fadliaulawi commited on
Commit
7ed8b46
·
1 Parent(s): ef06798

Add Azure auth

Browse files
Files changed (2) hide show
  1. app.py +168 -152
  2. requirements.txt +1 -0
app.py CHANGED
@@ -7,162 +7,178 @@ import zipfile
7
 
8
  from azure.core.credentials import AzureKeyCredential
9
  from azure.ai.translation.document import DocumentTranslationClient
10
- from docx import Document
11
  from dotenv import load_dotenv
12
  from streamlit_pdf_viewer import pdf_viewer
13
  from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
 
14
 
15
  load_dotenv()
16
-
17
- # Streamlit UI
18
  st.set_page_config(layout="wide")
19
- st.title("Azure Translation Tools")
20
- uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)
21
-
22
- # Initialize a new instance of the DocumentTranslationClient
23
- client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
24
- sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
25
- targetUri = "https://cbdtranslation.blob.core.windows.net/target"
26
-
27
- # Define available language options with their codes and names
28
- langs = (
29
- 'id - Indonesian',
30
- 'en - English',
31
- 'es - Spanish',
32
- 'zh - Chinese',
33
- 'ar - Arabic',
34
- 'fr - French',
35
- 'ru - Russian',
36
- 'hi - Hindi',
37
- 'pt - Portuguese',
38
- 'de - German',
39
- 'ms - Malay',
40
- 'ta - Tamil',
41
- 'ko - Korean',
42
- 'th - Thai',
43
- )
44
-
45
- # Get user's language selection and extract language code and name
46
- lang = st.selectbox('Target language selection:', langs, key='lang')
47
- lang_id = lang.split()[0] # Get language code (e.g., 'en')
48
- lang_name = lang.split()[-1] # Get language name (e.g., 'English')
49
-
50
- def process_sync(file_name, file_content):
51
-
52
- # Set up Azure Translator API headers
53
- headers = {
54
- "Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
55
- }
56
-
57
- # Prepare file for translation
58
- files = {
59
- "document": (file_name, file_content, "ContentType/file-extension"),
60
- }
61
-
62
- # Construct API URL with target language and version
63
- url = f"{os.environ["AZURE_AI_ENDPOINT_URL"]}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"
64
-
65
- # Send translation request to Azure
66
- response = requests.post(url, headers=headers, files=files)
67
-
68
- return response.status_code == 200, response.content
69
-
70
- def process_async(file_name, file_content):
71
-
72
- # Upload the original file to Azure Blob Storage source container
73
- upload_to_azure(blob_service_client, "source", file_content, file_name)
74
-
75
- # Initialize translation job using the DocumentTranslationClient
76
- # Wait for the translation to complete and get the result
77
- poller = client.begin_translation(sourceUri, targetUri, lang_id)
78
- result = poller.result()
79
-
80
- # Download the translated file from Azure Blob Storage target container
81
- downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)
82
-
83
- # Clean up: Remove files from both source and target containers
84
- delete_from_azure(blob_service_client, "source", file_name)
85
- delete_from_azure(blob_service_client, "target", file_name)
86
-
87
- # Return translation status and the translated content
88
- for document in result:
89
- return document.status == 'Succeeded', downloaded_file_content
90
-
91
- if uploaded_files:
92
- submit = st.button("Get Result", key='submit')
93
-
94
- if uploaded_files and submit:
95
- # Create an in-memory zip file to store translated documents
96
- zip_buffer = io.BytesIO()
97
- with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
98
- # Add progress bar for translation status
99
- progress_bar = st.progress(0)
100
- for idx, uploaded_file in enumerate(uploaded_files):
101
- # Start timing
102
- start_time = time.time()
103
-
104
- file_name = uploaded_file.name
105
- file_content = uploaded_file.read()
106
- file_type = file_name.split('.')[-1]
107
-
108
- # Check file extension to determine translation method
109
- if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
110
- result, response = process_sync(file_name, file_content)
111
- elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
112
- result, response = process_async(file_name, file_content)
113
-
114
- # Calculate duration
115
- duration = time.time() - start_time
116
-
117
- # Check if translation was successful
118
- if result:
119
- # Add successfully translated file to zip archive
120
- zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
121
- st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
122
- else:
123
- st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
124
-
125
- if file_type == 'pdf':
126
- # Display the original and translated files side by side
127
- col1, col2 = st.columns(2)
128
- with col1:
129
- st.write(f"Original File: {uploaded_file.name}")
130
- st.divider()
131
- pdf_viewer(file_content)
132
- with col2:
133
- st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
134
- st.divider()
135
- pdf_viewer(response)
136
- elif file_type == 'docx':
137
- col1, col2 = st.columns(2)
138
- with col1:
139
- st.write(f"Original File: {uploaded_file.name}")
140
- st.divider()
141
- st.write("On development")
142
- with col2:
143
- st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
144
- st.divider()
145
- st.write("On development")
146
- elif file_type == 'txt':
147
- # Display the original and translated files side by side
148
- col1, col2 = st.columns(2)
149
- with col1:
150
- st.write(f"Original File: {uploaded_file.name}")
151
- st.divider()
152
- st.write(file_content)
153
- with col2:
154
- st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
155
- st.divider()
156
- st.write(response)
157
-
158
- # Update progress bar based on completed translations
159
- progress = (idx + 1) / len(uploaded_files)
160
- progress_bar.progress(progress)
161
-
162
- # Create download button for the zip file containing all translations
163
- st.download_button(
164
- label="Download All Translated Files",
165
- data=zip_buffer.getvalue(),
166
- file_name=f"{lang_name}-translated-files.zip",
167
- mime="application/zip"
168
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  from azure.core.credentials import AzureKeyCredential
9
  from azure.ai.translation.document import DocumentTranslationClient
 
10
  from dotenv import load_dotenv
11
  from streamlit_pdf_viewer import pdf_viewer
12
  from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
13
+ from streamlit_msal import Msal
14
 
15
  load_dotenv()
 
 
16
  st.set_page_config(layout="wide")
17
+
18
+ # Authenticate user with Azure Active Directory
19
+ with st.sidebar:
20
+ auth_data = Msal.initialize_ui(
21
+ client_id=os.environ['AZURE_CLIENT_ID'],
22
+ authority=os.environ['AZURE_AUTHORITY_URL'],
23
+ scopes=[],
24
+ connecting_label="Connecting",
25
+ disconnected_label="Disconnected",
26
+ sign_in_label="Sign in",
27
+ sign_out_label="Sign out"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
+
30
+ if not auth_data:
31
+ st.warning("Please login to continue")
32
+ st.stop()
33
+ else:
34
+ # Streamlit UI
35
+ st.title("Azure Translation Tools")
36
+ uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)
37
+
38
+ # Initialize a new instance of the DocumentTranslationClient
39
+ client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
40
+ sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
41
+ targetUri = "https://cbdtranslation.blob.core.windows.net/target"
42
+
43
+ # Define available language options with their codes and names
44
+ langs = (
45
+ 'id - Indonesian',
46
+ 'en - English',
47
+ 'es - Spanish',
48
+ 'zh - Chinese',
49
+ 'ar - Arabic',
50
+ 'fr - French',
51
+ 'ru - Russian',
52
+ 'hi - Hindi',
53
+ 'pt - Portuguese',
54
+ 'de - German',
55
+ 'ms - Malay',
56
+ 'ta - Tamil',
57
+ 'ko - Korean',
58
+ 'th - Thai',
59
+ )
60
+
61
+ # Get user's language selection and extract language code and name
62
+ lang = st.selectbox('Target language selection:', langs, key='lang')
63
+ lang_id = lang.split()[0] # Get language code (e.g., 'en')
64
+ lang_name = lang.split()[-1] # Get language name (e.g., 'English')
65
+
66
+ def process_sync(file_name, file_content):
67
+
68
+ # Set up Azure Translator API headers
69
+ headers = {
70
+ "Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
71
+ }
72
+
73
+ # Prepare file for translation
74
+ files = {
75
+ "document": (file_name, file_content, "ContentType/file-extension"),
76
+ }
77
+
78
+ # Construct API URL with target language and version
79
+ url = f"{os.environ['AZURE_AI_ENDPOINT_URL']}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"
80
+
81
+ # Send translation request to Azure
82
+ response = requests.post(url, headers=headers, files=files)
83
+
84
+ return response.status_code == 200, response.content
85
+
86
+ def process_async(file_name, file_content):
87
+
88
+ # Upload the original file to Azure Blob Storage source container
89
+ upload_to_azure(blob_service_client, "source", file_content, file_name)
90
+
91
+ # Initialize translation job using the DocumentTranslationClient
92
+ # Wait for the translation to complete and get the result
93
+ poller = client.begin_translation(sourceUri, targetUri, lang_id)
94
+ result = poller.result()
95
+
96
+ # Download the translated file from Azure Blob Storage target container
97
+ downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)
98
+
99
+ # Clean up: Remove files from both source and target containers
100
+ delete_from_azure(blob_service_client, "source", file_name)
101
+ delete_from_azure(blob_service_client, "target", file_name)
102
+
103
+ # Return translation status and the translated content
104
+ for document in result:
105
+ return document.status == 'Succeeded', downloaded_file_content
106
+
107
+ if uploaded_files:
108
+ submit = st.button("Get Result", key='submit')
109
+
110
+ if uploaded_files and submit:
111
+ # Create an in-memory zip file to store translated documents
112
+ zip_buffer = io.BytesIO()
113
+ with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
114
+ # Add progress bar for translation status
115
+ progress_bar = st.progress(0)
116
+ for idx, uploaded_file in enumerate(uploaded_files):
117
+ # Start timing
118
+ start_time = time.time()
119
+
120
+ file_name = uploaded_file.name
121
+ file_content = uploaded_file.read()
122
+ file_type = file_name.split('.')[-1]
123
+
124
+ # Check file extension to determine translation method
125
+ if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
126
+ result, response = process_sync(file_name, file_content)
127
+ elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
128
+ result, response = process_async(file_name, file_content)
129
+
130
+ # Calculate duration
131
+ duration = time.time() - start_time
132
+
133
+ # Check if translation was successful
134
+ if result:
135
+ # Add successfully translated file to zip archive
136
+ zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
137
+ st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
138
+ else:
139
+ st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
140
+
141
+ if file_type == 'pdf':
142
+ # Display the original and translated files side by side
143
+ col1, col2 = st.columns(2)
144
+ with col1:
145
+ st.write(f"Original File: {uploaded_file.name}")
146
+ st.divider()
147
+ pdf_viewer(file_content)
148
+ with col2:
149
+ st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
150
+ st.divider()
151
+ pdf_viewer(response)
152
+ elif file_type == 'docx':
153
+ col1, col2 = st.columns(2)
154
+ with col1:
155
+ st.write(f"Original File: {uploaded_file.name}")
156
+ st.divider()
157
+ st.write("On development")
158
+ with col2:
159
+ st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
160
+ st.divider()
161
+ st.write("On development")
162
+ elif file_type == 'txt':
163
+ # Display the original and translated files side by side
164
+ col1, col2 = st.columns(2)
165
+ with col1:
166
+ st.write(f"Original File: {uploaded_file.name}")
167
+ st.divider()
168
+ st.write(file_content)
169
+ with col2:
170
+ st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
171
+ st.divider()
172
+ st.write(response)
173
+
174
+ # Update progress bar based on completed translations
175
+ progress = (idx + 1) / len(uploaded_files)
176
+ progress_bar.progress(progress)
177
+
178
+ # Create download button for the zip file containing all translations
179
+ st.download_button(
180
+ label="Download All Translated Files",
181
+ data=zip_buffer.getvalue(),
182
+ file_name=f"{lang_name}-translated-files.zip",
183
+ mime="application/zip"
184
+ )
requirements.txt CHANGED
@@ -48,6 +48,7 @@ rpds-py==0.22.3
48
  six==1.17.0
49
  smmap==5.0.1
50
  streamlit==1.40.2
 
51
  streamlit-pdf-viewer==0.0.19
52
  tenacity==9.0.0
53
  toml==0.10.2
 
48
  six==1.17.0
49
  smmap==5.0.1
50
  streamlit==1.40.2
51
+ streamlit-msal==0.2.0
52
  streamlit-pdf-viewer==0.0.19
53
  tenacity==9.0.0
54
  toml==0.10.2