File size: 7,731 Bytes
1f48a9f
 
c52dbb5
d8661f4
 
1f48a9f
d8661f4
 
 
1f48a9f
cdda69e
6ce9ced
7ed8b46
d8661f4
1f48a9f
3cc4808
7ed8b46
 
 
 
 
 
 
 
 
 
 
d8661f4
7ed8b46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import io
import os
import time
import streamlit as st
import requests
import zipfile

from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
from dotenv import load_dotenv
from streamlit_pdf_viewer import pdf_viewer
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
from streamlit_msal import Msal

load_dotenv()
st.set_page_config(layout="wide")

# Authenticate user with Azure Active Directory
with st.sidebar:
    auth_data = Msal.initialize_ui(
        client_id=os.environ['AZURE_CLIENT_ID'],
        authority=os.environ['AZURE_AUTHORITY_URL'],
        scopes=[],
        connecting_label="Connecting",
        disconnected_label="Disconnected",
        sign_in_label="Sign in",
        sign_out_label="Sign out"
    )

if not auth_data:
    st.warning("Please login to continue")
    st.stop()
else:
    # Streamlit UI
    st.title("Azure Translation Tools")
    uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)

    # Initialize a new instance of the DocumentTranslationClient
    client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
    sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
    targetUri = "https://cbdtranslation.blob.core.windows.net/target"

    # Define available language options with their codes and names
    langs = (
        'id - Indonesian',
        'en - English',
        'es - Spanish',
        'zh - Chinese',
        'ar - Arabic',
        'fr - French',
        'ru - Russian',
        'hi - Hindi',
        'pt - Portuguese',
        'de - German',
        'ms - Malay',
        'ta - Tamil',
        'ko - Korean',
        'th - Thai',
    )

    # Get user's language selection and extract language code and name
    lang = st.selectbox('Target language selection:', langs, key='lang')
    lang_id = lang.split()[0]  # Get language code (e.g., 'en')
    lang_name = lang.split()[-1]  # Get language name (e.g., 'English')

    def process_sync(file_name, file_content):

        # Set up Azure Translator API headers
        headers = {
            "Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
        }

        # Prepare file for translation
        files = {
            "document": (file_name, file_content, "ContentType/file-extension"),
        }

        # Construct API URL with target language and version
        url = f"{os.environ['AZURE_AI_ENDPOINT_URL']}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"

        # Send translation request to Azure
        response = requests.post(url, headers=headers, files=files)

        return response.status_code == 200, response.content

    def process_async(file_name, file_content):

        # Upload the original file to Azure Blob Storage source container
        upload_to_azure(blob_service_client, "source", file_content, file_name)

        # Initialize translation job using the DocumentTranslationClient
        # Wait for the translation to complete and get the result
        poller = client.begin_translation(sourceUri, targetUri, lang_id)
        result = poller.result()

        # Download the translated file from Azure Blob Storage target container
        downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)

        # Clean up: Remove files from both source and target containers
        delete_from_azure(blob_service_client, "source", file_name)
        delete_from_azure(blob_service_client, "target", file_name)

        # Return translation status and the translated content
        for document in result:
            return document.status == 'Succeeded', downloaded_file_content

    if uploaded_files:
        submit = st.button("Get Result", key='submit')

    if uploaded_files and submit:
        # Create an in-memory zip file to store translated documents
        zip_buffer = io.BytesIO()
        with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
            # Add progress bar for translation status
            progress_bar = st.progress(0)
            for idx, uploaded_file in enumerate(uploaded_files):
                # Start timing
                start_time = time.time()  

                file_name = uploaded_file.name
                file_content = uploaded_file.read()
                file_type = file_name.split('.')[-1]

                # Check file extension to determine translation method
                if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
                    result, response = process_sync(file_name, file_content)
                elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
                    result, response = process_async(file_name, file_content)

                # Calculate duration
                duration = time.time() - start_time

                # Check if translation was successful
                if result:
                    # Add successfully translated file to zip archive
                    zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
                    st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
                else:
                    st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")

                if file_type == 'pdf':
                    # Display the original and translated files side by side
                    col1, col2 = st.columns(2)
                    with col1:
                        st.write(f"Original File: {uploaded_file.name}")
                        st.divider()
                        pdf_viewer(file_content)
                    with col2:
                        st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                        st.divider()
                        pdf_viewer(response)
                elif file_type == 'docx':
                    col1, col2 = st.columns(2)
                    with col1:
                        st.write(f"Original File: {uploaded_file.name}")
                        st.divider()
                        st.write("On development")
                    with col2:
                        st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                        st.divider()
                        st.write("On development")
                elif file_type == 'txt':
                    # Display the original and translated files side by side
                    col1, col2 = st.columns(2)
                    with col1:
                        st.write(f"Original File: {uploaded_file.name}")
                        st.divider()
                        st.write(file_content)
                    with col2:
                        st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                        st.divider()
                        st.write(response)

                # Update progress bar based on completed translations
                progress = (idx + 1) / len(uploaded_files)
                progress_bar.progress(progress)

        # Create download button for the zip file containing all translations
        st.download_button(
            label="Download All Translated Files",
            data=zip_buffer.getvalue(),
            file_name=f"{lang_name}-translated-files.zip",
            mime="application/zip"
        )