File size: 6,758 Bytes
1f48a9f
 
c52dbb5
d8661f4
 
1f48a9f
d8661f4
 
 
cdda69e
1f48a9f
cdda69e
6ce9ced
d8661f4
1f48a9f
d8661f4
 
3cc4808
d8661f4
1f48a9f
d8661f4
6ce9ced
 
 
 
 
1f7f38a
d8661f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f7f38a
d8661f4
1f7f38a
 
d8661f4
cdda69e
6ce9ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdda69e
6ce9ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f48a9f
d8661f4
 
1f48a9f
1f7f38a
1f48a9f
 
1f7f38a
1f48a9f
 
c52dbb5
 
6ce9ced
cdda69e
 
3cc4808
cdda69e
6ce9ced
3cc4808
cdda69e
3cc4808
cdda69e
6ce9ced
c52dbb5
e6af8fc
c52dbb5
6ce9ced
 
1f7f38a
6ce9ced
c52dbb5
1f48a9f
c52dbb5
6ce9ced
3cc4808
 
 
 
 
e6af8fc
3cc4808
 
 
e6af8fc
3cc4808
 
e6af8fc
 
 
 
 
 
 
 
 
3cc4808
 
 
 
 
 
 
 
 
 
 
cdda69e
1f7f38a
1f48a9f
 
d8661f4
1f7f38a
d8661f4
1f48a9f
 
 
 
d8661f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import io
import os
import time
import streamlit as st
import requests
import zipfile

from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
from docx import Document
from dotenv import load_dotenv
from streamlit_pdf_viewer import pdf_viewer
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure

load_dotenv()

# Streamlit UI
st.set_page_config(layout="wide")
st.title("Azure Translation Tools")
uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)

# Initialize a new instance of the DocumentTranslationClient
client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
targetUri = "https://cbdtranslation.blob.core.windows.net/target"

# Define available language options with their codes and names
langs = (
    'id - Indonesian',
    'en - English',
    'es - Spanish',
    'zh - Chinese',
    'ar - Arabic',
    'fr - French',
    'ru - Russian',
    'hi - Hindi',
    'pt - Portuguese',
    'de - German',
    'ms - Malay',
    'ta - Tamil',
    'ko - Korean',
    'th - Thai',
)

# Get user's language selection and extract language code and name
lang = st.selectbox('Target language selection:', langs, key='lang')
lang_id = lang.split()[0]  # Get language code (e.g., 'en')
lang_name = lang.split()[-1]  # Get language name (e.g., 'English')

def process_sync(file_name, file_content):

    # Set up Azure Translator API headers
    headers = {
        "Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
    }

    # Prepare file for translation
    files = {
        "document": (file_name, file_content, "ContentType/file-extension"),
    }

    # Construct API URL with target language and version
    url = f"{os.environ["AZURE_AI_ENDPOINT_URL"]}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"

    # Send translation request to Azure
    response = requests.post(url, headers=headers, files=files)

    return response.status_code == 200, response.content

def process_async(file_name, file_content):

    # Upload the original file to Azure Blob Storage source container
    upload_to_azure(blob_service_client, "source", file_content, file_name)

    # Initialize translation job using the DocumentTranslationClient
    # Wait for the translation to complete and get the result
    poller = client.begin_translation(sourceUri, targetUri, lang_id)
    result = poller.result()

    # Download the translated file from Azure Blob Storage target container
    downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)

    # Clean up: Remove files from both source and target containers
    delete_from_azure(blob_service_client, "source", file_name)
    delete_from_azure(blob_service_client, "target", file_name)

    # Return translation status and the translated content
    for document in result:
        return document.status == 'Succeeded', downloaded_file_content

if uploaded_files:
    submit = st.button("Get Result", key='submit')

if uploaded_files and submit:
    # Create an in-memory zip file to store translated documents
    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
        # Add progress bar for translation status
        progress_bar = st.progress(0)
        for idx, uploaded_file in enumerate(uploaded_files):
            # Start timing
            start_time = time.time()  

            file_name = uploaded_file.name
            file_content = uploaded_file.read()
            file_type = file_name.split('.')[-1]

            # Check file extension to determine translation method
            if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
                result, response = process_sync(file_name, file_content)
            elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
                result, response = process_async(file_name, file_content)

            # Calculate duration
            duration = time.time() - start_time

            # Check if translation was successful
            if result:
                # Add successfully translated file to zip archive
                zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
                st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
            else:
                st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")

            if file_type == 'pdf':
                # Display the original and translated files side by side
                col1, col2 = st.columns(2)
                with col1:
                    st.write(f"Original File: {uploaded_file.name}")
                    st.divider()
                    pdf_viewer(file_content)
                with col2:
                    st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                    st.divider()
                    pdf_viewer(response)
            elif file_type == 'docx':
                col1, col2 = st.columns(2)
                with col1:
                    st.write(f"Original File: {uploaded_file.name}")
                    st.divider()
                    st.write("On development")
                with col2:
                    st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                    st.divider()
                    st.write("On development")
            elif file_type == 'txt':
                # Display the original and translated files side by side
                col1, col2 = st.columns(2)
                with col1:
                    st.write(f"Original File: {uploaded_file.name}")
                    st.divider()
                    st.write(file_content)
                with col2:
                    st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
                    st.divider()
                    st.write(response)

            # Update progress bar based on completed translations
            progress = (idx + 1) / len(uploaded_files)
            progress_bar.progress(progress)

    # Create download button for the zip file containing all translations
    st.download_button(
        label="Download All Translated Files",
        data=zip_buffer.getvalue(),
        file_name=f"{lang_name}-translated-files.zip",
        mime="application/zip"
    )