import io
import os
import time
import streamlit as st
import requests
import zipfile
from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
from docx import Document
from dotenv import load_dotenv
from streamlit_pdf_viewer import pdf_viewer
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
# Streamlit UI
st.title("Azure Translation Tools")
uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)
# Initialize a new instance of the DocumentTranslationClient
client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
targetUri = "https://cbdtranslation.blob.core.windows.net/target"
# Define available language options with their codes and names
langs = (
'id - Indonesian',
'en - English',
'es - Spanish',
'zh - Chinese',
'ar - Arabic',
'fr - French',
'ru - Russian',
'hi - Hindi',
'pt - Portuguese',
'de - German',
'ms - Malay',
'ta - Tamil',
'ko - Korean',
'th - Thai',
# Get user's language selection and extract language code and name
lang = st.selectbox('Target language selection:', langs, key='lang')
lang_id = lang.split()[0] # Get language code (e.g., 'en')
lang_name = lang.split()[-1] # Get language name (e.g., 'English')
def process_sync(file_name, file_content):
# Set up Azure Translator API headers
headers = {
"Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
# Prepare file for translation
files = {
"document": (file_name, file_content, "ContentType/file-extension"),
# Construct API URL with target language and version
url = f"{os.environ["AZURE_AI_ENDPOINT_URL"]}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"
# Send translation request to Azure
response = requests.post(url, headers=headers, files=files)
return response.status_code == 200, response.content
def process_async(file_name, file_content):
# Upload the original file to Azure Blob Storage source container
upload_to_azure(blob_service_client, "source", file_content, file_name)
# Initialize translation job using the DocumentTranslationClient
# Wait for the translation to complete and get the result
poller = client.begin_translation(sourceUri, targetUri, lang_id)
result = poller.result()
# Download the translated file from Azure Blob Storage target container
downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)
# Clean up: Remove files from both source and target containers
delete_from_azure(blob_service_client, "source", file_name)
delete_from_azure(blob_service_client, "target", file_name)
# Return translation status and the translated content
for document in result:
return document.status == 'Succeeded', downloaded_file_content
if uploaded_files:
submit = st.button("Get Result", key='submit')
if uploaded_files and submit:
# Create an in-memory zip file to store translated documents
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# Add progress bar for translation status
progress_bar = st.progress(0)
for idx, uploaded_file in enumerate(uploaded_files):
# Start timing
start_time = time.time()
file_name = uploaded_file.name
file_content = uploaded_file.read()
file_type = file_name.split('.')[-1]
# Check file extension to determine translation method
if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
result, response = process_sync(file_name, file_content)
elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
result, response = process_async(file_name, file_content)
# Calculate duration
duration = time.time() - start_time
# Check if translation was successful
if result:
# Add successfully translated file to zip archive
zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
if file_type == 'pdf':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
elif file_type == 'docx':
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.write("On development")
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.write("On development")
elif file_type == 'txt':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
# Update progress bar based on completed translations
progress = (idx + 1) / len(uploaded_files)
# Create download button for the zip file containing all translations
label="Download All Translated Files",