fadliaulawi's picture
Fix url quote
be586f8
raw
history blame
7.73 kB
import io
import os
import time
import streamlit as st
import requests
import zipfile
from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
from dotenv import load_dotenv
from streamlit_pdf_viewer import pdf_viewer
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
from streamlit_msal import Msal
load_dotenv()
st.set_page_config(layout="wide")
# Authenticate user with Azure Active Directory
with st.sidebar:
auth_data = Msal.initialize_ui(
client_id=os.environ['AZURE_CLIENT_ID'],
authority=os.environ['AZURE_AUTHORITY_URL'],
scopes=[],
connecting_label="Connecting",
disconnected_label="Disconnected",
sign_in_label="Sign in",
sign_out_label="Sign out"
)
if not auth_data:
st.warning("Please login to continue")
st.stop()
else:
# Streamlit UI
st.title("Azure Translation Tools")
uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)
# Initialize a new instance of the DocumentTranslationClient
client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
targetUri = "https://cbdtranslation.blob.core.windows.net/target"
# Define available language options with their codes and names
langs = (
'id - Indonesian',
'en - English',
'es - Spanish',
'zh - Chinese',
'ar - Arabic',
'fr - French',
'ru - Russian',
'hi - Hindi',
'pt - Portuguese',
'de - German',
'ms - Malay',
'ta - Tamil',
'ko - Korean',
'th - Thai',
)
# Get user's language selection and extract language code and name
lang = st.selectbox('Target language selection:', langs, key='lang')
lang_id = lang.split()[0] # Get language code (e.g., 'en')
lang_name = lang.split()[-1] # Get language name (e.g., 'English')
def process_sync(file_name, file_content):
# Set up Azure Translator API headers
headers = {
"Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
}
# Prepare file for translation
files = {
"document": (file_name, file_content, "ContentType/file-extension"),
}
# Construct API URL with target language and version
url = f"{os.environ['AZURE_AI_ENDPOINT_URL']}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ['AZURE_AI_API_VERSION']}"
# Send translation request to Azure
response = requests.post(url, headers=headers, files=files)
return response.status_code == 200, response.content
def process_async(file_name, file_content):
# Upload the original file to Azure Blob Storage source container
upload_to_azure(blob_service_client, "source", file_content, file_name)
# Initialize translation job using the DocumentTranslationClient
# Wait for the translation to complete and get the result
poller = client.begin_translation(sourceUri, targetUri, lang_id)
result = poller.result()
# Download the translated file from Azure Blob Storage target container
downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)
# Clean up: Remove files from both source and target containers
delete_from_azure(blob_service_client, "source", file_name)
delete_from_azure(blob_service_client, "target", file_name)
# Return translation status and the translated content
for document in result:
return document.status == 'Succeeded', downloaded_file_content
if uploaded_files:
submit = st.button("Get Result", key='submit')
if uploaded_files and submit:
# Create an in-memory zip file to store translated documents
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# Add progress bar for translation status
progress_bar = st.progress(0)
for idx, uploaded_file in enumerate(uploaded_files):
# Start timing
start_time = time.time()
file_name = uploaded_file.name
file_content = uploaded_file.read()
file_type = file_name.split('.')[-1]
# Check file extension to determine translation method
if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
result, response = process_sync(file_name, file_content)
elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
result, response = process_async(file_name, file_content)
# Calculate duration
duration = time.time() - start_time
# Check if translation was successful
if result:
# Add successfully translated file to zip archive
zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
else:
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
if file_type == 'pdf':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
pdf_viewer(file_content)
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
pdf_viewer(response)
elif file_type == 'docx':
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
st.write("On development")
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
st.write("On development")
elif file_type == 'txt':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
st.write(file_content)
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
st.write(response)
# Update progress bar based on completed translations
progress = (idx + 1) / len(uploaded_files)
progress_bar.progress(progress)
# Create download button for the zip file containing all translations
st.download_button(
label="Download All Translated Files",
data=zip_buffer.getvalue(),
file_name=f"{lang_name}-translated-files.zip",
mime="application/zip"
)