Spaces:
Sleeping
Sleeping
File size: 6,758 Bytes
1f48a9f c52dbb5 d8661f4 1f48a9f d8661f4 cdda69e 1f48a9f cdda69e 6ce9ced d8661f4 1f48a9f d8661f4 3cc4808 d8661f4 1f48a9f d8661f4 6ce9ced 1f7f38a d8661f4 1f7f38a d8661f4 1f7f38a d8661f4 cdda69e 6ce9ced cdda69e 6ce9ced 1f48a9f d8661f4 1f48a9f 1f7f38a 1f48a9f 1f7f38a 1f48a9f c52dbb5 6ce9ced cdda69e 3cc4808 cdda69e 6ce9ced 3cc4808 cdda69e 3cc4808 cdda69e 6ce9ced c52dbb5 e6af8fc c52dbb5 6ce9ced 1f7f38a 6ce9ced c52dbb5 1f48a9f c52dbb5 6ce9ced 3cc4808 e6af8fc 3cc4808 e6af8fc 3cc4808 e6af8fc 3cc4808 cdda69e 1f7f38a 1f48a9f d8661f4 1f7f38a d8661f4 1f48a9f d8661f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import io
import os
import time
import streamlit as st
import requests
import zipfile
from azure.core.credentials import AzureKeyCredential
from azure.ai.translation.document import DocumentTranslationClient
from docx import Document
from dotenv import load_dotenv
from streamlit_pdf_viewer import pdf_viewer
from utils import blob_service_client, upload_to_azure, download_from_azure, delete_from_azure
load_dotenv()
# Streamlit UI
st.set_page_config(layout="wide")
st.title("Azure Translation Tools")
uploaded_files = st.file_uploader("Upload files to start the process", accept_multiple_files=True)
# Initialize a new instance of the DocumentTranslationClient
client = DocumentTranslationClient(os.environ["AZURE_AI_ENDPOINT_URL"], AzureKeyCredential(os.environ["AZURE_AI_TRANSLATOR_KEY"]))
sourceUri = "https://cbdtranslation.blob.core.windows.net/source"
targetUri = "https://cbdtranslation.blob.core.windows.net/target"
# Define available language options with their codes and names
langs = (
'id - Indonesian',
'en - English',
'es - Spanish',
'zh - Chinese',
'ar - Arabic',
'fr - French',
'ru - Russian',
'hi - Hindi',
'pt - Portuguese',
'de - German',
'ms - Malay',
'ta - Tamil',
'ko - Korean',
'th - Thai',
)
# Get user's language selection and extract language code and name
lang = st.selectbox('Target language selection:', langs, key='lang')
lang_id = lang.split()[0] # Get language code (e.g., 'en')
lang_name = lang.split()[-1] # Get language name (e.g., 'English')
def process_sync(file_name, file_content):
# Set up Azure Translator API headers
headers = {
"Ocp-Apim-Subscription-Key": os.environ["AZURE_AI_TRANSLATOR_KEY"],
}
# Prepare file for translation
files = {
"document": (file_name, file_content, "ContentType/file-extension"),
}
# Construct API URL with target language and version
url = f"{os.environ["AZURE_AI_ENDPOINT_URL"]}/translator/document:translate?targetLanguage={lang_id}&api-version={os.environ["AZURE_AI_API_VERSION"]}"
# Send translation request to Azure
response = requests.post(url, headers=headers, files=files)
return response.status_code == 200, response.content
def process_async(file_name, file_content):
# Upload the original file to Azure Blob Storage source container
upload_to_azure(blob_service_client, "source", file_content, file_name)
# Initialize translation job using the DocumentTranslationClient
# Wait for the translation to complete and get the result
poller = client.begin_translation(sourceUri, targetUri, lang_id)
result = poller.result()
# Download the translated file from Azure Blob Storage target container
downloaded_file_content = download_from_azure(blob_service_client, "target", file_name)
# Clean up: Remove files from both source and target containers
delete_from_azure(blob_service_client, "source", file_name)
delete_from_azure(blob_service_client, "target", file_name)
# Return translation status and the translated content
for document in result:
return document.status == 'Succeeded', downloaded_file_content
if uploaded_files:
submit = st.button("Get Result", key='submit')
if uploaded_files and submit:
# Create an in-memory zip file to store translated documents
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# Add progress bar for translation status
progress_bar = st.progress(0)
for idx, uploaded_file in enumerate(uploaded_files):
# Start timing
start_time = time.time()
file_name = uploaded_file.name
file_content = uploaded_file.read()
file_type = file_name.split('.')[-1]
# Check file extension to determine translation method
if file_type in ['txt', 'tsv', 'tab', 'csv', 'html', 'htm', 'mthml', 'mht', 'pptx', 'xlsx', 'docx', 'msg', 'xlf', 'xliff']:
result, response = process_sync(file_name, file_content)
elif file_type in ['pdf', 'odt', 'odp', 'ods', 'rtf']:
result, response = process_async(file_name, file_content)
# Calculate duration
duration = time.time() - start_time
# Check if translation was successful
if result:
# Add successfully translated file to zip archive
zip_file.writestr(f"{lang_name}-translated-{uploaded_file.name}", response)
st.success(f"Successfully translated: {uploaded_file.name} (Time taken: {duration:.2f} seconds)")
else:
st.error(f"Failed to translate {uploaded_file.name} with status code {response.status_code}: {response.text} (Time taken: {duration:.2f} seconds)")
if file_type == 'pdf':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
pdf_viewer(file_content)
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
pdf_viewer(response)
elif file_type == 'docx':
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
st.write("On development")
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
st.write("On development")
elif file_type == 'txt':
# Display the original and translated files side by side
col1, col2 = st.columns(2)
with col1:
st.write(f"Original File: {uploaded_file.name}")
st.divider()
st.write(file_content)
with col2:
st.write(f"Translated File: {lang_name}-translated-{uploaded_file.name}")
st.divider()
st.write(response)
# Update progress bar based on completed translations
progress = (idx + 1) / len(uploaded_files)
progress_bar.progress(progress)
# Create download button for the zip file containing all translations
st.download_button(
label="Download All Translated Files",
data=zip_buffer.getvalue(),
file_name=f"{lang_name}-translated-files.zip",
mime="application/zip"
)
|