Spaces:

darshankr
/

trans-en-indic

Runtime error

App Files Files Community

darshankr commited on Oct 27, 2024

Commit

b6cd2a4

verified ·

1 Parent(s): 0b7c166

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -34

app.py CHANGED Viewed

@@ -1,46 +1,128 @@
-# app.py
-import streamlit as st
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List
 import torch
-import asyncio
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
-import requests
 import json
 # Initialize models and processors
-model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
 ip = IndicProcessor(inference=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(DEVICE)
 def translate_text(sentences: List[str], target_lang: str):
     try:
         src_lang = "eng_Latn"
-        batch = ip.preprocess_batch(sentences, src_lang=src_lang, tgt_lang=target_lang)
-        inputs = tokenizer(batch, truncation=True, padding="longest", return_tensors="pt", return_attention_mask=True).to(DEVICE)
         with torch.no_grad():
             generated_tokens = model.generate(
-                inputs,
                 use_cache=True,
                 min_length=0,
                 max_length=256,
                 num_beams=5,
                 num_return_sequences=1
             )
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
         translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
         return {
             "translations": translations,
@@ -50,13 +132,26 @@ def translate_text(sentences: List[str], target_lang: str):
     except Exception as e:
         raise Exception(f"Translation failed: {str(e)}")
 # Streamlit interface
 def main():
     st.title("Indic Language Translator")
     # Input text
     text_input = st.text_area("Enter text to translate:", "Hello, how are you?")
     # Language selection
     target_languages = {
         "Hindi": "hin_Deva",
@@ -71,13 +166,17 @@ def main():
         "Odia": "ori_Orya"
     }
-    target_lang = st.selectbox("Select target language:", options=list(target_languages.keys()))
     if st.button("Translate"):
         try:
-            result = translate_text(sentences=[text_input], target_lang=target_languages[target_lang])
-            # Display result
             st.success("Translation:")
             st.write(result["translations"][0])
         except Exception as e:
@@ -88,8 +187,9 @@ def main():
     st.header("API Documentation")
     st.markdown("""
     To use the translation API, send POST requests to:
     https://USERNAME-SPACE_NAME.hf.space/translate
     Request body format:
     ```json
     {
@@ -97,19 +197,21 @@ def main():
         "target_lang": "hin_Deva"
     }
     ```
-    Available target languages:
-    - Hindi: hin_Deva
-    - Bengali: ben_Beng
-    - Tamil: tam_Taml
-    - Telugu: tel_Telu
-    - Marathi: mar_Deva
-    - Gujarati: guj_Gujr
-    - Kannada: kan_Knda
-    - Malayalam: mal_Mlym
-    - Punjabi: pan_Guru
-    - Odia: ori_Orya
     """)
 if __name__ == "__main__":
-    main()

+# run.py
+import subprocess
+import sys
+import os
+def main():
+    # Start Streamlit server only
+    port = int(os.environ.get("PORT", 7860))  # Hugging Face Spaces uses port 7860
+    streamlit_process = subprocess.Popen([
+        sys.executable,
+        "-m",
+        "streamlit",
+        "run",
+        "app.py",
+        "--server.port",
+        str(port),
+        "--server.address",
+        "0.0.0.0"
+    ])
+    try:
+        streamlit_process.wait()
+    except KeyboardInterrupt:
+        streamlit_process.terminate()
+if __name__ == "__main__":
+    main()
+# api.py
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List
+from app import translate_text
+app = FastAPI()
+class InputData(BaseModel):
+    sentences: List[str]
+    target_lang: str
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+@app.post("/translate")
+async def translate(input_data: InputData):
+    try:
+        result = translate_text(
+            sentences=input_data.sentences,
+            target_lang=input_data.target_lang
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# app.py
+import streamlit as st
+from fastapi import FastAPI
+from typing import List
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
 import json
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+import uvicorn
+# Initialize FastAPI
+api = FastAPI()
+# Add CORS middleware
+api.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Initialize models and processors
+model = AutoModelForSeq2SeqLM.from_pretrained(
+    "ai4bharat/indictrans2-en-indic-1B",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    "ai4bharat/indictrans2-en-indic-1B",
+    trust_remote_code=True
+)
 ip = IndicProcessor(inference=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(DEVICE)
 def translate_text(sentences: List[str], target_lang: str):
     try:
         src_lang = "eng_Latn"
+        batch = ip.preprocess_batch(
+            sentences,
+            src_lang=src_lang,
+            tgt_lang=target_lang
+        )
+        inputs = tokenizer(
+            batch,
+            truncation=True,
+            padding="longest",
+            return_tensors="pt",
+            return_attention_mask=True
+        ).to(DEVICE)
         with torch.no_grad():
             generated_tokens = model.generate(
+                **inputs,
                 use_cache=True,
                 min_length=0,
                 max_length=256,
                 num_beams=5,
                 num_return_sequences=1
             )
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
         translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
         return {
             "translations": translations,
     except Exception as e:
         raise Exception(f"Translation failed: {str(e)}")
+# FastAPI routes
+@api.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+@api.post("/translate")
+async def translate_endpoint(sentences: List[str], target_lang: str):
+    try:
+        result = translate_text(sentences=sentences, target_lang=target_lang)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 # Streamlit interface
 def main():
     st.title("Indic Language Translator")
     # Input text
     text_input = st.text_area("Enter text to translate:", "Hello, how are you?")
     # Language selection
     target_languages = {
         "Hindi": "hin_Deva",
         "Odia": "ori_Orya"
     }
+    target_lang = st.selectbox(
+        "Select target language:",
+        options=list(target_languages.keys())
+    )
     if st.button("Translate"):
         try:
+            result = translate_text(
+                sentences=[text_input],
+                target_lang=target_languages[target_lang]
+            )
             st.success("Translation:")
             st.write(result["translations"][0])
         except Exception as e:
     st.header("API Documentation")
     st.markdown("""
     To use the translation API, send POST requests to:
+    ```
     https://USERNAME-SPACE_NAME.hf.space/translate
+    ```
     Request body format:
     ```json
     {
         "target_lang": "hin_Deva"
     }
     ```
     """)
+    st.markdown("Available target languages:")
+    for lang, code in target_languages.items():
+        st.markdown(f"- {lang}: `{code}`")
 if __name__ == "__main__":
+    # Run both Streamlit and FastAPI
+    import threading
+    def run_fastapi():
+        uvicorn.run(api, host="0.0.0.0", port=8000)
+    # Start FastAPI in a separate thread
+    api_thread = threading.Thread(target=run_fastapi)
+    api_thread.start()
+    # Run Streamlit
+    main()