Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,89 @@
|
|
1 |
-
#
|
2 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from fastapi import FastAPI, HTTPException
|
4 |
from pydantic import BaseModel
|
5 |
from typing import List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import torch
|
7 |
-
import asyncio
|
8 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
9 |
from IndicTransToolkit import IndicProcessor
|
10 |
-
import requests
|
11 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Initialize models and processors
|
14 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
15 |
-
"ai4bharat/indictrans2-en-indic-1B",
|
16 |
trust_remote_code=True
|
17 |
)
|
18 |
tokenizer = AutoTokenizer.from_pretrained(
|
19 |
-
"ai4bharat/indictrans2-en-indic-1B",
|
20 |
trust_remote_code=True
|
21 |
)
|
22 |
ip = IndicProcessor(inference=True)
|
@@ -31,7 +98,6 @@ def translate_text(sentences: List[str], target_lang: str):
|
|
31 |
src_lang=src_lang,
|
32 |
tgt_lang=target_lang
|
33 |
)
|
34 |
-
|
35 |
inputs = tokenizer(
|
36 |
batch,
|
37 |
truncation=True,
|
@@ -58,16 +124,27 @@ def translate_text(sentences: List[str], target_lang: str):
|
|
58 |
)
|
59 |
|
60 |
translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
|
61 |
-
|
62 |
return {
|
63 |
"translations": translations,
|
64 |
"source_language": src_lang,
|
65 |
"target_language": target_lang
|
66 |
}
|
67 |
-
|
68 |
except Exception as e:
|
69 |
raise Exception(f"Translation failed: {str(e)}")
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# Streamlit interface
|
72 |
def main():
|
73 |
st.title("Indic Language Translator")
|
@@ -100,11 +177,8 @@ def main():
|
|
100 |
sentences=[text_input],
|
101 |
target_lang=target_languages[target_lang]
|
102 |
)
|
103 |
-
|
104 |
-
# Display result
|
105 |
st.success("Translation:")
|
106 |
st.write(result["translations"][0])
|
107 |
-
|
108 |
except Exception as e:
|
109 |
st.error(f"Translation failed: {str(e)}")
|
110 |
|
@@ -116,7 +190,6 @@ def main():
|
|
116 |
```
|
117 |
https://USERNAME-SPACE_NAME.hf.space/translate
|
118 |
```
|
119 |
-
|
120 |
Request body format:
|
121 |
```json
|
122 |
{
|
@@ -124,19 +197,21 @@ def main():
|
|
124 |
"target_lang": "hin_Deva"
|
125 |
}
|
126 |
```
|
127 |
-
|
128 |
-
Available target languages:
|
129 |
-
- Hindi: `hin_Deva`
|
130 |
-
- Bengali: `ben_Beng`
|
131 |
-
- Tamil: `tam_Taml`
|
132 |
-
- Telugu: `tel_Telu`
|
133 |
-
- Marathi: `mar_Deva`
|
134 |
-
- Gujarati: `guj_Gujr`
|
135 |
-
- Kannada: `kan_Knda`
|
136 |
-
- Malayalam: `mal_Mlym`
|
137 |
-
- Punjabi: `pan_Guru`
|
138 |
-
- Odia: `ori_Orya`
|
139 |
""")
|
|
|
|
|
|
|
140 |
|
141 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
main()
|
|
|
1 |
+
# run.py
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
|
6 |
+
def main():
|
7 |
+
# Start Streamlit server only
|
8 |
+
port = int(os.environ.get("PORT", 7860)) # Hugging Face Spaces uses port 7860
|
9 |
+
streamlit_process = subprocess.Popen([
|
10 |
+
sys.executable,
|
11 |
+
"-m",
|
12 |
+
"streamlit",
|
13 |
+
"run",
|
14 |
+
"app.py",
|
15 |
+
"--server.port",
|
16 |
+
str(port),
|
17 |
+
"--server.address",
|
18 |
+
"0.0.0.0"
|
19 |
+
])
|
20 |
+
|
21 |
+
try:
|
22 |
+
streamlit_process.wait()
|
23 |
+
except KeyboardInterrupt:
|
24 |
+
streamlit_process.terminate()
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
main()
|
28 |
+
|
29 |
+
# api.py
|
30 |
from fastapi import FastAPI, HTTPException
|
31 |
from pydantic import BaseModel
|
32 |
from typing import List
|
33 |
+
from app import translate_text
|
34 |
+
|
35 |
+
app = FastAPI()
|
36 |
+
|
37 |
+
class InputData(BaseModel):
|
38 |
+
sentences: List[str]
|
39 |
+
target_lang: str
|
40 |
+
|
41 |
+
@app.get("/health")
|
42 |
+
async def health_check():
|
43 |
+
return {"status": "healthy"}
|
44 |
+
|
45 |
+
@app.post("/translate")
|
46 |
+
async def translate(input_data: InputData):
|
47 |
+
try:
|
48 |
+
result = translate_text(
|
49 |
+
sentences=input_data.sentences,
|
50 |
+
target_lang=input_data.target_lang
|
51 |
+
)
|
52 |
+
return result
|
53 |
+
except Exception as e:
|
54 |
+
raise HTTPException(status_code=500, detail=str(e))
|
55 |
+
|
56 |
+
# app.py
|
57 |
+
import streamlit as st
|
58 |
+
from fastapi import FastAPI
|
59 |
+
from typing import List
|
60 |
import torch
|
|
|
61 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
62 |
from IndicTransToolkit import IndicProcessor
|
|
|
63 |
import json
|
64 |
+
from fastapi.middleware.cors import CORSMiddleware
|
65 |
+
from fastapi.staticfiles import StaticFiles
|
66 |
+
import uvicorn
|
67 |
+
|
68 |
+
# Initialize FastAPI
|
69 |
+
api = FastAPI()
|
70 |
+
|
71 |
+
# Add CORS middleware
|
72 |
+
api.add_middleware(
|
73 |
+
CORSMiddleware,
|
74 |
+
allow_origins=["*"],
|
75 |
+
allow_credentials=True,
|
76 |
+
allow_methods=["*"],
|
77 |
+
allow_headers=["*"],
|
78 |
+
)
|
79 |
|
80 |
# Initialize models and processors
|
81 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
82 |
+
"ai4bharat/indictrans2-en-indic-1B",
|
83 |
trust_remote_code=True
|
84 |
)
|
85 |
tokenizer = AutoTokenizer.from_pretrained(
|
86 |
+
"ai4bharat/indictrans2-en-indic-1B",
|
87 |
trust_remote_code=True
|
88 |
)
|
89 |
ip = IndicProcessor(inference=True)
|
|
|
98 |
src_lang=src_lang,
|
99 |
tgt_lang=target_lang
|
100 |
)
|
|
|
101 |
inputs = tokenizer(
|
102 |
batch,
|
103 |
truncation=True,
|
|
|
124 |
)
|
125 |
|
126 |
translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
|
|
|
127 |
return {
|
128 |
"translations": translations,
|
129 |
"source_language": src_lang,
|
130 |
"target_language": target_lang
|
131 |
}
|
|
|
132 |
except Exception as e:
|
133 |
raise Exception(f"Translation failed: {str(e)}")
|
134 |
|
135 |
+
# FastAPI routes
|
136 |
+
@api.get("/health")
|
137 |
+
async def health_check():
|
138 |
+
return {"status": "healthy"}
|
139 |
+
|
140 |
+
@api.post("/translate")
|
141 |
+
async def translate_endpoint(sentences: List[str], target_lang: str):
|
142 |
+
try:
|
143 |
+
result = translate_text(sentences=sentences, target_lang=target_lang)
|
144 |
+
return result
|
145 |
+
except Exception as e:
|
146 |
+
raise HTTPException(status_code=500, detail=str(e))
|
147 |
+
|
148 |
# Streamlit interface
|
149 |
def main():
|
150 |
st.title("Indic Language Translator")
|
|
|
177 |
sentences=[text_input],
|
178 |
target_lang=target_languages[target_lang]
|
179 |
)
|
|
|
|
|
180 |
st.success("Translation:")
|
181 |
st.write(result["translations"][0])
|
|
|
182 |
except Exception as e:
|
183 |
st.error(f"Translation failed: {str(e)}")
|
184 |
|
|
|
190 |
```
|
191 |
https://USERNAME-SPACE_NAME.hf.space/translate
|
192 |
```
|
|
|
193 |
Request body format:
|
194 |
```json
|
195 |
{
|
|
|
197 |
"target_lang": "hin_Deva"
|
198 |
}
|
199 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
""")
|
201 |
+
st.markdown("Available target languages:")
|
202 |
+
for lang, code in target_languages.items():
|
203 |
+
st.markdown(f"- {lang}: `{code}`")
|
204 |
|
205 |
if __name__ == "__main__":
|
206 |
+
# Run both Streamlit and FastAPI
|
207 |
+
import threading
|
208 |
+
|
209 |
+
def run_fastapi():
|
210 |
+
uvicorn.run(api, host="0.0.0.0", port=8000)
|
211 |
+
|
212 |
+
# Start FastAPI in a separate thread
|
213 |
+
api_thread = threading.Thread(target=run_fastapi)
|
214 |
+
api_thread.start()
|
215 |
+
|
216 |
+
# Run Streamlit
|
217 |
main()
|