Neha13 commited on
Commit
699ec42
·
verified ·
1 Parent(s): 5729c63

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from ai4bharat.transliteration import XlitEngine
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ import torch
5
+ from IndicTransTokenizer import IndicProcessor
6
+
7
+ # Initialize the transliteration engine and model
8
+ e = XlitEngine(["gu", 'en'], beam_width=10, src_script_type="en")
9
+ model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
10
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
11
+ ip = IndicProcessor(inference=True)
12
+
13
+ def english_to_gujarati(text):
14
+ return e.translit_sentence(text)['gu']
15
+
16
+ def translate_question(english_question):
17
+ gujarati_question = english_to_gujarati(english_question)
18
+ lst = [gujarati_question]
19
+
20
+ batch = ip.preprocess_batch(lst, src_lang="guj_Gujr", tgt_lang="eng_Latn")
21
+ batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
22
+
23
+ with torch.inference_mode():
24
+ outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
25
+
26
+ with tokenizer.as_target_tokenizer():
27
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
28
+
29
+ outputs = ip.postprocess_batch(outputs, lang="eng_Latn")
30
+ return outputs
31
+
32
+ # Streamlit UI
33
+ st.title("English to Gujarati Translation")
34
+ st.write("Enter your question in English:")
35
+
36
+ english_question = st.text_input("Question:")
37
+
38
+ if st.button("Translate"):
39
+ if english_question:
40
+ translated = translate_question(english_question)
41
+ st.write("Transliterated and Translated question:", translated)
42
+ else:
43
+ st.write("Please enter a question.")
44
+