shreyasvaidya
commited on
Upload folder using huggingface_hub
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: "
|
3 |
colorFrom: "purple"
|
4 |
colorTo: "pink"
|
5 |
sdk: "gradio"
|
@@ -15,7 +15,7 @@ app_port: 7865
|
|
15 |
<p align="center">
|
16 |
<img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
|
17 |
<h3 align="center">
|
18 |
-
|
19 |
</h3>
|
20 |
</p>
|
21 |
<div align="center">
|
|
|
1 |
---
|
2 |
+
title: "Image_to_text_translation"
|
3 |
colorFrom: "purple"
|
4 |
colorTo: "pink"
|
5 |
sdk: "gradio"
|
|
|
15 |
<p align="center">
|
16 |
<img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
|
17 |
<h3 align="center">
|
18 |
+
Scene Text to Text Translation
|
19 |
</h3>
|
20 |
</p>
|
21 |
<div align="center">
|
app.py
CHANGED
@@ -20,8 +20,8 @@ DEVICE = "cpu"
|
|
20 |
|
21 |
# Initialize the OCR object for text detection and recognition
|
22 |
ocr = OCR(device="cpu", verbose=False)
|
23 |
-
def
|
24 |
-
model_name = "ai4bharat/indictrans2-en-indic-1B"
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
26 |
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
|
@@ -30,7 +30,7 @@ def translate_en_hin(given_str):
|
|
30 |
|
31 |
model = model.to(DEVICE)
|
32 |
model.eval()
|
33 |
-
src_lang, tgt_lang = "eng_Latn", "hin_Deva"
|
34 |
|
35 |
batch = ip.preprocess_batch(
|
36 |
[given_str],
|
@@ -276,20 +276,21 @@ def process_image(image):
|
|
276 |
for id,bbox in enumerate(detections):
|
277 |
# Identify the script and crop the image to this region
|
278 |
script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
y1 = min([bbox[i][1] for i in range(len(bbox))])
|
286 |
-
x2 = max([bbox[i][0] for i in range(len(bbox))])
|
287 |
-
y2 = max([bbox[i][1] for i in range(len(bbox))])
|
288 |
-
|
289 |
recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
291 |
# Combine recognized texts into a single string for display
|
292 |
-
return output_image,
|
293 |
|
294 |
# Custom HTML for interface header with logos and alignment
|
295 |
interface_html = """
|
|
|
20 |
|
21 |
# Initialize the OCR object for text detection and recognition
|
22 |
ocr = OCR(device="cpu", verbose=False)
|
23 |
+
def translate(given_str,lang):
|
24 |
+
model_name = "ai4bharat/indictrans2-en-indic-1B" if lang=="english" else "ai4bharat/indictrans2-indic-en-1B"
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
26 |
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
|
|
|
30 |
|
31 |
model = model.to(DEVICE)
|
32 |
model.eval()
|
33 |
+
src_lang, tgt_lang = "eng_Latn", "hin_Deva" if lang=="english" else "hin_Deva", "eng_Latn"
|
34 |
|
35 |
batch = ip.preprocess_batch(
|
36 |
[given_str],
|
|
|
276 |
for id,bbox in enumerate(detections):
|
277 |
# Identify the script and crop the image to this region
|
278 |
script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
|
279 |
+
x1 = min([bbox[i][0] for i in range(len(bbox))])
|
280 |
+
y1 = min([bbox[i][1] for i in range(len(bbox))])
|
281 |
+
x2 = max([bbox[i][0] for i in range(len(bbox))])
|
282 |
+
y2 = max([bbox[i][1] for i in range(len(bbox))])
|
283 |
+
if script_lang:
|
284 |
+
recognized_text = ocr.recognise(cropped_path,script_lang)
|
|
|
|
|
|
|
|
|
285 |
recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
|
286 |
+
|
287 |
+
|
288 |
+
|
289 |
+
translated = translate(recognized_texts,script_lang)
|
290 |
+
|
291 |
+
|
292 |
# Combine recognized texts into a single string for display
|
293 |
+
return output_image, translated
|
294 |
|
295 |
# Custom HTML for interface header with logos and alignment
|
296 |
interface_html = """
|