shreyasvaidya commited on
Commit
9eb10ea
·
verified ·
1 Parent(s): f52e5d3

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +16 -15
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: "IndicPhotoOCR"
3
  colorFrom: "purple"
4
  colorTo: "pink"
5
  sdk: "gradio"
@@ -15,7 +15,7 @@ app_port: 7865
15
  <p align="center">
16
  <img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
17
  <h3 align="center">
18
- IndicPhotoOCR - Comprehensive Scene Text Recognition Toolkit </br> across 13 Indian Languages
19
  </h3>
20
  </p>
21
  <div align="center">
 
1
  ---
2
+ title: "Image_to_text_translation"
3
  colorFrom: "purple"
4
  colorTo: "pink"
5
  sdk: "gradio"
 
15
  <p align="center">
16
  <img src="./static/pics/bharatOCR.png" alt="BharatOCR Logo" width="25%">
17
  <h3 align="center">
18
+ Scene Text to Text Translation
19
  </h3>
20
  </p>
21
  <div align="center">
app.py CHANGED
@@ -20,8 +20,8 @@ DEVICE = "cpu"
20
 
21
  # Initialize the OCR object for text detection and recognition
22
  ocr = OCR(device="cpu", verbose=False)
23
- def translate_en_hin(given_str):
24
- model_name = "ai4bharat/indictrans2-en-indic-1B"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
26
 
27
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
@@ -30,7 +30,7 @@ def translate_en_hin(given_str):
30
 
31
  model = model.to(DEVICE)
32
  model.eval()
33
- src_lang, tgt_lang = "eng_Latn", "hin_Deva"
34
 
35
  batch = ip.preprocess_batch(
36
  [given_str],
@@ -276,20 +276,21 @@ def process_image(image):
276
  for id,bbox in enumerate(detections):
277
  # Identify the script and crop the image to this region
278
  script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
279
-
280
- script_lang = "english"
281
- if script_lang: # Only proceed if a script language is identified
282
- # Recognize text in the cropped area
283
- recognized_text = ocr.recognise(cropped_path, "english")
284
- x1 = min([bbox[i][0] for i in range(len(bbox))])
285
- y1 = min([bbox[i][1] for i in range(len(bbox))])
286
- x2 = max([bbox[i][0] for i in range(len(bbox))])
287
- y2 = max([bbox[i][1] for i in range(len(bbox))])
288
-
289
  recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
290
-
 
 
 
 
 
291
  # Combine recognized texts into a single string for display
292
- return output_image, translate_en_hin(detect_para(recognized_texts))
293
 
294
  # Custom HTML for interface header with logos and alignment
295
  interface_html = """
 
20
 
21
  # Initialize the OCR object for text detection and recognition
22
  ocr = OCR(device="cpu", verbose=False)
23
+ def translate(given_str,lang):
24
+ model_name = "ai4bharat/indictrans2-en-indic-1B" if lang=="english" else "ai4bharat/indictrans2-indic-en-1B"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
26
 
27
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True)
 
30
 
31
  model = model.to(DEVICE)
32
  model.eval()
33
+ src_lang, tgt_lang = "eng_Latn", "hin_Deva" if lang=="english" else "hin_Deva", "eng_Latn"
34
 
35
  batch = ip.preprocess_batch(
36
  [given_str],
 
276
  for id,bbox in enumerate(detections):
277
  # Identify the script and crop the image to this region
278
  script_lang, cropped_path = ocr.crop_and_identify_script(pil_image, bbox)
279
+ x1 = min([bbox[i][0] for i in range(len(bbox))])
280
+ y1 = min([bbox[i][1] for i in range(len(bbox))])
281
+ x2 = max([bbox[i][0] for i in range(len(bbox))])
282
+ y2 = max([bbox[i][1] for i in range(len(bbox))])
283
+ if script_lang:
284
+ recognized_text = ocr.recognise(cropped_path,script_lang)
 
 
 
 
285
  recognized_texts[f"img_{id}"] = {"txt":recognized_text,"bbox":[x1,y1,x2,y2]}
286
+
287
+
288
+
289
+ translated = translate(recognized_texts,script_lang)
290
+
291
+
292
  # Combine recognized texts into a single string for display
293
+ return output_image, translated
294
 
295
  # Custom HTML for interface header with logos and alignment
296
  interface_html = """