Sakshiw1 commited on
Commit
a8e3390
·
verified ·
1 Parent(s): e29c724

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -7
app.py CHANGED
@@ -1,13 +1,12 @@
1
- import pytesseract
2
- from PIL import Image
3
  import gradio as gr
 
4
  import re
5
 
6
- # Configure Tesseract path (update if needed)
7
- pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'
8
 
9
  def perform_ocr(image):
10
- text = pytesseract.image_to_string(image, lang='hin+eng')
11
  return text
12
 
13
  def search_first_keyword_in_text(text, keyword):
@@ -24,8 +23,6 @@ def search_first_keyword_in_text(text, keyword):
24
 
25
  def ocr_and_search(image, keyword):
26
  try:
27
- # Resize the image to a manageable size for processing
28
- image = image.resize((800, 600)) # Adjust size as needed
29
  extracted_text = perform_ocr(image)
30
  search_result = search_first_keyword_in_text(extracted_text, keyword)
31
  return extracted_text, search_result
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import re
4
 
5
+ # Load the OCR pipeline from Hugging Face
6
+ ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-stage1")
7
 
8
  def perform_ocr(image):
9
+ text = ocr_pipeline(image)[0]['generated_text']
10
  return text
11
 
12
  def search_first_keyword_in_text(text, keyword):
 
23
 
24
  def ocr_and_search(image, keyword):
25
  try:
 
 
26
  extracted_text = perform_ocr(image)
27
  search_result = search_first_keyword_in_text(extracted_text, keyword)
28
  return extracted_text, search_result