Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
-
import pytesseract
|
2 |
-
from PIL import Image
|
3 |
import gradio as gr
|
|
|
4 |
import re
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
|
9 |
def perform_ocr(image):
|
10 |
-
text =
|
11 |
return text
|
12 |
|
13 |
def search_first_keyword_in_text(text, keyword):
|
@@ -24,8 +23,6 @@ def search_first_keyword_in_text(text, keyword):
|
|
24 |
|
25 |
def ocr_and_search(image, keyword):
|
26 |
try:
|
27 |
-
# Resize the image to a manageable size for processing
|
28 |
-
image = image.resize((800, 600)) # Adjust size as needed
|
29 |
extracted_text = perform_ocr(image)
|
30 |
search_result = search_first_keyword_in_text(extracted_text, keyword)
|
31 |
return extracted_text, search_result
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
import re
|
4 |
|
5 |
+
# Load the OCR pipeline from Hugging Face
|
6 |
+
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-stage1")
|
7 |
|
8 |
def perform_ocr(image):
|
9 |
+
text = ocr_pipeline(image)[0]['generated_text']
|
10 |
return text
|
11 |
|
12 |
def search_first_keyword_in_text(text, keyword):
|
|
|
23 |
|
24 |
def ocr_and_search(image, keyword):
|
25 |
try:
|
|
|
|
|
26 |
extracted_text = perform_ocr(image)
|
27 |
search_result = search_first_keyword_in_text(extracted_text, keyword)
|
28 |
return extracted_text, search_result
|