File size: 1,896 Bytes
340c40c a8e3390 340c40c e74a5f3 340c40c a8e3390 e74a5f3 340c40c e74a5f3 a8e3390 340c40c e74a5f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from transformers import pipeline
import re
from PIL import ImageFilter
# Load the OCR pipeline from Hugging Face
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-stage1") # Use a different model
def preprocess_image(image):
image = image.convert('L') # Convert to grayscale
image = image.filter(ImageFilter.SHARPEN) # Apply some filtering
return image
def perform_ocr(image):
# Preprocess the image before OCR
image = preprocess_image(image)
text = ocr_pipeline(image)[0]['generated_text']
return text
def search_first_keyword_in_text(text, keyword):
if keyword:
text = text.replace('\n', ' ')
sentences = re.split(r'(?<=[.!?]) +', text)
for sentence in sentences:
if re.search(keyword, sentence, re.IGNORECASE):
highlighted_sentence = re.sub(f'({re.escape(keyword)})', r'<b>\1</b>', sentence, flags=re.IGNORECASE)
return highlighted_sentence.strip()
return "No matching sentence found."
else:
return "Please enter a keyword to search."
def ocr_and_search(image, keyword):
try:
extracted_text = perform_ocr(image)
search_result = search_first_keyword_in_text(extracted_text, keyword)
return extracted_text, search_result
except Exception as e:
return str(e), str(e)
def web_app():
interface = gr.Interface(
fn=ocr_and_search,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
],
outputs=[
gr.Textbox(label="Extracted Text", lines=10),
gr.HTML(label="Search Result (First Matching Sentence)")
],
title="OCR and Keyword Search Application"
)
interface.launch()
if __name__ == "__main__":
web_app() |