OCR-image-to-text

Sleeping

App Files Files Community

Genzo1010 commited on Sep 11, 2024

Commit

fd9f73a

verified ·

1 Parent(s): e7b8a3e

Removed gradio

Browse files

Files changed (1) hide show

app.py +30 -111

app.py CHANGED Viewed

@@ -1,138 +1,57 @@
-import gradio as gr
-import tensorflow as tf
-import keras_ocr
-import requests
-import cv2
-import os
-import csv
 import numpy as np
-import pandas as pd
-import huggingface_hub
-from huggingface_hub import Repository
-from datetime import datetime
-import scipy.ndimage.interpolation as inter
-import easyocr
-import datasets
-from datasets import load_dataset, Image
 from PIL import Image
 from paddleocr import PaddleOCR
 from doctr.io import DocumentFile
 from doctr.models import ocr_predictor
 ocr_model = ocr_predictor(pretrained=True)
-"""
-Perform OCR with doctr
-"""
 def ocr_with_doctr(file):
     text_output = ''
-    # Load the document
     doc = DocumentFile.from_pdf(file)
-    # Perform OCR
     result = ocr_model(doc)
-    # Extract text from OCR result
     for page in result.pages:
         for block in page.blocks:
             for line in block.lines:
                 text_output += " ".join([word.value for word in line.words]) + "\n"
     return text_output
-"""
-Paddle OCR
-"""
 def ocr_with_paddle(img):
     finaltext = ''
-    ocr = PaddleOCR(lang='en', use_angle_cls=True)
-    # img_path = 'exp.jpeg'
-    result = ocr.ocr(img)
     for i in range(len(result[0])):
         text = result[0][i][1][0]
-        finaltext += ' '+ text
     return finaltext
-"""
-Keras OCR
-"""
-def ocr_with_keras(img):
-    output_text = ''
-    pipeline=keras_ocr.pipeline.Pipeline()
-    images=[keras_ocr.tools.read(img)]
-    predictions=pipeline.recognize(images)
-    first=predictions[0]
-    for text,box in first:
-        output_text += ' '+ text
-    return output_text
-"""
-easy OCR
-"""
-# gray scale image
-def get_grayscale(image):
-    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-# Thresholding or Binarization
-def thresholding(src):
-    return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
-def ocr_with_easy(img):
-    gray_scale_image=get_grayscale(img)
-    thresholding(gray_scale_image)
-    cv2.imwrite('image.png',gray_scale_image)
-    reader = easyocr.Reader(['th','en'])
-    bounds = reader.readtext('image.png',paragraph="False",detail = 0)
-    bounds = ''.join(bounds)
-    return bounds
-def generate_ocr(Method, file):
-    text_output = ''
-    if isinstance(file, bytes):  # Handle file uploaded as bytes
-        file = io.BytesIO(file)
-    if file.name.endswith('.pdf'):
-        # Perform OCR on the PDF using doctr
-        text_output = ocr_with_doctr(file)
     else:
-        # Handle image file
-        img_np = np.array(Image.open(file))
-        text_output = generate_text_from_image(Method, img_np)
-    return text_output
-def generate_text_from_image(Method, img):
-    text_output = ''
-    if Method == 'EasyOCR':
-        text_output = ocr_with_easy(img)
-    elif Method == 'KerasOCR':
-        text_output = ocr_with_keras(img)
-    elif Method == 'PaddleOCR':
-        text_output = ocr_with_paddle(img)
-    return text_output
-import gradio as gr
-image_or_pdf = gr.File(label="Upload an image or PDF")
-method = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR")
-output = gr.Textbox(label="Output")
-demo = gr.Interface(
-    generate_ocr,
-    [method, image_or_pdf],
-    output,
-    title="Optical Character Recognition",
-    css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
-    article="""<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
-                <a href="mailto:[email protected]" target="_blank">[email protected]</a>
-                <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
-)
-demo.launch(show_error=True)

+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
 import numpy as np
 from PIL import Image
 from paddleocr import PaddleOCR
 from doctr.io import DocumentFile
 from doctr.models import ocr_predictor
+import io
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+# Initialize models once at startup
 ocr_model = ocr_predictor(pretrained=True)
+paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)
 def ocr_with_doctr(file):
     text_output = ''
     doc = DocumentFile.from_pdf(file)
     result = ocr_model(doc)
     for page in result.pages:
         for block in page.blocks:
             for line in block.lines:
                 text_output += " ".join([word.value for word in line.words]) + "\n"
     return text_output
 def ocr_with_paddle(img):
     finaltext = ''
+    result = paddle_ocr.ocr(img)
     for i in range(len(result[0])):
         text = result[0][i][1][0]
+        finaltext += ' ' + text
     return finaltext
+def generate_text_from_image(img):
+    return ocr_with_paddle(img)
+@app.post("/ocr/")
+async def perform_ocr(file: UploadFile = File(...)):
+    file_bytes = await file.read()
+    if file.filename.endswith('.pdf'):
+        text_output = ocr_with_doctr(io.BytesIO(file_bytes))
     else:
+        img = np.array(Image.open(io.BytesIO(file_bytes)))
+        text_output = generate_text_from_image(img)
+    return {"ocr_text": text_output}
+@app.get("/test/")
+async def test_call():
+    return {"message": "Hi. I'm running"}