from fastapi import FastAPI, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware import numpy as np from PIL import Image from paddleocr import PaddleOCR from doctr.io import DocumentFile from doctr.models import ocr_predictor import io app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"] ) # Initialize models once at startup ocr_model = ocr_predictor(pretrained=True) paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True) def ocr_with_doctr(file): text_output = '' doc = DocumentFile.from_pdf(file) result = ocr_model(doc) for page in result.pages: for block in page.blocks: for line in block.lines: text_output += " ".join([word.value for word in line.words]) + "\n" return text_output def ocr_with_paddle(img): finaltext = '' result = paddle_ocr.ocr(img) for i in range(len(result[0])): text = result[0][i][1][0] finaltext += ' ' + text return finaltext def generate_text_from_image(img): return ocr_with_paddle(img) @app.post("/ocr/") async def perform_ocr(file: UploadFile = File(...)): file_bytes = await file.read() if file.filename.endswith('.pdf'): text_output = ocr_with_doctr(io.BytesIO(file_bytes)) else: img = np.array(Image.open(io.BytesIO(file_bytes))) text_output = generate_text_from_image(img) return {"ocr_text": text_output} @app.get("/test/") async def test_call(): return {"message": "Hi. I'm running"}