OCR-image-to-text

Sleeping

File size: 2,548 Bytes

import logging
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from paddleocr import PaddleOCR
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import numpy as np
from PIL import Image
import io

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize models once at startup
ocr_model = ocr_predictor(pretrained=True)
paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)

def ocr_with_doctr(file):
    text_output = ''
    try:
        logger.info("Processing PDF with Doctr...")
        doc = DocumentFile.from_pdf(file)
        result = ocr_model(doc)
        for page in result.pages:
            for block in page.blocks:
                for line in block.lines:
                    text_output += " ".join([word.value for word in line.words]) + "\n"
    except Exception as e:
        logger.error(f"Error processing PDF: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing PDF: {e}")
    return text_output

def ocr_with_paddle(img):
    finaltext = ''
    try:
        logger.info("Processing image with PaddleOCR...")
        result = paddle_ocr.ocr(img)
        for i in range(len(result[0])):
            text = result[0][i][1][0]
            finaltext += ' ' + text
    except Exception as e:
        logger.error(f"Error processing image: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing image: {e}")
    return finaltext

@app.post("/ocr/")
async def perform_ocr(file: UploadFile = File(...)):
    try:
        logger.info(f"Received file: {file.filename}")
        file_bytes = await file.read()
        
        if file.filename.endswith('.pdf'):
            logger.info("Detected PDF file")
            text_output = ocr_with_doctr(io.BytesIO(file_bytes))
        else:
            logger.info("Detected image file")
            img = np.array(Image.open(io.BytesIO(file_bytes)))
            text_output = ocr_with_paddle(img)
        
        logger.info("OCR completed successfully")
        return {"ocr_text": text_output}

    except Exception as e:
        logger.error(f"Internal server error: {e}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {e}")

@app.get("/test/")
async def test_call():
    return {"message": "Hi. I'm running"}