Genzo1010's picture
Update app.py
7adee70 verified
raw
history blame
2.68 kB
import logging
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from paddleocr import PaddleOCR
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import numpy as np
from PIL import Image
import io
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize models once at startup
ocr_model = ocr_predictor(pretrained=True)
paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)
def ocr_with_doctr(file):
text_output = ''
try:
logger.info("Processing PDF with Doctr...")
doc = DocumentFile.from_pdf(file)
result = ocr_model(doc)
for page in result.pages:
for block in page.blocks:
for line in block.lines:
text_output += " ".join([word.value for word in line.words]) + "\n"
except Exception as e:
logger.error(f"Error processing PDF: {e}")
raise HTTPException(status_code=500, detail=f"Error processing PDF: {e}")
return text_output
def ocr_with_paddle(img):
finaltext = ''
try:
logger.info("Processing image with PaddleOCR...")
result = paddle_ocr.ocr(img)
for i in range(len(result[0])):
text = result[0][i][1][0]
finaltext += ' ' + text
except Exception as e:
logger.error(f"Error processing image: {e}")
raise HTTPException(status_code=500, detail=f"Error processing image: {e}")
return finaltext
@app.post("/ocr/")
async def perform_ocr(file: UploadFile = File(...)):
try:
logger.info(f"Received file: {file.filename}")
file_bytes = await file.read()
if file.filename.endswith('.pdf'):
logger.info("Detected PDF file")
text_output = ocr_with_doctr(io.BytesIO(file_bytes))
else:
logger.info("Detected image file")
img = np.array(Image.open(io.BytesIO(file_bytes)))
text_output = ocr_with_paddle(img)
logger.info("OCR completed successfully")
return {"ocr_text": text_output}
except Exception as e:
logger.error(f"Internal server error: {e}")
raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
@app.get("/test/")
async def test_call():
return {"message": "Hi. I'm running"}
import uvicorn
# Main entry point
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=23333, reload=False)