Spaces:

Genzo1010
/

img_ocr_text_extractor

Runtime error

Update app.py

e4cd444 verified 5 months ago

1.17 kB

	import os
	from fastapi import FastAPI, File, UploadFile
	from paddleocr import PaddleOCR
	from PIL import Image
	import numpy as np
	import io
	import uvicorn
	from functools import lru_cache

	app = FastAPI()

	# Define a cache to store the results of the OCR function
	@lru_cache(maxsize=128)
	def ocr_cache(img_array):
	ocr = PaddleOCR(lang='en', use_angle_cls=True)
	result = ocr.ocr(img_array)
	extracted_text =''.join([line[1][0] for line in result[0]])
	return extracted_text

	@app.post("/extract-text/")
	async def extract_text(file: UploadFile = File(...)):
	image = Image.open(io.BytesIO(await file.read()))

	# Convert the image to a NumPy array
	img_array = np.array(image)

	# Handle different image channels
	if img_array.ndim == 2: # Grayscale image
	img_array = np.stack((img_array,)*3, axis=-1)
	elif img_array.shape[-1] == 4: # RGBA image
	img_array = img_array[..., :3]

	# Perform OCR using the cache
	extracted_text = ocr_cache(tuple(img_array.flatten()))
	return {"text": extracted_text}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))