Spaces:

Genzo1010
/

img_ocr_text_extractor

Runtime error

Genzo1010 commited on Aug 20, 2024

Commit

e4cd444

verified ·

1 Parent(s): 3c6ec6c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,11 +5,17 @@ from PIL import Image
 import numpy as np
 import io
 import uvicorn
 app = FastAPI()
-# Initialize PaddleOCR
-ocr = PaddleOCR(lang='en', use_angle_cls=True)
 @app.post("/extract-text/")
 async def extract_text(file: UploadFile = File(...)):
@@ -24,10 +30,9 @@ async def extract_text(file: UploadFile = File(...)):
     elif img_array.shape[-1] == 4:  # RGBA image
         img_array = img_array[..., :3]
-    # Perform OCR
-    result = ocr.ocr(img_array)
-    extracted_text = ' '.join([line[1][0] for line in result[0]])
     return {"text": extracted_text}
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))

 import numpy as np
 import io
 import uvicorn
+from functools import lru_cache
 app = FastAPI()
+# Define a cache to store the results of the OCR function
+@lru_cache(maxsize=128)
+def ocr_cache(img_array):
+    ocr = PaddleOCR(lang='en', use_angle_cls=True)
+    result = ocr.ocr(img_array)
+    extracted_text =''.join([line[1][0] for line in result[0]])
+    return extracted_text
 @app.post("/extract-text/")
 async def extract_text(file: UploadFile = File(...)):
     elif img_array.shape[-1] == 4:  # RGBA image
         img_array = img_array[..., :3]
+    # Perform OCR using the cache
+    extracted_text = ocr_cache(tuple(img_array.flatten()))
     return {"text": extracted_text}
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8000)))