verbisense / src /image_processor.py
HARISH20205's picture
first
c8c7a9e
import easyocr
import requests
import io
from PIL import Image
from typing import List, Dict, Any
import os
import numpy as np
from gradio_client import Client
def process_image_file(image_url: str) -> List[Dict[str, Any]]:
# Fetch the image content from the URL
response = requests.get(image_url)
# Check if the request was successful
if response.status_code == 200:
# Load the image from the response content using PIL
image_stream = io.BytesIO(response.content)
image = Image.open(image_stream)
# Convert the image to a NumPy array, which is supported by EasyOCR
image_np = np.array(image)
# Use EasyOCR to extract text from the image
reader = easyocr.Reader(['en'])
result = reader.readtext(image_np)
print("*" * 50 + image_url)
# Combine the extracted text from EasyOCR
extracted_text = "\n".join([detection[1] for detection in result])
if len(extracted_text.split())<5 :
# Use the BLIP model for image captioning
client = Client("HARISH20205/blip-image-caption")
caption_result = client.predict(image_url=image_url, api_name="/predict")
content = "\nImage Caption:\n" + str(caption_result)
return [{
"file_name": os.path.basename(image_url),
"text": content,
}]
# Format the content
content = "Image Data:\n" + extracted_text
return [{
"file_name": os.path.basename(image_url),
"text": content,
}]
else:
return [{
"file_name": os.path.basename(image_url),
"text": "Failed to retrieve image.",
}]