Spaces:
Running
Running
import easyocr | |
import requests | |
import io | |
from PIL import Image | |
from typing import List, Dict, Any | |
import os | |
import numpy as np | |
from gradio_client import Client | |
def process_image_file(image_url: str) -> List[Dict[str, Any]]: | |
# Fetch the image content from the URL | |
response = requests.get(image_url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Load the image from the response content using PIL | |
image_stream = io.BytesIO(response.content) | |
image = Image.open(image_stream) | |
# Convert the image to a NumPy array, which is supported by EasyOCR | |
image_np = np.array(image) | |
# Use EasyOCR to extract text from the image | |
reader = easyocr.Reader(['en']) | |
result = reader.readtext(image_np) | |
print("*" * 50 + image_url) | |
# Combine the extracted text from EasyOCR | |
extracted_text = "\n".join([detection[1] for detection in result]) | |
if len(extracted_text.split())<5 : | |
# Use the BLIP model for image captioning | |
client = Client("HARISH20205/blip-image-caption") | |
caption_result = client.predict(image_url=image_url, api_name="/predict") | |
content = "\nImage Caption:\n" + str(caption_result) | |
return [{ | |
"file_name": os.path.basename(image_url), | |
"text": content, | |
}] | |
# Format the content | |
content = "Image Data:\n" + extracted_text | |
return [{ | |
"file_name": os.path.basename(image_url), | |
"text": content, | |
}] | |
else: | |
return [{ | |
"file_name": os.path.basename(image_url), | |
"text": "Failed to retrieve image.", | |
}] |