Spaces:

HARISH20205
/

verbisense

Running

verbisense / src /image_processor.py

first

c8c7a9e 3 months ago

1.73 kB

	import easyocr
	import requests
	import io
	from PIL import Image
	from typing import List, Dict, Any
	import os
	import numpy as np
	from gradio_client import Client


	def process_image_file(image_url: str) -> List[Dict[str, Any]]:
	# Fetch the image content from the URL
	response = requests.get(image_url)

	# Check if the request was successful
	if response.status_code == 200:
	# Load the image from the response content using PIL
	image_stream = io.BytesIO(response.content)
	image = Image.open(image_stream)

	# Convert the image to a NumPy array, which is supported by EasyOCR
	image_np = np.array(image)

	# Use EasyOCR to extract text from the image
	reader = easyocr.Reader(['en'])
	result = reader.readtext(image_np)

	print("" 50 + image_url)



	# Combine the extracted text from EasyOCR
	extracted_text = "\n".join([detection[1] for detection in result])

	if len(extracted_text.split())<5 :
	# Use the BLIP model for image captioning
	client = Client("HARISH20205/blip-image-caption")
	caption_result = client.predict(image_url=image_url, api_name="/predict")
	content = "\nImage Caption:\n" + str(caption_result)
	return [{
	"file_name": os.path.basename(image_url),
	"text": content,
	}]
	# Format the content
	content = "Image Data:\n" + extracted_text

	return [{
	"file_name": os.path.basename(image_url),
	"text": content,
	}]
	else:
	return [{
	"file_name": os.path.basename(image_url),
	"text": "Failed to retrieve image.",
	}]