Spaces:
Running
Running
import numpy as np | |
from download_images import download_image_as_pil | |
def batch_process_images(image_urls: str, encoder, batch_size: int): | |
""" | |
Processes a batch of image URLs and generates embeddings for each image. | |
Args: | |
image_urls (str): Comma-separated string of image URLs. | |
encoder: The encoder object used to generate embeddings. | |
batch_size (int): The maximum number of images to process in one batch. | |
Returns: | |
list: A list of dictionaries containing embedding results or errors. | |
""" | |
# Split the input string by commas and strip whitespace | |
urls = [url.strip() for url in image_urls.split(",") if url.strip()] | |
if not urls: | |
return {"error": "No valid image URLs provided."} | |
results = [] | |
batch_urls, batch_images = [], [] | |
for url in urls: | |
try: | |
# Download image | |
image = download_image_as_pil(url) | |
if not image: | |
results.append({"image_url": url, "error": "Failed to download image"}) | |
continue | |
batch_urls.append(url) | |
batch_images.append(image) | |
# Process batch when reaching batch size | |
if len(batch_images) == batch_size: | |
process_batch(batch_urls, batch_images, results, encoder) | |
batch_urls, batch_images = [], [] | |
except Exception as e: | |
results.append({"image_url": url, "error": str(e)}) | |
# Process remaining images in the last batch | |
if batch_images: | |
process_batch(batch_urls, batch_images, results, encoder) | |
return results | |
def process_batch(batch_urls, batch_images, results, encoder): | |
""" | |
Processes a batch of images and generates embeddings. | |
Args: | |
batch_urls (list): List of image URLs in the batch. | |
batch_images (list): List of PIL images in the batch. | |
results (list): List to store results for each image. | |
encoder: The encoder object used to generate embeddings. | |
""" | |
try: | |
# Generate embeddings | |
embeddings = encoder.encode_images(batch_images) | |
for url, embedding in zip(batch_urls, embeddings): | |
# Normalize embedding | |
embedding_normalized = embedding / np.linalg.norm(embedding) | |
# Append results | |
results.append({ | |
"image_url": url, | |
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview | |
"success": True | |
}) | |
except Exception as e: | |
for url in batch_urls: | |
results.append({"image_url": url, "error": str(e)}) | |