import numpy as np from download_images import download_image_as_pil def batch_process_images(image_urls: str, encoder, batch_size: int): """ Processes a batch of image URLs and generates embeddings for each image. Args: image_urls (str): Comma-separated string of image URLs. encoder: The encoder object used to generate embeddings. batch_size (int): The maximum number of images to process in one batch. Returns: list: A list of dictionaries containing embedding results or errors. """ # Split the input string by commas and strip whitespace urls = [url.strip() for url in image_urls.split(",") if url.strip()] if not urls: return {"error": "No valid image URLs provided."} results = [] batch_urls, batch_images = [], [] for url in urls: try: # Download image image = download_image_as_pil(url) if not image: results.append({"image_url": url, "error": "Failed to download image"}) continue batch_urls.append(url) batch_images.append(image) # Process batch when reaching batch size if len(batch_images) == batch_size: process_batch(batch_urls, batch_images, results, encoder) batch_urls, batch_images = [], [] except Exception as e: results.append({"image_url": url, "error": str(e)}) # Process remaining images in the last batch if batch_images: process_batch(batch_urls, batch_images, results, encoder) return results def process_batch(batch_urls, batch_images, results, encoder): """ Processes a batch of images and generates embeddings. Args: batch_urls (list): List of image URLs in the batch. batch_images (list): List of PIL images in the batch. results (list): List to store results for each image. encoder: The encoder object used to generate embeddings. """ try: # Generate embeddings embeddings = encoder.encode_images(batch_images) for url, embedding in zip(batch_urls, embeddings): # Normalize embedding embedding_normalized = embedding / np.linalg.norm(embedding) # Append results results.append({ "image_url": url, "embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview "success": True }) except Exception as e: for url in batch_urls: results.append({"image_url": url, "error": str(e)})