openFashionClip / process.py
im
divid
e9eeafb
raw
history blame
2.67 kB
import numpy as np
from download_images import download_image_as_pil
def batch_process_images(image_urls: str, encoder, batch_size: int):
"""
Processes a batch of image URLs and generates embeddings for each image.
Args:
image_urls (str): Comma-separated string of image URLs.
encoder: The encoder object used to generate embeddings.
batch_size (int): The maximum number of images to process in one batch.
Returns:
list: A list of dictionaries containing embedding results or errors.
"""
# Split the input string by commas and strip whitespace
urls = [url.strip() for url in image_urls.split(",") if url.strip()]
if not urls:
return {"error": "No valid image URLs provided."}
results = []
batch_urls, batch_images = [], []
for url in urls:
try:
# Download image
image = download_image_as_pil(url)
if not image:
results.append({"image_url": url, "error": "Failed to download image"})
continue
batch_urls.append(url)
batch_images.append(image)
# Process batch when reaching batch size
if len(batch_images) == batch_size:
process_batch(batch_urls, batch_images, results, encoder)
batch_urls, batch_images = [], []
except Exception as e:
results.append({"image_url": url, "error": str(e)})
# Process remaining images in the last batch
if batch_images:
process_batch(batch_urls, batch_images, results, encoder)
return results
def process_batch(batch_urls, batch_images, results, encoder):
"""
Processes a batch of images and generates embeddings.
Args:
batch_urls (list): List of image URLs in the batch.
batch_images (list): List of PIL images in the batch.
results (list): List to store results for each image.
encoder: The encoder object used to generate embeddings.
"""
try:
# Generate embeddings
embeddings = encoder.encode_images(batch_images)
for url, embedding in zip(batch_urls, embeddings):
# Normalize embedding
embedding_normalized = embedding / np.linalg.norm(embedding)
# Append results
results.append({
"image_url": url,
"embedding_preview": embedding_normalized[:5].tolist(), # First 5 values for preview
"success": True
})
except Exception as e:
for url in batch_urls:
results.append({"image_url": url, "error": str(e)})