openFashionClip / app.py
im
ok
0116945
import os
import requests
from PIL import Image, UnidentifiedImageError
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder
from pinecone import Pinecone
from dotenv import load_dotenv
import json
# Load environment variables
load_dotenv()
# Constants
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_NAMESPACE = os.getenv("PINECONE_NAMESPACE")
REQUESTS_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
BATCH_SIZE = 30
# Initialize services
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(PINECONE_INDEX_NAME)
encoder = FashionCLIPEncoder()
def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
try:
response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
if response.status_code == 200 and 'image' in response.headers.get('Content-Type', ''):
try:
return Image.open(response.raw).convert("RGB")
except UnidentifiedImageError:
print(f"Unidentified image file from URL: {url}")
return None
except Exception as e:
print(f"Error downloading image: {e}")
return None
def process_batch(batch_products, batch_images, results):
try:
# Generate embeddings
embeddings = encoder.encode_images(batch_images)
for product, embedding in zip(batch_products, embeddings):
# Normalize embedding
embedding_normalized = embedding / np.linalg.norm(embedding)
# Append results
results.append({
"product_id": product["product_id"],
"image_url": product["url"],
"embedding": embedding_normalized.tolist(),
"embedding_preview": embedding_normalized[:5].tolist(),
"success": True
})
except Exception as e:
for product in batch_products:
results.append({
"product_id": product["product_id"],
"image_url": product["url"],
"error": str(e)
})
def batch_process_images(json_input: str):
try:
# Parse JSON input
data = json.loads(json_input)
products = data.get("products", [])
upload_to_pinecone = data.get("upload_to_pinecone", False)
if not products:
return {"error": "No products provided in JSON input."}
results = []
batch_products, batch_images = [], []
for product in products:
try:
# Download image
image = download_image_as_pil(product["url"])
if not image:
results.append({
"product_id": product["product_id"],
"image_url": product["url"],
"error": "Failed to download image"
})
continue
batch_products.append(product)
batch_images.append(image)
# Process batch when reaching batch size
if len(batch_images) == BATCH_SIZE:
process_batch(batch_products, batch_images, results)
batch_products, batch_images = [], []
except Exception as e:
results.append({
"product_id": product["product_id"],
"image_url": product["url"],
"error": str(e)
})
# Process remaining images in the last batch
if batch_images:
process_batch(batch_products, batch_images, results)
# Upload to Pinecone if requested
if upload_to_pinecone:
upload_result = upload_vector_to_pinecone(results)
return {
"processing_results": results,
"pinecone_upload": upload_result
}
return {"processing_results": results}
except json.JSONDecodeError:
return {"error": "Invalid JSON format"}
except Exception as e:
return {"error": f"Unexpecteddd error: {str(e)}"}
def upload_vector_to_pinecone(processed_results):
"""Upload embeddings to Pinecone"""
vectors_to_upsert = []
for result in processed_results:
if 'error' not in result and 'embedding' in result:
vector = {
'id': result['product_id'],
'values': result['embedding'],
'metadata': {
'image_url': result['image_url']
}
}
vectors_to_upsert.append(vector)
if vectors_to_upsert:
index.upsert(vectors=vectors_to_upsert, namespace=PINECONE_NAMESPACE)
return {"uploaded_count": len(vectors_to_upsert)}
# Example JSON input
EXAMPLE_INPUT = {
"products": [
{
"product_id": "1",
"url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"
}
],
"upload_to_pinecone": False
}
# Gradio Interface
iface = gr.Interface(
fn=batch_process_images,
inputs=gr.Code(
label="Input JSON",
language="json",
value=json.dumps(EXAMPLE_INPUT, indent=4) # Changed from default to value
),
outputs=gr.JSON(label="Processing Results"),
title="Fashion CLIP Embedding Generator",
description="Provide JSON input with product IDs, URLs, and Pinecone upload preference to generate embeddings.",
article="""
### Input JSON Format:
```json
{
"products": [
{
"product_id": "string",
"url": "string"
}
],
"upload_to_pinecone": boolean
}
```
### Features:
- Batch processing of multiple images
- Custom product ID support
- Embedding generation using Fashion CLIP
- Optional Pinecone database integration
- Error handling and detailed results
Make sure to set up your environment variables in a .env file:
- PINECONE_API_KEY
- PINECONE_INDEX_NAME
- PINECONE_NAMESPACE
"""
)
# Launch Gradio App
if __name__ == "__main__":
iface.launch()