Spaces:

Streetmarkets
/

openFashionClip

Running

File size: 6,335 Bytes

import os
import requests
from PIL import Image, UnidentifiedImageError
import numpy as np
import gradio as gr
from encoder import FashionCLIPEncoder
from pinecone import Pinecone
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()

# Constants
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
PINECONE_NAMESPACE = os.getenv("PINECONE_NAMESPACE")
REQUESTS_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
BATCH_SIZE = 30

# Initialize services
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(PINECONE_INDEX_NAME)
encoder = FashionCLIPEncoder()

def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
    try:
        response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
        if response.status_code == 200 and 'image' in response.headers.get('Content-Type', ''):
            try:
                return Image.open(response.raw).convert("RGB")
            except UnidentifiedImageError:
                print(f"Unidentified image file from URL: {url}")
        return None
    except Exception as e:
        print(f"Error downloading image: {e}")
        return None

def process_batch(batch_products, batch_images, results):
    try:
        # Generate embeddings
        embeddings = encoder.encode_images(batch_images)
        
        for product, embedding in zip(batch_products, embeddings):
            # Normalize embedding
            embedding_normalized = embedding / np.linalg.norm(embedding)
            
            # Append results
            results.append({
                "product_id": product["product_id"],
                "image_url": product["url"],
                "embedding": embedding_normalized.tolist(),
                "embedding_preview": embedding_normalized[:5].tolist(),
                "success": True
            })
    except Exception as e:
        for product in batch_products:
            results.append({
                "product_id": product["product_id"],
                "image_url": product["url"],
                "error": str(e)
            })

def batch_process_images(json_input: str):
    try:
        # Parse JSON input
        data = json.loads(json_input)
        products = data.get("products", [])
        upload_to_pinecone = data.get("upload_to_pinecone", False)
        
        if not products:
            return {"error": "No products provided in JSON input."}

        results = []
        batch_products, batch_images = [], []

        for product in products:
            try:
                # Download image
                image = download_image_as_pil(product["url"])
                if not image:
                    results.append({
                        "product_id": product["product_id"],
                        "image_url": product["url"],
                        "error": "Failed to download image"
                    })
                    continue

                batch_products.append(product)
                batch_images.append(image)

                # Process batch when reaching batch size
                if len(batch_images) == BATCH_SIZE:
                    process_batch(batch_products, batch_images, results)
                    batch_products, batch_images = [], []

            except Exception as e:
                results.append({
                    "product_id": product["product_id"],
                    "image_url": product["url"],
                    "error": str(e)
                })

        # Process remaining images in the last batch
        if batch_images:
            process_batch(batch_products, batch_images, results)

        # Upload to Pinecone if requested
        if upload_to_pinecone:
            upload_result = upload_vector_to_pinecone(results)
            return {
                "processing_results": results,
                "pinecone_upload": upload_result
            }
        
        return {"processing_results": results}
    
    except json.JSONDecodeError:
        return {"error": "Invalid JSON format"}
    except Exception as e:
        return {"error": f"Unexpecteddd error: {str(e)}"}

def upload_vector_to_pinecone(processed_results):
    """Upload embeddings to Pinecone"""
    vectors_to_upsert = []
    for result in processed_results:
        if 'error' not in result and 'embedding' in result:
            vector = {
                'id': result['product_id'],
                'values': result['embedding'],
                'metadata': {
                    'image_url': result['image_url']
                }
            }
            vectors_to_upsert.append(vector)
    
    if vectors_to_upsert:
        index.upsert(vectors=vectors_to_upsert, namespace=PINECONE_NAMESPACE)
    
    return {"uploaded_count": len(vectors_to_upsert)}

# Example JSON input
EXAMPLE_INPUT = {
    "products": [
        {
            "product_id": "1",
            "url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"
        }
    ],
    "upload_to_pinecone": False
}

# Gradio Interface
iface = gr.Interface(
    fn=batch_process_images,
    inputs=gr.Code(
        label="Input JSON",
        language="json",
        value=json.dumps(EXAMPLE_INPUT, indent=4)  # Changed from default to value
    ),
    outputs=gr.JSON(label="Processing Results"),
    title="Fashion CLIP Embedding Generator",
    description="Provide JSON input with product IDs, URLs, and Pinecone upload preference to generate embeddings.",
    article="""
    ### Input JSON Format:
    ```json
    {
        "products": [
            {
                "product_id": "string",
                "url": "string"
            }
        ],
        "upload_to_pinecone": boolean
    }
    ```
    
    ### Features:
    - Batch processing of multiple images
    - Custom product ID support
    - Embedding generation using Fashion CLIP
    - Optional Pinecone database integration
    - Error handling and detailed results
    
    Make sure to set up your environment variables in a .env file:
    - PINECONE_API_KEY
    - PINECONE_INDEX_NAME
    - PINECONE_NAMESPACE
    """
)

# Launch Gradio App
if __name__ == "__main__":
    iface.launch()