Spaces:

Streetmarkets
/

openFashionClip

Running

0116945 3 days ago

6.34 kB

	import os
	import requests
	from PIL import Image, UnidentifiedImageError
	import numpy as np
	import gradio as gr
	from encoder import FashionCLIPEncoder
	from pinecone import Pinecone
	from dotenv import load_dotenv
	import json

	# Load environment variables
	load_dotenv()

	# Constants
	PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
	PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
	PINECONE_NAMESPACE = os.getenv("PINECONE_NAMESPACE")
	REQUESTS_HEADERS = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	BATCH_SIZE = 30

	# Initialize services
	pc = Pinecone(api_key=PINECONE_API_KEY)
	index = pc.Index(PINECONE_INDEX_NAME)
	encoder = FashionCLIPEncoder()

	def download_image_as_pil(url: str, timeout: int = 10) -> Image.Image:
	try:
	response = requests.get(url, stream=True, headers=REQUESTS_HEADERS, timeout=timeout)
	if response.status_code == 200 and 'image' in response.headers.get('Content-Type', ''):
	try:
	return Image.open(response.raw).convert("RGB")
	except UnidentifiedImageError:
	print(f"Unidentified image file from URL: {url}")
	return None
	except Exception as e:
	print(f"Error downloading image: {e}")
	return None

	def process_batch(batch_products, batch_images, results):
	try:
	# Generate embeddings
	embeddings = encoder.encode_images(batch_images)

	for product, embedding in zip(batch_products, embeddings):
	# Normalize embedding
	embedding_normalized = embedding / np.linalg.norm(embedding)

	# Append results
	results.append({
	"product_id": product["product_id"],
	"image_url": product["url"],
	"embedding": embedding_normalized.tolist(),
	"embedding_preview": embedding_normalized[:5].tolist(),
	"success": True
	})
	except Exception as e:
	for product in batch_products:
	results.append({
	"product_id": product["product_id"],
	"image_url": product["url"],
	"error": str(e)
	})

	def batch_process_images(json_input: str):
	try:
	# Parse JSON input
	data = json.loads(json_input)
	products = data.get("products", [])
	upload_to_pinecone = data.get("upload_to_pinecone", False)

	if not products:
	return {"error": "No products provided in JSON input."}

	results = []
	batch_products, batch_images = [], []

	for product in products:
	try:
	# Download image
	image = download_image_as_pil(product["url"])
	if not image:
	results.append({
	"product_id": product["product_id"],
	"image_url": product["url"],
	"error": "Failed to download image"
	})
	continue

	batch_products.append(product)
	batch_images.append(image)

	# Process batch when reaching batch size
	if len(batch_images) == BATCH_SIZE:
	process_batch(batch_products, batch_images, results)
	batch_products, batch_images = [], []

	except Exception as e:
	results.append({
	"product_id": product["product_id"],
	"image_url": product["url"],
	"error": str(e)
	})

	# Process remaining images in the last batch
	if batch_images:
	process_batch(batch_products, batch_images, results)

	# Upload to Pinecone if requested
	if upload_to_pinecone:
	upload_result = upload_vector_to_pinecone(results)
	return {
	"processing_results": results,
	"pinecone_upload": upload_result
	}

	return {"processing_results": results}

	except json.JSONDecodeError:
	return {"error": "Invalid JSON format"}
	except Exception as e:
	return {"error": f"Unexpecteddd error: {str(e)}"}

	def upload_vector_to_pinecone(processed_results):
	"""Upload embeddings to Pinecone"""
	vectors_to_upsert = []
	for result in processed_results:
	if 'error' not in result and 'embedding' in result:
	vector = {
	'id': result['product_id'],
	'values': result['embedding'],
	'metadata': {
	'image_url': result['image_url']
	}
	}
	vectors_to_upsert.append(vector)

	if vectors_to_upsert:
	index.upsert(vectors=vectors_to_upsert, namespace=PINECONE_NAMESPACE)

	return {"uploaded_count": len(vectors_to_upsert)}

	# Example JSON input
	EXAMPLE_INPUT = {
	"products": [
	{
	"product_id": "1",
	"url": "https://cdn.shopify.com/s/files/1/0522/2239/4534/files/CT21355-22_1024x1024.webp"
	}
	],
	"upload_to_pinecone": False
	}

	# Gradio Interface
	iface = gr.Interface(
	fn=batch_process_images,
	inputs=gr.Code(
	label="Input JSON",
	language="json",
	value=json.dumps(EXAMPLE_INPUT, indent=4) # Changed from default to value
	),
	outputs=gr.JSON(label="Processing Results"),
	title="Fashion CLIP Embedding Generator",
	description="Provide JSON input with product IDs, URLs, and Pinecone upload preference to generate embeddings.",
	article="""
	### Input JSON Format:
	```json
	{
	"products": [
	{
	"product_id": "string",
	"url": "string"
	}
	],
	"upload_to_pinecone": boolean
	}
	```

	### Features:
	- Batch processing of multiple images
	- Custom product ID support
	- Embedding generation using Fashion CLIP
	- Optional Pinecone database integration
	- Error handling and detailed results

	Make sure to set up your environment variables in a .env file:
	- PINECONE_API_KEY
	- PINECONE_INDEX_NAME
	- PINECONE_NAMESPACE
	"""
	)

	# Launch Gradio App
	if __name__ == "__main__":
	iface.launch()