Spaces:
Running
Running
import os | |
import clip | |
import torch | |
import logging | |
import json | |
import pandas as pd | |
from PIL import Image | |
import gradio as gr | |
from autogluon.tabular import TabularPredictor | |
predictor = TabularPredictor.load("ag-20240618_230402") | |
# set logging level | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
) | |
logger = logging.getLogger("AQ") | |
CLIP_MODEL_NAME = "ViT-B/32" | |
clip_model, preprocess = clip.load(CLIP_MODEL_NAME, device="cpu") | |
def predict_fn(input_img): | |
input_img = Image.fromarray(input_img.astype("uint8"), "RGB") | |
image = preprocess(input_img).unsqueeze(0) | |
with torch.no_grad(): | |
image_features = clip_model.encode_image(image).numpy() | |
input_df = pd.DataFrame(image_features[0].reshape(1, -1)) | |
quality_score = float(predictor.predict(input_df).iloc[0]) | |
logger.info(f"decision: {quality_score}") | |
decision_json = json.dumps({"quality_score": quality_score}).encode("utf-8") | |
logger.info(f"decision_json: {decision_json}") | |
return decision_json | |
iface = gr.Interface( | |
fn=predict_fn, | |
inputs="image", | |
outputs="text", | |
description=""" | |
The model returns quality score for an avatar based on visual apeal and humanoid appearance. | |
""", | |
allow_flagging="manual", | |
) | |
iface.launch() | |