|
import streamlit as st |
|
import degirum as dg |
|
from PIL import Image |
|
import torch |
|
import numpy as np |
|
import torch.nn.functional as F |
|
import clip |
|
import cv2 |
|
|
|
prev_prompt = None |
|
|
|
def cosine_similarity(a, b): |
|
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) |
|
|
|
def compute_text_embeddings(text_prompts): |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
clip_model, _ = clip.load("RN50", device=device) |
|
|
|
text_embeddings = [] |
|
for text_prompt in text_prompts: |
|
text = clip.tokenize(text_prompt).to(device) |
|
text_embedding = clip_model.encode_text(text) |
|
text_embeddings.append(text_embedding.cpu().detach().numpy().tolist()) |
|
|
|
return text_embeddings |
|
|
|
zoo=dg.connect(dg.CLOUD,zoo_url='https://cs.degirum.com/degirum/kvk_upload_test', token=st.secrets["DG_TOKEN"]) |
|
|
|
st.title('DeGirum CLIP model Demo') |
|
|
|
with st.sidebar: |
|
st.header('Specify Model Options Below') |
|
prompts = st.text_area("Enter text prompts (comma-separated):", value="People Running, People sitting, People swimming, People sleeping, People watching television") |
|
prompts = [prompt.strip() for prompt in prompts.split(',')] |
|
st.text('Upload an image. Then click on the submit button') |
|
with st.form("model_form"): |
|
uploaded_file=st.file_uploader('input image') |
|
submitted = st.form_submit_button("Submit") |
|
if prev_prompt is None or prev_prompt != prompts: |
|
embeddings = compute_text_embeddings(prompts) |
|
if submitted: |
|
if prev_prompt != prompts: |
|
prev_prompt = prompts |
|
model=zoo.load_model('clip--224x224_float_openvino_cpu_4', |
|
input_image_format = "RAW" |
|
) |
|
image = Image.open(uploaded_file) |
|
opencv_image = np.array(image) |
|
opencv_image = cv2.cvtColor(opencv_image, cv2.COLOR_RGB2BGR) |
|
predictions=model(opencv_image).results[0]["data"] |
|
dg_cloud_output_reshaped = predictions.reshape(-1) |
|
similarities = [cosine_similarity(dg_cloud_output_reshaped, np.array(embedding).reshape(-1)) for embedding in embeddings] |
|
similarities_tensor = torch.tensor(similarities, dtype=torch.float32) |
|
softmax_scores = F.softmax(similarities_tensor, dim=0) |
|
max_index = torch.argmax(softmax_scores).item() |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
for index, prompt in enumerate(prompts): |
|
st.write(f"{prompt} - {softmax_scores[index]*100:.2f}%") |
|
|
|
|