Spaces:
Running
Running
from __future__ import annotations | |
import functools | |
import io | |
import urllib | |
from typing import Tuple, List, Any | |
import huggingface_hub | |
import onnxruntime as rt | |
import pandas as pd | |
import numpy as np | |
import PIL.Image | |
import requests | |
import dbimutils | |
import piexif | |
import piexif.helper | |
from urllib.request import urlopen | |
import model | |
HF_TOKEN = "" | |
SWIN_MODEL_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2" | |
CONV_MODEL_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2" | |
CONV2_MODEL_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2" | |
VIT_MODEL_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2" | |
MODEL_FILENAME = "model.onnx" | |
LABEL_FILENAME = "selected_tags.csv" | |
def change_model(model_name): | |
global loaded_models | |
if model_name == "SwinV2": | |
model = load_model(SWIN_MODEL_REPO, MODEL_FILENAME) | |
elif model_name == "ConvNext": | |
model = load_model(CONV_MODEL_REPO, MODEL_FILENAME) | |
elif model_name == "ConvNextV2": | |
model = load_model(CONV2_MODEL_REPO, MODEL_FILENAME) | |
elif model_name == "ViT": | |
model = load_model(VIT_MODEL_REPO, MODEL_FILENAME) | |
loaded_models[model_name] = model | |
return loaded_models[model_name] | |
def load_model(model_repo: str, model_filename: str) -> rt.InferenceSession: | |
path = huggingface_hub.hf_hub_download( | |
model_repo, model_filename, use_auth_token=HF_TOKEN | |
) | |
model = rt.InferenceSession(path) | |
return model | |
def load_labels() -> tuple[list[Any], list[Any], list[Any], list[Any]]: | |
path = huggingface_hub.hf_hub_download( | |
CONV2_MODEL_REPO, LABEL_FILENAME, use_auth_token=HF_TOKEN | |
) | |
df = pd.read_csv(path) | |
tag_names = df["name"].tolist() | |
rating_indexes = list(np.where(df["category"] == 9)[0]) | |
general_indexes = list(np.where(df["category"] == 0)[0]) | |
character_indexes = list(np.where(df["category"] == 4)[0]) | |
return tag_names, rating_indexes, general_indexes, character_indexes | |
def predict( | |
image: PIL.Image.Image, | |
model_name: str, | |
general_threshold: float, | |
character_threshold: float, | |
tag_names: list[str], | |
rating_indexes: list[np.int64], | |
general_indexes: list[np.int64], | |
character_indexes: list[np.int64], | |
): | |
global loaded_models | |
if isinstance(image, str): | |
rawimage = dbimutils.read_img_from_url(image) | |
elif isinstance(image, PIL.Image.Image): | |
rawimage = image | |
else: | |
raise Exception("Invalid image type") | |
image = rawimage | |
model = loaded_models[model_name] | |
if model is None: | |
model = change_model(model_name) | |
_, height, width, _ = model.get_inputs()[0].shape | |
# Alpha to white | |
image = image.convert("RGBA") | |
new_image = PIL.Image.new("RGBA", image.size, "WHITE") | |
new_image.paste(image, mask=image) | |
image = new_image.convert("RGB") | |
image = np.asarray(image) | |
# PIL RGB to OpenCV BGR | |
image = image[:, :, ::-1] | |
image = dbimutils.make_square(image, height) | |
image = dbimutils.smart_resize(image, height) | |
image = image.astype(np.float32) | |
image = np.expand_dims(image, 0) | |
input_name = model.get_inputs()[0].name | |
label_name = model.get_outputs()[0].name | |
probs = model.run([label_name], {input_name: image})[0] | |
labels = list(zip(tag_names, probs[0].astype(float))) | |
# First 4 labels are actually ratings: pick one with argmax | |
ratings_names = [labels[i] for i in rating_indexes] | |
rating = dict(ratings_names) | |
# Then we have general tags: pick any where prediction confidence > threshold | |
general_names = [labels[i] for i in general_indexes] | |
general_res = [x for x in general_names if x[1] > general_threshold] | |
general_res = dict(general_res) | |
# Everything else is characters: pick any where prediction confidence > threshold | |
character_names = [labels[i] for i in character_indexes] | |
character_res = [x for x in character_names if x[1] > character_threshold] | |
character_res = dict(character_res) | |
b = dict(sorted(general_res.items(), key=lambda item: item[1], reverse=True)) | |
a = ( | |
", ".join(list(b.keys())) | |
.replace("_", " ") | |
.replace("(", "\(") | |
.replace(")", "\)") | |
) | |
c = ", ".join(list(b.keys())) | |
items = rawimage.info | |
geninfo = "" | |
if "exif" in rawimage.info: | |
exif = piexif.load(rawimage.info["exif"]) | |
exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b"") | |
try: | |
exif_comment = piexif.helper.UserComment.load(exif_comment) | |
except ValueError: | |
exif_comment = exif_comment.decode("utf8", errors="ignore") | |
items["exif comment"] = exif_comment | |
geninfo = exif_comment | |
for field in [ | |
"jfif", | |
"jfif_version", | |
"jfif_unit", | |
"jfif_density", | |
"dpi", | |
"exif", | |
"loop", | |
"background", | |
"timestamp", | |
"duration", | |
]: | |
items.pop(field, None) | |
geninfo = items.get("parameters", geninfo) | |
for key, text in items.items(): | |
print(key) | |
print(text) | |
print("geninfo", geninfo) | |
print("a", a) | |
print("c", c) | |
print("rating", rating) | |
print("character_res", character_res) | |
print("general_res", general_res) | |
character_res = list(filter(lambda x: x['confidence'] > 0.4, [{'tag': tag, 'confidence': score} | |
for tag, score in character_res.items()])) | |
general_res = list(filter(lambda x: x['confidence'] > 0.4, [{'tag': tag, 'confidence': score} | |
for tag, score in general_res.items()])) | |
return {'a': a, 'c': c, 'rating': rating, 'character_res': character_res, 'general_res': general_res} | |
def label_img( | |
image: PIL.Image.Image | str, | |
model: str, | |
# model: (["SwinV2", "ConvNext", "ConvNextV2", "ViT"], value="ConvNextV2", label="Model"), | |
l_score_general_threshold: float, | |
l_score_character_threshold: float, | |
): | |
if isinstance(image, str) and image.startswith("http"): | |
image = dbimutils.read_img_from_url(image) | |
global loaded_models | |
loaded_models = {"SwinV2": None, "ConvNext": None, "ConvNextV2": None, "ViT": None} | |
change_model("ConvNextV2") | |
tag_names, rating_indexes, general_indexes, character_indexes = load_labels() | |
func = functools.partial( | |
predict, | |
tag_names=tag_names, | |
rating_indexes=rating_indexes, | |
general_indexes=general_indexes, | |
character_indexes=character_indexes, | |
) | |
return func( | |
image=image, model_name=model, | |
general_threshold=l_score_general_threshold, | |
character_threshold=l_score_character_threshold, | |
) | |
def write_image_tag(img_id: int, is_valid: bool, tags: List[model.ImageTag], callback_url: str): | |
model.ImageScanCallbackRequest(img_id=img_id, is_valid=is_valid, tags=tags) | |
if __name__ == "__main__": | |
score_slider_step = 0.05 | |
score_general_threshold = 0.35 | |
score_character_threshold = 0.85 | |
ret = label_img( | |
image='https://pub-9747017e9ec54620bfbe2385f14fe4d7.r2.dev/cnGirlYcy_v10_people_network_nannansleep/cnGirlYcy_v10_people_network_nannansleep_r_1679670778_0.png', | |
model="SwinV2", | |
l_score_general_threshold=score_general_threshold, | |
l_score_character_threshold=score_character_threshold, | |
) | |
print(ret) | |