#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sqlite3
from pathlib import Path
import sys
from collections import Counter
import gradio as gr
import math
def load_database():
conn = sqlite3.connect('danbooru2021.lfs.db')
cursor = conn.cursor()
sys.stderr.write("タグの辞書を作成中\n")
i2n = {}
i2c = {}
n2i = {}
cursor.execute("SELECT id, name, category FROM tags;")
for _id, name, category in cursor:
i2n[_id] = name.strip()
n2i[name.strip()] = _id
i2c[_id] = category
entries = []
cursor.execute('SELECT id, tags FROM entries;')
sys.stderr.write("データベースをメモリに読み込み中\n")
seq = 1
for id, tags, in cursor:
tags = set([int(x) for x in tags.strip('"[]').split(",")])
# 一旦entriesへ全てのentry_tagsを格納しておく。
entries.append(tags)
sys.stderr.write("\r({})".format(len(entries)))
seq += 1
#if seq > 10000:
# break
sys.stderr.write("\n")
conn.close()
return i2n, i2c, n2i, entries
i2n, i2c, n2i, entries = load_database()
def greet(query):
results = []
errors = []
target_tags = [x.strip().replace(" ", "_") for x in query.split(",") if x]
target_ids = set()
for tag_name in target_tags:
try:
target_ids.add(n2i[tag_name])
except:
errors.append(tag_name)
for error in errors:
results.append(['Tag "{}" has been ignored.'.format(error) , 0])
if len(target_ids) > 0:
rates = []
matched_entries = list(filter(lambda entry: target_ids.issubset(entry), entries))
print(len(matched_entries))
MAX_MATCHED_ENTRIES = 5000
if len(matched_entries) > MAX_MATCHED_ENTRIES:
results.append(['Too many {} entries have been matched.
Please change or increase tags for reduce matches.'.format(len(matched_entries)), -1])
else:
results.append(['{} entries have been matched.'.format(len(matched_entries)), -1])
all_tag_ids = set()
for entry in matched_entries:
for tag_id in entry:
all_tag_ids.add(tag_id)
#filtered_entries = list(filter(lambda entry: not target_ids.isdisjoint(entry), entries))
#print(len(matched_entries), len(filtered_entries))
for tag_id in all_tag_ids:
count = 0
total = 0
compare = {tag_id} | target_ids
if compare == target_ids:
continue
for entry in matched_entries:
total += 1
if compare.issubset(entry):
count += 1
rates.append((tag_id, count, total))
rates.sort(key=lambda x: x[1] / x[2], reverse=True)
for tag_id, count, total in rates:
if count == 0:
continue
rate = count / total
color = [
'color: lightblue',
'color: gold',
'color: violet',
'color: lightgreen',
'color: tomato',
'color: red',
'color: whitesmoke',
'color: seagreen',
][i2c[tag_id]]
results.append([
'? {}'.format(i2n[tag_id], color, i2n[tag_id]),
math.floor(rate * 10000) / 100
])
return results
js = '''document.addEventListener("click", (e) => {
if (e.target instanceof HTMLElement && e.target.classList.contains("click2copy")) {
navigator.clipboard.writeText(e.target.innerText);
let el = document.createElement("span");
el.innerText = " copied!";
el.style.color = "#666";
e.target.parentNode.appendChild(el);
setTimeout(() => {
el.style.transition = "opacity 1s";
el.style.opacity = "0";
setTimeout(() => {
el.remove();
}, 1000);
}, 500);
}
})'''
iface = gr.Interface(
js=js,
fn=greet,
inputs="textbox",
outputs=gr.Dataframe(
headers=["tag (click to copy)", "rate"],
datatype=["html", "number"],
),
allow_flagging='never', css='#component-4 { max-width: 16rem; }')
iface.launch(server_name="0.0.0.0")