File size: 2,343 Bytes
c532148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702c8d6
c532148
 
 
 
 
 
 
 
 
702c8d6
4e8a334
 
702c8d6
c532148
 
 
 
 
 
 
46d9ba6
 
 
 
 
 
 
c532148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import duckdb
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/LaBSE")

annoy_index = AnnoyIndex(768, "angular")
annoy_index.load("definitions.ann")

conn = duckdb.connect("sonajaht.db")


def search_query(query, top_k=10):
    query_vector = model.encode(query)

    similar_item_ids, distances = annoy_index.get_nns_by_vector(
        query_vector, top_k, include_distances=True
    )

    id_list = ", ".join(map(str, similar_item_ids))
    sql_query = f"""
    SELECT w.value AS sõna, d.value AS definitsioon 
    FROM definitions d
    JOIN words w ON d.word_id = w.word_id
    WHERE d.entry_id IN ({id_list})
    ORDER BY CASE d.entry_id 
    {' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
    END
    """

    results = conn.execute(sql_query).fetchdf()
    results["#"] = list(range(1, len(results) + 1))
    new_order = ["#", "sõna", "definitsioon"]
    results = results[new_order]
    # results["relevance_score"] = [1 - d for d in distances]

    return results


examples = [
    "väga vana mees",
    "очень старый дед",
    "un très vieil homme",
    "a clear material that you can see through used to make windows",
    "to have a rule that you need a specific object or thing in some situation",
    "something that makes you happy or makes you laugh",
    "when an event happens or takes place",
    "часть стерео системы, из которой исходит музыка",
    "кто-то, кто использует что-то",
]


def handle_example(example):
    return example, search_query(example)


with gr.Blocks() as demo:
    gr.Markdown("# Sõnajaht Demo")

    query_input = gr.Textbox(label="Sisestage teie otsingupäring")
    search_button = gr.Button("Otsi")

    with gr.Row():
        example_buttons = [gr.Button(example) for example in examples]

    results_output = gr.Dataframe(label="Otsingutulemused")

    search_button.click(search_query, inputs=query_input, outputs=results_output)

    for button in example_buttons:
        button.click(
            handle_example,
            inputs=gr.State(button.value),
            outputs=[query_input, results_output],
        )


if __name__ == "__main__":
    demo.launch()