terapyon commited on
Commit
2b32e82
·
1 Parent(s): 6fd8495

added select podcast for ui

Browse files
Files changed (1) hide show
  1. src/app.py +31 -7
src/app.py CHANGED
@@ -9,16 +9,29 @@ def get_conn():
9
  return duckdb.connect(DUCKDB_FILE)
10
 
11
 
12
- query = """WITH ordered_embeddings AS (
13
- SELECT embeddings.id, embeddings.part FROM embeddings
14
- ORDER BY array_distance(embedding, ?::FLOAT[1024])
 
 
 
 
 
 
 
 
 
 
 
15
  LIMIT 10
16
  )
17
  SELECT
18
  p.title,
19
  p.date,
20
  e.start,
21
- e.text
 
 
22
  FROM
23
  ordered_embeddings oe
24
  JOIN
@@ -33,15 +46,26 @@ SELECT
33
 
34
  st.title("terapyon cannel search")
35
 
 
 
 
 
 
 
 
 
 
 
36
  word = st.text_input("Search word")
37
  if word:
38
  st.write(f"Search word: {word}")
39
  embeddings = get_embeddings([word], query=True)
40
  word_embedding = embeddings[0, :]
41
 
42
- conn = get_conn()
43
- result = conn.execute(query, (word_embedding,)).df()
44
- selected = st.dataframe(result,
 
45
  on_select="rerun",
46
  selection_mode="single-row")
47
  if selected:
 
9
  return duckdb.connect(DUCKDB_FILE)
10
 
11
 
12
+ title_query = """SELECT id, title FROM podcasts
13
+ ORDER BY date DESC;
14
+ """
15
+
16
+ query = """WITH filtered_podcasts AS (
17
+ SELECT id
18
+ FROM podcasts
19
+ WHERE id in ?
20
+ ),
21
+ ordered_embeddings AS (
22
+ SELECT embeddings.id, embeddings.part
23
+ FROM embeddings
24
+ JOIN filtered_podcasts fp ON embeddings.id = fp.id
25
+ ORDER BY array_distance(embedding, ?::FLOAT[1024])
26
  LIMIT 10
27
  )
28
  SELECT
29
  p.title,
30
  p.date,
31
  e.start,
32
+ e.text,
33
+ e.part,
34
+ p.audio,
35
  FROM
36
  ordered_embeddings oe
37
  JOIN
 
46
 
47
  st.title("terapyon cannel search")
48
 
49
+ conn = get_conn()
50
+ titles = conn.execute(title_query).df()
51
+ selected_title: list[str] | None = st.multiselect("Select title", titles["title"])
52
+ if selected_title:
53
+ st.write(f"Selected title: {selected_title}")
54
+ selected_ids = titles.loc[titles.loc[:, "title"].isin(selected_title), "id"].tolist()
55
+ else:
56
+ st.write("All titles")
57
+ selected_ids = titles.loc[:, "id"].tolist()
58
+
59
  word = st.text_input("Search word")
60
  if word:
61
  st.write(f"Search word: {word}")
62
  embeddings = get_embeddings([word], query=True)
63
  word_embedding = embeddings[0, :]
64
 
65
+ result = conn.execute(query,
66
+ (selected_ids, word_embedding,)).df()
67
+ selected = st.dataframe(result,
68
+ column_order=["title", "date", "part", "start", "text", "audio"],
69
  on_select="rerun",
70
  selection_mode="single-row")
71
  if selected: