Juan Martínez commited on
Commit
37958d9
·
1 Parent(s): 8404ae6

add search

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -1,18 +1,34 @@
1
  import streamlit as st
 
 
2
 
3
- st.markdown("# Buscador de Noticias Salvadoreñas")
 
 
 
 
 
 
4
 
5
- search_text = st.text_input(label="Búsqueda")
 
 
 
 
6
 
7
- # ds = load_dataset("justinian336/salvadoran-news-embedded")
 
 
 
8
 
 
 
9
 
10
- # def search(text, model, ds, n):
11
- # encoded_text = model.encode(text)
12
- # scores, retrieved_examples = ds.get_nearest_examples('embedding', encoded_text, k=n)
13
- # matching_titles = retrieved_examples["title"]
14
- # urls = retrieved_examples["link"]
15
- # contents = retrieved_examples["content"]
16
- # return list(zip(matching_titles, [c[:150] for c in contents], urls, scores))
17
 
18
- # search_results = search(search_text, model, embedded_ds, 10)
 
 
 
 
 
1
  import streamlit as st
2
+ from datasets import load_dataset
3
+ from sentence_transformers import SentenceTransformer
4
 
5
+ def search(text, model, ds, n):
6
+ encoded_text = model.encode(text)
7
+ scores, retrieved_examples = ds.get_nearest_examples('embedding', encoded_text, k=n)
8
+ matching_titles = retrieved_examples["title"]
9
+ urls = retrieved_examples["link"]
10
+ contents = retrieved_examples["content"]
11
+ return list(zip(matching_titles, [c[:150] for c in contents], urls, scores))
12
 
13
+ @st.cache_data
14
+ def get_dataset():
15
+ ds = load_dataset("justinian336/salvadoran-news-embedded")
16
+ embedded_ds.add_faiss_index(column="embedding")
17
+ return ds
18
 
19
+ @st.cache__resource
20
+ def get_model():
21
+ model = SentenceTransformer("justinian336/chupeto")
22
+ return model
23
 
24
+ ds = get_dataset()
25
+ model = get_model()
26
 
27
+ st.markdown("# Buscador de Noticias Salvadoreñas")
28
+ search_text = st.text_input(label="Búsqueda")
 
 
 
 
 
29
 
30
+ search_results = search(search_text, model, embedded_ds, 10)
31
+ for title, content, url in search_results:
32
+ st.markdown(f"""<div><a href="{url}">{title}</a></div>""", unsafe_allow_html=True)
33
+ st.markdown(f"""<div>{content}...</div>""")
34
+ st.markdown("---")