ugmSorcero commited on
Commit
27e0350
·
1 Parent(s): 17fa846

Implements clear index but tfidf still struggles

Browse files
.streamlit/config.toml CHANGED
@@ -1,5 +1,5 @@
1
  [theme]
2
- primaryColor="#ffbf00"
3
  backgroundColor="#0e1117"
4
  secondaryBackgroundColor="#282929"
5
  textColor = "#ffffff"
 
1
  [theme]
2
+ primaryColor="#e5ab00"
3
  backgroundColor="#0e1117"
4
  secondaryBackgroundColor="#282929"
5
  textColor = "#ffffff"
app.py CHANGED
@@ -29,7 +29,7 @@ def run_demo():
29
  with navigation:
30
 
31
  selected_page = option_menu(
32
- menu_title="Navigation",
33
  options=list(pages.keys()),
34
  icons=[f[1] for f in pages.values()],
35
  menu_icon="cast",
 
29
  with navigation:
30
 
31
  selected_page = option_menu(
32
+ menu_title=None,
33
  options=list(pages.keys()),
34
  icons=[f[1] for f in pages.values()],
35
  menu_icon="cast",
core/pipelines.py CHANGED
@@ -39,10 +39,7 @@ def keyword_search(index="documents", split_word_length=100):
39
  index_pipeline = Pipeline()
40
  index_pipeline.add_node(processor, name="Preprocessor", inputs=["File"])
41
  index_pipeline.add_node(
42
- keyword_retriever, name="TfidfRetriever", inputs=["Preprocessor"]
43
- )
44
- index_pipeline.add_node(
45
- document_store, name="DocumentStore", inputs=["TfidfRetriever"]
46
  )
47
 
48
  return search_pipeline, index_pipeline
 
39
  index_pipeline = Pipeline()
40
  index_pipeline.add_node(processor, name="Preprocessor", inputs=["File"])
41
  index_pipeline.add_node(
42
+ document_store, name="DocumentStore", inputs=["Preprocessor"]
 
 
 
43
  )
44
 
45
  return search_pipeline, index_pipeline
core/search_index.py CHANGED
@@ -1,4 +1,5 @@
1
  from haystack.schema import Document
 
2
  import uuid
3
 
4
 
@@ -17,8 +18,14 @@ def format_docs(documents):
17
  return db_docs, [doc.meta["id"] for doc in db_docs]
18
 
19
 
20
- def index(documents, pipeline):
21
  documents, doc_ids = format_docs(documents)
 
 
 
 
 
 
22
  pipeline.run(documents=documents)
23
  return doc_ids
24
 
 
1
  from haystack.schema import Document
2
+ from haystack.document_stores import BaseDocumentStore
3
  import uuid
4
 
5
 
 
18
  return db_docs, [doc.meta["id"] for doc in db_docs]
19
 
20
 
21
+ def index(documents, pipeline, clear_index=True):
22
  documents, doc_ids = format_docs(documents)
23
+ if clear_index:
24
+ document_stores = pipeline.get_nodes_by_class(
25
+ class_type=BaseDocumentStore
26
+ )
27
+ for docstore in document_stores:
28
+ docstore.delete_index(docstore.index)
29
  pipeline.run(documents=documents)
30
  return doc_ids
31
 
interface/pages.py CHANGED
@@ -78,6 +78,8 @@ def page_index(container):
78
  default_index=0,
79
  orientation="horizontal",
80
  )
 
 
81
 
82
  corpus = input_funcs[selected_input][0](container)
83
 
@@ -87,6 +89,7 @@ def page_index(container):
87
  index_results = index(
88
  corpus,
89
  st.session_state["pipeline"]["index_pipeline"],
 
90
  )
91
  if index_results:
92
  st.write(index_results)
 
78
  default_index=0,
79
  orientation="horizontal",
80
  )
81
+
82
+ clear_index = st.sidebar.checkbox('Clear Index', True)
83
 
84
  corpus = input_funcs[selected_input][0](container)
85
 
 
89
  index_results = index(
90
  corpus,
91
  st.session_state["pipeline"]["index_pipeline"],
92
+ clear_index
93
  )
94
  if index_results:
95
  st.write(index_results)