Sean-Case commited on
Commit
102df35
·
1 Parent(s): bc459f6

Adapted code to keep newly-loaded vectorstores within local user state

Browse files
Files changed (2) hide show
  1. app.py +18 -17
  2. chatfuncs/chatfuncs.py +7 -7
app.py CHANGED
@@ -89,16 +89,28 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
89
  #print(out_message)
90
  #print(f"> Saved to: {save_to}")
91
 
92
- return out_message
93
 
94
  # Gradio chat
95
 
96
  import gradio as gr
97
 
 
 
 
98
  block = gr.Blocks(css=".gradio-container {background-color: black}")
99
 
100
  with block:
101
- #with gr.Row():
 
 
 
 
 
 
 
 
 
102
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
103
 
104
  gr.Markdown("Chat with a document (alpha). By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page (feature temporarily disabled), please select below. The chatbot will not answer questions where answered can't be found on the website. If switching topic, please click the 'New topic' button as the bot will assume follow up questions are linked to the first. Sources are shown underneath the chat area.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
@@ -117,7 +129,6 @@ with block:
117
  lines=1,
118
  )
119
 
120
-
121
  submit = gr.Button(value="Send message", variant="secondary", scale = 1)
122
 
123
  examples_set = gr.Examples(label="Examples for the Lambeth Borough Plan",
@@ -151,42 +162,32 @@ with block:
151
  "<center>Powered by Flan Alpaca and Langchain</a></center>"
152
  )
153
 
154
- ingest_text = gr.State()
155
- ingest_metadata = gr.State()
156
- ingest_docs = gr.State()
157
-
158
- embeddings_state = gr.State()
159
- vectorstore_state = gr.State()
160
-
161
- chat_history_state = gr.State()
162
- instruction_prompt_out = gr.State()
163
-
164
  #def hide_examples():
165
  # return gr.Examples.update(visible=False)
166
 
167
  # Load in a pdf
168
  load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
169
  then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
170
- then(docs_to_faiss_save, inputs=[ingest_docs], outputs=ingest_embed_out) # #then(load_embeddings, outputs=[embeddings_state]).\
171
  #then(hide_examples)
172
 
173
  # Load in a webpage
174
  load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
175
  then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
176
- then(docs_to_faiss_save, inputs=[ingest_docs], outputs=ingest_embed_out)
177
  #then(hide_examples)
178
 
179
  # Load in a webpage
180
 
181
  # Click/enter to send message action
182
- response_click = submit.click(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
183
  then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
184
  then(chatf.produce_streaming_answer_chatbot_hf, inputs=[chatbot, instruction_prompt_out], outputs=chatbot)
185
  response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
186
  then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
187
  then(lambda: gr.update(interactive=True), None, [message], queue=False)
188
 
189
- response_enter = message.submit(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
190
  then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
191
  then(chatf.produce_streaming_answer_chatbot_hf, [chatbot, instruction_prompt_out], chatbot)
192
  response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
 
89
  #print(out_message)
90
  #print(f"> Saved to: {save_to}")
91
 
92
+ return out_message, vectorstore_func
93
 
94
  # Gradio chat
95
 
96
  import gradio as gr
97
 
98
+
99
+
100
+
101
  block = gr.Blocks(css=".gradio-container {background-color: black}")
102
 
103
  with block:
104
+ ingest_text = gr.State()
105
+ ingest_metadata = gr.State()
106
+ ingest_docs = gr.State()
107
+
108
+ embeddings_state = gr.State(globals()["embeddings"])
109
+ vectorstore_state = gr.State(globals()["vectorstore"])
110
+
111
+ chat_history_state = gr.State()
112
+ instruction_prompt_out = gr.State()
113
+
114
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
115
 
116
  gr.Markdown("Chat with a document (alpha). By default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page (feature temporarily disabled), please select below. The chatbot will not answer questions where answered can't be found on the website. If switching topic, please click the 'New topic' button as the bot will assume follow up questions are linked to the first. Sources are shown underneath the chat area.\n\nWarnings: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
 
129
  lines=1,
130
  )
131
 
 
132
  submit = gr.Button(value="Send message", variant="secondary", scale = 1)
133
 
134
  examples_set = gr.Examples(label="Examples for the Lambeth Borough Plan",
 
162
  "<center>Powered by Flan Alpaca and Langchain</a></center>"
163
  )
164
 
 
 
 
 
 
 
 
 
 
 
165
  #def hide_examples():
166
  # return gr.Examples.update(visible=False)
167
 
168
  # Load in a pdf
169
  load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
170
  then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
171
+ then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state]) # #then(load_embeddings, outputs=[embeddings_state]).\
172
  #then(hide_examples)
173
 
174
  # Load in a webpage
175
  load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
176
  then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
177
+ then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state])
178
  #then(hide_examples)
179
 
180
  # Load in a webpage
181
 
182
  # Click/enter to send message action
183
+ response_click = submit.click(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
184
  then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
185
  then(chatf.produce_streaming_answer_chatbot_hf, inputs=[chatbot, instruction_prompt_out], outputs=chatbot)
186
  response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
187
  then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
188
  then(lambda: gr.update(interactive=True), None, [message], queue=False)
189
 
190
+ response_enter = message.submit(chatf.get_history_sources_final_input_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
191
  then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
192
  then(chatf.produce_streaming_answer_chatbot_hf, [chatbot, instruction_prompt_out], chatbot)
193
  response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
chatfuncs/chatfuncs.py CHANGED
@@ -238,11 +238,11 @@ def create_doc_df(docs_keep_out):
238
 
239
  return doc_df
240
 
241
- def hybrid_retrieval(new_question_kworded, k_val, out_passages,
242
  vec_score_cut_off, vec_weight, bm25_weight, svm_weight): # ,vectorstore, embeddings
243
 
244
- vectorstore=globals()["vectorstore"]
245
- embeddings=globals()["embeddings"]
246
 
247
 
248
  docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
@@ -470,7 +470,7 @@ def get_expanded_passages(vectorstore, docs, width):
470
 
471
  return expanded_docs, doc_df
472
 
473
- def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory): # ,
474
 
475
  question = inputs["question"]
476
  chat_history = inputs["chat_history"]
@@ -485,7 +485,7 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
485
  #docs_keep_as_doc, docs_content, docs_url = find_relevant_passages(new_question_kworded, k_val = 5, out_passages = 3,
486
  # vec_score_cut_off = 1.3, vec_weight = 1, tfidf_weight = 0.5, svm_weight = 1)
487
 
488
- docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, k_val = 5, out_passages = 2,
489
  vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
490
  #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
491
 
@@ -523,7 +523,7 @@ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prom
523
 
524
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
525
 
526
- def get_history_sources_final_input_prompt(user_input, history, extracted_memory):#):
527
 
528
  #if chain_agent is None:
529
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
@@ -539,7 +539,7 @@ def get_history_sources_final_input_prompt(user_input, history, extracted_memory
539
  instruction_prompt, content_prompt = create_prompt_templates()
540
  instruction_prompt_out, docs_content_string, new_question_kworded =\
541
  create_final_prompt({"question": user_input, "chat_history": history}, #vectorstore,
542
- instruction_prompt, content_prompt, extracted_memory)
543
 
544
 
545
  history.append(user_input)
 
238
 
239
  return doc_df
240
 
241
+ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_passages,
242
  vec_score_cut_off, vec_weight, bm25_weight, svm_weight): # ,vectorstore, embeddings
243
 
244
+ #vectorstore=globals()["vectorstore"]
245
+ #embeddings=globals()["embeddings"]
246
 
247
 
248
  docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
 
470
 
471
  return expanded_docs, doc_df
472
 
473
+ def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
474
 
475
  question = inputs["question"]
476
  chat_history = inputs["chat_history"]
 
485
  #docs_keep_as_doc, docs_content, docs_url = find_relevant_passages(new_question_kworded, k_val = 5, out_passages = 3,
486
  # vec_score_cut_off = 1.3, vec_weight = 1, tfidf_weight = 0.5, svm_weight = 1)
487
 
488
+ docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 5, out_passages = 2,
489
  vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)#,
490
  #vectorstore=globals()["vectorstore"], embeddings=globals()["embeddings"])
491
 
 
523
 
524
  return instruction_prompt_out, sources_docs_content_string, new_question_kworded
525
 
526
+ def get_history_sources_final_input_prompt(user_input, history, extracted_memory, vectorstore, embeddings):#):
527
 
528
  #if chain_agent is None:
529
  # history.append((user_input, "Please click the button to submit the Huggingface API key before using the chatbot (top right)"))
 
539
  instruction_prompt, content_prompt = create_prompt_templates()
540
  instruction_prompt_out, docs_content_string, new_question_kworded =\
541
  create_final_prompt({"question": user_input, "chat_history": history}, #vectorstore,
542
+ instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings)
543
 
544
 
545
  history.append(user_input)