Ritvik19 commited on
Commit
bad08ae
·
verified ·
1 Parent(s): eed34fa

Various Improvements

Browse files
Files changed (3) hide show
  1. app.py +27 -8
  2. chat_chains.py +29 -23
  3. openai_configuration.py +7 -0
app.py CHANGED
@@ -20,27 +20,30 @@ from chain_of_density import chain_of_density_chain
20
  from insights_bullet_chain import insights_bullet_chain
21
  from synopsis_chain import synopsis_chain
22
  from custom_exceptions import InvalidArgumentError, InvalidCommandError
 
23
 
24
  st.set_page_config(layout="wide")
25
- os.environ["OPENAI_API_KEY"] = "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"
26
 
27
 
28
  welcome_message = """
29
- Hi I'm Agent Zeta, your AI assistant, dedicated to making your journey through machine learning research papers as insightful and interactive as possible. Whether you're diving into the latest studies or brushing up on foundational papers, I'm here to help navigate, discuss, and analyze content with you.
 
30
 
31
  Here's a quick guide to getting started with me:
32
 
33
  | Command | Description |
34
  |---------|-------------|
 
35
  | `/add-papers <list of urls>` | Upload and process documents for our conversation. |
36
  | `/library` | View an index of processed documents to easily navigate your research. |
 
37
  | `/session-expense` | Calculate the cost of our conversation, ensuring transparency in resource usage. |
38
  | `/export` | Download conversation data for your records or further analysis. |
39
  | `/auto-insight <document id>` | Automatically generate questions and answers for the paper. |
40
- | `/deep-dive [<list of document ids>] <query>` | Query me with a specific context. |
41
  | `/condense-summary <document id>` | Generate increasingly concise, entity-dense summaries of the paper. |
42
  | `/insight-bullets <list of document ids>` | Extract and summarize key insights, methods, results, and conclusions. |
43
  | `/paper-synopsis <document id>` | Generate a synopsis of the paper. |
 
44
 
45
 
46
  <br>
@@ -70,6 +73,12 @@ def index_documents_wrapper(inputs=None):
70
  return (response, "dataframe")
71
 
72
 
 
 
 
 
 
 
73
  def calculate_cost_wrapper(inputs=None):
74
  try:
75
  stats_df = pd.DataFrame(st.session_state.costing)
@@ -122,7 +131,7 @@ def download_conversation_wrapper(inputs=None):
122
  def query_llm(inputs, relevant_docs):
123
  with get_openai_callback() as cb:
124
  response = (
125
- qa_chain(ChatOpenAI(model="gpt-4-0125-preview", temperature=0))
126
  .invoke({"context": format_docs(relevant_docs), "question": inputs})
127
  .content
128
  )
@@ -174,7 +183,7 @@ def chain_of_density_wrapper(inputs):
174
  if inputs == "":
175
  raise InvalidArgumentError("Please provide a document id")
176
  document = st.session_state.documents[inputs].page_content
177
- llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
178
  with get_openai_callback() as cb:
179
  summary = chain_of_density_chain(llm).invoke({"paper": document})
180
  stats = cb
@@ -193,7 +202,7 @@ def synopsis_wrapper(inputs):
193
  if inputs == "":
194
  raise InvalidArgumentError("Please provide a document id")
195
  document = st.session_state.documents[inputs].page_content
196
- llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
197
  with get_openai_callback() as cb:
198
  summary = synopsis_chain(llm).invoke({"paper": document})
199
  stats = cb
@@ -212,7 +221,7 @@ def insights_bullet_wrapper(inputs):
212
  if inputs == "":
213
  raise InvalidArgumentError("Please provide a document id")
214
  document = "\n\n".join([st.session_state.documents[c].page_content for c in inputs])
215
- llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
216
  with get_openai_callback() as cb:
217
  insights = insights_bullet_chain(llm).invoke({"paper": document})
218
  stats = cb
@@ -231,7 +240,7 @@ def auto_qa_chain_wrapper(inputs):
231
  if inputs == "":
232
  raise InvalidArgumentError("Please provide a document id")
233
  document = st.session_state.documents[inputs].page_content
234
- llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
235
  auto_qa_conversation = []
236
  with get_openai_callback() as cb:
237
  auto_qa_response = auto_qa_chain(llm).invoke({"paper": document})
@@ -288,10 +297,20 @@ def boot(command_center, formating_functions):
288
  st.error(e)
289
 
290
 
 
 
 
 
 
 
 
 
291
  if __name__ == "__main__":
292
  all_commands = [
 
293
  ("/add-papers", list, process_documents_wrapper),
294
  ("/library", None, index_documents_wrapper),
 
295
  ("/session-expense", None, calculate_cost_wrapper),
296
  ("/export", None, download_conversation_wrapper),
297
  ("/help-me", None, lambda x: (welcome_message, "identity")),
 
20
  from insights_bullet_chain import insights_bullet_chain
21
  from synopsis_chain import synopsis_chain
22
  from custom_exceptions import InvalidArgumentError, InvalidCommandError
23
+ from openai_configuration import openai_parser
24
 
25
  st.set_page_config(layout="wide")
 
26
 
27
 
28
  welcome_message = """
29
+ Hi I'm Agent Zeta, your AI assistant, dedicated to making your journey through machine learning research papers as insightful and interactive as possible.
30
+ Whether you're diving into the latest studies or brushing up on foundational papers, I'm here to help navigate, discuss, and analyze content with you.
31
 
32
  Here's a quick guide to getting started with me:
33
 
34
  | Command | Description |
35
  |---------|-------------|
36
+ | `/configure --key <api key> --model <model>` | Configure the OpenAI API key and model for our conversation. |
37
  | `/add-papers <list of urls>` | Upload and process documents for our conversation. |
38
  | `/library` | View an index of processed documents to easily navigate your research. |
39
+ | `/view-doc <document id>` | View the content of a specific document. |
40
  | `/session-expense` | Calculate the cost of our conversation, ensuring transparency in resource usage. |
41
  | `/export` | Download conversation data for your records or further analysis. |
42
  | `/auto-insight <document id>` | Automatically generate questions and answers for the paper. |
 
43
  | `/condense-summary <document id>` | Generate increasingly concise, entity-dense summaries of the paper. |
44
  | `/insight-bullets <list of document ids>` | Extract and summarize key insights, methods, results, and conclusions. |
45
  | `/paper-synopsis <document id>` | Generate a synopsis of the paper. |
46
+ | `/deep-dive [<list of document ids>] <query>` | Query me with a specific context. |
47
 
48
 
49
  <br>
 
73
  return (response, "dataframe")
74
 
75
 
76
+ def view_document_wrapper(inputs):
77
+ response = st.session_state.documents[inputs].page_content
78
+ st.session_state.messages.append((f"/view-doc {inputs}", response, "identity"))
79
+ return (response, "identity")
80
+
81
+
82
  def calculate_cost_wrapper(inputs=None):
83
  try:
84
  stats_df = pd.DataFrame(st.session_state.costing)
 
131
  def query_llm(inputs, relevant_docs):
132
  with get_openai_callback() as cb:
133
  response = (
134
+ qa_chain(ChatOpenAI(model=st.session_state.model, temperature=0))
135
  .invoke({"context": format_docs(relevant_docs), "question": inputs})
136
  .content
137
  )
 
183
  if inputs == "":
184
  raise InvalidArgumentError("Please provide a document id")
185
  document = st.session_state.documents[inputs].page_content
186
+ llm = ChatOpenAI(model=st.session_state.model, temperature=0)
187
  with get_openai_callback() as cb:
188
  summary = chain_of_density_chain(llm).invoke({"paper": document})
189
  stats = cb
 
202
  if inputs == "":
203
  raise InvalidArgumentError("Please provide a document id")
204
  document = st.session_state.documents[inputs].page_content
205
+ llm = ChatOpenAI(model=st.session_state.model, temperature=0)
206
  with get_openai_callback() as cb:
207
  summary = synopsis_chain(llm).invoke({"paper": document})
208
  stats = cb
 
221
  if inputs == "":
222
  raise InvalidArgumentError("Please provide a document id")
223
  document = "\n\n".join([st.session_state.documents[c].page_content for c in inputs])
224
+ llm = ChatOpenAI(model=st.session_state.model, temperature=0)
225
  with get_openai_callback() as cb:
226
  insights = insights_bullet_chain(llm).invoke({"paper": document})
227
  stats = cb
 
240
  if inputs == "":
241
  raise InvalidArgumentError("Please provide a document id")
242
  document = st.session_state.documents[inputs].page_content
243
+ llm = ChatOpenAI(model=st.session_state.model, temperature=0)
244
  auto_qa_conversation = []
245
  with get_openai_callback() as cb:
246
  auto_qa_response = auto_qa_chain(llm).invoke({"paper": document})
 
297
  st.error(e)
298
 
299
 
300
+ def configure_openai_wrapper(inputs):
301
+ args = openai_parser.parse_args(inputs.split())
302
+ os.environ["OPENAI_API_KEY"] = args.key
303
+ st.session_state.model = args.model
304
+ st.session_state.messages.append(("/configure", str(args), "identity"))
305
+ return (str(args), "identity")
306
+
307
+
308
  if __name__ == "__main__":
309
  all_commands = [
310
+ ("/configure", str, configure_openai_wrapper),
311
  ("/add-papers", list, process_documents_wrapper),
312
  ("/library", None, index_documents_wrapper),
313
+ ("/view-doc", str, view_document_wrapper),
314
  ("/session-expense", None, calculate_cost_wrapper),
315
  ("/export", None, download_conversation_wrapper),
316
  ("/help-me", None, lambda x: (welcome_message, "identity")),
chat_chains.py CHANGED
@@ -32,7 +32,6 @@ By following these guidelines, you ensure that users receive valuable, accurate,
32
  qa_prompt = ChatPromptTemplate.from_messages(
33
  [
34
  ("system", qa_system_prompt),
35
- # MessagesPlaceholder(variable_name="chat_history"),
36
  ("human", "{question}"),
37
  ]
38
  )
@@ -60,33 +59,40 @@ qa_chain = lambda llm: (
60
 
61
  def parse_model_response(input_string):
62
  parsed_data = {"answer": "", "citations": []}
63
- xml_matches = re.findall(r"<citations>.*?</citations>", input_string, re.DOTALL)
64
- if not xml_matches:
65
- parsed_data["answer"] = input_string
66
- return parsed_data
67
-
68
- outside_text_parts = []
69
- last_end_pos = 0
70
-
71
- for xml_string in xml_matches:
72
- match = re.search(re.escape(xml_string), input_string[last_end_pos:], re.DOTALL)
73
-
74
- if match:
75
- outside_text_parts.append(
76
- input_string[last_end_pos : match.start() + last_end_pos]
77
  )
78
- last_end_pos += match.end()
79
 
80
- root = ET.fromstring(xml_string)
 
 
 
 
 
 
81
 
82
- for citation in root.findall("citation"):
83
- source_id = citation.find("source_id").text
84
- quote = citation.find("quote").text
85
- parsed_data["citations"].append({"source_id": source_id, "quote": quote})
 
 
86
 
87
- outside_text_parts.append(input_string[last_end_pos:])
88
 
89
- parsed_data["answer"] = "".join(outside_text_parts)
 
 
90
 
91
  return parsed_data
92
 
 
32
  qa_prompt = ChatPromptTemplate.from_messages(
33
  [
34
  ("system", qa_system_prompt),
 
35
  ("human", "{question}"),
36
  ]
37
  )
 
59
 
60
  def parse_model_response(input_string):
61
  parsed_data = {"answer": "", "citations": []}
62
+ try:
63
+ xml_matches = re.findall(r"<citations>.*?</citations>", input_string, re.DOTALL)
64
+ if not xml_matches:
65
+ parsed_data["answer"] = input_string
66
+ return parsed_data
67
+
68
+ outside_text_parts = []
69
+ last_end_pos = 0
70
+
71
+ for xml_string in xml_matches:
72
+ match = re.search(
73
+ re.escape(xml_string), input_string[last_end_pos:], re.DOTALL
 
 
74
  )
 
75
 
76
+ if match:
77
+ outside_text_parts.append(
78
+ input_string[last_end_pos : match.start() + last_end_pos]
79
+ )
80
+ last_end_pos += match.end()
81
+
82
+ root = ET.fromstring(xml_string)
83
 
84
+ for citation in root.findall("citation"):
85
+ source_id = citation.find("source_id").text
86
+ quote = citation.find("quote").text
87
+ parsed_data["citations"].append(
88
+ {"source_id": source_id, "quote": quote}
89
+ )
90
 
91
+ outside_text_parts.append(input_string[last_end_pos:])
92
 
93
+ parsed_data["answer"] = "".join(outside_text_parts)
94
+ except Exception as e:
95
+ parsed_data["answer"] = input_string
96
 
97
  return parsed_data
98
 
openai_configuration.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ openai_parser = argparse.ArgumentParser(description="OpenAI Configuration")
4
+ openai_parser.add_argument("--key", type=str, help="OpenAI API Key")
5
+ openai_parser.add_argument(
6
+ "--model", type=str, help="OpenAI Model", default="gpt-4-turbo-preview"
7
+ )