Spaces:

Ritvik19
/

Zeta

Sleeping

App Files Files Community

Ritvik19 commited on Mar 7, 2024

Commit

bad08ae

verified ·

1 Parent(s): eed34fa

Various Improvements

Browse files

Files changed (3) hide show

app.py +27 -8
chat_chains.py +29 -23
openai_configuration.py +7 -0

app.py CHANGED Viewed

@@ -20,27 +20,30 @@ from chain_of_density import chain_of_density_chain
 from insights_bullet_chain import insights_bullet_chain
 from synopsis_chain import synopsis_chain
 from custom_exceptions import InvalidArgumentError, InvalidCommandError
 st.set_page_config(layout="wide")
-os.environ["OPENAI_API_KEY"] = "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"
 welcome_message = """
-Hi I'm Agent Zeta, your AI assistant, dedicated to making your journey through machine learning research papers as insightful and interactive as possible. Whether you're diving into the latest studies or brushing up on foundational papers, I'm here to help navigate, discuss, and analyze content with you.
 Here's a quick guide to getting started with me:
 | Command | Description |
 |---------|-------------|
 | `/add-papers <list of urls>` | Upload and process documents for our conversation. |
 | `/library` | View an index of processed documents to easily navigate your research. |
 | `/session-expense` | Calculate the cost of our conversation, ensuring transparency in resource usage. |
 | `/export` | Download conversation data for your records or further analysis. |
 | `/auto-insight <document id>` | Automatically generate questions and answers for the paper. |
-| `/deep-dive [<list of document ids>] <query>` | Query me with a specific context. |
 | `/condense-summary <document id>` | Generate increasingly concise, entity-dense summaries of the paper. |
 | `/insight-bullets <list of document ids>` | Extract and summarize key insights, methods, results, and conclusions. |
 | `/paper-synopsis <document id>` | Generate a synopsis of the paper. |
 <br>
@@ -70,6 +73,12 @@ def index_documents_wrapper(inputs=None):
     return (response, "dataframe")
 def calculate_cost_wrapper(inputs=None):
     try:
         stats_df = pd.DataFrame(st.session_state.costing)
@@ -122,7 +131,7 @@ def download_conversation_wrapper(inputs=None):
 def query_llm(inputs, relevant_docs):
     with get_openai_callback() as cb:
         response = (
-            qa_chain(ChatOpenAI(model="gpt-4-0125-preview", temperature=0))
             .invoke({"context": format_docs(relevant_docs), "question": inputs})
             .content
         )
@@ -174,7 +183,7 @@ def chain_of_density_wrapper(inputs):
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
-    llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
     with get_openai_callback() as cb:
         summary = chain_of_density_chain(llm).invoke({"paper": document})
         stats = cb
@@ -193,7 +202,7 @@ def synopsis_wrapper(inputs):
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
-    llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
     with get_openai_callback() as cb:
         summary = synopsis_chain(llm).invoke({"paper": document})
         stats = cb
@@ -212,7 +221,7 @@ def insights_bullet_wrapper(inputs):
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = "\n\n".join([st.session_state.documents[c].page_content for c in inputs])
-    llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
     with get_openai_callback() as cb:
         insights = insights_bullet_chain(llm).invoke({"paper": document})
         stats = cb
@@ -231,7 +240,7 @@ def auto_qa_chain_wrapper(inputs):
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
-    llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0)
     auto_qa_conversation = []
     with get_openai_callback() as cb:
         auto_qa_response = auto_qa_chain(llm).invoke({"paper": document})
@@ -288,10 +297,20 @@ def boot(command_center, formating_functions):
             st.error(e)
 if __name__ == "__main__":
     all_commands = [
         ("/add-papers", list, process_documents_wrapper),
         ("/library", None, index_documents_wrapper),
         ("/session-expense", None, calculate_cost_wrapper),
         ("/export", None, download_conversation_wrapper),
         ("/help-me", None, lambda x: (welcome_message, "identity")),

 from insights_bullet_chain import insights_bullet_chain
 from synopsis_chain import synopsis_chain
 from custom_exceptions import InvalidArgumentError, InvalidCommandError
+from openai_configuration import openai_parser
 st.set_page_config(layout="wide")
 welcome_message = """
+Hi I'm Agent Zeta, your AI assistant, dedicated to making your journey through machine learning research papers as insightful and interactive as possible.
+Whether you're diving into the latest studies or brushing up on foundational papers, I'm here to help navigate, discuss, and analyze content with you.
 Here's a quick guide to getting started with me:
 | Command | Description |
 |---------|-------------|
+| `/configure --key <api key> --model <model>` | Configure the OpenAI API key and model for our conversation. |
 | `/add-papers <list of urls>` | Upload and process documents for our conversation. |
 | `/library` | View an index of processed documents to easily navigate your research. |
+| `/view-doc <document id>` | View the content of a specific document. |
 | `/session-expense` | Calculate the cost of our conversation, ensuring transparency in resource usage. |
 | `/export` | Download conversation data for your records or further analysis. |
 | `/auto-insight <document id>` | Automatically generate questions and answers for the paper. |
 | `/condense-summary <document id>` | Generate increasingly concise, entity-dense summaries of the paper. |
 | `/insight-bullets <list of document ids>` | Extract and summarize key insights, methods, results, and conclusions. |
 | `/paper-synopsis <document id>` | Generate a synopsis of the paper. |
+| `/deep-dive [<list of document ids>] <query>` | Query me with a specific context. |
 <br>
     return (response, "dataframe")
+def view_document_wrapper(inputs):
+    response = st.session_state.documents[inputs].page_content
+    st.session_state.messages.append((f"/view-doc {inputs}", response, "identity"))
+    return (response, "identity")
 def calculate_cost_wrapper(inputs=None):
     try:
         stats_df = pd.DataFrame(st.session_state.costing)
 def query_llm(inputs, relevant_docs):
     with get_openai_callback() as cb:
         response = (
+            qa_chain(ChatOpenAI(model=st.session_state.model, temperature=0))
             .invoke({"context": format_docs(relevant_docs), "question": inputs})
             .content
         )
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
+    llm = ChatOpenAI(model=st.session_state.model, temperature=0)
     with get_openai_callback() as cb:
         summary = chain_of_density_chain(llm).invoke({"paper": document})
         stats = cb
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
+    llm = ChatOpenAI(model=st.session_state.model, temperature=0)
     with get_openai_callback() as cb:
         summary = synopsis_chain(llm).invoke({"paper": document})
         stats = cb
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = "\n\n".join([st.session_state.documents[c].page_content for c in inputs])
+    llm = ChatOpenAI(model=st.session_state.model, temperature=0)
     with get_openai_callback() as cb:
         insights = insights_bullet_chain(llm).invoke({"paper": document})
         stats = cb
     if inputs == "":
         raise InvalidArgumentError("Please provide a document id")
     document = st.session_state.documents[inputs].page_content
+    llm = ChatOpenAI(model=st.session_state.model, temperature=0)
     auto_qa_conversation = []
     with get_openai_callback() as cb:
         auto_qa_response = auto_qa_chain(llm).invoke({"paper": document})
             st.error(e)
+def configure_openai_wrapper(inputs):
+    args = openai_parser.parse_args(inputs.split())
+    os.environ["OPENAI_API_KEY"] = args.key
+    st.session_state.model = args.model
+    st.session_state.messages.append(("/configure", str(args), "identity"))
+    return (str(args), "identity")
 if __name__ == "__main__":
     all_commands = [
+        ("/configure", str, configure_openai_wrapper),
         ("/add-papers", list, process_documents_wrapper),
         ("/library", None, index_documents_wrapper),
+        ("/view-doc", str, view_document_wrapper),
         ("/session-expense", None, calculate_cost_wrapper),
         ("/export", None, download_conversation_wrapper),
         ("/help-me", None, lambda x: (welcome_message, "identity")),

chat_chains.py CHANGED Viewed

@@ -32,7 +32,6 @@ By following these guidelines, you ensure that users receive valuable, accurate,
 qa_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", qa_system_prompt),
-        # MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
     ]
 )
@@ -60,33 +59,40 @@ qa_chain = lambda llm: (
 def parse_model_response(input_string):
     parsed_data = {"answer": "", "citations": []}
-    xml_matches = re.findall(r"<citations>.*?</citations>", input_string, re.DOTALL)
-    if not xml_matches:
-        parsed_data["answer"] = input_string
-        return parsed_data
-    outside_text_parts = []
-    last_end_pos = 0
-    for xml_string in xml_matches:
-        match = re.search(re.escape(xml_string), input_string[last_end_pos:], re.DOTALL)
-        if match:
-            outside_text_parts.append(
-                input_string[last_end_pos : match.start() + last_end_pos]
             )
-            last_end_pos += match.end()
-        root = ET.fromstring(xml_string)
-        for citation in root.findall("citation"):
-            source_id = citation.find("source_id").text
-            quote = citation.find("quote").text
-            parsed_data["citations"].append({"source_id": source_id, "quote": quote})
-    outside_text_parts.append(input_string[last_end_pos:])
-    parsed_data["answer"] = "".join(outside_text_parts)
     return parsed_data

 qa_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", qa_system_prompt),
         ("human", "{question}"),
     ]
 )
 def parse_model_response(input_string):
     parsed_data = {"answer": "", "citations": []}
+    try:
+        xml_matches = re.findall(r"<citations>.*?</citations>", input_string, re.DOTALL)
+        if not xml_matches:
+            parsed_data["answer"] = input_string
+            return parsed_data
+        outside_text_parts = []
+        last_end_pos = 0
+        for xml_string in xml_matches:
+            match = re.search(
+                re.escape(xml_string), input_string[last_end_pos:], re.DOTALL
             )
+            if match:
+                outside_text_parts.append(
+                    input_string[last_end_pos : match.start() + last_end_pos]
+                )
+                last_end_pos += match.end()
+            root = ET.fromstring(xml_string)
+            for citation in root.findall("citation"):
+                source_id = citation.find("source_id").text
+                quote = citation.find("quote").text
+                parsed_data["citations"].append(
+                    {"source_id": source_id, "quote": quote}
+                )
+        outside_text_parts.append(input_string[last_end_pos:])
+        parsed_data["answer"] = "".join(outside_text_parts)
+    except Exception as e:
+        parsed_data["answer"] = input_string
     return parsed_data

openai_configuration.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import argparse
+openai_parser = argparse.ArgumentParser(description="OpenAI Configuration")
+openai_parser.add_argument("--key", type=str, help="OpenAI API Key")
+openai_parser.add_argument(
+    "--model", type=str, help="OpenAI Model", default="gpt-4-turbo-preview"
+)