Spaces:

fahmiaziz
/

ai-agent-prototype

Running

App Files Files Community

fahmiaziz98 commited on Nov 8, 2024

Commit

0a8b0d4

1 Parent(s): 26934b9

init

Browse files

Files changed (4) hide show

apps/agent/constant.py +2 -2
apps/agent/graph.py +1 -1
apps/agent/multi_query_chain.py +39 -0
apps/agent/tools.py +16 -11

apps/agent/constant.py CHANGED Viewed

@@ -17,8 +17,8 @@ PROMPT = ChatPromptTemplate.from_messages(
     [
         (
             "system",
-            "You are a knowledgeable instructor. Your job is to help students learn a tool, the data for which is retrieved from a documentation site"
-            "Answer questions directly and clearly, as if you were explaining to a student who needs precise and structured guidance."
             "If the answer doesn't fit the given context, just say I don't have the information for that."
         ),
         ("placeholder", "{messages}")

     [
         (
             "system",
+            "You are a knowledgeable instructor. Your job is to help students learn Xano and WeWeb, the data for which is retrieved from a documentation site"
+            "Answer questions directly and clearly, as if you were explaining to students who need precise and structured guidance. Also add website sources."
             "If the answer doesn't fit the given context, just say I don't have the information for that."
         ),
         ("placeholder", "{messages}")

apps/agent/graph.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain_core.messages import AIMessage, ToolMessage
 from langgraph.prebuilt import tools_condition
 from langchain_groq import ChatGroq
-from apps.agent.tools import tool_weweb, tool_xano    # kalo run api pake ini -> app.agent.tools
 from apps.agent.state import State, RequestAssistance
 from apps.agent.constant import PROMPT

 from langgraph.prebuilt import tools_condition
 from langchain_groq import ChatGroq
+from apps.agent.tools import tool_weweb, tool_xano
 from apps.agent.state import State, RequestAssistance
 from apps.agent.constant import PROMPT

apps/agent/multi_query_chain.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from typing import List
+from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.prompts import PromptTemplate
+from pydantic import BaseModel, Field
+from langchain_groq import ChatGroq
+from apps.agent.constant import GROQ_API_KEY, MODEL_GROQ
+# Output parser will split the LLM result into a list of queries
+class LineListOutputParser(BaseOutputParser[List[str]]):
+    """Output parser for a list of lines."""
+    def parse(self, text: str) -> List[str]:
+        lines = text.strip().split("\n")
+        return list(filter(None, lines))  # Remove empty lines
+output_parser = LineListOutputParser()
+llm = ChatGroq(model=MODEL_GROQ, groq_api_key=GROQ_API_KEY, temperature=0.1)
+template = """
+Your task is to generate 3 different search queries that aim to
+answer the user question from multiple perspectives. The user questions
+are focused on Large Language Models, Machine Learning, and related
+disciplines.
+Each query MUST tackle the question from a different viewpoint, we
+want to get a variety of RELEVANT search results.
+Provide these alternative questions separated by newlines.
+GENERATE ONLY QUERY! dont add explanation and word
+Original question: {question}
+"""
+QUERY_PROMPT = PromptTemplate(
+    input_variables=["question"],
+    template=template,
+)
+# llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
+# Chain
+llm_chain = QUERY_PROMPT | llm | output_parser

apps/agent/tools.py CHANGED Viewed

@@ -4,34 +4,39 @@ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import FlashrankRerank
 from langchain_core.tools import tool
 from apps.agent.constant import INDEX_NAME_WEWEB, INDEX_NAME_XANO
-# os.environ["PINECONE_API_KEY"] = "a526d62f-ccca-40d6-859b-3d878c8d288b"
-embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
-compressor = FlashrankRerank()
-def create_compressed_retriever(index_name: str, embeddings, compressor) -> ContextualCompressionRetriever:
     vectorstore = Pinecone.from_existing_index(embedding=embeddings, index_name=index_name)
     retriever = vectorstore.as_retriever()
-    return ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
-reranker_xano = create_compressed_retriever(INDEX_NAME_XANO, embeddings, compressor)
-reranker_weweb = create_compressed_retriever(INDEX_NAME_WEWEB, embeddings, compressor)
 @tool
 def tool_xano(query: str):
     """
     Searches and returns excerpts from the Xano documentation
     """
-    docs = reranker_xano.invoke(query)
-    return "\n\n".join([doc["page_content"] for doc in docs])
 @tool
 def tool_weweb(query: str):
     """
     Searches and returns excerpts from the Weweb documentation
     """
-    docs = reranker_weweb.invoke(query)
-    return "\n\n".join([doc["page_content"] for doc in docs])

 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import FlashrankRerank
 from langchain_core.tools import tool
+from langchain.retrievers.multi_query import MultiQueryRetriever
+from apps.agent.multi_query_chain import llm_chain
 from apps.agent.constant import INDEX_NAME_WEWEB, INDEX_NAME_XANO
+embeddings = FastEmbedEmbeddings(model_name="jinaai/jina-embeddings-v2-small-en")
+compressor = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2")
+def multiquery_retriever(index_name: str, embeddings, compressor) -> ContextualCompressionRetriever:
     vectorstore = Pinecone.from_existing_index(embedding=embeddings, index_name=index_name)
     retriever = vectorstore.as_retriever()
+    reranker_retriever = ContextualCompressionRetriever(
+        base_compressor=compressor, base_retriever=retriever
+    )
+    return MultiQueryRetriever(
+        retriever=reranker_retriever, llm_chain=llm_chain, parser_key="lines"
+    )
+retriever_xano = multiquery_retriever(INDEX_NAME_XANO, embeddings, compressor)
+retriever_weweb = multiquery_retriever(INDEX_NAME_WEWEB, embeddings, compressor)
 @tool
 def tool_xano(query: str):
     """
     Searches and returns excerpts from the Xano documentation
     """
+    return retriever_xano.get_relevant_documents(query)
 @tool
 def tool_weweb(query: str):
     """
     Searches and returns excerpts from the Weweb documentation
     """
+    return retriever_weweb.get_relevant_documents(query)