fahmiaziz98 commited on
Commit
0a8b0d4
·
1 Parent(s): 26934b9
apps/agent/constant.py CHANGED
@@ -17,8 +17,8 @@ PROMPT = ChatPromptTemplate.from_messages(
17
  [
18
  (
19
  "system",
20
- "You are a knowledgeable instructor. Your job is to help students learn a tool, the data for which is retrieved from a documentation site"
21
- "Answer questions directly and clearly, as if you were explaining to a student who needs precise and structured guidance."
22
  "If the answer doesn't fit the given context, just say I don't have the information for that."
23
  ),
24
  ("placeholder", "{messages}")
 
17
  [
18
  (
19
  "system",
20
+ "You are a knowledgeable instructor. Your job is to help students learn Xano and WeWeb, the data for which is retrieved from a documentation site"
21
+ "Answer questions directly and clearly, as if you were explaining to students who need precise and structured guidance. Also add website sources."
22
  "If the answer doesn't fit the given context, just say I don't have the information for that."
23
  ),
24
  ("placeholder", "{messages}")
apps/agent/graph.py CHANGED
@@ -7,7 +7,7 @@ from langchain_core.messages import AIMessage, ToolMessage
7
  from langgraph.prebuilt import tools_condition
8
 
9
  from langchain_groq import ChatGroq
10
- from apps.agent.tools import tool_weweb, tool_xano # kalo run api pake ini -> app.agent.tools
11
  from apps.agent.state import State, RequestAssistance
12
  from apps.agent.constant import PROMPT
13
 
 
7
  from langgraph.prebuilt import tools_condition
8
 
9
  from langchain_groq import ChatGroq
10
+ from apps.agent.tools import tool_weweb, tool_xano
11
  from apps.agent.state import State, RequestAssistance
12
  from apps.agent.constant import PROMPT
13
 
apps/agent/multi_query_chain.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from langchain_core.output_parsers import BaseOutputParser
4
+ from langchain_core.prompts import PromptTemplate
5
+ from pydantic import BaseModel, Field
6
+ from langchain_groq import ChatGroq
7
+ from apps.agent.constant import GROQ_API_KEY, MODEL_GROQ
8
+
9
+ # Output parser will split the LLM result into a list of queries
10
+ class LineListOutputParser(BaseOutputParser[List[str]]):
11
+ """Output parser for a list of lines."""
12
+
13
+ def parse(self, text: str) -> List[str]:
14
+ lines = text.strip().split("\n")
15
+ return list(filter(None, lines)) # Remove empty lines
16
+
17
+ output_parser = LineListOutputParser()
18
+ llm = ChatGroq(model=MODEL_GROQ, groq_api_key=GROQ_API_KEY, temperature=0.1)
19
+
20
+ template = """
21
+ Your task is to generate 3 different search queries that aim to
22
+ answer the user question from multiple perspectives. The user questions
23
+ are focused on Large Language Models, Machine Learning, and related
24
+ disciplines.
25
+ Each query MUST tackle the question from a different viewpoint, we
26
+ want to get a variety of RELEVANT search results.
27
+ Provide these alternative questions separated by newlines.
28
+ GENERATE ONLY QUERY! dont add explanation and word
29
+ Original question: {question}
30
+ """
31
+
32
+ QUERY_PROMPT = PromptTemplate(
33
+ input_variables=["question"],
34
+ template=template,
35
+ )
36
+ # llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
37
+
38
+ # Chain
39
+ llm_chain = QUERY_PROMPT | llm | output_parser
apps/agent/tools.py CHANGED
@@ -4,34 +4,39 @@ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
4
  from langchain.retrievers import ContextualCompressionRetriever
5
  from langchain.retrievers.document_compressors import FlashrankRerank
6
  from langchain_core.tools import tool
 
7
 
 
8
  from apps.agent.constant import INDEX_NAME_WEWEB, INDEX_NAME_XANO
9
 
10
- # os.environ["PINECONE_API_KEY"] = "a526d62f-ccca-40d6-859b-3d878c8d288b"
11
 
12
- embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
13
- compressor = FlashrankRerank()
14
 
15
- def create_compressed_retriever(index_name: str, embeddings, compressor) -> ContextualCompressionRetriever:
16
  vectorstore = Pinecone.from_existing_index(embedding=embeddings, index_name=index_name)
17
  retriever = vectorstore.as_retriever()
18
- return ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
 
 
 
 
 
19
 
20
- reranker_xano = create_compressed_retriever(INDEX_NAME_XANO, embeddings, compressor)
21
- reranker_weweb = create_compressed_retriever(INDEX_NAME_WEWEB, embeddings, compressor)
22
 
23
  @tool
24
  def tool_xano(query: str):
25
  """
26
  Searches and returns excerpts from the Xano documentation
27
  """
28
- docs = reranker_xano.invoke(query)
29
- return "\n\n".join([doc["page_content"] for doc in docs])
30
 
31
  @tool
32
  def tool_weweb(query: str):
33
  """
34
  Searches and returns excerpts from the Weweb documentation
35
  """
36
- docs = reranker_weweb.invoke(query)
37
- return "\n\n".join([doc["page_content"] for doc in docs])
 
4
  from langchain.retrievers import ContextualCompressionRetriever
5
  from langchain.retrievers.document_compressors import FlashrankRerank
6
  from langchain_core.tools import tool
7
+ from langchain.retrievers.multi_query import MultiQueryRetriever
8
 
9
+ from apps.agent.multi_query_chain import llm_chain
10
  from apps.agent.constant import INDEX_NAME_WEWEB, INDEX_NAME_XANO
11
 
 
12
 
13
+ embeddings = FastEmbedEmbeddings(model_name="jinaai/jina-embeddings-v2-small-en")
14
+ compressor = FlashrankRerank(model="ms-marco-MiniLM-L-12-v2")
15
 
16
+ def multiquery_retriever(index_name: str, embeddings, compressor) -> ContextualCompressionRetriever:
17
  vectorstore = Pinecone.from_existing_index(embedding=embeddings, index_name=index_name)
18
  retriever = vectorstore.as_retriever()
19
+ reranker_retriever = ContextualCompressionRetriever(
20
+ base_compressor=compressor, base_retriever=retriever
21
+ )
22
+ return MultiQueryRetriever(
23
+ retriever=reranker_retriever, llm_chain=llm_chain, parser_key="lines"
24
+ )
25
 
26
+ retriever_xano = multiquery_retriever(INDEX_NAME_XANO, embeddings, compressor)
27
+ retriever_weweb = multiquery_retriever(INDEX_NAME_WEWEB, embeddings, compressor)
28
 
29
  @tool
30
  def tool_xano(query: str):
31
  """
32
  Searches and returns excerpts from the Xano documentation
33
  """
34
+ return retriever_xano.get_relevant_documents(query)
35
+
36
 
37
  @tool
38
  def tool_weweb(query: str):
39
  """
40
  Searches and returns excerpts from the Weweb documentation
41
  """
42
+ return retriever_weweb.get_relevant_documents(query)