Wintersmith commited on
Commit
b5836a7
·
verified ·
1 Parent(s): 6a83a88

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +1 -0
  2. app.py +48 -0
  3. main_class.py +94 -0
  4. requirements.txt +13 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY="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"
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from main_class import PDFChatBot
5
+
6
+ load_dotenv()
7
+ api_key = os.getenv("OPENAI_API_KEY")
8
+ pdf_chatbot = PDFChatBot(api_key)
9
+
10
+ with gr.Blocks(title="RAG chatbot", theme="Monochrome") as demo:
11
+
12
+ def upload_file(file):
13
+ return file
14
+
15
+ gr.Markdown(
16
+ """
17
+ # Retrieval Augmented Generation app
18
+
19
+ Use Langchain´s OpenAI agent with retrieval tool with a memory to chat with your pdf document.
20
+ """
21
+ )
22
+
23
+ with gr.Column():
24
+
25
+ with gr.Row():
26
+ chat_history = gr.Chatbot(value=[], elem_id='chatbot', height=680)
27
+
28
+ with gr.Row():
29
+
30
+ with gr.Column(scale=1):
31
+ file_output = gr.File()
32
+ uploaded_pdf = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])
33
+ uploaded_pdf.upload(upload_file, inputs=uploaded_pdf, outputs=file_output)
34
+
35
+ with gr.Column(scale=2):
36
+ text_input = gr.Textbox(
37
+ show_label=False,
38
+ placeholder="Type here to ask your PDF",
39
+ container=False)
40
+
41
+ with gr.Column(scale=1):
42
+ submit_button = gr.Button('Send')
43
+ submit_button.click(pdf_chatbot.add_text, inputs=[chat_history, text_input], outputs=[chat_history], queue=False).\
44
+ success(pdf_chatbot.generate_response, inputs=[chat_history, text_input, uploaded_pdf], outputs=[chat_history, text_input])
45
+
46
+ if __name__ == '__main__':
47
+ demo.queue()
48
+ demo.launch(blocked_paths=["api_key.txt"], auth=("Kolda", "CV-app123"))
main_class.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain.retrievers import ContextualCompressionRetriever
7
+ from langchain.retrievers.document_compressors import LLMChainExtractor
8
+ from langchain.tools.retriever import create_retriever_tool
9
+ from langchain import hub
10
+ from langchain.agents import AgentExecutor, create_openai_tools_agent
11
+ import os
12
+ import gradio as gr
13
+
14
+ # The Agent retriever is based on: https://python.langchain.com/docs/use_cases/question_answering/conversational_retrieval_agents?ref=blog.langchain.dev
15
+ # The chat history is based on: https://python.langchain.com/docs/use_cases/question_answering/chat_history
16
+ # Inspired by https://github.com/Niez-Gharbi/PDF-RAG-with-Llama2-and-Gradio/tree/master
17
+ # Inspired by https://github.com/mirabdullahyaser/Retrieval-Augmented-Generation-Engine-with-LangChain-and-Streamlit/tree/master
18
+
19
+ api_key = os.getenv("OPENAI_API_KEY")
20
+
21
+ class PDFChatBot:
22
+ # Initialize the class with the api_key and the model_name
23
+ def __init__(self, api_key):
24
+ self.processed = False
25
+ self.final_agent = None
26
+ self.chat_history = []
27
+ self.api_key = api_key
28
+ self.llm = ChatOpenAI(openai_api_key=self.api_key, temperature=0, model_name="gpt-3.5-turbo-0125")
29
+
30
+ # add text to Gradio text block (not needed without Gradio)
31
+ def add_text(self, history, text):
32
+ if not text:
33
+ raise gr.Error("Please enter text.")
34
+ history.append((text, ''))
35
+ return history
36
+
37
+ # Load a pdf document with langchain textloader
38
+ def load_document(self, file_name):
39
+ loader = PyPDFLoader(file_name)
40
+ raw_document = loader.load()
41
+ return raw_document
42
+
43
+ # Split the document
44
+ def split_documents(self, raw_document):
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
46
+ chunk_overlap=100,
47
+ length_function=len,
48
+ is_separator_regex=False,
49
+ separators=["\n\n", "\n", " ", ""])
50
+ chunks = text_splitter.split_documents(raw_document)
51
+ return chunks
52
+
53
+ # Embed the document with OpenAI Embeddings & store it to vectorstore
54
+ def create_retriever(self, chunks):
55
+ embedding_func = OpenAIEmbeddings(openai_api_key=self.api_key)
56
+ # Create a new vectorstore from the chunks
57
+ vectorstore = FAISS.from_documents(chunks, embedding_func)
58
+
59
+ # Create a retriever
60
+ basic_retriever = vectorstore.as_retriever()
61
+ compressor = LLMChainExtractor.from_llm(self.llm)
62
+ compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
63
+ base_retriever=basic_retriever)
64
+ return basic_retriever # or compression_retriever
65
+
66
+ # Create an agent
67
+ def create_agent(self, retriever):
68
+ tool = create_retriever_tool(retriever,
69
+ f"search_document",
70
+ f"Searches and returns excerpts from the provided document.")
71
+ tools = [tool]
72
+ prompt = hub.pull("hwchase17/openai-tools-agent")
73
+ agent = create_openai_tools_agent(self.llm, tools, prompt)
74
+ self.final_agent = AgentExecutor(agent=agent, tools=tools)
75
+
76
+ #Process files
77
+ def process_file(self, file_name):
78
+ documents = self.load_document(file_name)
79
+ texts = self.split_documents(documents)
80
+ db = self.create_retriever(texts)
81
+ self.create_agent(db)
82
+ print("Files successfully processed")
83
+
84
+ # Generate a response and write to memory
85
+ def generate_response(self, history, query, path):
86
+ if not self.processed:
87
+ self.process_file(path)
88
+ self.processed = True
89
+ result = self.final_agent.invoke({'input': query, 'chat_history': self.chat_history})['output']
90
+ self.chat_history.extend((query, result))
91
+ for char in result: # history argument and the subsequent code is only for the purpose of Gradio
92
+ history[-1][1] += char
93
+ return history, " "
94
+
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ langchain-core
4
+ langchain-community
5
+ langchain-openai
6
+ langchain-text-splitters
7
+ langchainhub
8
+ gradio
9
+ gradio-client
10
+ faiss-cpu
11
+ openai
12
+ pypdf
13
+ python-dotenv