tdecae commited on
Commit
5ddd792
·
1 Parent(s): 553cbf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -30
app.py CHANGED
@@ -8,47 +8,74 @@ from langchain.embeddings import OpenAIEmbeddings
8
  from langchain.indexes import VectorstoreIndexCreator
9
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
10
  from langchain.llms import OpenAI
 
11
 
12
  __import__('pysqlite3')
13
  import sys
14
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
15
 
16
  from langchain.vectorstores import Chroma
 
17
 
18
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
19
 
20
- # Enable to save to disk & reuse the model (for repeated queries on the same data)
21
- PERSIST = False
22
-
23
- query = None
24
- if len(sys.argv) > 1:
25
- query = sys.argv[1]
26
-
27
- if PERSIST and os.path.exists("persist"):
28
- print("Reusing index...\n")
29
- vectorstore = Chroma(persist_directory="persist", embedding_function=OpenAIEmbeddings())
30
- index = VectorStoreIndexWrapper(vectorstore=vectorstore)
31
- else:
32
- loader = TextLoader("input/input_data.txt") # Use this line if you only need data.txt
33
- # loader = DirectoryLoader("data/")
34
- if PERSIST:
35
- index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
36
- else:
37
- index = VectorstoreIndexCreator().from_loaders([loader])
 
 
 
 
38
 
39
  chain = ConversationalRetrievalChain.from_llm(
40
- llm=ChatOpenAI(model="gpt-3.5-turbo"),
41
- retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}),
 
 
42
  )
43
 
44
  chat_history = []
45
- while True:
46
- if not query:
47
- query = input("Prompt: ")
48
- if query in ['quit', 'q', 'exit']:
49
- sys.exit()
50
- result = chain({"question": query, "chat_history": chat_history})
51
- print(result['answer'])
52
-
53
- chat_history.append((query, result['answer']))
54
- query = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from langchain.indexes import VectorstoreIndexCreator
9
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
10
  from langchain.llms import OpenAI
11
+ from langchain.text_splitter import CharacterTextSplitter
12
 
13
  __import__('pysqlite3')
14
  import sys
15
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
16
 
17
  from langchain.vectorstores import Chroma
18
+ import gradio as gr
19
 
20
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
21
 
22
+ docs = []
23
+
24
+ for f in os.listdir("./"):
25
+ if f.endswith(".pdf"):
26
+ pdf_path = "./" + f
27
+ loader = PyPDFLoader(pdf_path)
28
+ docs.extend(loader.load())
29
+ elif f.endswith('.docx') or f.endswith('.doc'):
30
+ doc_path = "./" + f
31
+ loader = Docx2txtLoader(doc_path)
32
+ docs.extend(loader.load())
33
+ elif f.endswith('.txt'):
34
+ text_path = "./" + f
35
+ loader = TextLoader(text_path)
36
+ docs.extend(loader.load())
37
+
38
+ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
39
+ docs = splitter.split_documents(docs)
40
+
41
+ # Convert the document chunks to embedding and save them to the vector store
42
+ vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
43
+ vectorstore.persist()
44
 
45
  chain = ConversationalRetrievalChain.from_llm(
46
+ ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
47
+ retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
48
+ return_source_documents=True,
49
+ verbose=False
50
  )
51
 
52
  chat_history = []
53
+
54
+ with gr.Blocks() as demo:
55
+ chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment relaged questions such as my previous experience, where i'm eligible to work, when I can start work, my most recent experience, what NLP skills I have, and much more!")],avatar_images=["./input/avatar/Guest.jpg","./input/avatar/Thierry Picture.jpg"])
56
+ msg = gr.Textbox()
57
+ clear = gr.Button("Clear")
58
+ chat_history = []
59
+
60
+ def user(query, chat_history):
61
+ # print("User query:", query)
62
+ # print("Chat history:", chat_history)
63
+
64
+ # Convert chat history to list of tuples
65
+ chat_history_tuples = []
66
+ for message in chat_history:
67
+ chat_history_tuples.append((message[0], message[1]))
68
+
69
+ # Get result from QA chain
70
+ result = chain({"question": query, "chat_history": chat_history_tuples})
71
+
72
+ # Append user message and response to chat history
73
+ chat_history.append((query, result["answer"]))
74
+ # print("Updated chat history:", chat_history)
75
+
76
+ return gr.update(value=""), chat_history
77
+
78
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
79
+ clear.click(lambda: None, None, chatbot, queue=False)
80
+
81
+ demo.launch(debug=True)