alijaanai commited on
Commit
7458375
·
verified ·
1 Parent(s): 0967530

Upload 6 files

Browse files
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"GROQ_API_KEY": "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"}
data/Ali_Jaan_CV.pdf ADDED
Binary file (112 kB). View file
 
data/Business Plan for TriMatrix Technologies PVT Ltd.pdf ADDED
Binary file (449 kB). View file
 
main.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import json
4
+
5
+ import streamlit as st
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_chroma import Chroma
8
+ from langchain_groq import ChatGroq
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+
12
+ from vectorize_documents import embeddings
13
+
14
+ working_dir = os.path.dirname(os.path.abspath(__file__))
15
+ config_data = json.load(open(f"{working_dir}/config.json"))
16
+ GROQ_API_KEY = config_data["GROQ_API_KEY"]
17
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
18
+
19
+
20
+ def setup_vectorstore():
21
+ persist_directory = f"{working_dir}/vector_db_dir"
22
+ embedddings = HuggingFaceEmbeddings()
23
+ vectorstore = Chroma(persist_directory=persist_directory,
24
+ embedding_function=embeddings)
25
+ return vectorstore
26
+
27
+
28
+ def chat_chain(vectorstore):
29
+ llm = ChatGroq(model="llama-3.1-70b-versatile",
30
+ temperature=0)
31
+ retriever = vectorstore.as_retriever()
32
+ memory = ConversationBufferMemory(
33
+ llm=llm,
34
+ output_key="answer",
35
+ memory_key="chat_history",
36
+ return_messages=True
37
+ )
38
+ chain = ConversationalRetrievalChain.from_llm(
39
+ llm=llm,
40
+ retriever=retriever,
41
+ chain_type="stuff",
42
+ memory=memory,
43
+ verbose=True,
44
+ return_source_documents=True
45
+ )
46
+
47
+ return chain
48
+
49
+
50
+ st.set_page_config(
51
+ page_title="Multi Doc Chat",
52
+ page_icon = "📚",
53
+ layout="centered"
54
+ )
55
+
56
+ st.title("📚 Multi Documents Chatbot")
57
+
58
+ if "chat_history" not in st.session_state:
59
+ st.session_state.chat_history = []
60
+
61
+ if "vectorstore" not in st.session_state:
62
+ st.session_state.vectorstore = setup_vectorstore()
63
+
64
+ if "conversationsal_chain" not in st.session_state:
65
+ st.session_state.conversationsal_chain = chat_chain(st.session_state.vectorstore)
66
+
67
+
68
+ for message in st.session_state.chat_history:
69
+ with st.chat_message(message["role"]):
70
+ st.markdown(message["content"])
71
+
72
+ user_input = st.chat_input("Ask AI...")
73
+
74
+ if user_input:
75
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
76
+
77
+ with st.chat_message("user"):
78
+ st.markdown(user_input)
79
+
80
+
81
+ with st.chat_message("assistant"):
82
+ response = st.session_state.conversationsal_chain({"question": user_input})
83
+ assistant_response = response["answer"]
84
+ st.markdown(assistant_response)
85
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
86
+
87
+ main.py
88
+ Displaying main.py.
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.38.0
2
+ langchain-community==0.2.16
3
+ langchain-text-splitters==0.2.4
4
+ langchain-chroma==0.1.3
5
+ langchain-huggingface==0.0.3
6
+ langchain-groq==0.1.9
7
+ unstructured==0.15.0
8
+ unstructured[pdf]==0.15.0
9
+ nltk==3.8.1
vectorize_documents.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import UnstructuredFileLoader
2
+ from langchain_community.document_loaders import DirectoryLoader
3
+ from langchain_text_splitters import CharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_chroma import Chroma
6
+
7
+ # loaidng the embedding model
8
+ embeddings = HuggingFaceEmbeddings()
9
+
10
+ loader = DirectoryLoader(path="data",
11
+ glob="./*.pdf",
12
+ loader_cls=UnstructuredFileLoader)
13
+ documents = loader.load()
14
+
15
+
16
+ text_splitter = CharacterTextSplitter(chunk_size=2000,
17
+ chunk_overlap=500)
18
+ text_chunks = text_splitter.split_documents(documents)
19
+
20
+ vectordb = Chroma.from_documents(
21
+ documents=text_chunks,
22
+ embedding=embeddings,
23
+ persist_directory="vector_db_dir"
24
+ )
25
+
26
+ print("Documents Vectorized")