CosmoAI commited on
Commit
be40012
·
1 Parent(s): 2aa6002

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -130
app.py CHANGED
@@ -1,165 +1,196 @@
1
- import streamlit as st
2
- from dotenv import load_dotenv
3
- from PyPDF2 import PdfReader
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.vectorstores import FAISS
7
- # from langchain.chat_models import ChatOpenAI
8
- from langchain.memory import ConversationBufferMemory
9
- from langchain.chains import ConversationalRetrievalChain
10
- from htmlTemplates import css, bot_template, user_template
11
- from langchain.llms import HuggingFaceHub
12
  import os
13
- # from transformers import T5Tokenizer, T5ForConditionalGeneration
14
- # from langchain.callbacks import get_openai_callback
15
-
16
- hub_token = os.environ["HUGGINGFACE_HUB_TOKEN"]
17
-
18
- def get_pdf_text(pdf_docs):
19
- text = ""
20
- for pdf in pdf_docs:
21
- pdf_reader = PdfReader(pdf)
22
- for page in pdf_reader.pages:
23
- text += page.extract_text()
24
- return text
25
 
 
 
26
 
27
- def get_text_chunks(text):
28
- text_splitter = CharacterTextSplitter(
29
- separator="\n",
30
- chunk_size=200,
31
- chunk_overlap=20,
32
- length_function=len
33
- )
34
- chunks = text_splitter.split_text(text)
35
- return chunks
36
 
 
 
 
 
 
37
 
38
- def get_vectorstore(text_chunks):
39
- # embeddings = OpenAIEmbeddings()
40
- # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
41
- embeddings = HuggingFaceEmbeddings()
42
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
43
- return vectorstore
44
 
 
 
 
45
 
46
- def get_conversation_chain(vectorstore):
47
- # llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
48
- # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
49
- # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
50
 
51
- llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", huggingfacehub_api_token=hub_token, model_kwargs={"temperature":0.5, "max_length":20})
52
 
53
- memory = ConversationBufferMemory(
54
- memory_key='chat_history', return_messages=True)
55
- conversation_chain = ConversationalRetrievalChain.from_llm(
56
- llm=llm,
57
- retriever=vectorstore.as_retriever(),
58
- memory=memory
59
- )
60
- return conversation_chain
61
 
62
 
63
- def handle_userinput(user_question):
64
- response = st.session_state.conversation
65
- reply = response.run(user_question)
66
- st.write(reply)
67
- # st.session_state.chat_history = response['chat_history']
68
 
69
- # for i, message in enumerate(st.session_state.chat_history):
70
- # if i % 2 == 0:
71
- # st.write(user_template.replace(
72
- # "{{MSG}}", message.content), unsafe_allow_html=True)
73
- # else:
74
- # st.write(bot_template.replace(
75
- # "{{MSG}}", message.content), unsafe_allow_html=True)
76
 
77
 
78
- def main():
79
- load_dotenv()
80
- st.set_page_config(page_title="Chat with multiple PDFs",
81
- page_icon=":books:")
82
- st.write(css, unsafe_allow_html=True)
83
-
84
- if "conversation" not in st.session_state:
85
- st.session_state.conversation = None
86
- if "chat_history" not in st.session_state:
87
- st.session_state.chat_history = None
88
-
89
- st.header("Chat with multiple PDFs :books:")
90
- user_question = st.text_input("Ask a question about your documents:")
91
- if user_question:
92
- handle_userinput(user_question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- with st.sidebar:
95
- st.subheader("Your documents")
96
- pdf_docs = st.file_uploader(
97
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
98
- if st.button("Process"):
99
- if(len(pdf_docs) == 0):
100
- st.error("Please upload at least one PDF")
101
- else:
102
- with st.spinner("Processing"):
103
- # get pdf text
104
- raw_text = get_pdf_text(pdf_docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # get the text chunks
107
- text_chunks = get_text_chunks(raw_text)
108
 
109
- # create vector store
110
- vectorstore = get_vectorstore(text_chunks)
 
111
 
112
- # create conversation chain
113
- st.session_state.conversation = get_conversation_chain(
114
- vectorstore)
115
-
116
- if __name__ == '__main__':
117
- main()
118
 
119
 
120
 
121
 
122
 
123
 
124
- # import os
125
- # import getpass
126
- # import streamlit as st
127
- # from langchain.document_loaders import PyPDFLoader
128
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
129
- # from langchain.embeddings import HuggingFaceEmbeddings
130
- # from langchain.vectorstores import Chroma
131
- # from langchain import HuggingFaceHub
132
- # from langchain.chains import RetrievalQA
133
- # # __import__('pysqlite3')
134
- # # import sys
135
- # # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
136
 
137
 
138
- # # load huggingface api key
139
- # hubtok = os.environ["HUGGINGFACE_HUB_TOKEN"]
140
 
141
- # # use streamlit file uploader to ask user for file
142
- # # file = st.file_uploader("Upload PDF")
143
 
144
 
145
- # path = "Geeta.pdf"
146
- # loader = PyPDFLoader(path)
147
- # pages = loader.load()
148
 
149
- # # st.write(pages)
150
 
151
- # splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
152
- # docs = splitter.split_documents(pages)
153
 
154
- # embeddings = HuggingFaceEmbeddings()
155
- # doc_search = Chroma.from_documents(docs, embeddings)
156
 
157
- # repo_id = "tiiuae/falcon-7b"
158
- # llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=hubtok, model_kwargs={'temperature': 0.2,'max_length': 1000})
159
 
160
- # from langchain.schema import retriever
161
- # retireval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_search.as_retriever())
162
 
163
- # if query := st.chat_input("Enter a question: "):
164
- # with st.chat_message("assistant"):
165
- # st.write(retireval_chain.run(query))
 
1
+ import google.generativeai as palm
2
+ import streamlit as st
 
 
 
 
 
 
 
 
 
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ # Set your API key
6
+ palm.configure(api_key = os.environ['PALM_KEY'])
7
 
8
+ # Select the PaLM 2 model
9
+ model = 'models/text-bison-001'
 
 
 
 
 
 
 
10
 
11
+ # Generate text
12
+ if prompt := st.chat_input("Ask your query..."):
13
+ enprom = f"""Answer the below provided input in context to Bhagwad Geeta. Use the verses and chapters sentences as references to your answer with suggestions
14
+ coming from Bhagwad Geeta. Your answer to below input should only be in context to Bhagwad geeta only.\nInput= {prompt}"""
15
+ completion = palm.generate_text(model=model, prompt=enprom, temperature=0.5, max_output_tokens=800)
16
 
17
+ # response = palm.chat(messages=["Hello."])
18
+ # print(response.last) # 'Hello! What can I help you with?'
19
+ # response.reply("Can you tell me a joke?")
 
 
 
20
 
21
+ # Print the generated text
22
+ with st.chat_message("Assistant"):
23
+ st.write(completion.result)
24
 
 
 
 
 
25
 
 
26
 
 
 
 
 
 
 
 
 
27
 
28
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
30
 
31
 
32
+ # import streamlit as st
33
+ # from dotenv import load_dotenv
34
+ # from PyPDF2 import PdfReader
35
+ # from langchain.text_splitter import CharacterTextSplitter
36
+ # from langchain.embeddings import HuggingFaceEmbeddings
37
+ # from langchain.vectorstores import FAISS
38
+ # # from langchain.chat_models import ChatOpenAI
39
+ # from langchain.memory import ConversationBufferMemory
40
+ # from langchain.chains import ConversationalRetrievalChain
41
+ # from htmlTemplates import css, bot_template, user_template
42
+ # from langchain.llms import HuggingFaceHub
43
+ # import os
44
+ # # from transformers import T5Tokenizer, T5ForConditionalGeneration
45
+ # # from langchain.callbacks import get_openai_callback
46
+
47
+ # hub_token = os.environ["HUGGINGFACE_HUB_TOKEN"]
48
+
49
+ # def get_pdf_text(pdf_docs):
50
+ # text = ""
51
+ # for pdf in pdf_docs:
52
+ # pdf_reader = PdfReader(pdf)
53
+ # for page in pdf_reader.pages:
54
+ # text += page.extract_text()
55
+ # return text
56
+
57
+
58
+ # def get_text_chunks(text):
59
+ # text_splitter = CharacterTextSplitter(
60
+ # separator="\n",
61
+ # chunk_size=200,
62
+ # chunk_overlap=20,
63
+ # length_function=len
64
+ # )
65
+ # chunks = text_splitter.split_text(text)
66
+ # return chunks
67
+
68
+
69
+ # def get_vectorstore(text_chunks):
70
+ # # embeddings = OpenAIEmbeddings()
71
+ # # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
72
+ # embeddings = HuggingFaceEmbeddings()
73
+ # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
74
+ # return vectorstore
75
+
76
+
77
+ # def get_conversation_chain(vectorstore):
78
+ # # llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
79
+ # # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
80
+ # # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
81
+
82
+ # llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", huggingfacehub_api_token=hub_token, model_kwargs={"temperature":0.5, "max_length":20})
83
+
84
+ # memory = ConversationBufferMemory(
85
+ # memory_key='chat_history', return_messages=True)
86
+ # conversation_chain = ConversationalRetrievalChain.from_llm(
87
+ # llm=llm,
88
+ # retriever=vectorstore.as_retriever(),
89
+ # memory=memory
90
+ # )
91
+ # return conversation_chain
92
+
93
 
94
+ # def handle_userinput(user_question):
95
+ # response = st.session_state.conversation
96
+ # reply = response.run(user_question)
97
+ # st.write(reply)
98
+ # # st.session_state.chat_history = response['chat_history']
99
+
100
+ # # for i, message in enumerate(st.session_state.chat_history):
101
+ # # if i % 2 == 0:
102
+ # # st.write(user_template.replace(
103
+ # # "{{MSG}}", message.content), unsafe_allow_html=True)
104
+ # # else:
105
+ # # st.write(bot_template.replace(
106
+ # # "{{MSG}}", message.content), unsafe_allow_html=True)
107
+
108
+
109
+ # def main():
110
+ # load_dotenv()
111
+ # st.set_page_config(page_title="Chat with multiple PDFs",
112
+ # page_icon=":books:")
113
+ # st.write(css, unsafe_allow_html=True)
114
+
115
+ # if "conversation" not in st.session_state:
116
+ # st.session_state.conversation = None
117
+ # if "chat_history" not in st.session_state:
118
+ # st.session_state.chat_history = None
119
+
120
+ # st.header("Chat with multiple PDFs :books:")
121
+ # user_question = st.text_input("Ask a question about your documents:")
122
+ # if user_question:
123
+ # handle_userinput(user_question)
124
+
125
+ # with st.sidebar:
126
+ # st.subheader("Your documents")
127
+ # pdf_docs = st.file_uploader(
128
+ # "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
129
+ # if st.button("Process"):
130
+ # if(len(pdf_docs) == 0):
131
+ # st.error("Please upload at least one PDF")
132
+ # else:
133
+ # with st.spinner("Processing"):
134
+ # # get pdf text
135
+ # raw_text = get_pdf_text(pdf_docs)
136
+
137
+ # # get the text chunks
138
+ # text_chunks = get_text_chunks(raw_text)
139
 
140
+ # # create vector store
141
+ # vectorstore = get_vectorstore(text_chunks)
142
 
143
+ # # create conversation chain
144
+ # st.session_state.conversation = get_conversation_chain(
145
+ # vectorstore)
146
 
147
+ # if __name__ == '__main__':
148
+ # main()
 
 
 
 
149
 
150
 
151
 
152
 
153
 
154
 
155
+ # # import os
156
+ # # import getpass
157
+ # # import streamlit as st
158
+ # # from langchain.document_loaders import PyPDFLoader
159
+ # # from langchain.text_splitter import RecursiveCharacterTextSplitter
160
+ # # from langchain.embeddings import HuggingFaceEmbeddings
161
+ # # from langchain.vectorstores import Chroma
162
+ # # from langchain import HuggingFaceHub
163
+ # # from langchain.chains import RetrievalQA
164
+ # # # __import__('pysqlite3')
165
+ # # # import sys
166
+ # # # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
167
 
168
 
169
+ # # # load huggingface api key
170
+ # # hubtok = os.environ["HUGGINGFACE_HUB_TOKEN"]
171
 
172
+ # # # use streamlit file uploader to ask user for file
173
+ # # # file = st.file_uploader("Upload PDF")
174
 
175
 
176
+ # # path = "Geeta.pdf"
177
+ # # loader = PyPDFLoader(path)
178
+ # # pages = loader.load()
179
 
180
+ # # # st.write(pages)
181
 
182
+ # # splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
183
+ # # docs = splitter.split_documents(pages)
184
 
185
+ # # embeddings = HuggingFaceEmbeddings()
186
+ # # doc_search = Chroma.from_documents(docs, embeddings)
187
 
188
+ # # repo_id = "tiiuae/falcon-7b"
189
+ # # llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=hubtok, model_kwargs={'temperature': 0.2,'max_length': 1000})
190
 
191
+ # # from langchain.schema import retriever
192
+ # # retireval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_search.as_retriever())
193
 
194
+ # # if query := st.chat_input("Enter a question: "):
195
+ # # with st.chat_message("assistant"):
196
+ # # st.write(retireval_chain.run(query))