priyesh17 commited on
Commit
5c9280a
·
verified ·
1 Parent(s): 766f493

chat with pdf app

Browse files
Files changed (1) hide show
  1. app.py +78 -78
app.py CHANGED
@@ -1,79 +1,79 @@
1
- import streamlit as st
2
- import os
3
- from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.chains.combine_documents import create_stuff_documents_chain
7
- from langchain_core.prompts import ChatPromptTemplate
8
- from langchain.chains import create_retrieval_chain
9
- from langchain_community.vectorstores import FAISS
10
-
11
- from dotenv import load_dotenv
12
- import tempfile
13
- import time
14
-
15
- load_dotenv()
16
-
17
- # load the Nvidia API key
18
- os.environ['NVIDIA_API_KEY'] = os.getenv('NVIDIA_API_KEY')
19
-
20
- llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
21
-
22
- def vector_embedding(pdf_file):
23
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
24
- tmp_file.write(pdf_file.getvalue())
25
- tmp_file_path = tmp_file.name
26
-
27
- st.session_state.embeddings = NVIDIAEmbeddings()
28
- st.session_state.loader = PyPDFLoader(tmp_file_path)
29
- st.session_state.docs = st.session_state.loader.load()
30
- st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
31
- st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
32
- st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
33
-
34
- os.unlink(tmp_file_path)
35
-
36
- st.title("NVIDIA NIM Demo")
37
-
38
- prompt = ChatPromptTemplate.from_template(
39
- """
40
- Answer the questions based on the provided context only.
41
- Please provide the most accurate response based on the question
42
- <context>
43
- {context}
44
- </context>
45
- Question: {input}
46
- """
47
- )
48
-
49
- uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
50
-
51
- if uploaded_file is not None:
52
- if st.button("Process PDF"):
53
- with st.spinner("Processing PDF..."):
54
- vector_embedding(uploaded_file)
55
- st.success("FAISS Vector Store DB is ready using NvidiaEmbedding")
56
-
57
- prompt1 = st.text_input("Enter your question about the uploaded document")
58
-
59
- if prompt1 and 'vectors' in st.session_state:
60
- document_chain = create_stuff_documents_chain(llm, prompt)
61
- retriever = st.session_state.vectors.as_retriever()
62
- retrieval_chain = create_retrieval_chain(retriever, document_chain)
63
-
64
- with st.spinner("Generating answer..."):
65
- start = time.process_time()
66
- response = retrieval_chain.invoke({'input': prompt1})
67
- end = time.process_time()
68
-
69
- st.write("Answer:", response['answer'])
70
- st.write(f"Response time: {end - start:.2f} seconds")
71
-
72
- with st.expander("Document Similarity Search"):
73
- for i, doc in enumerate(response["context"]):
74
- st.write(f"Chunk {i + 1}:")
75
- st.write(doc.page_content)
76
- st.write("------------------------------------------")
77
- else:
78
- if prompt1:
79
  st.warning("Please upload and process a PDF document first.")
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_community.vectorstores import FAISS
10
+
11
+ from dotenv import load_dotenv
12
+ import tempfile
13
+ import time
14
+
15
+ load_dotenv()
16
+
17
+ # load the Nvidia API key
18
+ os.environ['NVIDIA_API_KEY'] = os.getenv('NVIDIA_API_KEY')
19
+
20
+ llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
21
+
22
+ def vector_embedding(pdf_file):
23
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
24
+ tmp_file.write(pdf_file.getvalue())
25
+ tmp_file_path = tmp_file.name
26
+
27
+ st.session_state.embeddings = NVIDIAEmbeddings()
28
+ st.session_state.loader = PyPDFLoader(tmp_file_path)
29
+ st.session_state.docs = st.session_state.loader.load()
30
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
31
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs)
32
+ st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
33
+
34
+ os.unlink(tmp_file_path)
35
+
36
+ st.title("Chat with PDF")
37
+
38
+ prompt = ChatPromptTemplate.from_template(
39
+ """
40
+ Answer the questions based on the provided context only.
41
+ Please provide the most accurate response based on the question
42
+ <context>
43
+ {context}
44
+ </context>
45
+ Question: {input}
46
+ """
47
+ )
48
+
49
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
50
+
51
+ if uploaded_file is not None:
52
+ if st.button("Process PDF"):
53
+ with st.spinner("Processing PDF..."):
54
+ vector_embedding(uploaded_file)
55
+ st.success("FAISS Vector Store DB is ready using NvidiaEmbedding")
56
+
57
+ prompt1 = st.text_input("Enter your question about the uploaded document")
58
+
59
+ if prompt1 and 'vectors' in st.session_state:
60
+ document_chain = create_stuff_documents_chain(llm, prompt)
61
+ retriever = st.session_state.vectors.as_retriever()
62
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
63
+
64
+ with st.spinner("Generating answer..."):
65
+ start = time.process_time()
66
+ response = retrieval_chain.invoke({'input': prompt1})
67
+ end = time.process_time()
68
+
69
+ st.write("Answer:", response['answer'])
70
+ st.write(f"Response time: {end - start:.2f} seconds")
71
+
72
+ with st.expander("Document Similarity Search"):
73
+ for i, doc in enumerate(response["context"]):
74
+ st.write(f"Chunk {i + 1}:")
75
+ st.write(doc.page_content)
76
+ st.write("------------------------------------------")
77
+ else:
78
+ if prompt1:
79
  st.warning("Please upload and process a PDF document first.")