hail75 commited on
Commit
a007d8f
·
1 Parent(s): 7351c15
app.py CHANGED
@@ -1,33 +1,97 @@
1
  import os
2
-
3
  import streamlit as st
4
 
5
- from langchain_openai import OpenAI
 
 
 
 
 
 
 
6
  from langchain_core.prompts import ChatPromptTemplate
 
 
7
 
8
-
9
  st.title("Chat with your data")
 
 
 
 
 
 
10
 
11
- inp = st.text_input("Enter your prompt here")
 
 
 
12
 
13
- llm = OpenAI(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  api_key=os.environ.get("OPENAI_API_KEY"),
15
  temperature=0.2,
16
- model='gpt-3.5-turbo-0125')
17
-
18
- # prompt = ChatPromptTemplate.from_messages(
19
- # [
20
- # (
21
- # "system",
22
- # "You are a helpful assistant that answer questions from user.",
23
- # ),
24
- # ("human", "{input}"),
25
- # ]
26
- # )
27
-
28
- chain = llm
29
- if inp:
30
- response = chain.invoke(input=inp)
31
- st.write(response)
32
-
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import streamlit as st
3
 
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain_openai.chat_models import ChatOpenAI
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.document_loaders import PyPDFLoader
8
+ from langchain_community.document_loaders.generic import GenericLoader
9
+ from langchain_community.document_loaders.parsers import OpenAIWhisperParser
10
+ from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
11
+ from langchain_community.vectorstores import Chroma
12
  from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain.memory import ConversationBufferMemory
14
+ from langchain.chains import RetrievalQA
15
 
16
+ st.set_page_config(page_title="Chat with your data", page_icon="🤖")
17
  st.title("Chat with your data")
18
+ st.header("Add your data for RAG")
19
+
20
+ data_type = st.radio("Choose the type of data to add:", ("Text", "PDF", "YouTube URL"))
21
+
22
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
23
+ pages = None
24
 
25
+ if data_type == "Text":
26
+ user_text = st.text_area("Enter text data")
27
+ if st.button("Add"):
28
+ pages = user_text
29
 
30
+ elif data_type == "PDF":
31
+ uploaded_pdf = st.file_uploader("Upload PDF", type="pdf")
32
+ if st.button("Add"):
33
+ loader = PyPDFLoader("docs/ttdn.pdf")
34
+ pages = loader.load()
35
+
36
+ elif data_type == "YouTube URL":
37
+ youtube_url = st.text_input("Enter YouTube URL")
38
+ if st.button("Add"):
39
+ save_dir="docs/youtube"
40
+ loader = GenericLoader(
41
+ YoutubeAudioLoader([youtube_url], save_dir),
42
+ OpenAIWhisperParser()
43
+ )
44
+
45
+ pages = loader.load()
46
+
47
+ llm = ChatOpenAI(
48
  api_key=os.environ.get("OPENAI_API_KEY"),
49
  temperature=0.2,
50
+ model='gpt-3.5-turbo')
51
+
52
+
53
+ template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
54
+ Context: {context}
55
+ Question: {question}
56
+ Helpful Answer:"""
57
+
58
+ prompt = ChatPromptTemplate.from_template(template)
59
+
60
+ if pages:
61
+ embedding = OpenAIEmbeddings()
62
+ if data_type == "Text":
63
+ texts = text_splitter.split_text(pages)
64
+ vectordb = Chroma.from_texts(
65
+ texts=texts,
66
+ embedding=embedding,
67
+ persist_directory='docs/chroma/'
68
+ )
69
+ else:
70
+ docs = text_splitter.split_documents(pages)
71
+
72
+ vectordb = Chroma.from_documents(
73
+ documents=docs,
74
+ embedding=embedding,
75
+ persist_directory='docs/chroma/'
76
+ )
77
+
78
+ qa_chain = RetrievalQA.from_chain_type(
79
+ llm,
80
+ retriever=vectordb.as_retriever(),
81
+ return_source_documents=True,
82
+ chain_type_kwargs={"prompt": prompt}
83
+ )
84
+
85
+ result = qa_chain.invoke({"query": "What is BSM Labs"})
86
+ st.write(result["result"])
87
+ # st.session_state.retriever = vectordb.as_retriever()
88
+
89
+ # if "retriever" in st.session_state:
90
+ # user_query = st.chat_input("Ask a question")
91
+ # if user_query:
92
+ # chain = prompt | llm | parser
93
+ # response = chain.invoke(input={
94
+ # "context": st.session_state.retriever,
95
+ # "question": user_query
96
+ # })
97
+ # st.write(response)
docs/chroma/a94be33e-75ea-4e61-9699-3f0ab772f12a/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18abd8c514282db82706e52b0a33ed659cd534e925a6f149deb7af9ce34bd8e
3
+ size 6284000
docs/chroma/a94be33e-75ea-4e61-9699-3f0ab772f12a/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effaa959ce2b30070fdafc2fe82096fc46e4ee7561b75920dd3ce43d09679b21
3
+ size 100
docs/chroma/a94be33e-75ea-4e61-9699-3f0ab772f12a/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
3
+ size 4000
docs/chroma/a94be33e-75ea-4e61-9699-3f0ab772f12a/link_lists.bin ADDED
File without changes
docs/chroma/chroma.sqlite3 ADDED
Binary file (479 kB). View file
 
docs/ttdn.pdf ADDED
Binary file (147 kB). View file
 
requirements.txt CHANGED
@@ -1,2 +1,7 @@
1
  langchain
2
- langchain_openai
 
 
 
 
 
 
1
  langchain
2
+ langchain_community
3
+ langchain_openai
4
+ pypdf
5
+ yt_dlp
6
+ pydub
7
+ chromadb