Spaces:

Paul-Joshi
/

website-summarizers-RAG

Runtime error

App Files Files Community

Paul-Joshi commited on Apr 10, 2024

Commit

cef4abb

verified ·

1 Parent(s): 4b928c7

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -30

app.py CHANGED Viewed

@@ -14,53 +14,30 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain import hub
 def method_get_website_text(urls):
-    # Convert string of URLs to list
     urls_list = urls.split("\n")
     docs = [WebBaseLoader(url).load() for url in urls_list]
     docs_list = [item for sublist in docs for item in sublist]
     return docs_list
 def method_get_text_chunks(text):
-    #split the text into chunks
     #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
     doc_splits = text_splitter.split_documents(text)
     return doc_splits
 def method_get_vectorstore(document_chunks):
-    #convert text chunks into embeddings and store in vector database
     # create the open-source embedding function
-    #embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
-    embeddings = HuggingFaceEmbeddings()
     # create a vectorstore from the chunks
     vector_store = Chroma.from_documents(document_chunks, embeddings)
     return vector_store
-# def get_context_retriever_chain(vector_store,question):
-#     # Initialize the retriever
-#     retriever = vector_store.as_retriever()
-#     # Define the RAG template and RAG prompt template
-#     prompt = hub.pull("rlm/rag-prompt")
-#     # Initialize the Hugging Face language model (LLM)
-#     llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":512})
-#     # Construct the RAG pipeline
-#     rag_chain = (
-#         {"context": retriever, "question": RunnablePassthrough()}
-#         | prompt
-#         | llm
-#         | StrOutputParser()
-#     )
-#     return rag_chain.invoke(str(question))
 def get_context_retriever_chain(vector_store,question):
     # Initialize the retriever
@@ -76,7 +53,7 @@ def get_context_retriever_chain(vector_store,question):
     after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
     # Initialize the Hugging Face language model (LLM)
-    llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":512})
     # Construct the RAG pipeline
     after_rag_chain = (

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain import hub
+# Convert string of URLs to list
 def method_get_website_text(urls):
     urls_list = urls.split("\n")
     docs = [WebBaseLoader(url).load() for url in urls_list]
     docs_list = [item for sublist in docs for item in sublist]
     return docs_list
+#split the text into chunks
 def method_get_text_chunks(text):
     #text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
     doc_splits = text_splitter.split_documents(text)
     return doc_splits
+#convert text chunks into embeddings and store in vector database
 def method_get_vectorstore(document_chunks):
     # create the open-source embedding function
+    embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
+    #embeddings = HuggingFaceEmbeddings()
     # create a vectorstore from the chunks
     vector_store = Chroma.from_documents(document_chunks, embeddings)
     return vector_store
 def get_context_retriever_chain(vector_store,question):
     # Initialize the retriever
     after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
     # Initialize the Hugging Face language model (LLM)
+    llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":1024})
     # Construct the RAG pipeline
     after_rag_chain = (