Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,53 +14,30 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
14 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
15 |
from langchain import hub
|
16 |
|
|
|
17 |
def method_get_website_text(urls):
|
18 |
-
# Convert string of URLs to list
|
19 |
-
|
20 |
urls_list = urls.split("\n")
|
21 |
docs = [WebBaseLoader(url).load() for url in urls_list]
|
22 |
docs_list = [item for sublist in docs for item in sublist]
|
23 |
return docs_list
|
24 |
|
25 |
-
|
26 |
def method_get_text_chunks(text):
|
27 |
-
#split the text into chunks
|
28 |
-
|
29 |
#text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
|
30 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
31 |
doc_splits = text_splitter.split_documents(text)
|
32 |
return doc_splits
|
33 |
|
|
|
34 |
def method_get_vectorstore(document_chunks):
|
35 |
-
#convert text chunks into embeddings and store in vector database
|
36 |
-
|
37 |
# create the open-source embedding function
|
38 |
-
|
39 |
-
embeddings = HuggingFaceEmbeddings()
|
40 |
|
41 |
# create a vectorstore from the chunks
|
42 |
vector_store = Chroma.from_documents(document_chunks, embeddings)
|
43 |
return vector_store
|
44 |
|
45 |
-
# def get_context_retriever_chain(vector_store,question):
|
46 |
-
# # Initialize the retriever
|
47 |
-
# retriever = vector_store.as_retriever()
|
48 |
-
|
49 |
-
# # Define the RAG template and RAG prompt template
|
50 |
-
# prompt = hub.pull("rlm/rag-prompt")
|
51 |
-
|
52 |
-
# # Initialize the Hugging Face language model (LLM)
|
53 |
-
# llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":512})
|
54 |
-
|
55 |
-
# # Construct the RAG pipeline
|
56 |
-
# rag_chain = (
|
57 |
-
# {"context": retriever, "question": RunnablePassthrough()}
|
58 |
-
# | prompt
|
59 |
-
# | llm
|
60 |
-
# | StrOutputParser()
|
61 |
-
# )
|
62 |
-
# return rag_chain.invoke(str(question))
|
63 |
-
|
64 |
|
65 |
def get_context_retriever_chain(vector_store,question):
|
66 |
# Initialize the retriever
|
@@ -76,7 +53,7 @@ def get_context_retriever_chain(vector_store,question):
|
|
76 |
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
|
77 |
|
78 |
# Initialize the Hugging Face language model (LLM)
|
79 |
-
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":
|
80 |
|
81 |
# Construct the RAG pipeline
|
82 |
after_rag_chain = (
|
|
|
14 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
15 |
from langchain import hub
|
16 |
|
17 |
+
# Convert string of URLs to list
|
18 |
def method_get_website_text(urls):
|
|
|
|
|
19 |
urls_list = urls.split("\n")
|
20 |
docs = [WebBaseLoader(url).load() for url in urls_list]
|
21 |
docs_list = [item for sublist in docs for item in sublist]
|
22 |
return docs_list
|
23 |
|
24 |
+
#split the text into chunks
|
25 |
def method_get_text_chunks(text):
|
|
|
|
|
26 |
#text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
|
27 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
|
28 |
doc_splits = text_splitter.split_documents(text)
|
29 |
return doc_splits
|
30 |
|
31 |
+
#convert text chunks into embeddings and store in vector database
|
32 |
def method_get_vectorstore(document_chunks):
|
|
|
|
|
33 |
# create the open-source embedding function
|
34 |
+
embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
|
35 |
+
#embeddings = HuggingFaceEmbeddings()
|
36 |
|
37 |
# create a vectorstore from the chunks
|
38 |
vector_store = Chroma.from_documents(document_chunks, embeddings)
|
39 |
return vector_store
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
def get_context_retriever_chain(vector_store,question):
|
43 |
# Initialize the retriever
|
|
|
53 |
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
|
54 |
|
55 |
# Initialize the Hugging Face language model (LLM)
|
56 |
+
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.6, "max_length":1024})
|
57 |
|
58 |
# Construct the RAG pipeline
|
59 |
after_rag_chain = (
|