Update utils.py
Browse files
utils.py
CHANGED
@@ -29,10 +29,9 @@ from pygments.lexers import guess_lexer,get_lexer_by_name
|
|
29 |
from pygments.formatters import HtmlFormatter
|
30 |
|
31 |
from langchain.chains import LLMChain, RetrievalQA
|
32 |
-
from langchain_community.document_loaders import PyPDFLoader,
|
33 |
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
34 |
-
from langchain.document_loaders
|
35 |
-
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
36 |
from langchain.schema import AIMessage, HumanMessage
|
37 |
from langchain_community.llms import HuggingFaceHub
|
38 |
from langchain_community.llms import HuggingFaceTextGenInference
|
@@ -47,7 +46,6 @@ from langchain import hub
|
|
47 |
from langchain.output_parsers.openai_tools import PydanticToolsParser
|
48 |
from langchain.prompts import PromptTemplate
|
49 |
from langchain.schema import Document
|
50 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
51 |
from langchain_community.vectorstores import Chroma
|
52 |
from langchain_core.messages import BaseMessage, FunctionMessage
|
53 |
from langchain_core.output_parsers import StrOutputParser
|
@@ -189,10 +187,10 @@ def document_loading_splitting():
|
|
189 |
#Chroma DB die splits ablegen - vektorisiert...
|
190 |
def document_storage_chroma(splits):
|
191 |
#OpenAi embeddings----------------------------------
|
192 |
-
Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
|
193 |
|
194 |
#HF embeddings--------------------------------------
|
195 |
-
|
196 |
|
197 |
|
198 |
############################################
|
|
|
29 |
from pygments.formatters import HtmlFormatter
|
30 |
|
31 |
from langchain.chains import LLMChain, RetrievalQA
|
32 |
+
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, DirectoryLoader
|
33 |
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
34 |
+
from langchain.document_loaders import GenericLoader
|
|
|
35 |
from langchain.schema import AIMessage, HumanMessage
|
36 |
from langchain_community.llms import HuggingFaceHub
|
37 |
from langchain_community.llms import HuggingFaceTextGenInference
|
|
|
46 |
from langchain.output_parsers.openai_tools import PydanticToolsParser
|
47 |
from langchain.prompts import PromptTemplate
|
48 |
from langchain.schema import Document
|
|
|
49 |
from langchain_community.vectorstores import Chroma
|
50 |
from langchain_core.messages import BaseMessage, FunctionMessage
|
51 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
187 |
#Chroma DB die splits ablegen - vektorisiert...
|
188 |
def document_storage_chroma(splits):
|
189 |
#OpenAi embeddings----------------------------------
|
190 |
+
#Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
|
191 |
|
192 |
#HF embeddings--------------------------------------
|
193 |
+
Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
194 |
|
195 |
|
196 |
############################################
|