Update utils.py
Browse files
utils.py
CHANGED
@@ -35,7 +35,8 @@ from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordD
|
|
35 |
from langchain.schema import AIMessage, HumanMessage
|
36 |
from langchain_community.llms import HuggingFaceHub
|
37 |
from langchain_community.llms import HuggingFaceTextGenInference
|
38 |
-
from langchain_community.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
|
|
39 |
from langchain_community.tools import DuckDuckGoSearchRun
|
40 |
from typing import Dict, TypedDict
|
41 |
from langchain_core.messages import BaseMessage
|
@@ -224,9 +225,9 @@ def document_storage_chroma(splits):
|
|
224 |
def document_retrieval_chroma(llm, prompt):
|
225 |
#HF embeddings -----------------------------------
|
226 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
227 |
-
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
228 |
#etwas weniger rechenaufwendig:
|
229 |
-
|
230 |
|
231 |
#ChromaDb um die embedings zu speichern
|
232 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
|
|
35 |
from langchain.schema import AIMessage, HumanMessage
|
36 |
from langchain_community.llms import HuggingFaceHub
|
37 |
from langchain_community.llms import HuggingFaceTextGenInference
|
38 |
+
#from langchain_community.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
39 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
40 |
from langchain_community.tools import DuckDuckGoSearchRun
|
41 |
from typing import Dict, TypedDict
|
42 |
from langchain_core.messages import BaseMessage
|
|
|
225 |
def document_retrieval_chroma(llm, prompt):
|
226 |
#HF embeddings -----------------------------------
|
227 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
228 |
+
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
229 |
#etwas weniger rechenaufwendig:
|
230 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
231 |
|
232 |
#ChromaDb um die embedings zu speichern
|
233 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|