alexkueck commited on
Commit
73ae7b2
·
1 Parent(s): db7ad24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -13,7 +13,7 @@ from langchain.document_loaders.parsers import OpenAIWhisperParser
13
  from langchain.schema import AIMessage, HumanMessage
14
  from langchain.llms import HuggingFaceHub
15
  from langchain.llms import HuggingFaceTextGenInference
16
- from langchain.embeddings import HuggingFaceInstructEmbeddings
17
 
18
  from langchain.embeddings.openai import OpenAIEmbeddings
19
  from langchain.prompts import PromptTemplate
@@ -182,8 +182,11 @@ def document_storage_mongodb(splits):
182
  #dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
183
  def document_retrieval_chroma(llm, prompt):
184
  #embeddings = OpenAIEmbeddings()
185
- #Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen
186
  embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
 
 
 
187
  db = Chroma(embedding_function = embeddings,
188
  persist_directory = PATH_WORK + CHROMA_DIR)
189
 
 
13
  from langchain.schema import AIMessage, HumanMessage
14
  from langchain.llms import HuggingFaceHub
15
  from langchain.llms import HuggingFaceTextGenInference
16
+ from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
17
 
18
  from langchain.embeddings.openai import OpenAIEmbeddings
19
  from langchain.prompts import PromptTemplate
 
182
  #dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
183
  def document_retrieval_chroma(llm, prompt):
184
  #embeddings = OpenAIEmbeddings()
185
+ #Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
186
  embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
187
+ #etwas weniger rechenaufwendig:
188
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
189
+
190
  db = Chroma(embedding_function = embeddings,
191
  persist_directory = PATH_WORK + CHROMA_DIR)
192