Spaces:
Runtime error
Runtime error
eljanmahammadli
commited on
Commit
·
80a07a7
1
Parent(s):
593bb22
decreased num max docs before retrieval
Browse files- ai_generate.py +1 -3
- requirements.txt +2 -1
ai_generate.py
CHANGED
@@ -21,8 +21,6 @@ from langchain.prompts import ChatPromptTemplate
|
|
21 |
import re
|
22 |
import numpy as np
|
23 |
import torch
|
24 |
-
|
25 |
-
# pip install bm25s
|
26 |
import bm25s
|
27 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
28 |
from langchain.retrievers import ContextualCompressionRetriever
|
@@ -236,7 +234,7 @@ def create_db_with_langchain(path: list[str], url_content: dict, query: str):
|
|
236 |
print(f"### Total number of documents before bm25s: {len(all_docs)}")
|
237 |
|
238 |
# if the number of docs is too high, we need to reduce it
|
239 |
-
num_max_docs =
|
240 |
if len(all_docs) > num_max_docs:
|
241 |
docs_raw = [doc.page_content for doc in all_docs]
|
242 |
retriever = bm25s.BM25(corpus=docs_raw)
|
|
|
21 |
import re
|
22 |
import numpy as np
|
23 |
import torch
|
|
|
|
|
24 |
import bm25s
|
25 |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
26 |
from langchain.retrievers import ContextualCompressionRetriever
|
|
|
234 |
print(f"### Total number of documents before bm25s: {len(all_docs)}")
|
235 |
|
236 |
# if the number of docs is too high, we need to reduce it
|
237 |
+
num_max_docs = 300
|
238 |
if len(all_docs) > num_max_docs:
|
239 |
docs_raw = [doc.page_content for doc in all_docs]
|
240 |
retriever = bm25s.BM25(corpus=docs_raw)
|
requirements.txt
CHANGED
@@ -24,4 +24,5 @@ langchain-google-genai
|
|
24 |
langchain-anthropic
|
25 |
langchain-openai
|
26 |
vertexai
|
27 |
-
html2text
|
|
|
|
24 |
langchain-anthropic
|
25 |
langchain-openai
|
26 |
vertexai
|
27 |
+
html2text
|
28 |
+
bm25s
|