eljanmahammadli commited on
Commit
80a07a7
·
1 Parent(s): 593bb22

decreased num max docs before retrieval

Browse files
Files changed (2) hide show
  1. ai_generate.py +1 -3
  2. requirements.txt +2 -1
ai_generate.py CHANGED
@@ -21,8 +21,6 @@ from langchain.prompts import ChatPromptTemplate
21
  import re
22
  import numpy as np
23
  import torch
24
-
25
- # pip install bm25s
26
  import bm25s
27
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
28
  from langchain.retrievers import ContextualCompressionRetriever
@@ -236,7 +234,7 @@ def create_db_with_langchain(path: list[str], url_content: dict, query: str):
236
  print(f"### Total number of documents before bm25s: {len(all_docs)}")
237
 
238
  # if the number of docs is too high, we need to reduce it
239
- num_max_docs = 250
240
  if len(all_docs) > num_max_docs:
241
  docs_raw = [doc.page_content for doc in all_docs]
242
  retriever = bm25s.BM25(corpus=docs_raw)
 
21
  import re
22
  import numpy as np
23
  import torch
 
 
24
  import bm25s
25
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
26
  from langchain.retrievers import ContextualCompressionRetriever
 
234
  print(f"### Total number of documents before bm25s: {len(all_docs)}")
235
 
236
  # if the number of docs is too high, we need to reduce it
237
+ num_max_docs = 300
238
  if len(all_docs) > num_max_docs:
239
  docs_raw = [doc.page_content for doc in all_docs]
240
  retriever = bm25s.BM25(corpus=docs_raw)
requirements.txt CHANGED
@@ -24,4 +24,5 @@ langchain-google-genai
24
  langchain-anthropic
25
  langchain-openai
26
  vertexai
27
- html2text
 
 
24
  langchain-anthropic
25
  langchain-openai
26
  vertexai
27
+ html2text
28
+ bm25s