Spaces:

polygraf-ai
/

article_writer

Runtime error

eljanmahammadli commited on Sep 7, 2024

Commit

80a07a7

1 Parent(s): 593bb22

decreased num max docs before retrieval

Files changed (2) hide show

ai_generate.py CHANGED Viewed

@@ -21,8 +21,6 @@ from langchain.prompts import ChatPromptTemplate
 import re
 import numpy as np
 import torch
-# pip install bm25s
 import bm25s
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 from langchain.retrievers import ContextualCompressionRetriever
@@ -236,7 +234,7 @@ def create_db_with_langchain(path: list[str], url_content: dict, query: str):
     print(f"### Total number of documents before bm25s: {len(all_docs)}")
     # if the number of docs is too high, we need to reduce it
-    num_max_docs = 250
     if len(all_docs) > num_max_docs:
         docs_raw = [doc.page_content for doc in all_docs]
         retriever = bm25s.BM25(corpus=docs_raw)

 import re
 import numpy as np
 import torch
 import bm25s
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 from langchain.retrievers import ContextualCompressionRetriever
     print(f"### Total number of documents before bm25s: {len(all_docs)}")
     # if the number of docs is too high, we need to reduce it
+    num_max_docs = 300
     if len(all_docs) > num_max_docs:
         docs_raw = [doc.page_content for doc in all_docs]
         retriever = bm25s.BM25(corpus=docs_raw)

requirements.txt CHANGED Viewed

@@ -24,4 +24,5 @@ langchain-google-genai
 langchain-anthropic
 langchain-openai
 vertexai
-html2text

 langchain-anthropic
 langchain-openai
 vertexai
+html2text
+bm25s