|
from crawl4ai.llmtxt import AsyncLLMTextManager |
|
from crawl4ai.async_logger import AsyncLogger |
|
from pathlib import Path |
|
import asyncio |
|
|
|
async def main(): |
|
current_file = Path(__file__).resolve() |
|
|
|
base_dir = current_file.parent.parent / "local/_docs/llm.txt" |
|
docs_dir = base_dir |
|
|
|
|
|
docs_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
logger = AsyncLogger() |
|
|
|
|
|
manager = AsyncLLMTextManager(docs_dir, logger, batch_size=2) |
|
|
|
|
|
print("\nAvailable files:") |
|
for f in docs_dir.glob("*.md"): |
|
print(f"- {f.name}") |
|
|
|
|
|
print("\nGenerating index files...") |
|
await manager.generate_index_files( |
|
force_generate_facts=False, |
|
clear_bm25_cache=False |
|
) |
|
|
|
|
|
test_queries = [ |
|
"How is using the `arun_many` method?", |
|
] |
|
|
|
print("\nTesting search functionality:") |
|
for query in test_queries: |
|
print(f"\nQuery: {query}") |
|
results = manager.search(query, top_k=2) |
|
print(f"Results length: {len(results)} characters") |
|
if results: |
|
print("First 200 chars of results:", results[:200].replace('\n', ' '), "...") |
|
else: |
|
print("No results found") |
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |