Spaces:
Sleeping
Sleeping
cleanup
Browse files- app.py +18 -8
- cfg.py +2 -29
- rtd_scraper/tutorial/spiders/docs_spider.py +0 -1
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
from typing import Optional, Tuple
|
3 |
|
4 |
import gradio as gr
|
@@ -6,17 +6,26 @@ import pandas as pd
|
|
6 |
from buster.completers import Completion
|
7 |
from buster.utils import extract_zip
|
8 |
|
|
|
9 |
import cfg
|
10 |
from cfg import setup_buster
|
11 |
|
12 |
-
# Create a handler to control where log messages go (e.g., console, file)
|
13 |
-
handler = (
|
14 |
-
logging.StreamHandler()
|
15 |
-
) # Console output, you can change it to a file handler if needed
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# Typehint for chatbot history
|
22 |
ChatHistory = list[list[Optional[str], Optional[str]]]
|
@@ -108,6 +117,7 @@ with demo:
|
|
108 |
examples=[
|
109 |
"How can I install the library?",
|
110 |
"What dependencies are required?",
|
|
|
111 |
],
|
112 |
inputs=question,
|
113 |
)
|
|
|
1 |
+
import os
|
2 |
from typing import Optional, Tuple
|
3 |
|
4 |
import gradio as gr
|
|
|
6 |
from buster.completers import Completion
|
7 |
from buster.utils import extract_zip
|
8 |
|
9 |
+
from rtd_scraper.scrape_rtd import scrape_rtd
|
10 |
import cfg
|
11 |
from cfg import setup_buster
|
12 |
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
# Check if an openai key is set as an env. variable
|
15 |
+
if os.getenv("OPENAI_API_KEY") is None:
|
16 |
+
print(
|
17 |
+
"Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
|
22 |
+
target_version = os.getenv("RTD_VERSION", "en/stable")
|
23 |
+
|
24 |
+
# scrape and embed content from readthedocs website
|
25 |
+
scrape_rtd(
|
26 |
+
homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
|
27 |
+
)
|
28 |
+
|
29 |
|
30 |
# Typehint for chatbot history
|
31 |
ChatHistory = list[list[Optional[str], Optional[str]]]
|
|
|
117 |
examples=[
|
118 |
"How can I install the library?",
|
119 |
"What dependencies are required?",
|
120 |
+
"Give a brief overview of the library."
|
121 |
],
|
122 |
inputs=question,
|
123 |
)
|
cfg.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
|
4 |
from buster.busterbot import Buster, BusterConfig
|
5 |
from buster.completers import ChatGPTCompleter, DocumentAnswerer
|
6 |
from buster.formatters.documents import DocumentsFormatterJSON
|
@@ -11,29 +8,6 @@ from buster.validators import QuestionAnswerValidator, Validator
|
|
11 |
|
12 |
from rtd_scraper.scrape_rtd import scrape_rtd
|
13 |
|
14 |
-
# Set the root logger's level to INFO
|
15 |
-
logging.basicConfig(level=logging.INFO)
|
16 |
-
|
17 |
-
# Check if an openai key is set as an env. variable
|
18 |
-
if os.getenv("OPENAI_API_KEY") is None:
|
19 |
-
print(
|
20 |
-
"Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
|
21 |
-
)
|
22 |
-
|
23 |
-
homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
|
24 |
-
target_version = os.getenv("RTD_VERSION", "en/stable")
|
25 |
-
|
26 |
-
# scrape and embed content from readthedocs website
|
27 |
-
scrape_rtd(
|
28 |
-
homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
|
29 |
-
)
|
30 |
-
|
31 |
-
# Disable logging for third-party libraries at DEBUG level
|
32 |
-
for name in logging.root.manager.loggerDict:
|
33 |
-
logger = logging.getLogger(name)
|
34 |
-
logger.setLevel(logging.INFO)
|
35 |
-
|
36 |
-
|
37 |
buster_cfg = BusterConfig(
|
38 |
validator_cfg={
|
39 |
"unknown_response_templates": [
|
@@ -43,15 +17,14 @@ buster_cfg = BusterConfig(
|
|
43 |
"embedding_model": "text-embedding-ada-002",
|
44 |
"use_reranking": True,
|
45 |
"invalid_question_response": "This question does not seem relevant to my current knowledge.",
|
46 |
-
"check_question_prompt": """You are an chatbot answering questions on
|
47 |
|
48 |
Your job is to determine wether or not a question is valid, and should be answered.
|
49 |
-
More general questions are not considered valid, even if you might know the response.
|
50 |
A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
|
51 |
|
52 |
For example:
|
53 |
|
54 |
-
Q:
|
55 |
true
|
56 |
|
57 |
Q: What is the meaning of life?
|
|
|
|
|
|
|
|
|
1 |
from buster.busterbot import Buster, BusterConfig
|
2 |
from buster.completers import ChatGPTCompleter, DocumentAnswerer
|
3 |
from buster.formatters.documents import DocumentsFormatterJSON
|
|
|
8 |
|
9 |
from rtd_scraper.scrape_rtd import scrape_rtd
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
buster_cfg = BusterConfig(
|
12 |
validator_cfg={
|
13 |
"unknown_response_templates": [
|
|
|
17 |
"embedding_model": "text-embedding-ada-002",
|
18 |
"use_reranking": True,
|
19 |
"invalid_question_response": "This question does not seem relevant to my current knowledge.",
|
20 |
+
"check_question_prompt": """You are an chatbot answering questions on python libraries.
|
21 |
|
22 |
Your job is to determine wether or not a question is valid, and should be answered.
|
|
|
23 |
A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
|
24 |
|
25 |
For example:
|
26 |
|
27 |
+
Q: How can I install the library?
|
28 |
true
|
29 |
|
30 |
Q: What is the meaning of life?
|
rtd_scraper/tutorial/spiders/docs_spider.py
CHANGED
@@ -62,7 +62,6 @@ class DocsSpider(scrapy.Spider):
|
|
62 |
filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
|
63 |
filepath.parent.mkdir(parents=True, exist_ok=True)
|
64 |
|
65 |
-
print(f"{filepath=}")
|
66 |
with open(filepath, "wb") as f:
|
67 |
f.write(response.body)
|
68 |
|
|
|
62 |
filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
|
63 |
filepath.parent.mkdir(parents=True, exist_ok=True)
|
64 |
|
|
|
65 |
with open(filepath, "wb") as f:
|
66 |
f.write(response.body)
|
67 |
|