Spaces:
Runtime error
Runtime error
eljanmahammadli
commited on
Commit
·
bf91121
1
Parent(s):
8f26ea6
enable ai model selection and api key
Browse files- app.py +13 -10
- google_search.py +9 -5
app.py
CHANGED
@@ -331,7 +331,7 @@ def generate_article(
|
|
331 |
ai_model: str,
|
332 |
content_string: str,
|
333 |
url_content: str = None,
|
334 |
-
|
335 |
pdf_file_input: list[str] = None,
|
336 |
generated_article: str = None,
|
337 |
user_comments: str = None,
|
@@ -363,11 +363,15 @@ def generate_article(
|
|
363 |
|
364 |
print("Generated Prompt...\n", prompt)
|
365 |
article = generate(
|
366 |
-
prompt,
|
367 |
-
topic,
|
368 |
-
ai_model,
|
369 |
-
url_content,
|
370 |
-
pdf_file_input,
|
|
|
|
|
|
|
|
|
371 |
)
|
372 |
|
373 |
return clean_text(article)
|
@@ -428,8 +432,6 @@ def generate_and_format(
|
|
428 |
references,
|
429 |
num_examples,
|
430 |
conclusion_type,
|
431 |
-
# ai_model,
|
432 |
-
# api_key,
|
433 |
google_search_check,
|
434 |
year_from,
|
435 |
month_from,
|
@@ -441,12 +443,13 @@ def generate_and_format(
|
|
441 |
include_sites,
|
442 |
exclude_sites,
|
443 |
pdf_file_input,
|
|
|
|
|
444 |
generated_article: str = None,
|
445 |
user_comments: str = None,
|
446 |
):
|
447 |
content_string = ""
|
448 |
url_content = None
|
449 |
-
ai_model = "OpenAI GPT 4o"
|
450 |
if google_search_check:
|
451 |
date_from = build_date(year_from, month_from, day_from)
|
452 |
date_to = build_date(year_to, month_to, day_to)
|
@@ -485,7 +488,7 @@ def generate_and_format(
|
|
485 |
ai_model,
|
486 |
content_string,
|
487 |
url_content,
|
488 |
-
|
489 |
pdf_file_input,
|
490 |
generated_article,
|
491 |
user_comments,
|
|
|
331 |
ai_model: str,
|
332 |
content_string: str,
|
333 |
url_content: str = None,
|
334 |
+
api_key: str = None,
|
335 |
pdf_file_input: list[str] = None,
|
336 |
generated_article: str = None,
|
337 |
user_comments: str = None,
|
|
|
363 |
|
364 |
print("Generated Prompt...\n", prompt)
|
365 |
article = generate(
|
366 |
+
prompt=prompt,
|
367 |
+
topic=topic,
|
368 |
+
model=ai_model,
|
369 |
+
url_content=url_content,
|
370 |
+
path=pdf_file_input,
|
371 |
+
temperature=1,
|
372 |
+
max_length=2048,
|
373 |
+
api_key=api_key,
|
374 |
+
sys_message="",
|
375 |
)
|
376 |
|
377 |
return clean_text(article)
|
|
|
432 |
references,
|
433 |
num_examples,
|
434 |
conclusion_type,
|
|
|
|
|
435 |
google_search_check,
|
436 |
year_from,
|
437 |
month_from,
|
|
|
443 |
include_sites,
|
444 |
exclude_sites,
|
445 |
pdf_file_input,
|
446 |
+
ai_model="OpenAI GPT 4o",
|
447 |
+
api_key=None,
|
448 |
generated_article: str = None,
|
449 |
user_comments: str = None,
|
450 |
):
|
451 |
content_string = ""
|
452 |
url_content = None
|
|
|
453 |
if google_search_check:
|
454 |
date_from = build_date(year_from, month_from, day_from)
|
455 |
date_to = build_date(year_to, month_to, day_to)
|
|
|
488 |
ai_model,
|
489 |
content_string,
|
490 |
url_content,
|
491 |
+
api_key,
|
492 |
pdf_file_input,
|
493 |
generated_article,
|
494 |
user_comments,
|
google_search.py
CHANGED
@@ -10,6 +10,12 @@ import requests
|
|
10 |
|
11 |
load_dotenv()
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# load html2text and set up configs
|
14 |
h2t = html2text.HTML2Text()
|
15 |
h2t.bodywidth = 0 # No wrapping
|
@@ -39,13 +45,12 @@ def build_results_beautifulsoup(url_list):
|
|
39 |
print(f"Scraping processing time: {scraping_time:.2f} seconds")
|
40 |
|
41 |
result_content = {}
|
42 |
-
num_pages = 10
|
43 |
count = 0
|
44 |
|
45 |
print("Starting to process each URL...")
|
46 |
for url, soup in zip(url_list, soups):
|
47 |
-
if count >=
|
48 |
-
print(f"Reached the limit of {
|
49 |
break
|
50 |
|
51 |
if soup:
|
@@ -68,10 +73,9 @@ def build_results_extractor(url_list):
|
|
68 |
try:
|
69 |
endpoint = "https://extractorapi.com/api/v1/extractor"
|
70 |
result_content = {}
|
71 |
-
num_pages = 3
|
72 |
count = 0
|
73 |
for url in url_list:
|
74 |
-
if count >=
|
75 |
break
|
76 |
params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
|
77 |
r = requests.get(endpoint, params=params)
|
|
|
10 |
|
11 |
load_dotenv()
|
12 |
|
13 |
+
API_KEY = os.environ.get("GOOGLE_SEARCH_API_KEY")
|
14 |
+
CSE_KEY = os.environ.get("GOOGLE_SEARCH_CSE_ID")
|
15 |
+
|
16 |
+
# Number of pages to scrape
|
17 |
+
NUM_PAGES = 10
|
18 |
+
|
19 |
# load html2text and set up configs
|
20 |
h2t = html2text.HTML2Text()
|
21 |
h2t.bodywidth = 0 # No wrapping
|
|
|
45 |
print(f"Scraping processing time: {scraping_time:.2f} seconds")
|
46 |
|
47 |
result_content = {}
|
|
|
48 |
count = 0
|
49 |
|
50 |
print("Starting to process each URL...")
|
51 |
for url, soup in zip(url_list, soups):
|
52 |
+
if count >= NUM_PAGES:
|
53 |
+
print(f"Reached the limit of {NUM_PAGES} pages. Stopping processing.")
|
54 |
break
|
55 |
|
56 |
if soup:
|
|
|
73 |
try:
|
74 |
endpoint = "https://extractorapi.com/api/v1/extractor"
|
75 |
result_content = {}
|
|
|
76 |
count = 0
|
77 |
for url in url_list:
|
78 |
+
if count >= NUM_PAGES:
|
79 |
break
|
80 |
params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
|
81 |
r = requests.get(endpoint, params=params)
|