eljanmahammadli commited on
Commit
bf91121
·
1 Parent(s): 8f26ea6

enable ai model selection and api key

Browse files
Files changed (2) hide show
  1. app.py +13 -10
  2. google_search.py +9 -5
app.py CHANGED
@@ -331,7 +331,7 @@ def generate_article(
331
  ai_model: str,
332
  content_string: str,
333
  url_content: str = None,
334
- # api_key: str = None,
335
  pdf_file_input: list[str] = None,
336
  generated_article: str = None,
337
  user_comments: str = None,
@@ -363,11 +363,15 @@ def generate_article(
363
 
364
  print("Generated Prompt...\n", prompt)
365
  article = generate(
366
- prompt,
367
- topic,
368
- ai_model,
369
- url_content,
370
- pdf_file_input, # api_key
 
 
 
 
371
  )
372
 
373
  return clean_text(article)
@@ -428,8 +432,6 @@ def generate_and_format(
428
  references,
429
  num_examples,
430
  conclusion_type,
431
- # ai_model,
432
- # api_key,
433
  google_search_check,
434
  year_from,
435
  month_from,
@@ -441,12 +443,13 @@ def generate_and_format(
441
  include_sites,
442
  exclude_sites,
443
  pdf_file_input,
 
 
444
  generated_article: str = None,
445
  user_comments: str = None,
446
  ):
447
  content_string = ""
448
  url_content = None
449
- ai_model = "OpenAI GPT 4o"
450
  if google_search_check:
451
  date_from = build_date(year_from, month_from, day_from)
452
  date_to = build_date(year_to, month_to, day_to)
@@ -485,7 +488,7 @@ def generate_and_format(
485
  ai_model,
486
  content_string,
487
  url_content,
488
- # api_key,
489
  pdf_file_input,
490
  generated_article,
491
  user_comments,
 
331
  ai_model: str,
332
  content_string: str,
333
  url_content: str = None,
334
+ api_key: str = None,
335
  pdf_file_input: list[str] = None,
336
  generated_article: str = None,
337
  user_comments: str = None,
 
363
 
364
  print("Generated Prompt...\n", prompt)
365
  article = generate(
366
+ prompt=prompt,
367
+ topic=topic,
368
+ model=ai_model,
369
+ url_content=url_content,
370
+ path=pdf_file_input,
371
+ temperature=1,
372
+ max_length=2048,
373
+ api_key=api_key,
374
+ sys_message="",
375
  )
376
 
377
  return clean_text(article)
 
432
  references,
433
  num_examples,
434
  conclusion_type,
 
 
435
  google_search_check,
436
  year_from,
437
  month_from,
 
443
  include_sites,
444
  exclude_sites,
445
  pdf_file_input,
446
+ ai_model="OpenAI GPT 4o",
447
+ api_key=None,
448
  generated_article: str = None,
449
  user_comments: str = None,
450
  ):
451
  content_string = ""
452
  url_content = None
 
453
  if google_search_check:
454
  date_from = build_date(year_from, month_from, day_from)
455
  date_to = build_date(year_to, month_to, day_to)
 
488
  ai_model,
489
  content_string,
490
  url_content,
491
+ api_key,
492
  pdf_file_input,
493
  generated_article,
494
  user_comments,
google_search.py CHANGED
@@ -10,6 +10,12 @@ import requests
10
 
11
  load_dotenv()
12
 
 
 
 
 
 
 
13
  # load html2text and set up configs
14
  h2t = html2text.HTML2Text()
15
  h2t.bodywidth = 0 # No wrapping
@@ -39,13 +45,12 @@ def build_results_beautifulsoup(url_list):
39
  print(f"Scraping processing time: {scraping_time:.2f} seconds")
40
 
41
  result_content = {}
42
- num_pages = 10
43
  count = 0
44
 
45
  print("Starting to process each URL...")
46
  for url, soup in zip(url_list, soups):
47
- if count >= num_pages:
48
- print(f"Reached the limit of {num_pages} pages. Stopping processing.")
49
  break
50
 
51
  if soup:
@@ -68,10 +73,9 @@ def build_results_extractor(url_list):
68
  try:
69
  endpoint = "https://extractorapi.com/api/v1/extractor"
70
  result_content = {}
71
- num_pages = 3
72
  count = 0
73
  for url in url_list:
74
- if count >= num_pages:
75
  break
76
  params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
77
  r = requests.get(endpoint, params=params)
 
10
 
11
  load_dotenv()
12
 
13
+ API_KEY = os.environ.get("GOOGLE_SEARCH_API_KEY")
14
+ CSE_KEY = os.environ.get("GOOGLE_SEARCH_CSE_ID")
15
+
16
+ # Number of pages to scrape
17
+ NUM_PAGES = 10
18
+
19
  # load html2text and set up configs
20
  h2t = html2text.HTML2Text()
21
  h2t.bodywidth = 0 # No wrapping
 
45
  print(f"Scraping processing time: {scraping_time:.2f} seconds")
46
 
47
  result_content = {}
 
48
  count = 0
49
 
50
  print("Starting to process each URL...")
51
  for url, soup in zip(url_list, soups):
52
+ if count >= NUM_PAGES:
53
+ print(f"Reached the limit of {NUM_PAGES} pages. Stopping processing.")
54
  break
55
 
56
  if soup:
 
73
  try:
74
  endpoint = "https://extractorapi.com/api/v1/extractor"
75
  result_content = {}
 
76
  count = 0
77
  for url in url_list:
78
+ if count >= NUM_PAGES:
79
  break
80
  params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
81
  r = requests.get(endpoint, params=params)