Spaces:

polygraf-ai
/

article_writer

Runtime error

eljanmahammadli commited on Aug 14, 2024

Commit

bf91121

1 Parent(s): 8f26ea6

enable ai model selection and api key

Files changed (2) hide show

app.py CHANGED Viewed

@@ -331,7 +331,7 @@ def generate_article(
     ai_model: str,
     content_string: str,
     url_content: str = None,
-    # api_key: str = None,
     pdf_file_input: list[str] = None,
     generated_article: str = None,
     user_comments: str = None,
@@ -363,11 +363,15 @@ def generate_article(
     print("Generated Prompt...\n", prompt)
     article = generate(
-        prompt,
-        topic,
-        ai_model,
-        url_content,
-        pdf_file_input,  # api_key
     )
     return clean_text(article)
@@ -428,8 +432,6 @@ def generate_and_format(
     references,
     num_examples,
     conclusion_type,
-    # ai_model,
-    # api_key,
     google_search_check,
     year_from,
     month_from,
@@ -441,12 +443,13 @@ def generate_and_format(
     include_sites,
     exclude_sites,
     pdf_file_input,
     generated_article: str = None,
     user_comments: str = None,
 ):
     content_string = ""
     url_content = None
-    ai_model = "OpenAI GPT 4o"
     if google_search_check:
         date_from = build_date(year_from, month_from, day_from)
         date_to = build_date(year_to, month_to, day_to)
@@ -485,7 +488,7 @@ def generate_and_format(
         ai_model,
         content_string,
         url_content,
-        # api_key,
         pdf_file_input,
         generated_article,
         user_comments,

     ai_model: str,
     content_string: str,
     url_content: str = None,
+    api_key: str = None,
     pdf_file_input: list[str] = None,
     generated_article: str = None,
     user_comments: str = None,
     print("Generated Prompt...\n", prompt)
     article = generate(
+        prompt=prompt,
+        topic=topic,
+        model=ai_model,
+        url_content=url_content,
+        path=pdf_file_input,
+        temperature=1,
+        max_length=2048,
+        api_key=api_key,
+        sys_message="",
     )
     return clean_text(article)
     references,
     num_examples,
     conclusion_type,
     google_search_check,
     year_from,
     month_from,
     include_sites,
     exclude_sites,
     pdf_file_input,
+    ai_model="OpenAI GPT 4o",
+    api_key=None,
     generated_article: str = None,
     user_comments: str = None,
 ):
     content_string = ""
     url_content = None
     if google_search_check:
         date_from = build_date(year_from, month_from, day_from)
         date_to = build_date(year_to, month_to, day_to)
         ai_model,
         content_string,
         url_content,
+        api_key,
         pdf_file_input,
         generated_article,
         user_comments,

google_search.py CHANGED Viewed

@@ -10,6 +10,12 @@ import requests
 load_dotenv()
 # load html2text and set up configs
 h2t = html2text.HTML2Text()
 h2t.bodywidth = 0  # No wrapping
@@ -39,13 +45,12 @@ def build_results_beautifulsoup(url_list):
     print(f"Scraping processing time: {scraping_time:.2f} seconds")
     result_content = {}
-    num_pages = 10
     count = 0
     print("Starting to process each URL...")
     for url, soup in zip(url_list, soups):
-        if count >= num_pages:
-            print(f"Reached the limit of {num_pages} pages. Stopping processing.")
             break
         if soup:
@@ -68,10 +73,9 @@ def build_results_extractor(url_list):
     try:
         endpoint = "https://extractorapi.com/api/v1/extractor"
         result_content = {}
-        num_pages = 3
         count = 0
         for url in url_list:
-            if count >= num_pages:
                 break
             params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
             r = requests.get(endpoint, params=params)

 load_dotenv()
+API_KEY = os.environ.get("GOOGLE_SEARCH_API_KEY")
+CSE_KEY = os.environ.get("GOOGLE_SEARCH_CSE_ID")
+# Number of pages to scrape
+NUM_PAGES = 10
 # load html2text and set up configs
 h2t = html2text.HTML2Text()
 h2t.bodywidth = 0  # No wrapping
     print(f"Scraping processing time: {scraping_time:.2f} seconds")
     result_content = {}
     count = 0
     print("Starting to process each URL...")
     for url, soup in zip(url_list, soups):
+        if count >= NUM_PAGES:
+            print(f"Reached the limit of {NUM_PAGES} pages. Stopping processing.")
             break
         if soup:
     try:
         endpoint = "https://extractorapi.com/api/v1/extractor"
         result_content = {}
         count = 0
         for url in url_list:
+            if count >= NUM_PAGES:
                 break
             params = {"apikey": os.environ.get("EXTRACTOR_API_KEY"), "url": url}
             r = requests.get(endpoint, params=params)