Spaces:
Runtime error
Runtime error
make g search markable, add role/occupation, switch to domain to include
Browse files- app.py +41 -26
- plagiarism.py +18 -9
app.py
CHANGED
@@ -219,11 +219,9 @@ def ai_check(text: str, option: str):
|
|
219 |
|
220 |
|
221 |
def generate_prompt(settings: Dict[str, str]) -> str:
|
222 |
-
content_string = "\n".join(
|
223 |
-
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in settings["sources"].items()
|
224 |
-
)
|
225 |
-
|
226 |
prompt = f"""
|
|
|
|
|
227 |
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
|
228 |
|
229 |
Style and Tone:
|
@@ -244,9 +242,7 @@ def generate_prompt(settings: Dict[str, str]) -> str:
|
|
244 |
- End with a {settings['conclusion_type']} conclusion
|
245 |
- Add a "References" section at the end with at least 3 credible sources, formatted as [1], [2], etc.
|
246 |
- Do not make any headline, title bold.
|
247 |
-
|
248 |
-
Use the content here from the URLs I've found for you:
|
249 |
-
{content_string}
|
250 |
|
251 |
Ensure proper paragraph breaks for better readability.
|
252 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
@@ -255,11 +251,9 @@ def generate_prompt(settings: Dict[str, str]) -> str:
|
|
255 |
|
256 |
|
257 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
258 |
-
content_string = "\n".join(
|
259 |
-
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in settings["sources"].items()
|
260 |
-
)
|
261 |
-
|
262 |
prompt = f"""
|
|
|
|
|
263 |
"{settings['generated_article']}"
|
264 |
|
265 |
Edit the given text based on user comments.
|
@@ -269,8 +263,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
269 |
- The original content should not be changed. Make minor modifications based on user comments above.
|
270 |
- Keep the references the same as the given text in the same format.
|
271 |
- Do not make any headline, title bold.
|
272 |
-
|
273 |
-
{content_string}
|
274 |
|
275 |
Ensure proper paragraph breaks for better readability.
|
276 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
@@ -279,6 +272,7 @@ def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
279 |
|
280 |
|
281 |
def generate_article(
|
|
|
282 |
topic: str,
|
283 |
keywords: str,
|
284 |
article_length: str,
|
@@ -292,15 +286,13 @@ def generate_article(
|
|
292 |
num_examples: str,
|
293 |
conclusion_type: str,
|
294 |
ai_model: str,
|
295 |
-
|
296 |
-
domains_to_skip,
|
297 |
api_key: str = None,
|
298 |
generated_article: str = None,
|
299 |
user_comments: str = None,
|
300 |
) -> str:
|
301 |
-
|
302 |
-
url_content = google_search(topic, sorted_date, domains_to_skip)
|
303 |
settings = {
|
|
|
304 |
"topic": topic,
|
305 |
"keywords": [k.strip() for k in keywords.split(",")],
|
306 |
"article_length": article_length,
|
@@ -313,7 +305,7 @@ def generate_article(
|
|
313 |
"references": [r.strip() for r in references.split(",")],
|
314 |
"num_examples": num_examples,
|
315 |
"conclusion_type": conclusion_type,
|
316 |
-
"sources":
|
317 |
"generated_article": generated_article,
|
318 |
"user_comments": user_comments,
|
319 |
}
|
@@ -379,7 +371,11 @@ def format_references(text: str) -> str:
|
|
379 |
in_references = False
|
380 |
|
381 |
for line in lines:
|
382 |
-
if
|
|
|
|
|
|
|
|
|
383 |
in_references = True
|
384 |
continue
|
385 |
if in_references:
|
@@ -396,6 +392,7 @@ def format_references(text: str) -> str:
|
|
396 |
|
397 |
|
398 |
def generate_and_format(
|
|
|
399 |
topic,
|
400 |
keywords,
|
401 |
article_length,
|
@@ -410,20 +407,29 @@ def generate_and_format(
|
|
410 |
conclusion_type,
|
411 |
ai_model,
|
412 |
api_key,
|
|
|
413 |
year_from,
|
414 |
month_from,
|
415 |
day_from,
|
416 |
year_to,
|
417 |
month_to,
|
418 |
day_to,
|
419 |
-
|
420 |
generated_article: str = None,
|
421 |
user_comments: str = None,
|
422 |
):
|
423 |
date_from = build_date(year_from, month_from, day_from)
|
424 |
date_to = build_date(year_to, month_to, day_to)
|
425 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
article = generate_article(
|
|
|
427 |
topic,
|
428 |
keywords,
|
429 |
article_length,
|
@@ -437,9 +443,8 @@ def generate_and_format(
|
|
437 |
num_examples,
|
438 |
conclusion_type,
|
439 |
ai_model,
|
|
|
440 |
api_key,
|
441 |
-
sorted_date,
|
442 |
-
domains_to_skip,
|
443 |
generated_article,
|
444 |
user_comments,
|
445 |
)
|
@@ -465,6 +470,7 @@ def create_interface():
|
|
465 |
with gr.Column(scale=2):
|
466 |
with gr.Group():
|
467 |
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
|
|
|
468 |
input_topic = gr.Textbox(
|
469 |
label="Topic",
|
470 |
placeholder="Enter the main topic of your article",
|
@@ -585,6 +591,10 @@ def create_interface():
|
|
585 |
)
|
586 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
587 |
with gr.Group():
|
|
|
|
|
|
|
|
|
588 |
with gr.Row():
|
589 |
month_from = gr.Dropdown(
|
590 |
choices=months,
|
@@ -606,10 +616,11 @@ def create_interface():
|
|
606 |
year_to = gr.Textbox(label="To Year", value=d1[2])
|
607 |
|
608 |
with gr.Row():
|
609 |
-
|
610 |
domain_list,
|
|
|
611 |
multiselect=True,
|
612 |
-
label="
|
613 |
)
|
614 |
|
615 |
with gr.Group():
|
@@ -691,6 +702,7 @@ def create_interface():
|
|
691 |
generate_btn.click(
|
692 |
fn=generate_and_format,
|
693 |
inputs=[
|
|
|
694 |
input_topic,
|
695 |
input_keywords,
|
696 |
input_length,
|
@@ -705,13 +717,14 @@ def create_interface():
|
|
705 |
input_conclusion,
|
706 |
ai_generator,
|
707 |
input_api,
|
|
|
708 |
year_from,
|
709 |
month_from,
|
710 |
day_from,
|
711 |
year_to,
|
712 |
month_to,
|
713 |
day_to,
|
714 |
-
|
715 |
],
|
716 |
outputs=[output_article],
|
717 |
)
|
@@ -719,6 +732,7 @@ def create_interface():
|
|
719 |
regenerate_btn.click(
|
720 |
fn=generate_and_format,
|
721 |
inputs=[
|
|
|
722 |
input_topic,
|
723 |
input_keywords,
|
724 |
input_length,
|
@@ -733,13 +747,14 @@ def create_interface():
|
|
733 |
input_conclusion,
|
734 |
ai_generator,
|
735 |
input_api,
|
|
|
736 |
year_from,
|
737 |
month_from,
|
738 |
day_from,
|
739 |
year_to,
|
740 |
month_to,
|
741 |
day_to,
|
742 |
-
|
743 |
output_article,
|
744 |
ai_comments,
|
745 |
],
|
|
|
219 |
|
220 |
|
221 |
def generate_prompt(settings: Dict[str, str]) -> str:
|
|
|
|
|
|
|
|
|
222 |
prompt = f"""
|
223 |
+
I am a {settings['role']}
|
224 |
+
|
225 |
Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
|
226 |
|
227 |
Style and Tone:
|
|
|
242 |
- End with a {settings['conclusion_type']} conclusion
|
243 |
- Add a "References" section at the end with at least 3 credible sources, formatted as [1], [2], etc.
|
244 |
- Do not make any headline, title bold.
|
245 |
+
{settings['sources']}
|
|
|
|
|
246 |
|
247 |
Ensure proper paragraph breaks for better readability.
|
248 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
|
|
251 |
|
252 |
|
253 |
def regenerate_prompt(settings: Dict[str, str]) -> str:
|
|
|
|
|
|
|
|
|
254 |
prompt = f"""
|
255 |
+
I am a {settings['role']}
|
256 |
+
|
257 |
"{settings['generated_article']}"
|
258 |
|
259 |
Edit the given text based on user comments.
|
|
|
263 |
- The original content should not be changed. Make minor modifications based on user comments above.
|
264 |
- Keep the references the same as the given text in the same format.
|
265 |
- Do not make any headline, title bold.
|
266 |
+
{settings['sources']}
|
|
|
267 |
|
268 |
Ensure proper paragraph breaks for better readability.
|
269 |
Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
|
|
|
272 |
|
273 |
|
274 |
def generate_article(
|
275 |
+
input_role: str,
|
276 |
topic: str,
|
277 |
keywords: str,
|
278 |
article_length: str,
|
|
|
286 |
num_examples: str,
|
287 |
conclusion_type: str,
|
288 |
ai_model: str,
|
289 |
+
content_string: str,
|
|
|
290 |
api_key: str = None,
|
291 |
generated_article: str = None,
|
292 |
user_comments: str = None,
|
293 |
) -> str:
|
|
|
|
|
294 |
settings = {
|
295 |
+
"role": input_role,
|
296 |
"topic": topic,
|
297 |
"keywords": [k.strip() for k in keywords.split(",")],
|
298 |
"article_length": article_length,
|
|
|
305 |
"references": [r.strip() for r in references.split(",")],
|
306 |
"num_examples": num_examples,
|
307 |
"conclusion_type": conclusion_type,
|
308 |
+
"sources": content_string,
|
309 |
"generated_article": generated_article,
|
310 |
"user_comments": user_comments,
|
311 |
}
|
|
|
371 |
in_references = False
|
372 |
|
373 |
for line in lines:
|
374 |
+
if (
|
375 |
+
line.strip().lower() == "references"
|
376 |
+
or line.strip().lower() == "references:"
|
377 |
+
or line.strip().lower().startswith("references:")
|
378 |
+
):
|
379 |
in_references = True
|
380 |
continue
|
381 |
if in_references:
|
|
|
392 |
|
393 |
|
394 |
def generate_and_format(
|
395 |
+
input_role,
|
396 |
topic,
|
397 |
keywords,
|
398 |
article_length,
|
|
|
407 |
conclusion_type,
|
408 |
ai_model,
|
409 |
api_key,
|
410 |
+
google_search_check,
|
411 |
year_from,
|
412 |
month_from,
|
413 |
day_from,
|
414 |
year_to,
|
415 |
month_to,
|
416 |
day_to,
|
417 |
+
domains_to_include,
|
418 |
generated_article: str = None,
|
419 |
user_comments: str = None,
|
420 |
):
|
421 |
date_from = build_date(year_from, month_from, day_from)
|
422 |
date_to = build_date(year_to, month_to, day_to)
|
423 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
424 |
+
content_string = ""
|
425 |
+
if google_search_check:
|
426 |
+
url_content = google_search(topic, sorted_date, domains_to_include)
|
427 |
+
content_string = "\n".join(
|
428 |
+
f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in url_content.items()
|
429 |
+
)
|
430 |
+
content_string = "Use the trusted information here from the URLs I've found for you:\n" + content_string
|
431 |
article = generate_article(
|
432 |
+
input_role,
|
433 |
topic,
|
434 |
keywords,
|
435 |
article_length,
|
|
|
443 |
num_examples,
|
444 |
conclusion_type,
|
445 |
ai_model,
|
446 |
+
content_string,
|
447 |
api_key,
|
|
|
|
|
448 |
generated_article,
|
449 |
user_comments,
|
450 |
)
|
|
|
470 |
with gr.Column(scale=2):
|
471 |
with gr.Group():
|
472 |
gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
|
473 |
+
input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
|
474 |
input_topic = gr.Textbox(
|
475 |
label="Topic",
|
476 |
placeholder="Enter the main topic of your article",
|
|
|
591 |
)
|
592 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
593 |
with gr.Group():
|
594 |
+
with gr.Row():
|
595 |
+
google_search_check = gr.Checkbox(
|
596 |
+
label="Enable Google Search For Recent Sources", value=True
|
597 |
+
)
|
598 |
with gr.Row():
|
599 |
month_from = gr.Dropdown(
|
600 |
choices=months,
|
|
|
616 |
year_to = gr.Textbox(label="To Year", value=d1[2])
|
617 |
|
618 |
with gr.Row():
|
619 |
+
domains_to_include = gr.Dropdown(
|
620 |
domain_list,
|
621 |
+
value=domain_list,
|
622 |
multiselect=True,
|
623 |
+
label="Domains To Include",
|
624 |
)
|
625 |
|
626 |
with gr.Group():
|
|
|
702 |
generate_btn.click(
|
703 |
fn=generate_and_format,
|
704 |
inputs=[
|
705 |
+
input_role,
|
706 |
input_topic,
|
707 |
input_keywords,
|
708 |
input_length,
|
|
|
717 |
input_conclusion,
|
718 |
ai_generator,
|
719 |
input_api,
|
720 |
+
google_search_check,
|
721 |
year_from,
|
722 |
month_from,
|
723 |
day_from,
|
724 |
year_to,
|
725 |
month_to,
|
726 |
day_to,
|
727 |
+
domains_to_include,
|
728 |
],
|
729 |
outputs=[output_article],
|
730 |
)
|
|
|
732 |
regenerate_btn.click(
|
733 |
fn=generate_and_format,
|
734 |
inputs=[
|
735 |
+
input_role,
|
736 |
input_topic,
|
737 |
input_keywords,
|
738 |
input_length,
|
|
|
747 |
input_conclusion,
|
748 |
ai_generator,
|
749 |
input_api,
|
750 |
+
google_search_check,
|
751 |
year_from,
|
752 |
month_from,
|
753 |
day_from,
|
754 |
year_to,
|
755 |
month_to,
|
756 |
day_to,
|
757 |
+
domains_to_include,
|
758 |
output_article,
|
759 |
ai_comments,
|
760 |
],
|
plagiarism.py
CHANGED
@@ -61,10 +61,18 @@ async def parallel_scrap(urls):
|
|
61 |
return results
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def google_search_urls(
|
65 |
text,
|
66 |
sorted_date,
|
67 |
-
|
68 |
api_key,
|
69 |
cse_id,
|
70 |
**kwargs,
|
@@ -75,7 +83,9 @@ def google_search_urls(
|
|
75 |
if "items" in results and len(results["items"]) > 0:
|
76 |
for count, link in enumerate(results["items"]):
|
77 |
# skip user selected domains
|
78 |
-
if (
|
|
|
|
|
79 |
continue
|
80 |
url = link["link"]
|
81 |
if url not in url_list:
|
@@ -84,25 +94,24 @@ def google_search_urls(
|
|
84 |
|
85 |
|
86 |
def google_search(
|
87 |
-
|
88 |
sorted_date,
|
89 |
-
|
90 |
):
|
91 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
92 |
-
api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
93 |
-
|
94 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
95 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
96 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
97 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
98 |
cse_id = "851813e81162b4ed4"
|
99 |
-
|
100 |
# get list of URLS to check
|
101 |
start_time = time.perf_counter()
|
102 |
url_list = google_search_urls(
|
103 |
-
|
104 |
sorted_date,
|
105 |
-
|
106 |
api_key,
|
107 |
cse_id,
|
108 |
)
|
|
|
61 |
return results
|
62 |
|
63 |
|
64 |
+
def scrap(urls):
|
65 |
+
client = httpx.Client()
|
66 |
+
soups = []
|
67 |
+
for url in urls:
|
68 |
+
soups.append(get_url_data(url=url, client=client))
|
69 |
+
return soups
|
70 |
+
|
71 |
+
|
72 |
def google_search_urls(
|
73 |
text,
|
74 |
sorted_date,
|
75 |
+
domains_to_include,
|
76 |
api_key,
|
77 |
cse_id,
|
78 |
**kwargs,
|
|
|
83 |
if "items" in results and len(results["items"]) > 0:
|
84 |
for count, link in enumerate(results["items"]):
|
85 |
# skip user selected domains
|
86 |
+
if (domains_to_include is None) or not any(
|
87 |
+
("." + domain) in link["link"] for domain in domains_to_include
|
88 |
+
):
|
89 |
continue
|
90 |
url = link["link"]
|
91 |
if url not in url_list:
|
|
|
94 |
|
95 |
|
96 |
def google_search(
|
97 |
+
topic,
|
98 |
sorted_date,
|
99 |
+
domains_to_include,
|
100 |
):
|
101 |
# api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
102 |
+
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
103 |
+
api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
|
104 |
# api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
|
105 |
# api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
|
106 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
107 |
# api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
|
108 |
cse_id = "851813e81162b4ed4"
|
|
|
109 |
# get list of URLS to check
|
110 |
start_time = time.perf_counter()
|
111 |
url_list = google_search_urls(
|
112 |
+
topic,
|
113 |
sorted_date,
|
114 |
+
domains_to_include,
|
115 |
api_key,
|
116 |
cse_id,
|
117 |
)
|