Spaces:
Sleeping
Sleeping
""" | |
utils.py - Utility functions for the project. | |
""" | |
import logging | |
import re | |
def postprocess(text: str): | |
""" | |
postprocess - remove common values in scraped dataset | |
Args: | |
text (str): the text to postprocess | |
""" | |
replacements = { | |
"ENA": "COMPANY", | |
"Enron": "COMPANY", | |
"Enron Corporation": "COMPANY", | |
"Sony Pictures Entertainment": "COMPANY", | |
"Columbia Pictures": "COMPANY", | |
"Sony": "COMPANY", | |
"Columbia": "COMPANY", | |
"Hillary": "Jane", | |
"Clinton": "Smith", | |
"Amy": "Jane", | |
"Sara": "Jane", | |
"Harambe": "Jane", | |
"Pascal": "PERSON", | |
} | |
# replace common values, also check lowercase | |
for k, v in replacements.items(): | |
text = text.replace(k, v) | |
text = text.replace(k.lower(), v) | |
return text | |
def clear(text, verbose=False, **kwargs): | |
"""for use with buttons""" | |
if verbose: | |
logging.info(f"Clearing text: {text}") | |
return "" | |
def make_email_link( | |
subject: str = "Email subject - This was generated by Postbot", | |
link_text: str = "click to open in your email client", | |
body: str = None, | |
tag_placeholder: str = "PLACEHOLDER", | |
): | |
""" | |
email_link - generate an email link | |
Args: | |
subject (str, optional): the subject of the email. Defaults to "Email subject - This was generated by Postbot". | |
link_text (str, optional): the text of the link. Defaults to "click to open in your email client". | |
body (str, optional): the body of the email. Defaults to None. | |
tag_placeholder (str, optional): the placeholder for the tag. Defaults to "PLACEHOLDER". | |
Returns: | |
str: the email link, in the form of an html link | |
""" | |
if body is None: | |
body = "hmm - no body. replace me" | |
# strip brackets and other HTML-tag characters from body with regex | |
body = re.sub(r"<[^>]*>", tag_placeholder, body) | |
return f'<a href="mailto:%20?subject={subject}&body={body}">{link_text}</a>' | |