Spaces:
Sleeping
Sleeping
""" | |
utils.py - Utility functions for the project. | |
""" | |
def postprocess(text:str): | |
""" | |
postprocess - remove common values in scraped dataset | |
Args: | |
text (str): the text to postprocess | |
""" | |
replacements = { | |
"ENA":"<COMPANY>", | |
"Enron":"<COMPANY>", | |
"Sony":"<COMPANY>", | |
"Columbia":"<COMPANY>", | |
"Hilary":"John", | |
"Clinton":"Smith", | |
"Amy":"Jane", | |
"Pascal":"<PERSON>", | |
} | |
# replace common values, also check lowercase | |
for k, v in replacements.items(): | |
text = text.replace(k, v) | |
text = text.replace(k.lower(), v) | |
return text | |