Spaces:
Running
Running
File size: 646 Bytes
0601dad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
"""
utils.py - Utility functions for the project.
"""
def postprocess(text:str):
"""
postprocess - remove common values in scraped dataset
Args:
text (str): the text to postprocess
"""
replacements = {
"ENA":"<COMPANY>",
"Enron":"<COMPANY>",
"Sony":"<COMPANY>",
"Columbia":"<COMPANY>",
"Hilary":"John",
"Clinton":"Smith",
"Amy":"Jane",
"Pascal":"<PERSON>",
}
# replace common values, also check lowercase
for k, v in replacements.items():
text = text.replace(k, v)
text = text.replace(k.lower(), v)
return text
|