oliverguhr
commited on
Commit
·
7589fe5
1
Parent(s):
89ac337
changed paper link to the correct document
Browse files
README.md
CHANGED
@@ -49,8 +49,8 @@ class SentimentModel():
|
|
49 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
50 |
|
51 |
self.clean_chars = re.compile(r'[^A-Za-züöäÖÜÄß ]', re.MULTILINE)
|
52 |
-
self.clean_http_urls = re.compile(r'https
|
53 |
-
self.clean_at_mentions = re.compile(r'
|
54 |
|
55 |
def predict_sentiment(self, texts: List[str])-> List[str]:
|
56 |
texts = [self.clean_text(text) for text in texts]
|
@@ -70,7 +70,8 @@ class SentimentModel():
|
|
70 |
return text.replace("0"," null").replace("1"," eins").replace("2"," zwei").replace("3"," drei").replace("4"," vier").replace("5"," fünf").replace("6"," sechs").replace("7"," sieben").replace("8"," acht").replace("9"," neun")
|
71 |
|
72 |
def clean_text(self,text: str)-> str:
|
73 |
-
text = text.replace("\
|
|
|
74 |
text = self.clean_http_urls.sub('',text)
|
75 |
text = self.clean_at_mentions.sub('',text)
|
76 |
text = self.replace_numbers(text)
|
@@ -118,7 +119,7 @@ For feedback and questions contact me view mail or Twitter [@oliverguhr](https:/
|
|
118 |
address = {Marseille, France},
|
119 |
publisher = {European Language Resources Association},
|
120 |
pages = {1620--1625},
|
121 |
-
url = {https://www.aclweb.org/anthology/2020.lrec-1.
|
122 |
}
|
123 |
```
|
124 |
|
|
|
49 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
50 |
|
51 |
self.clean_chars = re.compile(r'[^A-Za-züöäÖÜÄß ]', re.MULTILINE)
|
52 |
+
self.clean_http_urls = re.compile(r'https*\\S+', re.MULTILINE)
|
53 |
+
self.clean_at_mentions = re.compile(r'@\\S+', re.MULTILINE)
|
54 |
|
55 |
def predict_sentiment(self, texts: List[str])-> List[str]:
|
56 |
texts = [self.clean_text(text) for text in texts]
|
|
|
70 |
return text.replace("0"," null").replace("1"," eins").replace("2"," zwei").replace("3"," drei").replace("4"," vier").replace("5"," fünf").replace("6"," sechs").replace("7"," sieben").replace("8"," acht").replace("9"," neun")
|
71 |
|
72 |
def clean_text(self,text: str)-> str:
|
73 |
+
text = text.replace("\
|
74 |
+
", " ")
|
75 |
text = self.clean_http_urls.sub('',text)
|
76 |
text = self.clean_at_mentions.sub('',text)
|
77 |
text = self.replace_numbers(text)
|
|
|
119 |
address = {Marseille, France},
|
120 |
publisher = {European Language Resources Association},
|
121 |
pages = {1620--1625},
|
122 |
+
url = {https://www.aclweb.org/anthology/2020.lrec-1.202}
|
123 |
}
|
124 |
```
|
125 |
|