Update utils.py
Browse files
utils.py
CHANGED
@@ -144,13 +144,13 @@ def normalise_prompt (prompt):
|
|
144 |
#Punktuierung entfernen
|
145 |
tokens = [word for word in tokens if word.isalnum()]
|
146 |
# Stop Word Entfernung
|
147 |
-
nltk.download('stopwords')
|
148 |
-
stop_words = set(stopwords.words('english'))
|
149 |
-
tokens = [word for word in tokens if not word in stop_words]
|
150 |
# 5. Lemmatisierung: Worte in Grundform bringen, um Text besser vergleichen zu können
|
151 |
-
nltk.download('wordnet')
|
152 |
-
lemmatizer = WordNetLemmatizer()
|
153 |
-
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
154 |
# 6. Handling Special Characters (Remove non-alphanumeric characters)
|
155 |
tokens = [re.sub(r'\W+', '', word) for word in tokens]
|
156 |
# 7. Spell Check (optional, using a library like pyspellchecker)
|
|
|
144 |
#Punktuierung entfernen
|
145 |
tokens = [word for word in tokens if word.isalnum()]
|
146 |
# Stop Word Entfernung
|
147 |
+
#nltk.download('stopwords')
|
148 |
+
#stop_words = set(stopwords.words('english'))
|
149 |
+
#tokens = [word for word in tokens if not word in stop_words]
|
150 |
# 5. Lemmatisierung: Worte in Grundform bringen, um Text besser vergleichen zu können
|
151 |
+
#nltk.download('wordnet')
|
152 |
+
#lemmatizer = WordNetLemmatizer()
|
153 |
+
#tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
154 |
# 6. Handling Special Characters (Remove non-alphanumeric characters)
|
155 |
tokens = [re.sub(r'\W+', '', word) for word in tokens]
|
156 |
# 7. Spell Check (optional, using a library like pyspellchecker)
|