alexkueck commited on
Commit
8937e12
·
1 Parent(s): 226612a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +6 -6
utils.py CHANGED
@@ -144,13 +144,13 @@ def normalise_prompt (prompt):
144
  #Punktuierung entfernen
145
  tokens = [word for word in tokens if word.isalnum()]
146
  # Stop Word Entfernung
147
- nltk.download('stopwords')
148
- stop_words = set(stopwords.words('english'))
149
- tokens = [word for word in tokens if not word in stop_words]
150
  # 5. Lemmatisierung: Worte in Grundform bringen, um Text besser vergleichen zu können
151
- nltk.download('wordnet')
152
- lemmatizer = WordNetLemmatizer()
153
- tokens = [lemmatizer.lemmatize(word) for word in tokens]
154
  # 6. Handling Special Characters (Remove non-alphanumeric characters)
155
  tokens = [re.sub(r'\W+', '', word) for word in tokens]
156
  # 7. Spell Check (optional, using a library like pyspellchecker)
 
144
  #Punktuierung entfernen
145
  tokens = [word for word in tokens if word.isalnum()]
146
  # Stop Word Entfernung
147
+ #nltk.download('stopwords')
148
+ #stop_words = set(stopwords.words('english'))
149
+ #tokens = [word for word in tokens if not word in stop_words]
150
  # 5. Lemmatisierung: Worte in Grundform bringen, um Text besser vergleichen zu können
151
+ #nltk.download('wordnet')
152
+ #lemmatizer = WordNetLemmatizer()
153
+ #tokens = [lemmatizer.lemmatize(word) for word in tokens]
154
  # 6. Handling Special Characters (Remove non-alphanumeric characters)
155
  tokens = [re.sub(r'\W+', '', word) for word in tokens]
156
  # 7. Spell Check (optional, using a library like pyspellchecker)