Spaces:
Runtime error
Runtime error
import re | |
from nltk.corpus.reader import pickle | |
import pandas as pd | |
import numpy as np | |
from nltk.corpus import stopwords | |
from nltk.stem import SnowballStemmer | |
def clean_text(text): | |
stop_words = set(stopwords.words("english")) | |
# english_stopwords = stopwords.words("english") | |
english_stemmer = SnowballStemmer("english") | |
text = text.replace('', '') # Remove | |
text = re.sub(r'[^\w]', ' ', text) # Remove symbols | |
text = re.sub(r'[ ]{2,}', ' ', text) # Remove extra spaces | |
text = re.sub(r'[ \t]+$', '', text) # Remove trailing white spaces | |
tokens = [] | |
for token in text.split(): | |
if token not in stop_words: | |
token = english_stemmer.stem(token) | |
tokens.append(token) | |
return " ".join(tokens) | |
def preprocess_pipeline(text): | |
return clean_text(text) | |
def vectorizer(text): | |
count_vectorizer = pickle.load(open("vectorizers/count_vectorizer.pkl", "rb")) | |
return count_vectorizer.transform(text) | |