Spaces:

srivarshan
/

argumentation-quality-analyzer

Runtime error

srivarshan commited on Dec 18, 2022

Commit

a90b4b9

1 Parent(s): 876b82d

Add text preprocessing

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import gradio as gr
 from model import CustomModel
 import os
 os.system("cp -r ./nltk_data/ /home/user/nltk_data")
 def analyze(text):
     model = CustomModel()
     return text
 app = gr.Interface(fn=analyze, inputs="text", outputs="text")

 import gradio as gr
 from model import CustomModel
+from preprocess import preprocess_pipeline
 import os
 os.system("cp -r ./nltk_data/ /home/user/nltk_data")
 def analyze(text):
     model = CustomModel()
+    text = preprocess_pipeline(text)
     return text
 app = gr.Interface(fn=analyze, inputs="text", outputs="text")

preprocess.py ADDED Viewed

+import re
+from nltk.corpus import stopwords
+from nltk.stem import SnowballStemmer
+def clean_text(text):
+    stop_words = set(stopwords.words("english"))
+    # english_stopwords = stopwords.words("english")
+    english_stemmer = SnowballStemmer("english")
+    text = text.replace('', '') # Remove
+    text = re.sub(r'[^\w]', ' ', text) # Remove symbols
+    text = re.sub(r'[ ]{2,}', ' ', text) # Remove extra spaces
+    text = re.sub(r'[ \t]+$', '', text) # Remove trailing white spaces
+    tokens = []
+    for token in text.split():
+        if token not in stop_words:
+            token = english_stemmer.stem(token)
+            tokens.append(token)
+    return " ".join(tokens)
+def preprocess_pipeline(text):
+    cleaned_text = [clean_text(text) for text in text]
+    return cleaned_text