spam-classifier / spam_classifier.py
panchajanya1999's picture
Add initial application files
c5bd3e2 verified
raw
history blame
2.6 kB
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
# import string
import string
# import countvectorizer
from sklearn.feature_extraction.text import CountVectorizer
# import train_test_split
from sklearn.model_selection import train_test_split
# import multinomial naive bayes
from sklearn.naive_bayes import MultinomialNB
# read data from csv file
df = pd.read_csv('dataset/spam.tsv', sep='\t', names=['label', 'message'])
# check features
features = ['spam', 'ham']
# write a function to remove punctuations from meassages
def remove_punctuation(text):
no_punct = [char for char in text if char not in string.punctuation]
no_punct = ''.join(no_punct)
return no_punct
# apply the function to the message column
df['message'] = df['message'].apply(remove_punctuation)
# after removing punctuations, check the length of the message and also description of the message
df['length'] = df['message'].apply(len)
# apply countvectorizer to the message column
CV = CountVectorizer(stop_words='english')
# assign the contents of each 'message' to X and 'label' to y
X = df['message'].values
y = df['label'].values
# split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# fit the conutervectorizer transformer to the training data
X_train_CV = CV.fit_transform(X_train)
# fit the countvectorizer transformer to the testing data
X_test_CV = CV.transform(X_test)
# create an instance of the classifier
NB = MultinomialNB()
# fit the classifier to the training data
NB.fit(X_train_CV, y_train)
# test the accuracy with test data
y_pred = NB.predict(X_test_CV)
# write a function that will take a string as input and return the prediction
def predict_spam(message):
message = CV.transform([message])
prediction = NB.predict(message)
if prediction == 'ham':
message = 'This is a ham message'
else:
message = 'This is a spam message'
return message
iface = gr.Interface(
fn=predict_spam,
inputs=gr.Textbox(lines=2, placeholder="Enter a message to check if it is spam or ham", label="Message", info = "Enter a message"),
outputs=gr.Textbox(lines=2, info="Check if the enetered message is spam or ham", label="Prediction", placeholder = "Output will be here.."),
title="Spam Classifier",
description="Enter a message to check if it is spam or ham",
allow_flagging='never',
examples=[['Hey, how are you doing?'], ['Congratulations! You have won a free trip to Dubai!']])
iface.launch()