Spaces:

panchajanya1999
/

spam-classifier

Runtime error

App Files Files Community

spam-classifier / spam_classifier.py

panchajanya1999

Add initial application files

c5bd3e2 verified almost 2 years ago

raw

history blame

2.6 kB

	# import libraries
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import gradio as gr

	# import string
	import string

	# import countvectorizer
	from sklearn.feature_extraction.text import CountVectorizer

	# import train_test_split
	from sklearn.model_selection import train_test_split

	# import multinomial naive bayes
	from sklearn.naive_bayes import MultinomialNB

	# read data from csv file
	df = pd.read_csv('dataset/spam.tsv', sep='\t', names=['label', 'message'])

	# check features
	features = ['spam', 'ham']

	# write a function to remove punctuations from meassages
	def remove_punctuation(text):
	no_punct = [char for char in text if char not in string.punctuation]
	no_punct = ''.join(no_punct)
	return no_punct

	# apply the function to the message column
	df['message'] = df['message'].apply(remove_punctuation)

	# after removing punctuations, check the length of the message and also description of the message
	df['length'] = df['message'].apply(len)

	# apply countvectorizer to the message column
	CV = CountVectorizer(stop_words='english')

	# assign the contents of each 'message' to X and 'label' to y
	X = df['message'].values
	y = df['label'].values

	# split the dataset into train and test
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# fit the conutervectorizer transformer to the training data
	X_train_CV = CV.fit_transform(X_train)

	# fit the countvectorizer transformer to the testing data
	X_test_CV = CV.transform(X_test)

	# create an instance of the classifier
	NB = MultinomialNB()

	# fit the classifier to the training data
	NB.fit(X_train_CV, y_train)

	# test the accuracy with test data
	y_pred = NB.predict(X_test_CV)

	# write a function that will take a string as input and return the prediction
	def predict_spam(message):
	message = CV.transform([message])
	prediction = NB.predict(message)
	if prediction == 'ham':
	message = 'This is a ham message'
	else:
	message = 'This is a spam message'
	return message

	iface = gr.Interface(
	fn=predict_spam,
	inputs=gr.Textbox(lines=2, placeholder="Enter a message to check if it is spam or ham", label="Message", info = "Enter a message"),
	outputs=gr.Textbox(lines=2, info="Check if the enetered message is spam or ham", label="Prediction", placeholder = "Output will be here.."),
	title="Spam Classifier",
	description="Enter a message to check if it is spam or ham",
	allow_flagging='never',
	examples=[['Hey, how are you doing?'], ['Congratulations! You have won a free trip to Dubai!']])
	iface.launch()