{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "LnPbntVRnfvV" }, "source": [ "Importing the Dependencies" ] }, { "cell_type": "code", "metadata": { "id": "-71UtHzNVWjB" }, "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import svm\n", "from sklearn.metrics import accuracy_score" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "bmfOfG8joBBy" }, "source": [ "Data Collection and Analysis\n", "\n", "PIMA Diabetes Dataset" ] }, { "cell_type": "code", "metadata": { "id": "Xpw6Mj_pn_TL" }, "source": [ "# loading the diabetes dataset to a pandas DataFrame\n", "diabetes_dataset = pd.read_csv('/content/diabetes.csv')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "-tjO09ncovoh", "outputId": "0f5f8129-eb57-4ba0-f329-312bba4aae27" }, "source": [ "# printing the first 5 rows of the dataset\n", "diabetes_dataset.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure ... DiabetesPedigreeFunction Age Outcome\n", "0 6 148 72 ... 0.627 50 1\n", "1 1 85 66 ... 0.351 31 0\n", "2 8 183 64 ... 0.672 32 1\n", "3 1 89 66 ... 0.167 21 0\n", "4 0 137 40 ... 2.288 33 1\n", "\n", "[5 rows x 9 columns]" ] }, "metadata": {}, "execution_count": 3 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lynParo6pEMB", "outputId": "ab7d817a-1f20-46d0-d504-833efb433f7d" }, "source": [ "# number of rows and Columns in this dataset\n", "diabetes_dataset.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(768, 9)" ] }, "metadata": {}, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "id": "3NDJOlrEpmoL", "outputId": "12af9f8e-b5fb-4f7f-a4bb-f5df64cce508" }, "source": [ "# getting the statistical measures of the data\n", "diabetes_dataset.describe()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
count768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000768.000000
mean3.845052120.89453169.10546920.53645879.79947931.9925780.47187633.2408850.348958
std3.36957831.97261819.35580715.952218115.2440027.8841600.33132911.7602320.476951
min0.0000000.0000000.0000000.0000000.0000000.0000000.07800021.0000000.000000
25%1.00000099.00000062.0000000.0000000.00000027.3000000.24375024.0000000.000000
50%3.000000117.00000072.00000023.00000030.50000032.0000000.37250029.0000000.000000
75%6.000000140.25000080.00000032.000000127.25000036.6000000.62625041.0000001.000000
max17.000000199.000000122.00000099.000000846.00000067.1000002.42000081.0000001.000000
\n", "
" ], "text/plain": [ " Pregnancies Glucose ... Age Outcome\n", "count 768.000000 768.000000 ... 768.000000 768.000000\n", "mean 3.845052 120.894531 ... 33.240885 0.348958\n", "std 3.369578 31.972618 ... 11.760232 0.476951\n", "min 0.000000 0.000000 ... 21.000000 0.000000\n", "25% 1.000000 99.000000 ... 24.000000 0.000000\n", "50% 3.000000 117.000000 ... 29.000000 0.000000\n", "75% 6.000000 140.250000 ... 41.000000 1.000000\n", "max 17.000000 199.000000 ... 81.000000 1.000000\n", "\n", "[8 rows x 9 columns]" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LrpHzaGpp5dQ", "outputId": "916953df-2cee-43a9-cc80-2e58fe6b43d2" }, "source": [ "diabetes_dataset['Outcome'].value_counts()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 500\n", "1 268\n", "Name: Outcome, dtype: int64" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "markdown", "metadata": { "id": "cB1qRaNcqeh5" }, "source": [ "0 --> Non-Diabetic\n", "\n", "1 --> Diabetic" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "I6MWR0k_qSCK", "outputId": "47b23d5c-8973-4868-8582-b0fa95bfed46" }, "source": [ "diabetes_dataset.groupby('Outcome').mean()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAge
Outcome
03.298000109.98000068.18400019.66400068.79200030.3042000.42973431.190000
14.865672141.25746370.82462722.164179100.33582135.1425370.55050037.067164
\n", "
" ], "text/plain": [ " Pregnancies Glucose ... DiabetesPedigreeFunction Age\n", "Outcome ... \n", "0 3.298000 109.980000 ... 0.429734 31.190000\n", "1 4.865672 141.257463 ... 0.550500 37.067164\n", "\n", "[2 rows x 8 columns]" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "metadata": { "id": "RoDW7l9mqqHZ" }, "source": [ "# separating the data and labels\n", "X = diabetes_dataset.drop(columns = 'Outcome', axis=1)\n", "Y = diabetes_dataset['Outcome']" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3eiRW9M9raMm", "outputId": "552c0851-90ec-4068-812d-c848224be8a7" }, "source": [ "print(X)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ " Pregnancies Glucose BloodPressure ... BMI DiabetesPedigreeFunction Age\n", "0 6 148 72 ... 33.6 0.627 50\n", "1 1 85 66 ... 26.6 0.351 31\n", "2 8 183 64 ... 23.3 0.672 32\n", "3 1 89 66 ... 28.1 0.167 21\n", "4 0 137 40 ... 43.1 2.288 33\n", ".. ... ... ... ... ... ... ...\n", "763 10 101 76 ... 32.9 0.171 63\n", "764 2 122 70 ... 36.8 0.340 27\n", "765 5 121 72 ... 26.2 0.245 30\n", "766 1 126 60 ... 30.1 0.349 47\n", "767 1 93 70 ... 30.4 0.315 23\n", "\n", "[768 rows x 8 columns]\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AoxgTJAMrcCl", "outputId": "d6f83516-18e5-41ca-c6ce-4495bdf733cb" }, "source": [ "print(Y)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0 1\n", "1 0\n", "2 1\n", "3 0\n", "4 1\n", " ..\n", "763 0\n", "764 0\n", "765 0\n", "766 1\n", "767 0\n", "Name: Outcome, Length: 768, dtype: int64\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "gHciEFkxsoQP" }, "source": [ "Train Test Split" ] }, { "cell_type": "code", "metadata": { "id": "AEfKGj_yslvD" }, "source": [ "X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DR05T-o0t3FQ", "outputId": "2b7c195d-58d7-4c4d-803d-34e09791b07a" }, "source": [ "print(X.shape, X_train.shape, X_test.shape)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(768, 8) (614, 8) (154, 8)\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "ElJ3tkOtuC_n" }, "source": [ "Training the Model" ] }, { "cell_type": "code", "metadata": { "id": "5szLWHlNt9xc" }, "source": [ "classifier = svm.SVC(kernel='linear')" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ncJWY_7suPAb", "outputId": "1a8fc42b-37a5-4e59-d52a-5dd5e09560e8" }, "source": [ "#training the support vector Machine Classifier\n", "classifier.fit(X_train, Y_train)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "SVC(kernel='linear')" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "markdown", "metadata": { "id": "UV4-CAfquiyP" }, "source": [ "Model Evaluation" ] }, { "cell_type": "markdown", "metadata": { "id": "yhAjGPJWunXa" }, "source": [ "Accuracy Score" ] }, { "cell_type": "code", "metadata": { "id": "fJLEPQK7ueXp" }, "source": [ "# accuracy score on the training data\n", "X_train_prediction = classifier.predict(X_train)\n", "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mmJ22qhVvNwj", "outputId": "1b1c3d32-b9f2-40c0-89ed-5d59b674cdfe" }, "source": [ "print('Accuracy score of the training data : ', training_data_accuracy)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy score of the training data : 0.7833876221498371\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "G2CICFMEvcCl" }, "source": [ "# accuracy score on the test data\n", "X_test_prediction = classifier.predict(X_test)\n", "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "i2GcW_t_vz7C", "outputId": "a65c3281-1621-4c8f-b57f-bbf0bc81d129" }, "source": [ "print('Accuracy score of the test data : ', test_data_accuracy)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy score of the test data : 0.7727272727272727\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "gq8ZX1xpwPF5" }, "source": [ "Making a Predictive System" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "U-ULRe4yv5tH", "outputId": "63b3fd00-f094-4642-b45e-3eb21331c3df" }, "source": [ "input_data = (5,166,72,19,175,25.8,0.587,51)\n", "\n", "# changing the input_data to numpy array\n", "input_data_as_numpy_array = np.asarray(input_data)\n", "\n", "# reshape the array as we are predicting for one instance\n", "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n", "\n", "prediction = classifier.predict(input_data_reshaped)\n", "print(prediction)\n", "\n", "if (prediction[0] == 0):\n", " print('The person is not diabetic')\n", "else:\n", " print('The person is diabetic')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[1]\n", "The person is diabetic\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:446: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n", " \"X does not have valid feature names, but\"\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "vgL6wblpQUtX" }, "source": [ "Saving the trained model" ] }, { "cell_type": "code", "metadata": { "id": "Nn60MdxByjgz" }, "source": [ "import pickle" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "cWzPQs4mQZN_" }, "source": [ "filename = 'trained_model.sav'\n", "pickle.dump(classifier, open(filename, 'wb'))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Wk1T2sMcQ6_U" }, "source": [ "# loading the saved model\n", "loaded_model = pickle.load(open('trained_model.sav', 'rb'))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Bd5OpxHnRPyy", "outputId": "abd39207-0fea-4c68-e91b-710244c8e73d" }, "source": [ "input_data = (5,166,72,19,175,25.8,0.587,51)\n", "\n", "# changing the input_data to numpy array\n", "input_data_as_numpy_array = np.asarray(input_data)\n", "\n", "# reshape the array as we are predicting for one instance\n", "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n", "\n", "prediction = loaded_model.predict(input_data_reshaped)\n", "print(prediction)\n", "\n", "if (prediction[0] == 0):\n", " print('The person is not diabetic')\n", "else:\n", " print('The person is diabetic')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[1]\n", "The person is diabetic\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.7/dist-packages/sklearn/base.py:446: UserWarning: X does not have valid feature names, but SVC was fitted with feature names\n", " \"X does not have valid feature names, but\"\n" ] } ] }, { "cell_type": "code", "metadata": { "id": "iGRhGvgfRkvm" }, "source": [], "execution_count": null, "outputs": [] } ] }