{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d4c303ef", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.simplefilter(\"ignore\")\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "from sklearn.model_selection import train_test_split\n", "import xgboost as xgb\n", "from sklearn.preprocessing import LabelEncoder\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "4e15af5f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERAGESMOKINGYELLOW_FINGERSANXIETYPEER_PRESSURECHRONIC DISEASEFATIGUEALLERGYWHEEZINGALCOHOL CONSUMINGCOUGHINGSHORTNESS OF BREATHSWALLOWING DIFFICULTYCHEST PAINLUNG_CANCER
0M691221121222222YES
1M742111222111222YES
2F591112121212212NO
3M632221111121122NO
4F631211111212211NO
\n", "
" ], "text/plain": [ " GENDER AGE SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE \\\n", "0 M 69 1 2 2 1 \n", "1 M 74 2 1 1 1 \n", "2 F 59 1 1 1 2 \n", "3 M 63 2 2 2 1 \n", "4 F 63 1 2 1 1 \n", "\n", " CHRONIC DISEASE FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING \\\n", "0 1 2 1 2 2 2 \n", "1 2 2 2 1 1 1 \n", "2 1 2 1 2 1 2 \n", "3 1 1 1 1 2 1 \n", "4 1 1 1 2 1 2 \n", "\n", " SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER \n", "0 2 2 2 YES \n", "1 2 2 2 YES \n", "2 2 1 2 NO \n", "3 1 2 2 NO \n", "4 2 1 1 NO " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lung_data = pd.read_csv(r'C:\\Users\\elegb\\Desktop\\pdf\\survey lung cancer.csv')\n", "lung_data.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "9abe8af8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 309 entries, 0 to 308\n", "Data columns (total 16 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 GENDER 309 non-null object\n", " 1 AGE 309 non-null int64 \n", " 2 SMOKING 309 non-null int64 \n", " 3 YELLOW_FINGERS 309 non-null int64 \n", " 4 ANXIETY 309 non-null int64 \n", " 5 PEER_PRESSURE 309 non-null int64 \n", " 6 CHRONIC DISEASE 309 non-null int64 \n", " 7 FATIGUE 309 non-null int64 \n", " 8 ALLERGY 309 non-null int64 \n", " 9 WHEEZING 309 non-null int64 \n", " 10 ALCOHOL CONSUMING 309 non-null int64 \n", " 11 COUGHING 309 non-null int64 \n", " 12 SHORTNESS OF BREATH 309 non-null int64 \n", " 13 SWALLOWING DIFFICULTY 309 non-null int64 \n", " 14 CHEST PAIN 309 non-null int64 \n", " 15 LUNG_CANCER 309 non-null object\n", "dtypes: int64(14), object(2)\n", "memory usage: 38.8+ KB\n" ] } ], "source": [ "lung_data.info()" ] }, { "cell_type": "code", "execution_count": 4, "id": "3dbb3974", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 276 entries, 0 to 283\n", "Data columns (total 16 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 GENDER 276 non-null object\n", " 1 AGE 276 non-null int64 \n", " 2 SMOKING 276 non-null int64 \n", " 3 YELLOW_FINGERS 276 non-null int64 \n", " 4 ANXIETY 276 non-null int64 \n", " 5 PEER_PRESSURE 276 non-null int64 \n", " 6 CHRONIC DISEASE 276 non-null int64 \n", " 7 FATIGUE 276 non-null int64 \n", " 8 ALLERGY 276 non-null int64 \n", " 9 WHEEZING 276 non-null int64 \n", " 10 ALCOHOL CONSUMING 276 non-null int64 \n", " 11 COUGHING 276 non-null int64 \n", " 12 SHORTNESS OF BREATH 276 non-null int64 \n", " 13 SWALLOWING DIFFICULTY 276 non-null int64 \n", " 14 CHEST PAIN 276 non-null int64 \n", " 15 LUNG_CANCER 276 non-null object\n", "dtypes: int64(14), object(2)\n", "memory usage: 36.7+ KB\n" ] } ], "source": [ "lung_data = lung_data.drop_duplicates()\n", "lung_data.info()" ] }, { "cell_type": "code", "execution_count": 5, "id": "2c09b012", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERSMOKINGYELLOW_FINGERSANXIETYPEER_PRESSURECHRONIC DISEASEFATIGUEALLERGYWHEEZINGALCOHOL CONSUMINGCOUGHINGSHORTNESS OF BREATHSWALLOWING DIFFICULTYCHEST PAINLUNG_CANCER
0101100101111111
1110001110001111
2000010101011010
3111100000100110
4001000001011000
\n", "
" ], "text/plain": [ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n", "0 1 0 1 1 0 0 \n", "1 1 1 0 0 0 1 \n", "2 0 0 0 0 1 0 \n", "3 1 1 1 1 0 0 \n", "4 0 0 1 0 0 0 \n", "\n", " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n", "0 1 0 1 1 1 1 \n", "1 1 1 0 0 0 1 \n", "2 1 0 1 0 1 1 \n", "3 0 0 0 1 0 0 \n", "4 0 0 1 0 1 1 \n", "\n", " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER \n", "0 1 1 1 \n", "1 1 1 1 \n", "2 0 1 0 \n", "3 1 1 0 \n", "4 0 0 0 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categorical = lung_data.drop(['AGE'], axis = 1)\n", "encoder = LabelEncoder()\n", "for col in categorical.columns:\n", " categorical[col] = encoder.fit_transform(categorical[col])\n", "\n", "categorical = categorical.astype(\"category\") \n", "categorical.head()\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "b15e78ca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERSMOKINGYELLOW_FINGERSANXIETYPEER_PRESSURECHRONIC DISEASEFATIGUEALLERGYWHEEZINGALCOHOL CONSUMINGCOUGHINGSHORTNESS OF BREATHSWALLOWING DIFFICULTYCHEST PAINLUNG_CANCERAGE
010110010111111169
111000111000111174
200001010101101059
311110000010011063
400100000101100063
\n", "
" ], "text/plain": [ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n", "0 1 0 1 1 0 0 \n", "1 1 1 0 0 0 1 \n", "2 0 0 0 0 1 0 \n", "3 1 1 1 1 0 0 \n", "4 0 0 1 0 0 0 \n", "\n", " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n", "0 1 0 1 1 1 1 \n", "1 1 1 0 0 0 1 \n", "2 1 0 1 0 1 1 \n", "3 0 0 0 1 0 0 \n", "4 0 0 1 0 1 1 \n", "\n", " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER AGE \n", "0 1 1 1 69 \n", "1 1 1 1 74 \n", "2 0 1 0 59 \n", "3 1 1 0 63 \n", "4 0 0 0 63 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lung_data = pd.concat([categorical, lung_data['AGE']], axis = 1)\n", "lung_data.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "8925150b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 238\n", "0 38\n", "Name: LUNG_CANCER, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lung_data.LUNG_CANCER.value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "id": "c992c376", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 1000 entries, 183 to 87\n", "Data columns (total 16 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 GENDER 1000 non-null category\n", " 1 SMOKING 1000 non-null category\n", " 2 YELLOW_FINGERS 1000 non-null category\n", " 3 ANXIETY 1000 non-null category\n", " 4 PEER_PRESSURE 1000 non-null category\n", " 5 CHRONIC DISEASE 1000 non-null category\n", " 6 FATIGUE 1000 non-null category\n", " 7 ALLERGY 1000 non-null category\n", " 8 WHEEZING 1000 non-null category\n", " 9 ALCOHOL CONSUMING 1000 non-null category\n", " 10 COUGHING 1000 non-null category\n", " 11 SHORTNESS OF BREATH 1000 non-null category\n", " 12 SWALLOWING DIFFICULTY 1000 non-null category\n", " 13 CHEST PAIN 1000 non-null category\n", " 14 LUNG_CANCER 1000 non-null category\n", " 15 AGE 1000 non-null int64 \n", "dtypes: category(15), int64(1)\n", "memory usage: 32.1 KB\n" ] } ], "source": [ "class_0 = lung_data[lung_data['LUNG_CANCER'] == 0]\n", "class_1 = lung_data[lung_data['LUNG_CANCER'] == 1]\n", "class_1 = class_1.sample(n = 500, replace = True)\n", "class_0 = class_0.sample(n = 500, replace = True)\n", "lung_data = pd.concat([class_0, class_1], axis = 0)\n", "lung_data.info()" ] }, { "cell_type": "code", "execution_count": 9, "id": "ebc06c8e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 500\n", "1 500\n", "Name: LUNG_CANCER, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lung_data['LUNG_CANCER'].value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "id": "b2ca517b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERSMOKINGYELLOW_FINGERSANXIETYPEER_PRESSURECHRONIC DISEASEFATIGUEALLERGYWHEEZINGALCOHOL CONSUMINGCOUGHINGSHORTNESS OF BREATHSWALLOWING DIFFICULTYCHEST PAINLUNG_CANCERAGE
18301000110000100071
400100000101100063
3700000100100110056
1411000001111001069
801010010000000068
\n", "
" ], "text/plain": [ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n", "183 0 1 0 0 0 1 \n", "4 0 0 1 0 0 0 \n", "37 0 0 0 0 0 1 \n", "14 1 1 0 0 0 0 \n", "8 0 1 0 1 0 0 \n", "\n", " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n", "183 1 0 0 0 0 1 \n", "4 0 0 1 0 1 1 \n", "37 0 0 1 0 0 1 \n", "14 0 1 1 1 1 0 \n", "8 1 0 0 0 0 0 \n", "\n", " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER AGE \n", "183 0 0 0 71 \n", "4 0 0 0 63 \n", "37 1 0 0 56 \n", "14 0 1 0 69 \n", "8 0 0 0 68 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lung_data.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "62696544", "metadata": {}, "outputs": [], "source": [ "X = lung_data.drop('LUNG_CANCER', axis =1)\n", "y = lung_data.LUNG_CANCER" ] }, { "cell_type": "code", "execution_count": 12, "id": "6e4572a6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 80\n", "0 70\n", "Name: LUNG_CANCER, dtype: int64\n" ] } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.15, random_state = 42)\n", "\n", "print(y_test.value_counts())" ] }, { "cell_type": "code", "execution_count": 13, "id": "1b243af1", "metadata": {}, "outputs": [], "source": [ "categorical_columns = ['GENDER', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY', 'PEER_PRESSURE',\n", " 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',\n", " 'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',\n", " 'SWALLOWING DIFFICULTY', 'CHEST PAIN',\n", "]" ] }, { "cell_type": "code", "execution_count": 14, "id": "d0a038fd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.95 1.00 0.97 70\n", " 1 1.00 0.95 0.97 80\n", "\n", " accuracy 0.97 150\n", " macro avg 0.97 0.97 0.97 150\n", "weighted avg 0.97 0.97 0.97 150\n", "\n" ] }, { "data": { "text/plain": [ "array([[70, 0],\n", " [ 4, 76]], dtype=int64)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create DMatrix for training and test sets with categorical features enabled\n", "X_train = X_train.astype({col: \"category\" for col in categorical_columns})\n", "\n", "dtrain = xgb.DMatrix(X_train , label=y_train, enable_categorical=True)\n", "dtest = xgb.DMatrix(X_test, enable_categorical=True)\n", "\n", "# set parameters for XGBoost classifier\n", "\n", "params = {\n", " 'objective': 'binary:logistic',\n", " 'max_depth':3, \n", " 'eta':1, \n", " 'nthread': 3,\n", " 'eval_metric': 'auc',\n", " 'learning_rate': 1\n", "}\n", "\n", "# train model\n", "model = xgb.train(params, dtrain, num_boost_round=100)\n", "# make predictions on test data\n", "y_pred = model.predict(dtest)\n", "\n", "# convert probabilities to binary predictions\n", "y_pred_binary = [1 if p >= 0.99 else 0 for p in y_pred]\n", "\n", "# evaluate model performance\n", "print(classification_report(y_test, y_pred_binary))\n", "confusion_matrix(y_test, y_pred_binary)" ] }, { "cell_type": "markdown", "id": "d74abb37", "metadata": {}, "source": [ "#### Lets assume i want to make predictions with that of a new patient coming in" ] }, { "cell_type": "code", "execution_count": 16, "id": "cd579274", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDERSMOKINGYELLOW_FINGERSANXIETYPEER_PRESSURECHRONIC DISEASEFATIGUEALLERGYWHEEZINGALCOHOL CONSUMINGCOUGHINGSHORTNESS OF BREATHSWALLOWING DIFFICULTYCHEST PAINAGE
01001000000100125
\n", "
" ], "text/plain": [ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n", "0 1 0 0 1 0 0 \n", "\n", " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING \\\n", "0 0 0 0 0 1 \n", "\n", " SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN AGE \n", "0 0 0 1 25 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_dict = {'GENDER': 1, 'SMOKING': 0, 'YELLOW_FINGERS':0,\n", " 'ANXIETY': 1, 'PEER_PRESSURE': 0,\n", " 'CHRONIC DISEASE': 0, 'FATIGUE ': 0,\n", " 'ALLERGY ': 0, 'WHEEZING': 0,\n", " 'ALCOHOL CONSUMING': 0, 'COUGHING': 1,\n", " 'SHORTNESS OF BREATH': 0,\n", " 'SWALLOWING DIFFICULTY': 0,\n", " 'CHEST PAIN': 1, 'AGE': 25}\n", "input_df = pd.DataFrame.from_dict([input_dict])\n", "input_df.astype({col: \"category\" for col in categorical_columns})\n", "input_df" ] }, { "cell_type": "code", "execution_count": 17, "id": "0f6dc7eb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1 entries, 0 to 0\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 GENDER 1 non-null int64\n", " 1 SMOKING 1 non-null int64\n", " 2 YELLOW_FINGERS 1 non-null int64\n", " 3 ANXIETY 1 non-null int64\n", " 4 PEER_PRESSURE 1 non-null int64\n", " 5 CHRONIC DISEASE 1 non-null int64\n", " 6 FATIGUE 1 non-null int64\n", " 7 ALLERGY 1 non-null int64\n", " 8 WHEEZING 1 non-null int64\n", " 9 ALCOHOL CONSUMING 1 non-null int64\n", " 10 COUGHING 1 non-null int64\n", " 11 SHORTNESS OF BREATH 1 non-null int64\n", " 12 SWALLOWING DIFFICULTY 1 non-null int64\n", " 13 CHEST PAIN 1 non-null int64\n", " 14 AGE 1 non-null int64\n", "dtypes: int64(15)\n", "memory usage: 248.0 bytes\n" ] } ], "source": [ "input_df.info()" ] }, { "cell_type": "code", "execution_count": 18, "id": "afb5ec1e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.20313403], dtype=float32)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categorical_columns = ['GENDER', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY', 'PEER_PRESSURE',\n", " 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',\n", " 'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',\n", " 'SWALLOWING DIFFICULTY', 'CHEST PAIN',\n", "]\n", "\n", "\n", "dtest = xgb.DMatrix(input_df)\n", "\n", "prediction = model.predict(dtest)\n", "prediction" ] }, { "cell_type": "code", "execution_count": null, "id": "91746991", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "728ee97e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 19, "id": "fff3df16", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7860\n", "Running on public URL: https://de43ecf59e54e5afd9.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n" ] }, { "data": { "text/plain": [] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import gradio as gr\n", "\n", "# Define the Gradio input and output interfaces\n", "inputs = [\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Gender\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you smoke?\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Yellow Fingers\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Anxiety\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you get influenced by Peer Pressure\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have any Chronic Disease\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Fatigue\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have an Allergy\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you experience Wheezing\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you drink alcohol\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Are you Coughing\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Shortness of Breath\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Swallowing Difficulty\"),\n", " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Chest Pain\"),\n", " gr.inputs.Number(label='Age')\n", "]\n", "\n", "output = gr.outputs.Label(num_top_classes=2)\n", "\n", "# Define the predict function\n", "def predict(gender, smoking, yellow_fingers, anxiety, peer_pressure,\n", " chronic_disease, fatigue, allergy, wheezing, alcohol_consuming,\n", " coughing, shortness_of_breath, swallowing_difficulty, chest_pain,\n", " age):\n", " # Create a dataframe with the input values\n", " input_dict = {'GENDER': gender, 'SMOKING': smoking, 'YELLOW_FINGERS': yellow_fingers,\n", " 'ANXIETY': anxiety, 'PEER_PRESSURE': peer_pressure,\n", " 'CHRONIC DISEASE': chronic_disease, 'FATIGUE ': fatigue,\n", " 'ALLERGY ': allergy, 'WHEEZING': wheezing,\n", " 'ALCOHOL CONSUMING': alcohol_consuming, 'COUGHING': coughing,\n", " 'SHORTNESS OF BREATH': shortness_of_breath,\n", " 'SWALLOWING DIFFICULTY': swallowing_difficulty,\n", " 'CHEST PAIN': chest_pain, 'AGE': age}\n", " input_df = pd.DataFrame.from_dict([input_dict]).astype(\"int\")\n", " \n", " dtest = xgb.DMatrix(input_df)\n", " \n", " \n", " #make predictions\n", " prediction = model.predict(dtest)\n", " \n", " # Return prediction\n", " return \"You have Lung Cancer, you might want to see the Doctor.\" if prediction >0.99 else \"You don't have Lung Cancer, Enjoyā¯¤\"\n", "\n", "# Create and launch the interface\n", "interface = gr.Interface(fn=predict, inputs=inputs, outputs=output, \n", " title='Lung Cancer Prediction', description='Predicting lung cancer using XGBoost Classifier.\\nPlease Note:\\nFemale = 0, Male= 1\\nNo = 0, Yes = 1')\n", "interface.launch(auth = ('user', 'atom'), share = True)" ] }, { "cell_type": "code", "execution_count": null, "id": "de27fbe1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }