Elegbede commited on
Commit
959b76a
·
1 Parent(s): 4182cd7

Upload 4 files

Browse files
Files changed (4) hide show
  1. APP (2).ipynb +1317 -0
  2. app (2).py +74 -0
  3. model (1).pkl +3 -0
  4. requirements.txt +6 -0
APP (2).ipynb ADDED
@@ -0,0 +1,1317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "d4c303ef",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import warnings\n",
11
+ "warnings.simplefilter(\"ignore\")\n",
12
+ "import pandas as pd\n",
13
+ "import numpy as np\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "import seaborn as sns\n",
16
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
17
+ "from sklearn.model_selection import train_test_split\n",
18
+ "import xgboost as xgb\n",
19
+ "from sklearn.preprocessing import LabelEncoder\n"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "id": "4e15af5f",
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "data": {
30
+ "text/html": [
31
+ "<div>\n",
32
+ "<style scoped>\n",
33
+ " .dataframe tbody tr th:only-of-type {\n",
34
+ " vertical-align: middle;\n",
35
+ " }\n",
36
+ "\n",
37
+ " .dataframe tbody tr th {\n",
38
+ " vertical-align: top;\n",
39
+ " }\n",
40
+ "\n",
41
+ " .dataframe thead th {\n",
42
+ " text-align: right;\n",
43
+ " }\n",
44
+ "</style>\n",
45
+ "<table border=\"1\" class=\"dataframe\">\n",
46
+ " <thead>\n",
47
+ " <tr style=\"text-align: right;\">\n",
48
+ " <th></th>\n",
49
+ " <th>GENDER</th>\n",
50
+ " <th>AGE</th>\n",
51
+ " <th>SMOKING</th>\n",
52
+ " <th>YELLOW_FINGERS</th>\n",
53
+ " <th>ANXIETY</th>\n",
54
+ " <th>PEER_PRESSURE</th>\n",
55
+ " <th>CHRONIC DISEASE</th>\n",
56
+ " <th>FATIGUE</th>\n",
57
+ " <th>ALLERGY</th>\n",
58
+ " <th>WHEEZING</th>\n",
59
+ " <th>ALCOHOL CONSUMING</th>\n",
60
+ " <th>COUGHING</th>\n",
61
+ " <th>SHORTNESS OF BREATH</th>\n",
62
+ " <th>SWALLOWING DIFFICULTY</th>\n",
63
+ " <th>CHEST PAIN</th>\n",
64
+ " <th>LUNG_CANCER</th>\n",
65
+ " </tr>\n",
66
+ " </thead>\n",
67
+ " <tbody>\n",
68
+ " <tr>\n",
69
+ " <th>0</th>\n",
70
+ " <td>M</td>\n",
71
+ " <td>69</td>\n",
72
+ " <td>1</td>\n",
73
+ " <td>2</td>\n",
74
+ " <td>2</td>\n",
75
+ " <td>1</td>\n",
76
+ " <td>1</td>\n",
77
+ " <td>2</td>\n",
78
+ " <td>1</td>\n",
79
+ " <td>2</td>\n",
80
+ " <td>2</td>\n",
81
+ " <td>2</td>\n",
82
+ " <td>2</td>\n",
83
+ " <td>2</td>\n",
84
+ " <td>2</td>\n",
85
+ " <td>YES</td>\n",
86
+ " </tr>\n",
87
+ " <tr>\n",
88
+ " <th>1</th>\n",
89
+ " <td>M</td>\n",
90
+ " <td>74</td>\n",
91
+ " <td>2</td>\n",
92
+ " <td>1</td>\n",
93
+ " <td>1</td>\n",
94
+ " <td>1</td>\n",
95
+ " <td>2</td>\n",
96
+ " <td>2</td>\n",
97
+ " <td>2</td>\n",
98
+ " <td>1</td>\n",
99
+ " <td>1</td>\n",
100
+ " <td>1</td>\n",
101
+ " <td>2</td>\n",
102
+ " <td>2</td>\n",
103
+ " <td>2</td>\n",
104
+ " <td>YES</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>2</th>\n",
108
+ " <td>F</td>\n",
109
+ " <td>59</td>\n",
110
+ " <td>1</td>\n",
111
+ " <td>1</td>\n",
112
+ " <td>1</td>\n",
113
+ " <td>2</td>\n",
114
+ " <td>1</td>\n",
115
+ " <td>2</td>\n",
116
+ " <td>1</td>\n",
117
+ " <td>2</td>\n",
118
+ " <td>1</td>\n",
119
+ " <td>2</td>\n",
120
+ " <td>2</td>\n",
121
+ " <td>1</td>\n",
122
+ " <td>2</td>\n",
123
+ " <td>NO</td>\n",
124
+ " </tr>\n",
125
+ " <tr>\n",
126
+ " <th>3</th>\n",
127
+ " <td>M</td>\n",
128
+ " <td>63</td>\n",
129
+ " <td>2</td>\n",
130
+ " <td>2</td>\n",
131
+ " <td>2</td>\n",
132
+ " <td>1</td>\n",
133
+ " <td>1</td>\n",
134
+ " <td>1</td>\n",
135
+ " <td>1</td>\n",
136
+ " <td>1</td>\n",
137
+ " <td>2</td>\n",
138
+ " <td>1</td>\n",
139
+ " <td>1</td>\n",
140
+ " <td>2</td>\n",
141
+ " <td>2</td>\n",
142
+ " <td>NO</td>\n",
143
+ " </tr>\n",
144
+ " <tr>\n",
145
+ " <th>4</th>\n",
146
+ " <td>F</td>\n",
147
+ " <td>63</td>\n",
148
+ " <td>1</td>\n",
149
+ " <td>2</td>\n",
150
+ " <td>1</td>\n",
151
+ " <td>1</td>\n",
152
+ " <td>1</td>\n",
153
+ " <td>1</td>\n",
154
+ " <td>1</td>\n",
155
+ " <td>2</td>\n",
156
+ " <td>1</td>\n",
157
+ " <td>2</td>\n",
158
+ " <td>2</td>\n",
159
+ " <td>1</td>\n",
160
+ " <td>1</td>\n",
161
+ " <td>NO</td>\n",
162
+ " </tr>\n",
163
+ " </tbody>\n",
164
+ "</table>\n",
165
+ "</div>"
166
+ ],
167
+ "text/plain": [
168
+ " GENDER AGE SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE \\\n",
169
+ "0 M 69 1 2 2 1 \n",
170
+ "1 M 74 2 1 1 1 \n",
171
+ "2 F 59 1 1 1 2 \n",
172
+ "3 M 63 2 2 2 1 \n",
173
+ "4 F 63 1 2 1 1 \n",
174
+ "\n",
175
+ " CHRONIC DISEASE FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING \\\n",
176
+ "0 1 2 1 2 2 2 \n",
177
+ "1 2 2 2 1 1 1 \n",
178
+ "2 1 2 1 2 1 2 \n",
179
+ "3 1 1 1 1 2 1 \n",
180
+ "4 1 1 1 2 1 2 \n",
181
+ "\n",
182
+ " SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER \n",
183
+ "0 2 2 2 YES \n",
184
+ "1 2 2 2 YES \n",
185
+ "2 2 1 2 NO \n",
186
+ "3 1 2 2 NO \n",
187
+ "4 2 1 1 NO "
188
+ ]
189
+ },
190
+ "execution_count": 2,
191
+ "metadata": {},
192
+ "output_type": "execute_result"
193
+ }
194
+ ],
195
+ "source": [
196
+ "lung_data = pd.read_csv(r'C:\\Users\\elegb\\Desktop\\pdf\\survey lung cancer.csv')\n",
197
+ "lung_data.head()"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 3,
203
+ "id": "9abe8af8",
204
+ "metadata": {},
205
+ "outputs": [
206
+ {
207
+ "name": "stdout",
208
+ "output_type": "stream",
209
+ "text": [
210
+ "<class 'pandas.core.frame.DataFrame'>\n",
211
+ "RangeIndex: 309 entries, 0 to 308\n",
212
+ "Data columns (total 16 columns):\n",
213
+ " # Column Non-Null Count Dtype \n",
214
+ "--- ------ -------------- ----- \n",
215
+ " 0 GENDER 309 non-null object\n",
216
+ " 1 AGE 309 non-null int64 \n",
217
+ " 2 SMOKING 309 non-null int64 \n",
218
+ " 3 YELLOW_FINGERS 309 non-null int64 \n",
219
+ " 4 ANXIETY 309 non-null int64 \n",
220
+ " 5 PEER_PRESSURE 309 non-null int64 \n",
221
+ " 6 CHRONIC DISEASE 309 non-null int64 \n",
222
+ " 7 FATIGUE 309 non-null int64 \n",
223
+ " 8 ALLERGY 309 non-null int64 \n",
224
+ " 9 WHEEZING 309 non-null int64 \n",
225
+ " 10 ALCOHOL CONSUMING 309 non-null int64 \n",
226
+ " 11 COUGHING 309 non-null int64 \n",
227
+ " 12 SHORTNESS OF BREATH 309 non-null int64 \n",
228
+ " 13 SWALLOWING DIFFICULTY 309 non-null int64 \n",
229
+ " 14 CHEST PAIN 309 non-null int64 \n",
230
+ " 15 LUNG_CANCER 309 non-null object\n",
231
+ "dtypes: int64(14), object(2)\n",
232
+ "memory usage: 38.8+ KB\n"
233
+ ]
234
+ }
235
+ ],
236
+ "source": [
237
+ "lung_data.info()"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 4,
243
+ "id": "3dbb3974",
244
+ "metadata": {},
245
+ "outputs": [
246
+ {
247
+ "name": "stdout",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "<class 'pandas.core.frame.DataFrame'>\n",
251
+ "Int64Index: 276 entries, 0 to 283\n",
252
+ "Data columns (total 16 columns):\n",
253
+ " # Column Non-Null Count Dtype \n",
254
+ "--- ------ -------------- ----- \n",
255
+ " 0 GENDER 276 non-null object\n",
256
+ " 1 AGE 276 non-null int64 \n",
257
+ " 2 SMOKING 276 non-null int64 \n",
258
+ " 3 YELLOW_FINGERS 276 non-null int64 \n",
259
+ " 4 ANXIETY 276 non-null int64 \n",
260
+ " 5 PEER_PRESSURE 276 non-null int64 \n",
261
+ " 6 CHRONIC DISEASE 276 non-null int64 \n",
262
+ " 7 FATIGUE 276 non-null int64 \n",
263
+ " 8 ALLERGY 276 non-null int64 \n",
264
+ " 9 WHEEZING 276 non-null int64 \n",
265
+ " 10 ALCOHOL CONSUMING 276 non-null int64 \n",
266
+ " 11 COUGHING 276 non-null int64 \n",
267
+ " 12 SHORTNESS OF BREATH 276 non-null int64 \n",
268
+ " 13 SWALLOWING DIFFICULTY 276 non-null int64 \n",
269
+ " 14 CHEST PAIN 276 non-null int64 \n",
270
+ " 15 LUNG_CANCER 276 non-null object\n",
271
+ "dtypes: int64(14), object(2)\n",
272
+ "memory usage: 36.7+ KB\n"
273
+ ]
274
+ }
275
+ ],
276
+ "source": [
277
+ "lung_data = lung_data.drop_duplicates()\n",
278
+ "lung_data.info()"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 5,
284
+ "id": "2c09b012",
285
+ "metadata": {},
286
+ "outputs": [
287
+ {
288
+ "data": {
289
+ "text/html": [
290
+ "<div>\n",
291
+ "<style scoped>\n",
292
+ " .dataframe tbody tr th:only-of-type {\n",
293
+ " vertical-align: middle;\n",
294
+ " }\n",
295
+ "\n",
296
+ " .dataframe tbody tr th {\n",
297
+ " vertical-align: top;\n",
298
+ " }\n",
299
+ "\n",
300
+ " .dataframe thead th {\n",
301
+ " text-align: right;\n",
302
+ " }\n",
303
+ "</style>\n",
304
+ "<table border=\"1\" class=\"dataframe\">\n",
305
+ " <thead>\n",
306
+ " <tr style=\"text-align: right;\">\n",
307
+ " <th></th>\n",
308
+ " <th>GENDER</th>\n",
309
+ " <th>SMOKING</th>\n",
310
+ " <th>YELLOW_FINGERS</th>\n",
311
+ " <th>ANXIETY</th>\n",
312
+ " <th>PEER_PRESSURE</th>\n",
313
+ " <th>CHRONIC DISEASE</th>\n",
314
+ " <th>FATIGUE</th>\n",
315
+ " <th>ALLERGY</th>\n",
316
+ " <th>WHEEZING</th>\n",
317
+ " <th>ALCOHOL CONSUMING</th>\n",
318
+ " <th>COUGHING</th>\n",
319
+ " <th>SHORTNESS OF BREATH</th>\n",
320
+ " <th>SWALLOWING DIFFICULTY</th>\n",
321
+ " <th>CHEST PAIN</th>\n",
322
+ " <th>LUNG_CANCER</th>\n",
323
+ " </tr>\n",
324
+ " </thead>\n",
325
+ " <tbody>\n",
326
+ " <tr>\n",
327
+ " <th>0</th>\n",
328
+ " <td>1</td>\n",
329
+ " <td>0</td>\n",
330
+ " <td>1</td>\n",
331
+ " <td>1</td>\n",
332
+ " <td>0</td>\n",
333
+ " <td>0</td>\n",
334
+ " <td>1</td>\n",
335
+ " <td>0</td>\n",
336
+ " <td>1</td>\n",
337
+ " <td>1</td>\n",
338
+ " <td>1</td>\n",
339
+ " <td>1</td>\n",
340
+ " <td>1</td>\n",
341
+ " <td>1</td>\n",
342
+ " <td>1</td>\n",
343
+ " </tr>\n",
344
+ " <tr>\n",
345
+ " <th>1</th>\n",
346
+ " <td>1</td>\n",
347
+ " <td>1</td>\n",
348
+ " <td>0</td>\n",
349
+ " <td>0</td>\n",
350
+ " <td>0</td>\n",
351
+ " <td>1</td>\n",
352
+ " <td>1</td>\n",
353
+ " <td>1</td>\n",
354
+ " <td>0</td>\n",
355
+ " <td>0</td>\n",
356
+ " <td>0</td>\n",
357
+ " <td>1</td>\n",
358
+ " <td>1</td>\n",
359
+ " <td>1</td>\n",
360
+ " <td>1</td>\n",
361
+ " </tr>\n",
362
+ " <tr>\n",
363
+ " <th>2</th>\n",
364
+ " <td>0</td>\n",
365
+ " <td>0</td>\n",
366
+ " <td>0</td>\n",
367
+ " <td>0</td>\n",
368
+ " <td>1</td>\n",
369
+ " <td>0</td>\n",
370
+ " <td>1</td>\n",
371
+ " <td>0</td>\n",
372
+ " <td>1</td>\n",
373
+ " <td>0</td>\n",
374
+ " <td>1</td>\n",
375
+ " <td>1</td>\n",
376
+ " <td>0</td>\n",
377
+ " <td>1</td>\n",
378
+ " <td>0</td>\n",
379
+ " </tr>\n",
380
+ " <tr>\n",
381
+ " <th>3</th>\n",
382
+ " <td>1</td>\n",
383
+ " <td>1</td>\n",
384
+ " <td>1</td>\n",
385
+ " <td>1</td>\n",
386
+ " <td>0</td>\n",
387
+ " <td>0</td>\n",
388
+ " <td>0</td>\n",
389
+ " <td>0</td>\n",
390
+ " <td>0</td>\n",
391
+ " <td>1</td>\n",
392
+ " <td>0</td>\n",
393
+ " <td>0</td>\n",
394
+ " <td>1</td>\n",
395
+ " <td>1</td>\n",
396
+ " <td>0</td>\n",
397
+ " </tr>\n",
398
+ " <tr>\n",
399
+ " <th>4</th>\n",
400
+ " <td>0</td>\n",
401
+ " <td>0</td>\n",
402
+ " <td>1</td>\n",
403
+ " <td>0</td>\n",
404
+ " <td>0</td>\n",
405
+ " <td>0</td>\n",
406
+ " <td>0</td>\n",
407
+ " <td>0</td>\n",
408
+ " <td>1</td>\n",
409
+ " <td>0</td>\n",
410
+ " <td>1</td>\n",
411
+ " <td>1</td>\n",
412
+ " <td>0</td>\n",
413
+ " <td>0</td>\n",
414
+ " <td>0</td>\n",
415
+ " </tr>\n",
416
+ " </tbody>\n",
417
+ "</table>\n",
418
+ "</div>"
419
+ ],
420
+ "text/plain": [
421
+ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n",
422
+ "0 1 0 1 1 0 0 \n",
423
+ "1 1 1 0 0 0 1 \n",
424
+ "2 0 0 0 0 1 0 \n",
425
+ "3 1 1 1 1 0 0 \n",
426
+ "4 0 0 1 0 0 0 \n",
427
+ "\n",
428
+ " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n",
429
+ "0 1 0 1 1 1 1 \n",
430
+ "1 1 1 0 0 0 1 \n",
431
+ "2 1 0 1 0 1 1 \n",
432
+ "3 0 0 0 1 0 0 \n",
433
+ "4 0 0 1 0 1 1 \n",
434
+ "\n",
435
+ " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER \n",
436
+ "0 1 1 1 \n",
437
+ "1 1 1 1 \n",
438
+ "2 0 1 0 \n",
439
+ "3 1 1 0 \n",
440
+ "4 0 0 0 "
441
+ ]
442
+ },
443
+ "execution_count": 5,
444
+ "metadata": {},
445
+ "output_type": "execute_result"
446
+ }
447
+ ],
448
+ "source": [
449
+ "categorical = lung_data.drop(['AGE'], axis = 1)\n",
450
+ "encoder = LabelEncoder()\n",
451
+ "for col in categorical.columns:\n",
452
+ " categorical[col] = encoder.fit_transform(categorical[col])\n",
453
+ "\n",
454
+ "categorical = categorical.astype(\"category\") \n",
455
+ "categorical.head()\n",
456
+ "\n"
457
+ ]
458
+ },
459
+ {
460
+ "cell_type": "code",
461
+ "execution_count": 6,
462
+ "id": "b15e78ca",
463
+ "metadata": {},
464
+ "outputs": [
465
+ {
466
+ "data": {
467
+ "text/html": [
468
+ "<div>\n",
469
+ "<style scoped>\n",
470
+ " .dataframe tbody tr th:only-of-type {\n",
471
+ " vertical-align: middle;\n",
472
+ " }\n",
473
+ "\n",
474
+ " .dataframe tbody tr th {\n",
475
+ " vertical-align: top;\n",
476
+ " }\n",
477
+ "\n",
478
+ " .dataframe thead th {\n",
479
+ " text-align: right;\n",
480
+ " }\n",
481
+ "</style>\n",
482
+ "<table border=\"1\" class=\"dataframe\">\n",
483
+ " <thead>\n",
484
+ " <tr style=\"text-align: right;\">\n",
485
+ " <th></th>\n",
486
+ " <th>GENDER</th>\n",
487
+ " <th>SMOKING</th>\n",
488
+ " <th>YELLOW_FINGERS</th>\n",
489
+ " <th>ANXIETY</th>\n",
490
+ " <th>PEER_PRESSURE</th>\n",
491
+ " <th>CHRONIC DISEASE</th>\n",
492
+ " <th>FATIGUE</th>\n",
493
+ " <th>ALLERGY</th>\n",
494
+ " <th>WHEEZING</th>\n",
495
+ " <th>ALCOHOL CONSUMING</th>\n",
496
+ " <th>COUGHING</th>\n",
497
+ " <th>SHORTNESS OF BREATH</th>\n",
498
+ " <th>SWALLOWING DIFFICULTY</th>\n",
499
+ " <th>CHEST PAIN</th>\n",
500
+ " <th>LUNG_CANCER</th>\n",
501
+ " <th>AGE</th>\n",
502
+ " </tr>\n",
503
+ " </thead>\n",
504
+ " <tbody>\n",
505
+ " <tr>\n",
506
+ " <th>0</th>\n",
507
+ " <td>1</td>\n",
508
+ " <td>0</td>\n",
509
+ " <td>1</td>\n",
510
+ " <td>1</td>\n",
511
+ " <td>0</td>\n",
512
+ " <td>0</td>\n",
513
+ " <td>1</td>\n",
514
+ " <td>0</td>\n",
515
+ " <td>1</td>\n",
516
+ " <td>1</td>\n",
517
+ " <td>1</td>\n",
518
+ " <td>1</td>\n",
519
+ " <td>1</td>\n",
520
+ " <td>1</td>\n",
521
+ " <td>1</td>\n",
522
+ " <td>69</td>\n",
523
+ " </tr>\n",
524
+ " <tr>\n",
525
+ " <th>1</th>\n",
526
+ " <td>1</td>\n",
527
+ " <td>1</td>\n",
528
+ " <td>0</td>\n",
529
+ " <td>0</td>\n",
530
+ " <td>0</td>\n",
531
+ " <td>1</td>\n",
532
+ " <td>1</td>\n",
533
+ " <td>1</td>\n",
534
+ " <td>0</td>\n",
535
+ " <td>0</td>\n",
536
+ " <td>0</td>\n",
537
+ " <td>1</td>\n",
538
+ " <td>1</td>\n",
539
+ " <td>1</td>\n",
540
+ " <td>1</td>\n",
541
+ " <td>74</td>\n",
542
+ " </tr>\n",
543
+ " <tr>\n",
544
+ " <th>2</th>\n",
545
+ " <td>0</td>\n",
546
+ " <td>0</td>\n",
547
+ " <td>0</td>\n",
548
+ " <td>0</td>\n",
549
+ " <td>1</td>\n",
550
+ " <td>0</td>\n",
551
+ " <td>1</td>\n",
552
+ " <td>0</td>\n",
553
+ " <td>1</td>\n",
554
+ " <td>0</td>\n",
555
+ " <td>1</td>\n",
556
+ " <td>1</td>\n",
557
+ " <td>0</td>\n",
558
+ " <td>1</td>\n",
559
+ " <td>0</td>\n",
560
+ " <td>59</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>3</th>\n",
564
+ " <td>1</td>\n",
565
+ " <td>1</td>\n",
566
+ " <td>1</td>\n",
567
+ " <td>1</td>\n",
568
+ " <td>0</td>\n",
569
+ " <td>0</td>\n",
570
+ " <td>0</td>\n",
571
+ " <td>0</td>\n",
572
+ " <td>0</td>\n",
573
+ " <td>1</td>\n",
574
+ " <td>0</td>\n",
575
+ " <td>0</td>\n",
576
+ " <td>1</td>\n",
577
+ " <td>1</td>\n",
578
+ " <td>0</td>\n",
579
+ " <td>63</td>\n",
580
+ " </tr>\n",
581
+ " <tr>\n",
582
+ " <th>4</th>\n",
583
+ " <td>0</td>\n",
584
+ " <td>0</td>\n",
585
+ " <td>1</td>\n",
586
+ " <td>0</td>\n",
587
+ " <td>0</td>\n",
588
+ " <td>0</td>\n",
589
+ " <td>0</td>\n",
590
+ " <td>0</td>\n",
591
+ " <td>1</td>\n",
592
+ " <td>0</td>\n",
593
+ " <td>1</td>\n",
594
+ " <td>1</td>\n",
595
+ " <td>0</td>\n",
596
+ " <td>0</td>\n",
597
+ " <td>0</td>\n",
598
+ " <td>63</td>\n",
599
+ " </tr>\n",
600
+ " </tbody>\n",
601
+ "</table>\n",
602
+ "</div>"
603
+ ],
604
+ "text/plain": [
605
+ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n",
606
+ "0 1 0 1 1 0 0 \n",
607
+ "1 1 1 0 0 0 1 \n",
608
+ "2 0 0 0 0 1 0 \n",
609
+ "3 1 1 1 1 0 0 \n",
610
+ "4 0 0 1 0 0 0 \n",
611
+ "\n",
612
+ " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n",
613
+ "0 1 0 1 1 1 1 \n",
614
+ "1 1 1 0 0 0 1 \n",
615
+ "2 1 0 1 0 1 1 \n",
616
+ "3 0 0 0 1 0 0 \n",
617
+ "4 0 0 1 0 1 1 \n",
618
+ "\n",
619
+ " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER AGE \n",
620
+ "0 1 1 1 69 \n",
621
+ "1 1 1 1 74 \n",
622
+ "2 0 1 0 59 \n",
623
+ "3 1 1 0 63 \n",
624
+ "4 0 0 0 63 "
625
+ ]
626
+ },
627
+ "execution_count": 6,
628
+ "metadata": {},
629
+ "output_type": "execute_result"
630
+ }
631
+ ],
632
+ "source": [
633
+ "lung_data = pd.concat([categorical, lung_data['AGE']], axis = 1)\n",
634
+ "lung_data.head()"
635
+ ]
636
+ },
637
+ {
638
+ "cell_type": "code",
639
+ "execution_count": 7,
640
+ "id": "8925150b",
641
+ "metadata": {},
642
+ "outputs": [
643
+ {
644
+ "data": {
645
+ "text/plain": [
646
+ "1 238\n",
647
+ "0 38\n",
648
+ "Name: LUNG_CANCER, dtype: int64"
649
+ ]
650
+ },
651
+ "execution_count": 7,
652
+ "metadata": {},
653
+ "output_type": "execute_result"
654
+ }
655
+ ],
656
+ "source": [
657
+ "lung_data.LUNG_CANCER.value_counts()"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 8,
663
+ "id": "c992c376",
664
+ "metadata": {},
665
+ "outputs": [
666
+ {
667
+ "name": "stdout",
668
+ "output_type": "stream",
669
+ "text": [
670
+ "<class 'pandas.core.frame.DataFrame'>\n",
671
+ "Int64Index: 1000 entries, 183 to 87\n",
672
+ "Data columns (total 16 columns):\n",
673
+ " # Column Non-Null Count Dtype \n",
674
+ "--- ------ -------------- ----- \n",
675
+ " 0 GENDER 1000 non-null category\n",
676
+ " 1 SMOKING 1000 non-null category\n",
677
+ " 2 YELLOW_FINGERS 1000 non-null category\n",
678
+ " 3 ANXIETY 1000 non-null category\n",
679
+ " 4 PEER_PRESSURE 1000 non-null category\n",
680
+ " 5 CHRONIC DISEASE 1000 non-null category\n",
681
+ " 6 FATIGUE 1000 non-null category\n",
682
+ " 7 ALLERGY 1000 non-null category\n",
683
+ " 8 WHEEZING 1000 non-null category\n",
684
+ " 9 ALCOHOL CONSUMING 1000 non-null category\n",
685
+ " 10 COUGHING 1000 non-null category\n",
686
+ " 11 SHORTNESS OF BREATH 1000 non-null category\n",
687
+ " 12 SWALLOWING DIFFICULTY 1000 non-null category\n",
688
+ " 13 CHEST PAIN 1000 non-null category\n",
689
+ " 14 LUNG_CANCER 1000 non-null category\n",
690
+ " 15 AGE 1000 non-null int64 \n",
691
+ "dtypes: category(15), int64(1)\n",
692
+ "memory usage: 32.1 KB\n"
693
+ ]
694
+ }
695
+ ],
696
+ "source": [
697
+ "class_0 = lung_data[lung_data['LUNG_CANCER'] == 0]\n",
698
+ "class_1 = lung_data[lung_data['LUNG_CANCER'] == 1]\n",
699
+ "class_1 = class_1.sample(n = 500, replace = True)\n",
700
+ "class_0 = class_0.sample(n = 500, replace = True)\n",
701
+ "lung_data = pd.concat([class_0, class_1], axis = 0)\n",
702
+ "lung_data.info()"
703
+ ]
704
+ },
705
+ {
706
+ "cell_type": "code",
707
+ "execution_count": 9,
708
+ "id": "ebc06c8e",
709
+ "metadata": {},
710
+ "outputs": [
711
+ {
712
+ "data": {
713
+ "text/plain": [
714
+ "0 500\n",
715
+ "1 500\n",
716
+ "Name: LUNG_CANCER, dtype: int64"
717
+ ]
718
+ },
719
+ "execution_count": 9,
720
+ "metadata": {},
721
+ "output_type": "execute_result"
722
+ }
723
+ ],
724
+ "source": [
725
+ "lung_data['LUNG_CANCER'].value_counts()"
726
+ ]
727
+ },
728
+ {
729
+ "cell_type": "code",
730
+ "execution_count": 10,
731
+ "id": "b2ca517b",
732
+ "metadata": {},
733
+ "outputs": [
734
+ {
735
+ "data": {
736
+ "text/html": [
737
+ "<div>\n",
738
+ "<style scoped>\n",
739
+ " .dataframe tbody tr th:only-of-type {\n",
740
+ " vertical-align: middle;\n",
741
+ " }\n",
742
+ "\n",
743
+ " .dataframe tbody tr th {\n",
744
+ " vertical-align: top;\n",
745
+ " }\n",
746
+ "\n",
747
+ " .dataframe thead th {\n",
748
+ " text-align: right;\n",
749
+ " }\n",
750
+ "</style>\n",
751
+ "<table border=\"1\" class=\"dataframe\">\n",
752
+ " <thead>\n",
753
+ " <tr style=\"text-align: right;\">\n",
754
+ " <th></th>\n",
755
+ " <th>GENDER</th>\n",
756
+ " <th>SMOKING</th>\n",
757
+ " <th>YELLOW_FINGERS</th>\n",
758
+ " <th>ANXIETY</th>\n",
759
+ " <th>PEER_PRESSURE</th>\n",
760
+ " <th>CHRONIC DISEASE</th>\n",
761
+ " <th>FATIGUE</th>\n",
762
+ " <th>ALLERGY</th>\n",
763
+ " <th>WHEEZING</th>\n",
764
+ " <th>ALCOHOL CONSUMING</th>\n",
765
+ " <th>COUGHING</th>\n",
766
+ " <th>SHORTNESS OF BREATH</th>\n",
767
+ " <th>SWALLOWING DIFFICULTY</th>\n",
768
+ " <th>CHEST PAIN</th>\n",
769
+ " <th>LUNG_CANCER</th>\n",
770
+ " <th>AGE</th>\n",
771
+ " </tr>\n",
772
+ " </thead>\n",
773
+ " <tbody>\n",
774
+ " <tr>\n",
775
+ " <th>183</th>\n",
776
+ " <td>0</td>\n",
777
+ " <td>1</td>\n",
778
+ " <td>0</td>\n",
779
+ " <td>0</td>\n",
780
+ " <td>0</td>\n",
781
+ " <td>1</td>\n",
782
+ " <td>1</td>\n",
783
+ " <td>0</td>\n",
784
+ " <td>0</td>\n",
785
+ " <td>0</td>\n",
786
+ " <td>0</td>\n",
787
+ " <td>1</td>\n",
788
+ " <td>0</td>\n",
789
+ " <td>0</td>\n",
790
+ " <td>0</td>\n",
791
+ " <td>71</td>\n",
792
+ " </tr>\n",
793
+ " <tr>\n",
794
+ " <th>4</th>\n",
795
+ " <td>0</td>\n",
796
+ " <td>0</td>\n",
797
+ " <td>1</td>\n",
798
+ " <td>0</td>\n",
799
+ " <td>0</td>\n",
800
+ " <td>0</td>\n",
801
+ " <td>0</td>\n",
802
+ " <td>0</td>\n",
803
+ " <td>1</td>\n",
804
+ " <td>0</td>\n",
805
+ " <td>1</td>\n",
806
+ " <td>1</td>\n",
807
+ " <td>0</td>\n",
808
+ " <td>0</td>\n",
809
+ " <td>0</td>\n",
810
+ " <td>63</td>\n",
811
+ " </tr>\n",
812
+ " <tr>\n",
813
+ " <th>37</th>\n",
814
+ " <td>0</td>\n",
815
+ " <td>0</td>\n",
816
+ " <td>0</td>\n",
817
+ " <td>0</td>\n",
818
+ " <td>0</td>\n",
819
+ " <td>1</td>\n",
820
+ " <td>0</td>\n",
821
+ " <td>0</td>\n",
822
+ " <td>1</td>\n",
823
+ " <td>0</td>\n",
824
+ " <td>0</td>\n",
825
+ " <td>1</td>\n",
826
+ " <td>1</td>\n",
827
+ " <td>0</td>\n",
828
+ " <td>0</td>\n",
829
+ " <td>56</td>\n",
830
+ " </tr>\n",
831
+ " <tr>\n",
832
+ " <th>14</th>\n",
833
+ " <td>1</td>\n",
834
+ " <td>1</td>\n",
835
+ " <td>0</td>\n",
836
+ " <td>0</td>\n",
837
+ " <td>0</td>\n",
838
+ " <td>0</td>\n",
839
+ " <td>0</td>\n",
840
+ " <td>1</td>\n",
841
+ " <td>1</td>\n",
842
+ " <td>1</td>\n",
843
+ " <td>1</td>\n",
844
+ " <td>0</td>\n",
845
+ " <td>0</td>\n",
846
+ " <td>1</td>\n",
847
+ " <td>0</td>\n",
848
+ " <td>69</td>\n",
849
+ " </tr>\n",
850
+ " <tr>\n",
851
+ " <th>8</th>\n",
852
+ " <td>0</td>\n",
853
+ " <td>1</td>\n",
854
+ " <td>0</td>\n",
855
+ " <td>1</td>\n",
856
+ " <td>0</td>\n",
857
+ " <td>0</td>\n",
858
+ " <td>1</td>\n",
859
+ " <td>0</td>\n",
860
+ " <td>0</td>\n",
861
+ " <td>0</td>\n",
862
+ " <td>0</td>\n",
863
+ " <td>0</td>\n",
864
+ " <td>0</td>\n",
865
+ " <td>0</td>\n",
866
+ " <td>0</td>\n",
867
+ " <td>68</td>\n",
868
+ " </tr>\n",
869
+ " </tbody>\n",
870
+ "</table>\n",
871
+ "</div>"
872
+ ],
873
+ "text/plain": [
874
+ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n",
875
+ "183 0 1 0 0 0 1 \n",
876
+ "4 0 0 1 0 0 0 \n",
877
+ "37 0 0 0 0 0 1 \n",
878
+ "14 1 1 0 0 0 0 \n",
879
+ "8 0 1 0 1 0 0 \n",
880
+ "\n",
881
+ " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING SHORTNESS OF BREATH \\\n",
882
+ "183 1 0 0 0 0 1 \n",
883
+ "4 0 0 1 0 1 1 \n",
884
+ "37 0 0 1 0 0 1 \n",
885
+ "14 0 1 1 1 1 0 \n",
886
+ "8 1 0 0 0 0 0 \n",
887
+ "\n",
888
+ " SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER AGE \n",
889
+ "183 0 0 0 71 \n",
890
+ "4 0 0 0 63 \n",
891
+ "37 1 0 0 56 \n",
892
+ "14 0 1 0 69 \n",
893
+ "8 0 0 0 68 "
894
+ ]
895
+ },
896
+ "execution_count": 10,
897
+ "metadata": {},
898
+ "output_type": "execute_result"
899
+ }
900
+ ],
901
+ "source": [
902
+ "lung_data.head()"
903
+ ]
904
+ },
905
+ {
906
+ "cell_type": "code",
907
+ "execution_count": 11,
908
+ "id": "62696544",
909
+ "metadata": {},
910
+ "outputs": [],
911
+ "source": [
912
+ "X = lung_data.drop('LUNG_CANCER', axis =1)\n",
913
+ "y = lung_data.LUNG_CANCER"
914
+ ]
915
+ },
916
+ {
917
+ "cell_type": "code",
918
+ "execution_count": 12,
919
+ "id": "6e4572a6",
920
+ "metadata": {},
921
+ "outputs": [
922
+ {
923
+ "name": "stdout",
924
+ "output_type": "stream",
925
+ "text": [
926
+ "1 80\n",
927
+ "0 70\n",
928
+ "Name: LUNG_CANCER, dtype: int64\n"
929
+ ]
930
+ }
931
+ ],
932
+ "source": [
933
+ "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.15, random_state = 42)\n",
934
+ "\n",
935
+ "print(y_test.value_counts())"
936
+ ]
937
+ },
938
+ {
939
+ "cell_type": "code",
940
+ "execution_count": 13,
941
+ "id": "1b243af1",
942
+ "metadata": {},
943
+ "outputs": [],
944
+ "source": [
945
+ "categorical_columns = ['GENDER', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY', 'PEER_PRESSURE',\n",
946
+ " 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',\n",
947
+ " 'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',\n",
948
+ " 'SWALLOWING DIFFICULTY', 'CHEST PAIN',\n",
949
+ "]"
950
+ ]
951
+ },
952
+ {
953
+ "cell_type": "code",
954
+ "execution_count": 14,
955
+ "id": "d0a038fd",
956
+ "metadata": {},
957
+ "outputs": [
958
+ {
959
+ "name": "stdout",
960
+ "output_type": "stream",
961
+ "text": [
962
+ " precision recall f1-score support\n",
963
+ "\n",
964
+ " 0 0.95 1.00 0.97 70\n",
965
+ " 1 1.00 0.95 0.97 80\n",
966
+ "\n",
967
+ " accuracy 0.97 150\n",
968
+ " macro avg 0.97 0.97 0.97 150\n",
969
+ "weighted avg 0.97 0.97 0.97 150\n",
970
+ "\n"
971
+ ]
972
+ },
973
+ {
974
+ "data": {
975
+ "text/plain": [
976
+ "array([[70, 0],\n",
977
+ " [ 4, 76]], dtype=int64)"
978
+ ]
979
+ },
980
+ "execution_count": 14,
981
+ "metadata": {},
982
+ "output_type": "execute_result"
983
+ }
984
+ ],
985
+ "source": [
986
+ "# create DMatrix for training and test sets with categorical features enabled\n",
987
+ "X_train = X_train.astype({col: \"category\" for col in categorical_columns})\n",
988
+ "\n",
989
+ "dtrain = xgb.DMatrix(X_train , label=y_train, enable_categorical=True)\n",
990
+ "dtest = xgb.DMatrix(X_test, enable_categorical=True)\n",
991
+ "\n",
992
+ "# set parameters for XGBoost classifier\n",
993
+ "\n",
994
+ "params = {\n",
995
+ " 'objective': 'binary:logistic',\n",
996
+ " 'max_depth':3, \n",
997
+ " 'eta':1, \n",
998
+ " 'nthread': 3,\n",
999
+ " 'eval_metric': 'auc',\n",
1000
+ " 'learning_rate': 1\n",
1001
+ "}\n",
1002
+ "\n",
1003
+ "# train model\n",
1004
+ "model = xgb.train(params, dtrain, num_boost_round=100)\n",
1005
+ "# make predictions on test data\n",
1006
+ "y_pred = model.predict(dtest)\n",
1007
+ "\n",
1008
+ "# convert probabilities to binary predictions\n",
1009
+ "y_pred_binary = [1 if p >= 0.99 else 0 for p in y_pred]\n",
1010
+ "\n",
1011
+ "# evaluate model performance\n",
1012
+ "print(classification_report(y_test, y_pred_binary))\n",
1013
+ "confusion_matrix(y_test, y_pred_binary)"
1014
+ ]
1015
+ },
1016
+ {
1017
+ "cell_type": "markdown",
1018
+ "id": "d74abb37",
1019
+ "metadata": {},
1020
+ "source": [
1021
+ "#### Lets assume i want to make predictions with that of a new patient coming in"
1022
+ ]
1023
+ },
1024
+ {
1025
+ "cell_type": "code",
1026
+ "execution_count": 16,
1027
+ "id": "cd579274",
1028
+ "metadata": {},
1029
+ "outputs": [
1030
+ {
1031
+ "data": {
1032
+ "text/html": [
1033
+ "<div>\n",
1034
+ "<style scoped>\n",
1035
+ " .dataframe tbody tr th:only-of-type {\n",
1036
+ " vertical-align: middle;\n",
1037
+ " }\n",
1038
+ "\n",
1039
+ " .dataframe tbody tr th {\n",
1040
+ " vertical-align: top;\n",
1041
+ " }\n",
1042
+ "\n",
1043
+ " .dataframe thead th {\n",
1044
+ " text-align: right;\n",
1045
+ " }\n",
1046
+ "</style>\n",
1047
+ "<table border=\"1\" class=\"dataframe\">\n",
1048
+ " <thead>\n",
1049
+ " <tr style=\"text-align: right;\">\n",
1050
+ " <th></th>\n",
1051
+ " <th>GENDER</th>\n",
1052
+ " <th>SMOKING</th>\n",
1053
+ " <th>YELLOW_FINGERS</th>\n",
1054
+ " <th>ANXIETY</th>\n",
1055
+ " <th>PEER_PRESSURE</th>\n",
1056
+ " <th>CHRONIC DISEASE</th>\n",
1057
+ " <th>FATIGUE</th>\n",
1058
+ " <th>ALLERGY</th>\n",
1059
+ " <th>WHEEZING</th>\n",
1060
+ " <th>ALCOHOL CONSUMING</th>\n",
1061
+ " <th>COUGHING</th>\n",
1062
+ " <th>SHORTNESS OF BREATH</th>\n",
1063
+ " <th>SWALLOWING DIFFICULTY</th>\n",
1064
+ " <th>CHEST PAIN</th>\n",
1065
+ " <th>AGE</th>\n",
1066
+ " </tr>\n",
1067
+ " </thead>\n",
1068
+ " <tbody>\n",
1069
+ " <tr>\n",
1070
+ " <th>0</th>\n",
1071
+ " <td>1</td>\n",
1072
+ " <td>0</td>\n",
1073
+ " <td>0</td>\n",
1074
+ " <td>1</td>\n",
1075
+ " <td>0</td>\n",
1076
+ " <td>0</td>\n",
1077
+ " <td>0</td>\n",
1078
+ " <td>0</td>\n",
1079
+ " <td>0</td>\n",
1080
+ " <td>0</td>\n",
1081
+ " <td>1</td>\n",
1082
+ " <td>0</td>\n",
1083
+ " <td>0</td>\n",
1084
+ " <td>1</td>\n",
1085
+ " <td>25</td>\n",
1086
+ " </tr>\n",
1087
+ " </tbody>\n",
1088
+ "</table>\n",
1089
+ "</div>"
1090
+ ],
1091
+ "text/plain": [
1092
+ " GENDER SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE CHRONIC DISEASE \\\n",
1093
+ "0 1 0 0 1 0 0 \n",
1094
+ "\n",
1095
+ " FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING \\\n",
1096
+ "0 0 0 0 0 1 \n",
1097
+ "\n",
1098
+ " SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN AGE \n",
1099
+ "0 0 0 1 25 "
1100
+ ]
1101
+ },
1102
+ "execution_count": 16,
1103
+ "metadata": {},
1104
+ "output_type": "execute_result"
1105
+ }
1106
+ ],
1107
+ "source": [
1108
+ "input_dict = {'GENDER': 1, 'SMOKING': 0, 'YELLOW_FINGERS':0,\n",
1109
+ " 'ANXIETY': 1, 'PEER_PRESSURE': 0,\n",
1110
+ " 'CHRONIC DISEASE': 0, 'FATIGUE ': 0,\n",
1111
+ " 'ALLERGY ': 0, 'WHEEZING': 0,\n",
1112
+ " 'ALCOHOL CONSUMING': 0, 'COUGHING': 1,\n",
1113
+ " 'SHORTNESS OF BREATH': 0,\n",
1114
+ " 'SWALLOWING DIFFICULTY': 0,\n",
1115
+ " 'CHEST PAIN': 1, 'AGE': 25}\n",
1116
+ "input_df = pd.DataFrame.from_dict([input_dict])\n",
1117
+ "input_df.astype({col: \"category\" for col in categorical_columns})\n",
1118
+ "input_df"
1119
+ ]
1120
+ },
1121
+ {
1122
+ "cell_type": "code",
1123
+ "execution_count": 17,
1124
+ "id": "0f6dc7eb",
1125
+ "metadata": {},
1126
+ "outputs": [
1127
+ {
1128
+ "name": "stdout",
1129
+ "output_type": "stream",
1130
+ "text": [
1131
+ "<class 'pandas.core.frame.DataFrame'>\n",
1132
+ "RangeIndex: 1 entries, 0 to 0\n",
1133
+ "Data columns (total 15 columns):\n",
1134
+ " # Column Non-Null Count Dtype\n",
1135
+ "--- ------ -------------- -----\n",
1136
+ " 0 GENDER 1 non-null int64\n",
1137
+ " 1 SMOKING 1 non-null int64\n",
1138
+ " 2 YELLOW_FINGERS 1 non-null int64\n",
1139
+ " 3 ANXIETY 1 non-null int64\n",
1140
+ " 4 PEER_PRESSURE 1 non-null int64\n",
1141
+ " 5 CHRONIC DISEASE 1 non-null int64\n",
1142
+ " 6 FATIGUE 1 non-null int64\n",
1143
+ " 7 ALLERGY 1 non-null int64\n",
1144
+ " 8 WHEEZING 1 non-null int64\n",
1145
+ " 9 ALCOHOL CONSUMING 1 non-null int64\n",
1146
+ " 10 COUGHING 1 non-null int64\n",
1147
+ " 11 SHORTNESS OF BREATH 1 non-null int64\n",
1148
+ " 12 SWALLOWING DIFFICULTY 1 non-null int64\n",
1149
+ " 13 CHEST PAIN 1 non-null int64\n",
1150
+ " 14 AGE 1 non-null int64\n",
1151
+ "dtypes: int64(15)\n",
1152
+ "memory usage: 248.0 bytes\n"
1153
+ ]
1154
+ }
1155
+ ],
1156
+ "source": [
1157
+ "input_df.info()"
1158
+ ]
1159
+ },
1160
+ {
1161
+ "cell_type": "code",
1162
+ "execution_count": 18,
1163
+ "id": "afb5ec1e",
1164
+ "metadata": {},
1165
+ "outputs": [
1166
+ {
1167
+ "data": {
1168
+ "text/plain": [
1169
+ "array([0.20313403], dtype=float32)"
1170
+ ]
1171
+ },
1172
+ "execution_count": 18,
1173
+ "metadata": {},
1174
+ "output_type": "execute_result"
1175
+ }
1176
+ ],
1177
+ "source": [
1178
+ "categorical_columns = ['GENDER', 'SMOKING', 'YELLOW_FINGERS', 'ANXIETY', 'PEER_PRESSURE',\n",
1179
+ " 'CHRONIC DISEASE', 'FATIGUE ', 'ALLERGY ', 'WHEEZING',\n",
1180
+ " 'ALCOHOL CONSUMING', 'COUGHING', 'SHORTNESS OF BREATH',\n",
1181
+ " 'SWALLOWING DIFFICULTY', 'CHEST PAIN',\n",
1182
+ "]\n",
1183
+ "\n",
1184
+ "\n",
1185
+ "dtest = xgb.DMatrix(input_df)\n",
1186
+ "\n",
1187
+ "prediction = model.predict(dtest)\n",
1188
+ "prediction"
1189
+ ]
1190
+ },
1191
+ {
1192
+ "cell_type": "code",
1193
+ "execution_count": null,
1194
+ "id": "91746991",
1195
+ "metadata": {},
1196
+ "outputs": [],
1197
+ "source": []
1198
+ },
1199
+ {
1200
+ "cell_type": "code",
1201
+ "execution_count": null,
1202
+ "id": "728ee97e",
1203
+ "metadata": {},
1204
+ "outputs": [],
1205
+ "source": []
1206
+ },
1207
+ {
1208
+ "cell_type": "code",
1209
+ "execution_count": 19,
1210
+ "id": "fff3df16",
1211
+ "metadata": {},
1212
+ "outputs": [
1213
+ {
1214
+ "name": "stdout",
1215
+ "output_type": "stream",
1216
+ "text": [
1217
+ "Running on local URL: http://127.0.0.1:7860\n",
1218
+ "Running on public URL: https://de43ecf59e54e5afd9.gradio.live\n",
1219
+ "\n",
1220
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces\n"
1221
+ ]
1222
+ },
1223
+ {
1224
+ "data": {
1225
+ "text/plain": []
1226
+ },
1227
+ "execution_count": 19,
1228
+ "metadata": {},
1229
+ "output_type": "execute_result"
1230
+ }
1231
+ ],
1232
+ "source": [
1233
+ "import gradio as gr\n",
1234
+ "\n",
1235
+ "# Define the Gradio input and output interfaces\n",
1236
+ "inputs = [\n",
1237
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Gender\"),\n",
1238
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you smoke?\"),\n",
1239
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Yellow Fingers\"),\n",
1240
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Anxiety\"),\n",
1241
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you get influenced by Peer Pressure\"),\n",
1242
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have any Chronic Disease\"),\n",
1243
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Fatigue\"),\n",
1244
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have an Allergy\"),\n",
1245
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you experience Wheezing\"),\n",
1246
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you drink alcohol\"),\n",
1247
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Are you Coughing\"),\n",
1248
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Shortness of Breath\"),\n",
1249
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Swallowing Difficulty\"),\n",
1250
+ " gr.inputs.Dropdown(choices=[\"0\", \"1\"], label=\"Do you have Chest Pain\"),\n",
1251
+ " gr.inputs.Number(label='Age')\n",
1252
+ "]\n",
1253
+ "\n",
1254
+ "output = gr.outputs.Label(num_top_classes=2)\n",
1255
+ "\n",
1256
+ "# Define the predict function\n",
1257
+ "def predict(gender, smoking, yellow_fingers, anxiety, peer_pressure,\n",
1258
+ " chronic_disease, fatigue, allergy, wheezing, alcohol_consuming,\n",
1259
+ " coughing, shortness_of_breath, swallowing_difficulty, chest_pain,\n",
1260
+ " age):\n",
1261
+ " # Create a dataframe with the input values\n",
1262
+ " input_dict = {'GENDER': gender, 'SMOKING': smoking, 'YELLOW_FINGERS': yellow_fingers,\n",
1263
+ " 'ANXIETY': anxiety, 'PEER_PRESSURE': peer_pressure,\n",
1264
+ " 'CHRONIC DISEASE': chronic_disease, 'FATIGUE ': fatigue,\n",
1265
+ " 'ALLERGY ': allergy, 'WHEEZING': wheezing,\n",
1266
+ " 'ALCOHOL CONSUMING': alcohol_consuming, 'COUGHING': coughing,\n",
1267
+ " 'SHORTNESS OF BREATH': shortness_of_breath,\n",
1268
+ " 'SWALLOWING DIFFICULTY': swallowing_difficulty,\n",
1269
+ " 'CHEST PAIN': chest_pain, 'AGE': age}\n",
1270
+ " input_df = pd.DataFrame.from_dict([input_dict]).astype(\"int\")\n",
1271
+ " \n",
1272
+ " dtest = xgb.DMatrix(input_df)\n",
1273
+ " \n",
1274
+ " \n",
1275
+ " #make predictions\n",
1276
+ " prediction = model.predict(dtest)\n",
1277
+ " \n",
1278
+ " # Return prediction\n",
1279
+ " return \"You have Lung Cancer, you might want to see the Doctor.\" if prediction >0.99 else \"You don't have Lung Cancer, Enjoy❤\"\n",
1280
+ "\n",
1281
+ "# Create and launch the interface\n",
1282
+ "interface = gr.Interface(fn=predict, inputs=inputs, outputs=output, \n",
1283
+ " title='Lung Cancer Prediction', description='Predicting lung cancer using XGBoost Classifier.\\nPlease Note:\\nFemale = 0, Male= 1\\nNo = 0, Yes = 1')\n",
1284
+ "interface.launch(auth = ('user', 'atom'), share = True)"
1285
+ ]
1286
+ },
1287
+ {
1288
+ "cell_type": "code",
1289
+ "execution_count": null,
1290
+ "id": "de27fbe1",
1291
+ "metadata": {},
1292
+ "outputs": [],
1293
+ "source": []
1294
+ }
1295
+ ],
1296
+ "metadata": {
1297
+ "kernelspec": {
1298
+ "display_name": "Python 3 (ipykernel)",
1299
+ "language": "python",
1300
+ "name": "python3"
1301
+ },
1302
+ "language_info": {
1303
+ "codemirror_mode": {
1304
+ "name": "ipython",
1305
+ "version": 3
1306
+ },
1307
+ "file_extension": ".py",
1308
+ "mimetype": "text/x-python",
1309
+ "name": "python",
1310
+ "nbconvert_exporter": "python",
1311
+ "pygments_lexer": "ipython3",
1312
+ "version": "3.9.7"
1313
+ }
1314
+ },
1315
+ "nbformat": 4,
1316
+ "nbformat_minor": 5
1317
+ }
app (2).py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ import warnings
8
+ warnings.simplefilter("ignore")
9
+ import pandas as pd
10
+ import numpy as np
11
+ from sklearn.metrics import classification_report, confusion_matrix
12
+ from sklearn.model_selection import train_test_split
13
+ import xgboost as xgb
14
+ from sklearn.preprocessing import LabelEncoder
15
+ import joblib
16
+ import gradio as gr
17
+ import joblib
18
+
19
+ # Define the Gradio input and output interfaces
20
+ inputs = [
21
+ gr.inputs.Dropdown(choices=["0", "1"], label="Gender"),
22
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you smoke?"),
23
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Yellow Fingers?"),
24
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Anxiety?"),
25
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you get influenced by Peer Pressure?"),
26
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have any Chronic Disease?"),
27
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Fatigue?"),
28
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have an Allergy?"),
29
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you experience Wheezing?"),
30
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you drink alcohol?"),
31
+ gr.inputs.Dropdown(choices=["0", "1"], label="Are you Coughing?"),
32
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Shortness of Breath?"),
33
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Swallowing Difficulty?"),
34
+ gr.inputs.Dropdown(choices=["0", "1"], label="Do you have Chest Pain?"),
35
+ gr.inputs.Number(label='What is your Age')
36
+ ]
37
+
38
+ output = gr.outputs.Label(num_top_classes=2)
39
+
40
+ # Define the predict function
41
+ def predict(gender, smoking, yellow_fingers, anxiety, peer_pressure,
42
+ chronic_disease, fatigue, allergy, wheezing, alcohol_consuming,
43
+ coughing, shortness_of_breath, swallowing_difficulty, chest_pain,
44
+ age):
45
+ # Create a dataframe with the input values
46
+ input_dict = {'GENDER': gender, 'SMOKING': smoking, 'YELLOW_FINGERS': yellow_fingers,
47
+ 'ANXIETY': anxiety, 'PEER_PRESSURE': peer_pressure,
48
+ 'CHRONIC DISEASE': chronic_disease, 'FATIGUE ': fatigue,
49
+ 'ALLERGY ': allergy, 'WHEEZING': wheezing,
50
+ 'ALCOHOL CONSUMING': alcohol_consuming, 'COUGHING': coughing,
51
+ 'SHORTNESS OF BREATH': shortness_of_breath,
52
+ 'SWALLOWING DIFFICULTY': swallowing_difficulty,
53
+ 'CHEST PAIN': chest_pain, 'AGE': age}
54
+ input_df = pd.DataFrame.from_dict([input_dict]).astype("int")
55
+
56
+ dtest = xgb.DMatrix(input_df)
57
+
58
+
59
+ #make predictions
60
+ #load model
61
+ model = joblib.load("model.pkl")
62
+ prediction = model.predict(dtest)
63
+
64
+ # Return prediction
65
+ return "You exhibit symptomps of Lung cancer,you might want to see the Doctor for proper diagnosis ❤." if prediction >0.99 else "You don't seem to have Lung Cancer, Enjoy and take good care of yourself❤"
66
+
67
+ # Create and launch the interface
68
+ interface = gr.Interface(fn=predict, inputs=inputs, outputs=output,
69
+ title='Lung Cancer Prediction', description='Predicting lung cancer using XGBoost Classifier.\nPlease Note:\nFemale = 0, Male= 1\nNo = 0, Yes = 1',
70
+ theme = 'darkhuggingface')
71
+ interface.launch()
72
+
73
+
74
+
model (1).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e75ee73561f5117ee2d88872d684452d80bd3b8c3d43f98e7a3c5258f800e84
3
+ size 91879
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas>=1.3.4
2
+ numpy>=1.23.3
3
+ scikit-learn>=1.1.2
4
+ xgboost>=1.5.0
5
+ gradio>=3.20.1
6
+ joblib >=1.1.0