aman5614 commited on
Commit
b108030
·
verified ·
1 Parent(s): 24ed534

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -86
app.py CHANGED
@@ -323,100 +323,86 @@ def evaluate_model(model, X_test, y_test, model_name):
323
  "macro_avg_f1": report["macro avg"]["f1-score"],
324
  "accuracy": accuracy_score(y_test, y_pred)
325
  }
326
-
327
- # Convert dictionary to dataframe
328
- df = pd.DataFrame(metrics, index=[model_name]).round(2)
329
-
330
- dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
331
- dt_evaluation
332
- rf_base = RandomForestClassifier(random_state=0)
333
- param_grid_rf = {
334
  'n_estimators': [10, 30, 50, 70, 100],
335
  'criterion': ['gini', 'entropy'],
336
  'max_depth': [2, 3, 4],
337
  'min_samples_split': [2, 3, 4, 5],
338
  'min_samples_leaf': [1, 2, 3],
339
  'bootstrap': [True, False]
340
- }
341
- # Using the tune_clf_hyperparameters function to get the best estimator
342
- best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
343
- print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
344
-
345
- # Evaluate the optimized model on the train data
346
- print(classification_report(y_train, best_rf.predict(X_train)))
347
-
348
- # Evaluate the optimized model on the test data
349
- print(classification_report(y_test, best_rf.predict(X_test)))
350
-
351
- rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
352
- rf_evaluation
353
-
354
- # Define the base KNN model and set up the pipeline with scaling
355
- knn_pipeline = Pipeline([
356
- ('scaler', StandardScaler()),
357
- ('knn', KNeighborsClassifier())
358
- ])
359
- # Hyperparameter grid for KNN
360
- knn_param_grid = {
361
- 'knn__n_neighbors': list(range(1, 12)),
362
- 'knn__weights': ['uniform', 'distance'],
363
- 'knn__p': [1, 2] # 1: Manhattan distance, 2: Euclidean distance
364
- }
365
- # Hyperparameter tuning for KNN
366
- best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
367
- print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
368
- # Evaluate the optimized model on the train data
369
- print(classification_report(y_train, best_knn.predict(X_train)))
370
- # Evaluate the optimized model on the test data
371
- print(classification_report(y_test, best_knn.predict(X_test)))
372
- knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
373
- knn_evaluation
374
-
375
- svm_pipeline = Pipeline([
376
  ('scaler', StandardScaler()),
377
  ('svm', SVC(probability=True))
378
- ])
379
-
380
- param_grid_svm = {
381
- 'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
382
- 'svm__kernel': ['linear', 'rbf', 'poly'],
383
- 'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
384
- 'svm__degree': [2, 3, 4]
385
- }
386
- # Call the function for hyperparameter tuning
387
- best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
388
- print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
389
- # Evaluate the optimized model on the train data
390
- print(classification_report(y_train, best_svm.predict(X_train)))
391
- svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
392
- svm_evaluation
393
- # Concatenate the dataframes
394
- all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
395
- results = pd.concat(all_evaluations)
396
-
397
- # Sort by 'recall_1'
398
- results = results.sort_values(by='recall_1', ascending=False).round(2)
399
- results
400
- # Sort values based on 'recall_1'
401
- results.sort_values(by='recall_1', ascending=True, inplace=True)
402
- recall_1_scores = results['recall_1']
403
-
404
- # Plot the horizontal bar chart
405
- fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
406
- ax.barh(results.index, recall_1_scores, color='red')
407
-
408
- # Annotate the values and indexes
409
- for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
410
- ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
411
- ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
412
-
413
- # Remove yticks
414
- ax.set_yticks([])
415
-
416
- # Set x-axis limit
417
- ax.set_xlim([0, 1.2])
418
-
419
-
420
  import gradio as gr
421
  import numpy as np
422
  from sklearn.ensemble import RandomForestClassifier
 
323
  "macro_avg_f1": report["macro avg"]["f1-score"],
324
  "accuracy": accuracy_score(y_test, y_pred)
325
  }
326
+ # Convert dictionary to dataframe
327
+ df = pd.DataFrame(metrics, index=[model_name]).round(2)
328
+ dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
329
+ dt_evaluation
330
+ rf_base = RandomForestClassifier(random_state=0)
331
+ param_grid_rf = {
 
 
332
  'n_estimators': [10, 30, 50, 70, 100],
333
  'criterion': ['gini', 'entropy'],
334
  'max_depth': [2, 3, 4],
335
  'min_samples_split': [2, 3, 4, 5],
336
  'min_samples_leaf': [1, 2, 3],
337
  'bootstrap': [True, False]
338
+ }
339
+ # Using the tune_clf_hyperparameters function to get the best estimator
340
+ best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
341
+ print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
342
+ # Evaluate the optimized model on the train data
343
+ print(classification_report(y_train, best_rf.predict(X_train)))
344
+ # Evaluate the optimized model on the test data
345
+ print(classification_report(y_test, best_rf.predict(X_test)))
346
+ rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
347
+ rf_evaluation
348
+ # Define the base KNN model and set up the pipeline with scaling
349
+ knn_pipeline = Pipeline([
350
+ ('scaler', StandardScaler()),
351
+ ('knn', KNeighborsClassifier())
352
+ ])
353
+ # Hyperparameter grid for KNN
354
+ knn_param_grid = {
355
+ 'knn__n_neighbors': list(range(1, 12)),
356
+ 'knn__weights': ['uniform', 'distance'],
357
+ 'knn__p': [1, 2] # 1: Manhattan distance, 2: Euclidean distance
358
+ }
359
+ # Hyperparameter tuning for KNN
360
+ best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
361
+ print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
362
+ # Evaluate the optimized model on the train data
363
+ print(classification_report(y_train, best_knn.predict(X_train)))
364
+ # Evaluate the optimized model on the test data
365
+ print(classification_report(y_test, best_knn.predict(X_test)))
366
+ knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
367
+ knn_evaluation
368
+ svm_pipeline = Pipeline([
 
 
 
 
 
369
  ('scaler', StandardScaler()),
370
  ('svm', SVC(probability=True))
371
+ ])
372
+ param_grid_svm = {
373
+ 'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
374
+ 'svm__kernel': ['linear', 'rbf', 'poly'],
375
+ 'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
376
+ 'svm__degree': [2, 3, 4]
377
+ }
378
+ # Call the function for hyperparameter tuning
379
+ best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
380
+ print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
381
+ # Evaluate the optimized model on the train data
382
+ print(classification_report(y_train, best_svm.predict(X_train)))
383
+ svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
384
+ svm_evaluation
385
+ # Concatenate the dataframes
386
+ all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
387
+ results = pd.concat(all_evaluations)
388
+
389
+ # Sort by 'recall_1'
390
+ results = results.sort_values(by='recall_1', ascending=False).round(2)
391
+ results
392
+ # Sort values based on 'recall_1'
393
+ results.sort_values(by='recall_1', ascending=True, inplace=True)
394
+ recall_1_scores = results['recall_1']
395
+ # Plot the horizontal bar chart
396
+ fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
397
+ ax.barh(results.index, recall_1_scores, color='red')
398
+ # Annotate the values and indexes
399
+ for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
400
+ ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
401
+ ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
402
+ # Remove yticks
403
+ ax.set_yticks([])
404
+ # Set x-axis limit
405
+ ax.set_xlim([0, 1.2])
 
 
 
 
 
 
 
406
  import gradio as gr
407
  import numpy as np
408
  from sklearn.ensemble import RandomForestClassifier