Spaces:

aman5614
/

heart_disease_prediction

Sleeping

App Files Files Community

aman5614 commited on Nov 25, 2024

Commit

b108030

verified ·

1 Parent(s): 24ed534

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -86

app.py CHANGED Viewed

@@ -323,100 +323,86 @@ def evaluate_model(model, X_test, y_test, model_name):
         "macro_avg_f1": report["macro avg"]["f1-score"],
         "accuracy": accuracy_score(y_test, y_pred)
     }
-# Convert dictionary to dataframe
-df = pd.DataFrame(metrics, index=[model_name]).round(2)
-dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
-dt_evaluation
-rf_base = RandomForestClassifier(random_state=0)
-param_grid_rf = {
     'n_estimators': [10, 30, 50, 70, 100],
     'criterion': ['gini', 'entropy'],
     'max_depth': [2, 3, 4],
     'min_samples_split': [2, 3, 4, 5],
     'min_samples_leaf': [1, 2, 3],
     'bootstrap': [True, False]
-}
-# Using the tune_clf_hyperparameters function to get the best estimator
-best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
-print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
-# Evaluate the optimized model on the train data
-print(classification_report(y_train, best_rf.predict(X_train)))
-# Evaluate the optimized model on the test data
-print(classification_report(y_test, best_rf.predict(X_test)))
-rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
-rf_evaluation
-# Define the base KNN model and set up the pipeline with scaling
-knn_pipeline = Pipeline([
-    ('scaler', StandardScaler()),
-    ('knn', KNeighborsClassifier())
-])
-# Hyperparameter grid for KNN
-knn_param_grid = {
-    'knn__n_neighbors': list(range(1, 12)),
-    'knn__weights': ['uniform', 'distance'],
-    'knn__p': [1, 2]  # 1: Manhattan distance, 2: Euclidean distance
-}
-# Hyperparameter tuning for KNN
-best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
-print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
-# Evaluate the optimized model on the train data
-print(classification_report(y_train, best_knn.predict(X_train)))
-# Evaluate the optimized model on the test data
-print(classification_report(y_test, best_knn.predict(X_test)))
-knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
-knn_evaluation
-svm_pipeline = Pipeline([
     ('scaler', StandardScaler()),
     ('svm', SVC(probability=True))
-])
-param_grid_svm = {
-    'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
-    'svm__kernel': ['linear', 'rbf', 'poly'],
-    'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
-    'svm__degree': [2, 3, 4]
-}
-# Call the function for hyperparameter tuning
-best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
-print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
-# Evaluate the optimized model on the train data
-print(classification_report(y_train, best_svm.predict(X_train)))
-svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
-svm_evaluation
-# Concatenate the dataframes
-all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
-results = pd.concat(all_evaluations)
-# Sort by 'recall_1'
-results = results.sort_values(by='recall_1', ascending=False).round(2)
-results
-# Sort values based on 'recall_1'
-results.sort_values(by='recall_1', ascending=True, inplace=True)
-recall_1_scores = results['recall_1']
-# Plot the horizontal bar chart
-fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
-ax.barh(results.index, recall_1_scores, color='red')
-# Annotate the values and indexes
-for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
-    ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
-    ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
-# Remove yticks
-ax.set_yticks([])
-# Set x-axis limit
-ax.set_xlim([0, 1.2])
 import gradio as gr
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier

         "macro_avg_f1": report["macro avg"]["f1-score"],
         "accuracy": accuracy_score(y_test, y_pred)
     }
+    # Convert dictionary to dataframe
+    df = pd.DataFrame(metrics, index=[model_name]).round(2)
+    dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
+    dt_evaluation
+    rf_base = RandomForestClassifier(random_state=0)
+    param_grid_rf = {
     'n_estimators': [10, 30, 50, 70, 100],
     'criterion': ['gini', 'entropy'],
     'max_depth': [2, 3, 4],
     'min_samples_split': [2, 3, 4, 5],
     'min_samples_leaf': [1, 2, 3],
     'bootstrap': [True, False]
+    }
+    # Using the tune_clf_hyperparameters function to get the best estimator
+    best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
+    print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
+    # Evaluate the optimized model on the train data
+    print(classification_report(y_train, best_rf.predict(X_train)))
+    # Evaluate the optimized model on the test data
+    print(classification_report(y_test, best_rf.predict(X_test)))
+    rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
+    rf_evaluation
+    # Define the base KNN model and set up the pipeline with scaling
+    knn_pipeline = Pipeline([
+        ('scaler', StandardScaler()),
+        ('knn', KNeighborsClassifier())
+    ])
+    # Hyperparameter grid for KNN
+    knn_param_grid = {
+        'knn__n_neighbors': list(range(1, 12)),
+        'knn__weights': ['uniform', 'distance'],
+        'knn__p': [1, 2]  # 1: Manhattan distance, 2: Euclidean distance
+    }
+    # Hyperparameter tuning for KNN
+    best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
+    print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
+    # Evaluate the optimized model on the train data
+    print(classification_report(y_train, best_knn.predict(X_train)))
+    # Evaluate the optimized model on the test data
+    print(classification_report(y_test, best_knn.predict(X_test)))
+    knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
+    knn_evaluation
+    svm_pipeline = Pipeline([
     ('scaler', StandardScaler()),
     ('svm', SVC(probability=True))
+    ])
+    param_grid_svm = {
+        'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
+        'svm__kernel': ['linear', 'rbf', 'poly'],
+        'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
+        'svm__degree': [2, 3, 4]
+    }
+    # Call the function for hyperparameter tuning
+    best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
+    print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
+    # Evaluate the optimized model on the train data
+    print(classification_report(y_train, best_svm.predict(X_train)))
+    svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
+    svm_evaluation
+    # Concatenate the dataframes
+    all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
+    results = pd.concat(all_evaluations)
+    # Sort by 'recall_1'
+    results = results.sort_values(by='recall_1', ascending=False).round(2)
+    results
+    # Sort values based on 'recall_1'
+    results.sort_values(by='recall_1', ascending=True, inplace=True)
+    recall_1_scores = results['recall_1']
+    # Plot the horizontal bar chart
+    fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
+    ax.barh(results.index, recall_1_scores, color='red')
+    # Annotate the values and indexes
+    for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
+        ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
+        ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
+        # Remove yticks
+        ax.set_yticks([])
+        # Set x-axis limit
+        ax.set_xlim([0, 1.2])
 import gradio as gr
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier