Browse files
@@ -323,100 +323,86 @@ def evaluate_model(model, X_test, y_test, model_name):
323 |
"macro_avg_f1": report["macro avg"]["f1-score"],
324 |
"accuracy": accuracy_score(y_test, y_pred)
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
rf_base = RandomForestClassifier(random_state=0)
333 |
param_grid_rf = {
334 |
'n_estimators': [10, 30, 50, 70, 100],
335 |
'criterion': ['gini', 'entropy'],
336 |
'max_depth': [2, 3, 4],
337 |
'min_samples_split': [2, 3, 4, 5],
338 |
'min_samples_leaf': [1, 2, 3],
339 |
'bootstrap': [True, False]
340 |
341 |
# Using the tune_clf_hyperparameters function to get the best estimator
342 |
best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
343 |
print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
print(classification_report(y_test, best_knn.predict(X_test)))
372 |
knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
373 |
374 |
375 |
svm_pipeline = Pipeline([
376 |
('scaler', StandardScaler()),
377 |
('svm', SVC(probability=True))
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
# Remove yticks
414 |
415 |
416 |
# Set x-axis limit
417 |
ax.set_xlim([0, 1.2])
418 |
419 |
420 |
import gradio as gr
421 |
import numpy as np
422 |
from sklearn.ensemble import RandomForestClassifier
323 |
"macro_avg_f1": report["macro avg"]["f1-score"],
324 |
"accuracy": accuracy_score(y_test, y_pred)
325 |
326 |
# Convert dictionary to dataframe
327 |
df = pd.DataFrame(metrics, index=[model_name]).round(2)
328 |
dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
329 |
330 |
rf_base = RandomForestClassifier(random_state=0)
331 |
param_grid_rf = {
332 |
'n_estimators': [10, 30, 50, 70, 100],
333 |
'criterion': ['gini', 'entropy'],
334 |
'max_depth': [2, 3, 4],
335 |
'min_samples_split': [2, 3, 4, 5],
336 |
'min_samples_leaf': [1, 2, 3],
337 |
'bootstrap': [True, False]
338 |
339 |
# Using the tune_clf_hyperparameters function to get the best estimator
340 |
best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
341 |
print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
342 |
# Evaluate the optimized model on the train data
343 |
print(classification_report(y_train, best_rf.predict(X_train)))
344 |
# Evaluate the optimized model on the test data
345 |
print(classification_report(y_test, best_rf.predict(X_test)))
346 |
rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
347 |
348 |
# Define the base KNN model and set up the pipeline with scaling
349 |
knn_pipeline = Pipeline([
350 |
('scaler', StandardScaler()),
351 |
('knn', KNeighborsClassifier())
352 |
353 |
# Hyperparameter grid for KNN
354 |
knn_param_grid = {
355 |
'knn__n_neighbors': list(range(1, 12)),
356 |
'knn__weights': ['uniform', 'distance'],
357 |
'knn__p': [1, 2] # 1: Manhattan distance, 2: Euclidean distance
358 |
359 |
# Hyperparameter tuning for KNN
360 |
best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
361 |
print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
362 |
# Evaluate the optimized model on the train data
363 |
print(classification_report(y_train, best_knn.predict(X_train)))
364 |
# Evaluate the optimized model on the test data
365 |
print(classification_report(y_test, best_knn.predict(X_test)))
366 |
knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
367 |
368 |
svm_pipeline = Pipeline([
369 |
('scaler', StandardScaler()),
370 |
('svm', SVC(probability=True))
371 |
372 |
param_grid_svm = {
373 |
'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
374 |
'svm__kernel': ['linear', 'rbf', 'poly'],
375 |
'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
376 |
'svm__degree': [2, 3, 4]
377 |
378 |
# Call the function for hyperparameter tuning
379 |
best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
380 |
print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
381 |
# Evaluate the optimized model on the train data
382 |
print(classification_report(y_train, best_svm.predict(X_train)))
383 |
svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
384 |
385 |
# Concatenate the dataframes
386 |
all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
387 |
results = pd.concat(all_evaluations)
388 |
389 |
# Sort by 'recall_1'
390 |
results = results.sort_values(by='recall_1', ascending=False).round(2)
391 |
392 |
# Sort values based on 'recall_1'
393 |
results.sort_values(by='recall_1', ascending=True, inplace=True)
394 |
recall_1_scores = results['recall_1']
395 |
# Plot the horizontal bar chart
396 |
fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
397 |
ax.barh(results.index, recall_1_scores, color='red')
398 |
# Annotate the values and indexes
399 |
for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
400 |
ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
401 |
ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
402 |
# Remove yticks
403 |
404 |
# Set x-axis limit
405 |
ax.set_xlim([0, 1.2])
406 |
import gradio as gr
407 |
import numpy as np
408 |
from sklearn.ensemble import RandomForestClassifier