Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -323,100 +323,86 @@ def evaluate_model(model, X_test, y_test, model_name):
|
|
323 |
"macro_avg_f1": report["macro avg"]["f1-score"],
|
324 |
"accuracy": accuracy_score(y_test, y_pred)
|
325 |
}
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
rf_base = RandomForestClassifier(random_state=0)
|
333 |
-
param_grid_rf = {
|
334 |
'n_estimators': [10, 30, 50, 70, 100],
|
335 |
'criterion': ['gini', 'entropy'],
|
336 |
'max_depth': [2, 3, 4],
|
337 |
'min_samples_split': [2, 3, 4, 5],
|
338 |
'min_samples_leaf': [1, 2, 3],
|
339 |
'bootstrap': [True, False]
|
340 |
-
}
|
341 |
-
# Using the tune_clf_hyperparameters function to get the best estimator
|
342 |
-
best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
|
343 |
-
print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
]
|
359 |
-
#
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
'
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
print(
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
print(classification_report(y_test, best_knn.predict(X_test)))
|
372 |
-
knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
|
373 |
-
knn_evaluation
|
374 |
-
|
375 |
-
svm_pipeline = Pipeline([
|
376 |
('scaler', StandardScaler()),
|
377 |
('svm', SVC(probability=True))
|
378 |
-
])
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
svm_evaluation
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
results
|
399 |
-
|
400 |
-
|
401 |
-
results
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
# Remove yticks
|
414 |
-
ax.set_yticks([])
|
415 |
-
|
416 |
-
# Set x-axis limit
|
417 |
-
ax.set_xlim([0, 1.2])
|
418 |
-
|
419 |
-
|
420 |
import gradio as gr
|
421 |
import numpy as np
|
422 |
from sklearn.ensemble import RandomForestClassifier
|
|
|
323 |
"macro_avg_f1": report["macro avg"]["f1-score"],
|
324 |
"accuracy": accuracy_score(y_test, y_pred)
|
325 |
}
|
326 |
+
# Convert dictionary to dataframe
|
327 |
+
df = pd.DataFrame(metrics, index=[model_name]).round(2)
|
328 |
+
dt_evaluation = evaluate_model(best_dt, X_test, y_test, 'DT')
|
329 |
+
dt_evaluation
|
330 |
+
rf_base = RandomForestClassifier(random_state=0)
|
331 |
+
param_grid_rf = {
|
|
|
|
|
332 |
'n_estimators': [10, 30, 50, 70, 100],
|
333 |
'criterion': ['gini', 'entropy'],
|
334 |
'max_depth': [2, 3, 4],
|
335 |
'min_samples_split': [2, 3, 4, 5],
|
336 |
'min_samples_leaf': [1, 2, 3],
|
337 |
'bootstrap': [True, False]
|
338 |
+
}
|
339 |
+
# Using the tune_clf_hyperparameters function to get the best estimator
|
340 |
+
best_rf, best_rf_hyperparams = tune_clf_hyperparameters(rf_base, param_grid_rf, X_train, y_train)
|
341 |
+
print('RF Optimal Hyperparameters: \n', best_rf_hyperparams)
|
342 |
+
# Evaluate the optimized model on the train data
|
343 |
+
print(classification_report(y_train, best_rf.predict(X_train)))
|
344 |
+
# Evaluate the optimized model on the test data
|
345 |
+
print(classification_report(y_test, best_rf.predict(X_test)))
|
346 |
+
rf_evaluation = evaluate_model(best_rf, X_test, y_test, 'RF')
|
347 |
+
rf_evaluation
|
348 |
+
# Define the base KNN model and set up the pipeline with scaling
|
349 |
+
knn_pipeline = Pipeline([
|
350 |
+
('scaler', StandardScaler()),
|
351 |
+
('knn', KNeighborsClassifier())
|
352 |
+
])
|
353 |
+
# Hyperparameter grid for KNN
|
354 |
+
knn_param_grid = {
|
355 |
+
'knn__n_neighbors': list(range(1, 12)),
|
356 |
+
'knn__weights': ['uniform', 'distance'],
|
357 |
+
'knn__p': [1, 2] # 1: Manhattan distance, 2: Euclidean distance
|
358 |
+
}
|
359 |
+
# Hyperparameter tuning for KNN
|
360 |
+
best_knn, best_knn_hyperparams = tune_clf_hyperparameters(knn_pipeline, knn_param_grid, X_train, y_train)
|
361 |
+
print('KNN Optimal Hyperparameters: \n', best_knn_hyperparams)
|
362 |
+
# Evaluate the optimized model on the train data
|
363 |
+
print(classification_report(y_train, best_knn.predict(X_train)))
|
364 |
+
# Evaluate the optimized model on the test data
|
365 |
+
print(classification_report(y_test, best_knn.predict(X_test)))
|
366 |
+
knn_evaluation = evaluate_model(best_knn, X_test, y_test, 'KNN')
|
367 |
+
knn_evaluation
|
368 |
+
svm_pipeline = Pipeline([
|
|
|
|
|
|
|
|
|
|
|
369 |
('scaler', StandardScaler()),
|
370 |
('svm', SVC(probability=True))
|
371 |
+
])
|
372 |
+
param_grid_svm = {
|
373 |
+
'svm__C': [0.0011, 0.005, 0.01, 0.05, 0.1, 1, 10, 20],
|
374 |
+
'svm__kernel': ['linear', 'rbf', 'poly'],
|
375 |
+
'svm__gamma': ['scale', 'auto', 0.1, 0.5, 1, 5],
|
376 |
+
'svm__degree': [2, 3, 4]
|
377 |
+
}
|
378 |
+
# Call the function for hyperparameter tuning
|
379 |
+
best_svm, best_svm_hyperparams = tune_clf_hyperparameters(svm_pipeline, param_grid_svm, X_train, y_train)
|
380 |
+
print('SVM Optimal Hyperparameters: \n', best_svm_hyperparams)
|
381 |
+
# Evaluate the optimized model on the train data
|
382 |
+
print(classification_report(y_train, best_svm.predict(X_train)))
|
383 |
+
svm_evaluation = evaluate_model(best_svm, X_test, y_test, 'SVM')
|
384 |
+
svm_evaluation
|
385 |
+
# Concatenate the dataframes
|
386 |
+
all_evaluations = [dt_evaluation, rf_evaluation, knn_evaluation, svm_evaluation]
|
387 |
+
results = pd.concat(all_evaluations)
|
388 |
+
|
389 |
+
# Sort by 'recall_1'
|
390 |
+
results = results.sort_values(by='recall_1', ascending=False).round(2)
|
391 |
+
results
|
392 |
+
# Sort values based on 'recall_1'
|
393 |
+
results.sort_values(by='recall_1', ascending=True, inplace=True)
|
394 |
+
recall_1_scores = results['recall_1']
|
395 |
+
# Plot the horizontal bar chart
|
396 |
+
fig, ax = plt.subplots(figsize=(12, 7), dpi=70)
|
397 |
+
ax.barh(results.index, recall_1_scores, color='red')
|
398 |
+
# Annotate the values and indexes
|
399 |
+
for i, (value, name) in enumerate(zip(recall_1_scores, results.index)):
|
400 |
+
ax.text(value + 0.01, i, f"{value:.2f}", ha='left', va='center', fontweight='bold', color='red', fontsize=15)
|
401 |
+
ax.text(0.1, i, name, ha='left', va='center', fontweight='bold', color='white', fontsize=25)
|
402 |
+
# Remove yticks
|
403 |
+
ax.set_yticks([])
|
404 |
+
# Set x-axis limit
|
405 |
+
ax.set_xlim([0, 1.2])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
import gradio as gr
|
407 |
import numpy as np
|
408 |
from sklearn.ensemble import RandomForestClassifier
|