Spaces:
Sleeping
Sleeping
HalteroXHunter
commited on
Commit
·
1a07572
1
Parent(s):
e368a57
include new metrics
Browse files- classification_evaluator.py +34 -15
classification_evaluator.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import evaluate
|
2 |
from datasets import Features, Value
|
3 |
-
from sklearn.metrics import accuracy_score
|
|
|
4 |
|
5 |
_CITATION = """
|
6 |
@article{scikit-learn,
|
@@ -17,13 +18,11 @@ _CITATION = """
|
|
17 |
"""
|
18 |
|
19 |
_DESCRIPTION = """
|
20 |
-
|
21 |
-
Accuracy
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
FP: False positive
|
26 |
-
FN: False negative
|
27 |
"""
|
28 |
|
29 |
_KWARGS_DESCRIPTION = """
|
@@ -32,8 +31,12 @@ Args:
|
|
32 |
references (`list` of `str`): Ground truth labels.
|
33 |
|
34 |
Returns:
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
"""
|
38 |
|
39 |
|
@@ -50,10 +53,26 @@ class ClassificationEvaluator(evaluate.Metric):
|
|
50 |
|
51 |
def _compute(self, predictions, references):
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return {
|
54 |
-
"accuracy":
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
)
|
|
|
|
|
|
|
59 |
}
|
|
|
1 |
import evaluate
|
2 |
from datasets import Features, Value
|
3 |
+
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
|
4 |
+
|
5 |
|
6 |
_CITATION = """
|
7 |
@article{scikit-learn,
|
|
|
18 |
"""
|
19 |
|
20 |
_DESCRIPTION = """
|
21 |
+
This evaluator computes multiple classification metrics to assess the performance of a model. Metrics calculated include:
|
22 |
+
- Accuracy: The proportion of correct predictions among the total number of cases processed. Computed as (TP + TN) / (TP + TN + FP + FN), where TP, TN, FP, and FN denote true positives, true negatives, false positives, and false negatives respectively.
|
23 |
+
- Precision, Recall, and F1-Score: Evaluated for each class individually as well as macro (average across classes) and micro (aggregate contributions of all classes) averages.
|
24 |
+
- Confusion Matrix: A matrix representing the classification accuracy for each class combination.
|
25 |
+
|
|
|
|
|
26 |
"""
|
27 |
|
28 |
_KWARGS_DESCRIPTION = """
|
|
|
31 |
references (`list` of `str`): Ground truth labels.
|
32 |
|
33 |
Returns:
|
34 |
+
Returns:
|
35 |
+
Dict containing:
|
36 |
+
accuracy (float): Proportion of correct predictions. Value ranges between 0 (worst) and 1 (best).
|
37 |
+
precision_macro (float), recall_macro (float), f1_macro (float): Macro averages of precision, recall, and F1-score respectively.
|
38 |
+
precision_micro (float), recall_micro (float), f1_micro (float): Micro averages of precision, recall, and F1-score respectively.
|
39 |
+
confusion_matrix (list of lists): 2D list representing the confusion matrix of the classification results.
|
40 |
"""
|
41 |
|
42 |
|
|
|
53 |
|
54 |
def _compute(self, predictions, references):
|
55 |
|
56 |
+
accuracy = accuracy_score(references, predictions, normalize=True, sample_weight=None)
|
57 |
+
|
58 |
+
# Calculate macro and micro averages for precision, recall, and F1-score
|
59 |
+
precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
|
60 |
+
references, predictions, average='macro'
|
61 |
+
)
|
62 |
+
precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
|
63 |
+
references, predictions, average='micro'
|
64 |
+
)
|
65 |
+
|
66 |
+
# Calculate the confusion matrix
|
67 |
+
conf_matrix = confusion_matrix(references, predictions)
|
68 |
+
|
69 |
return {
|
70 |
+
"accuracy": accuracy,
|
71 |
+
"precision_macro": float(precision_macro),
|
72 |
+
"recall_macro": float(recall_macro),
|
73 |
+
"f1_macro": float(f1_macro),
|
74 |
+
"precision_micro": float(precision_micro),
|
75 |
+
"recall_micro": float(recall_micro),
|
76 |
+
"f1_micro": float(f1_micro),
|
77 |
+
"confusion_matrix": conf_matrix.tolist()
|
78 |
}
|