test upload
Browse files- unigram.py +32 -21
unigram.py
CHANGED
@@ -15,7 +15,8 @@
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
-
|
|
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
@@ -28,7 +29,7 @@ year={2020}
|
|
28 |
|
29 |
# TODO: Add description of the module here
|
30 |
_DESCRIPTION = """\
|
31 |
-
This
|
32 |
"""
|
33 |
|
34 |
|
@@ -36,25 +37,20 @@ This new module is designed to solve this great ML task and is crafted with a lo
|
|
36 |
_KWARGS_DESCRIPTION = """
|
37 |
Calculates how good are predictions given some references, using certain scores
|
38 |
Args:
|
39 |
-
predictions: list of
|
40 |
-
|
41 |
-
references: list of reference for each prediction. Each
|
42 |
-
reference should be a string with tokens separated by spaces.
|
43 |
Returns:
|
44 |
-
|
45 |
-
|
|
|
46 |
Examples:
|
47 |
-
Examples should be written in doctest format, and should illustrate how
|
48 |
-
to use the function.
|
49 |
|
50 |
-
>>> my_new_module = evaluate.load("
|
51 |
-
>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
|
52 |
>>> print(results)
|
53 |
{'accuracy': 1.0}
|
54 |
"""
|
55 |
|
56 |
-
# TODO: Define external resources urls if needed
|
57 |
-
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
58 |
|
59 |
|
60 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
@@ -81,15 +77,30 @@ class unigram(evaluate.Metric):
|
|
81 |
reference_urls=["http://path.to.reference.url/new_module"]
|
82 |
)
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
def _compute(self, predictions, references):
|
90 |
"""Returns the scores"""
|
91 |
# TODO: Compute the different scores of the module
|
92 |
-
|
93 |
return {
|
94 |
-
"
|
95 |
-
|
|
|
|
|
|
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
+
from collections import Counter
|
19 |
+
import numpy as np
|
20 |
|
21 |
# TODO: Add BibTeX citation
|
22 |
_CITATION = """\
|
|
|
29 |
|
30 |
# TODO: Add description of the module here
|
31 |
_DESCRIPTION = """\
|
32 |
+
This module calculates the unigram precision, recall, and f1 score.
|
33 |
"""
|
34 |
|
35 |
|
|
|
37 |
_KWARGS_DESCRIPTION = """
|
38 |
Calculates how good are predictions given some references, using certain scores
|
39 |
Args:
|
40 |
+
predictions: list of list of int (token)
|
41 |
+
references: list of list of int (tokens)
|
|
|
|
|
42 |
Returns:
|
43 |
+
f1: the unigram f1 score.
|
44 |
+
precision: the unigram accuracy.
|
45 |
+
recall: the unigram recall.
|
46 |
Examples:
|
|
|
|
|
47 |
|
48 |
+
>>> my_new_module = evaluate.load("ckb/unigram")
|
49 |
+
>>> results = my_new_module.compute(references=[[0, 1]], predictions=[[0, 1]])
|
50 |
>>> print(results)
|
51 |
{'accuracy': 1.0}
|
52 |
"""
|
53 |
|
|
|
|
|
54 |
|
55 |
|
56 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
|
77 |
reference_urls=["http://path.to.reference.url/new_module"]
|
78 |
)
|
79 |
|
80 |
+
|
81 |
+
def _prec_recall_f1_score(pred_items, gold_items):
|
82 |
+
"""
|
83 |
+
Compute precision, recall and f1 given a set of gold and prediction items.
|
84 |
+
:param pred_items: iterable of predicted values
|
85 |
+
:param gold_items: iterable of gold values
|
86 |
+
:return: tuple (p, r, f1) for precision, recall, f1
|
87 |
+
"""
|
88 |
+
common = Counter(gold_items) & Counter(pred_items)
|
89 |
+
num_same = sum(common.values())
|
90 |
+
if num_same == 0:
|
91 |
+
return 0, 0, 0
|
92 |
+
precision = 1.0 * num_same / len(pred_items)
|
93 |
+
recall = 1.0 * num_same / len(gold_items)
|
94 |
+
f1 = (2 * precision * recall) / (precision + recall)
|
95 |
+
return np.array(precision, recall, f1)
|
96 |
|
97 |
def _compute(self, predictions, references):
|
98 |
"""Returns the scores"""
|
99 |
# TODO: Compute the different scores of the module
|
100 |
+
score = sum(self._prec_recall_f1_score(i,j) for i, j in zip(predictions, references)) / len(predictions)
|
101 |
return {
|
102 |
+
"precision": score[0],
|
103 |
+
"recall": score[1],
|
104 |
+
"f1": score[2],
|
105 |
+
|
106 |
+
}
|