ckb commited on
Commit
65fa5ba
·
1 Parent(s): b3f9eae

test upload

Browse files
Files changed (1) hide show
  1. unigram.py +32 -21
unigram.py CHANGED
@@ -15,7 +15,8 @@
15
 
16
  import evaluate
17
  import datasets
18
-
 
19
 
20
  # TODO: Add BibTeX citation
21
  _CITATION = """\
@@ -28,7 +29,7 @@ year={2020}
28
 
29
  # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
 
@@ -36,25 +37,20 @@ This new module is designed to solve this great ML task and is crafted with a lo
36
  _KWARGS_DESCRIPTION = """
37
  Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
 
46
  Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
 
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
  {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
 
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
@@ -81,15 +77,30 @@ class unigram(evaluate.Metric):
81
  reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def _compute(self, predictions, references):
90
  """Returns the scores"""
91
  # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
93
  return {
94
- "accuracy": accuracy,
95
- }
 
 
 
 
15
 
16
  import evaluate
17
  import datasets
18
+ from collections import Counter
19
+ import numpy as np
20
 
21
  # TODO: Add BibTeX citation
22
  _CITATION = """\
 
29
 
30
  # TODO: Add description of the module here
31
  _DESCRIPTION = """\
32
+ This module calculates the unigram precision, recall, and f1 score.
33
  """
34
 
35
 
 
37
  _KWARGS_DESCRIPTION = """
38
  Calculates how good are predictions given some references, using certain scores
39
  Args:
40
+ predictions: list of list of int (token)
41
+ references: list of list of int (tokens)
 
 
42
  Returns:
43
+ f1: the unigram f1 score.
44
+ precision: the unigram accuracy.
45
+ recall: the unigram recall.
46
  Examples:
 
 
47
 
48
+ >>> my_new_module = evaluate.load("ckb/unigram")
49
+ >>> results = my_new_module.compute(references=[[0, 1]], predictions=[[0, 1]])
50
  >>> print(results)
51
  {'accuracy': 1.0}
52
  """
53
 
 
 
54
 
55
 
56
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 
77
  reference_urls=["http://path.to.reference.url/new_module"]
78
  )
79
 
80
+
81
+ def _prec_recall_f1_score(pred_items, gold_items):
82
+ """
83
+ Compute precision, recall and f1 given a set of gold and prediction items.
84
+ :param pred_items: iterable of predicted values
85
+ :param gold_items: iterable of gold values
86
+ :return: tuple (p, r, f1) for precision, recall, f1
87
+ """
88
+ common = Counter(gold_items) & Counter(pred_items)
89
+ num_same = sum(common.values())
90
+ if num_same == 0:
91
+ return 0, 0, 0
92
+ precision = 1.0 * num_same / len(pred_items)
93
+ recall = 1.0 * num_same / len(gold_items)
94
+ f1 = (2 * precision * recall) / (precision + recall)
95
+ return np.array(precision, recall, f1)
96
 
97
  def _compute(self, predictions, references):
98
  """Returns the scores"""
99
  # TODO: Compute the different scores of the module
100
+ score = sum(self._prec_recall_f1_score(i,j) for i, j in zip(predictions, references)) / len(predictions)
101
  return {
102
+ "precision": score[0],
103
+ "recall": score[1],
104
+ "f1": score[2],
105
+
106
+ }