MariaK commited on
Commit
e04bd55
Β·
1 Parent(s): 88e18ec

Updated with evaluation for ASR hands-on

Browse files
Files changed (1) hide show
  1. app.py +93 -53
app.py CHANGED
@@ -17,26 +17,66 @@ def pass_emoji(passed):
17
  api = HfApi()
18
 
19
 
20
- def get_user_audio_classification_models(hf_username):
21
  """
22
- List the user's Audio Classification models
23
  :param hf_username: User HF username
24
  """
25
-
26
- models = api.list_models(author=hf_username, filter=["audio-classification"])
27
  user_model_ids = [x.modelId for x in models]
28
- models_gtzan = []
29
 
 
 
 
 
 
 
 
 
 
 
30
  for model in user_model_ids:
31
  meta = get_metadata(model)
32
  if meta is None:
33
  continue
34
  try:
35
- if meta["datasets"] == ['marsyas/gtzan']:
36
- models_gtzan.append(model)
37
  except: continue
38
- return models_gtzan
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  def get_metadata(model_id):
42
  """
@@ -51,48 +91,34 @@ def get_metadata(model_id):
51
  return None
52
 
53
 
54
- def extract_accuracy(model_card_content):
55
  """
56
- Extract the accuracy value from the models' model card
57
  :param model_card_content: model card content
58
  """
59
  accuracy_pattern = r"Accuracy: (\d+\.\d+)"
60
- match = re.search(accuracy_pattern, model_card_content)
 
 
 
 
 
 
 
61
  if match:
62
- accuracy = match.group(1)
63
- return float(accuracy)
64
  else:
65
- return None
66
 
67
 
68
- def parse_metrics_accuracy(model_id):
69
  """
70
  Get model card and parse it
71
  :param model_id: model id
72
  """
73
- card = ModelCard.load(model_id)
74
- return extract_accuracy(card.content)
75
-
76
-
77
- def calculate_best_acc_result(user_model_ids):
78
- """
79
- Calculate the best results of a unit
80
- :param user_model_ids: RL models of a user
81
- """
82
-
83
- best_result = -100
84
- best_model = ""
85
-
86
- for model in user_model_ids:
87
- meta = get_metadata(model)
88
- if meta is None:
89
- continue
90
- accuracy = parse_metrics_accuracy(model)
91
- if accuracy > best_result:
92
- best_result = accuracy
93
- best_model = meta['model-index'][0]["name"]
94
-
95
- return best_result, best_model
96
 
97
 
98
  def certification(hf_username):
@@ -106,9 +132,9 @@ def certification(hf_username):
106
  "passed_": False
107
  },
108
  {
109
- "unit": "Unit 5: TBD",
110
- "task": "TBD",
111
- "baseline_metric": 0.99,
112
  "best_result": 0,
113
  "best_model_id": "",
114
  "passed_": False
@@ -133,17 +159,32 @@ def certification(hf_username):
133
 
134
  for unit in results_certification:
135
  unit["passed"] = pass_emoji(unit["passed_"])
136
- if unit["task"] == "audio-classification":
137
- user_models = get_user_audio_classification_models(hf_username)
138
- best_result, best_model_id = calculate_best_acc_result(user_models)
139
- unit["best_result"] = best_result
140
- unit["best_model_id"] = best_model_id
141
- if unit["best_result"] >= unit["baseline_metric"]:
142
- unit["passed_"] = True
143
- unit["passed"] = pass_emoji(unit["passed_"])
144
- else:
145
- # TBD for other units
146
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  print(results_certification)
149
 
@@ -151,7 +192,6 @@ def certification(hf_username):
151
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
152
  return df
153
 
154
-
155
  with gr.Blocks() as demo:
156
  gr.Markdown(f"""
157
  # πŸ† Check your progress in the Audio Course πŸ†
 
17
  api = HfApi()
18
 
19
 
20
+ def get_user_models(hf_username, task):
21
  """
22
+ List the user's models for a given task
23
  :param hf_username: User HF username
24
  """
25
+ models = api.list_models(author=hf_username, filter=[task])
 
26
  user_model_ids = [x.modelId for x in models]
 
27
 
28
+ match task:
29
+ case "audio-classification":
30
+ dataset = 'marsyas/gtzan'
31
+ case "automatic-speech-recognition":
32
+ dataset = 'PolyAI/minds14'
33
+ case _:
34
+ print("Unsupported task")
35
+
36
+ dataset_specific_models = []
37
+
38
  for model in user_model_ids:
39
  meta = get_metadata(model)
40
  if meta is None:
41
  continue
42
  try:
43
+ if meta["datasets"] == [dataset]:
44
+ dataset_specific_models.append(model)
45
  except: continue
46
+ return dataset_specific_models
47
 
48
+ def calculate_best_result(user_models, task):
49
+ """
50
+ Calculate the best results of a unit for a given task
51
+ :param user_model_ids: models of a user
52
+ """
53
+
54
+ best_model = ""
55
+
56
+ if task == "audio-classification":
57
+ best_result = -100
58
+ larger_is_better = True
59
+ elif task == "automatic-speech-recognition":
60
+ best_result = 100
61
+ larger_is_better = False
62
+
63
+ for model in user_models:
64
+ meta = get_metadata(model)
65
+ if meta is None:
66
+ continue
67
+ metric = parse_metrics(model, task)
68
+
69
+ if larger_is_better:
70
+ if metric > best_result:
71
+ best_result = metric
72
+ best_model = meta['model-index'][0]["name"]
73
+ else:
74
+ if metric < best_result:
75
+ best_result = metric
76
+ best_model = meta['model-index'][0]["name"]
77
+
78
+ return best_result, best_model
79
+
80
 
81
  def get_metadata(model_id):
82
  """
 
91
  return None
92
 
93
 
94
+ def extract_metric(model_card_content, task):
95
  """
96
+ Extract the metric value from the models' model card
97
  :param model_card_content: model card content
98
  """
99
  accuracy_pattern = r"Accuracy: (\d+\.\d+)"
100
+ wer_pattern = r"Wer: (\d+\.\d+)"
101
+
102
+ if task == "audio-classification":
103
+ pattern = accuracy_pattern
104
+ elif task == "automatic-speech-recognition":
105
+ pattern = wer_pattern
106
+
107
+ match = re.search(pattern, model_card_content)
108
  if match:
109
+ metric = match.group(1)
110
+ return float(metric)
111
  else:
112
+ return None
113
 
114
 
115
+ def parse_metrics(model, task):
116
  """
117
  Get model card and parse it
118
  :param model_id: model id
119
  """
120
+ card = ModelCard.load(model)
121
+ return extract_metric(card.content, task)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
 
124
  def certification(hf_username):
 
132
  "passed_": False
133
  },
134
  {
135
+ "unit": "Unit 5: Automatic Speech Recognition",
136
+ "task": "automatic-speech-recognition",
137
+ "baseline_metric": 0.37,
138
  "best_result": 0,
139
  "best_model_id": "",
140
  "passed_": False
 
159
 
160
  for unit in results_certification:
161
  unit["passed"] = pass_emoji(unit["passed_"])
162
+
163
+ match unit["task"]:
164
+ case "audio-classification":
165
+ try:
166
+ user_ac_models = get_user_models(hf_username, task = "audio-classification")
167
+ best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
168
+ unit["best_result"] = best_result
169
+ unit["best_model_id"] = best_model_id
170
+ if unit["best_result"] >= unit["baseline_metric"]:
171
+ unit["passed_"] = True
172
+ unit["passed"] = pass_emoji(unit["passed_"])
173
+ except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
174
+ case "automatic-speech-recognition":
175
+ try:
176
+ user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
177
+ best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
178
+ unit["best_result"] = best_result
179
+ unit["best_model_id"] = best_model_id
180
+ if unit["best_result"] <= unit["baseline_metric"]:
181
+ unit["passed_"] = True
182
+ unit["passed"] = pass_emoji(unit["passed_"])
183
+ except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
184
+ case "TBD":
185
+ print("Evaluation for this unit is work in progress")
186
+ case _:
187
+ print("Unknown task")
188
 
189
  print(results_certification)
190
 
 
192
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
193
  return df
194
 
 
195
  with gr.Blocks() as demo:
196
  gr.Markdown(f"""
197
  # πŸ† Check your progress in the Audio Course πŸ†