MariaK commited on
Commit
b3439f4
Β·
1 Parent(s): e04bd55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -32
app.py CHANGED
@@ -30,41 +30,48 @@ def get_user_models(hf_username, task):
30
  dataset = 'marsyas/gtzan'
31
  case "automatic-speech-recognition":
32
  dataset = 'PolyAI/minds14'
 
 
33
  case _:
34
  print("Unsupported task")
35
 
36
  dataset_specific_models = []
37
-
38
- for model in user_model_ids:
39
- meta = get_metadata(model)
40
- if meta is None:
 
 
 
 
 
 
 
 
41
  continue
42
- try:
43
- if meta["datasets"] == [dataset]:
44
- dataset_specific_models.append(model)
45
- except: continue
46
- return dataset_specific_models
47
 
48
  def calculate_best_result(user_models, task):
49
  """
50
  Calculate the best results of a unit for a given task
51
  :param user_model_ids: models of a user
52
  """
53
-
54
  best_model = ""
55
-
56
  if task == "audio-classification":
57
  best_result = -100
58
  larger_is_better = True
59
  elif task == "automatic-speech-recognition":
60
  best_result = 100
61
- larger_is_better = False
62
 
63
  for model in user_models:
64
  meta = get_metadata(model)
65
  if meta is None:
66
- continue
67
- metric = parse_metrics(model, task)
68
 
69
  if larger_is_better:
70
  if metric > best_result:
@@ -76,7 +83,7 @@ def calculate_best_result(user_models, task):
76
  best_model = meta['model-index'][0]["name"]
77
 
78
  return best_result, best_model
79
-
80
 
81
  def get_metadata(model_id):
82
  """
@@ -97,19 +104,19 @@ def extract_metric(model_card_content, task):
97
  :param model_card_content: model card content
98
  """
99
  accuracy_pattern = r"Accuracy: (\d+\.\d+)"
100
- wer_pattern = r"Wer: (\d+\.\d+)"
101
-
102
  if task == "audio-classification":
103
  pattern = accuracy_pattern
104
  elif task == "automatic-speech-recognition":
105
  pattern = wer_pattern
106
-
107
  match = re.search(pattern, model_card_content)
108
  if match:
109
  metric = match.group(1)
110
  return float(metric)
111
  else:
112
- return None
113
 
114
 
115
  def parse_metrics(model, task):
@@ -133,16 +140,16 @@ def certification(hf_username):
133
  },
134
  {
135
  "unit": "Unit 5: Automatic Speech Recognition",
136
- "task": "automatic-speech-recognition",
137
  "baseline_metric": 0.37,
138
  "best_result": 0,
139
  "best_model_id": "",
140
  "passed_": False
141
  },
142
  {
143
- "unit": "Unit 6: TBD",
144
- "task": "TBD",
145
- "baseline_metric": 0.99,
146
  "best_result": 0,
147
  "best_model_id": "",
148
  "passed_": False
@@ -155,7 +162,7 @@ def certification(hf_username):
155
  "best_model_id": "",
156
  "passed_": False
157
  },
158
- ]
159
 
160
  for unit in results_certification:
161
  unit["passed"] = pass_emoji(unit["passed_"])
@@ -167,31 +174,39 @@ def certification(hf_username):
167
  best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
168
  unit["best_result"] = best_result
169
  unit["best_model_id"] = best_model_id
170
- if unit["best_result"] >= unit["baseline_metric"]:
171
  unit["passed_"] = True
172
  unit["passed"] = pass_emoji(unit["passed_"])
173
- except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
174
  case "automatic-speech-recognition":
175
  try:
176
  user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
177
  best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
178
  unit["best_result"] = best_result
179
  unit["best_model_id"] = best_model_id
180
- if unit["best_result"] <= unit["baseline_metric"]:
 
 
 
 
 
 
 
 
 
181
  unit["passed_"] = True
182
  unit["passed"] = pass_emoji(unit["passed_"])
183
- except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
184
- case "TBD":
185
  print("Evaluation for this unit is work in progress")
186
  case _:
187
  print("Unknown task")
188
-
189
  print(results_certification)
190
-
191
  df = pd.DataFrame(results_certification)
192
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
193
  return df
194
-
195
  with gr.Blocks() as demo:
196
  gr.Markdown(f"""
197
  # πŸ† Check your progress in the Audio Course πŸ†
 
30
  dataset = 'marsyas/gtzan'
31
  case "automatic-speech-recognition":
32
  dataset = 'PolyAI/minds14'
33
+ case "text-to-speech":
34
+ dataset = ""
35
  case _:
36
  print("Unsupported task")
37
 
38
  dataset_specific_models = []
39
+
40
+ if dataset == "":
41
+ return user_model_ids
42
+ else:
43
+ for model in user_model_ids:
44
+ meta = get_metadata(model)
45
+ if meta is None:
46
+ continue
47
+ try:
48
+ if meta["datasets"] == [dataset]:
49
+ dataset_specific_models.append(model)
50
+ except:
51
  continue
52
+ return dataset_specific_models
53
+
 
 
 
54
 
55
  def calculate_best_result(user_models, task):
56
  """
57
  Calculate the best results of a unit for a given task
58
  :param user_model_ids: models of a user
59
  """
60
+
61
  best_model = ""
62
+
63
  if task == "audio-classification":
64
  best_result = -100
65
  larger_is_better = True
66
  elif task == "automatic-speech-recognition":
67
  best_result = 100
68
+ larger_is_better = False
69
 
70
  for model in user_models:
71
  meta = get_metadata(model)
72
  if meta is None:
73
+ continue
74
+ metric = parse_metrics(model, task)
75
 
76
  if larger_is_better:
77
  if metric > best_result:
 
83
  best_model = meta['model-index'][0]["name"]
84
 
85
  return best_result, best_model
86
+
87
 
88
  def get_metadata(model_id):
89
  """
 
104
  :param model_card_content: model card content
105
  """
106
  accuracy_pattern = r"Accuracy: (\d+\.\d+)"
107
+ wer_pattern = r"Wer: (\d+\.\d+)"
108
+
109
  if task == "audio-classification":
110
  pattern = accuracy_pattern
111
  elif task == "automatic-speech-recognition":
112
  pattern = wer_pattern
113
+
114
  match = re.search(pattern, model_card_content)
115
  if match:
116
  metric = match.group(1)
117
  return float(metric)
118
  else:
119
+ return None
120
 
121
 
122
  def parse_metrics(model, task):
 
140
  },
141
  {
142
  "unit": "Unit 5: Automatic Speech Recognition",
143
+ "task": "automatic-speech-recognition",
144
  "baseline_metric": 0.37,
145
  "best_result": 0,
146
  "best_model_id": "",
147
  "passed_": False
148
  },
149
  {
150
+ "unit": "Unit 6: Text-to-Speech",
151
+ "task": "text-to-speech",
152
+ "baseline_metric": 0,
153
  "best_result": 0,
154
  "best_model_id": "",
155
  "passed_": False
 
162
  "best_model_id": "",
163
  "passed_": False
164
  },
165
+ ]
166
 
167
  for unit in results_certification:
168
  unit["passed"] = pass_emoji(unit["passed_"])
 
174
  best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
175
  unit["best_result"] = best_result
176
  unit["best_model_id"] = best_model_id
177
+ if unit["best_result"] >= unit["baseline_metric"]:
178
  unit["passed_"] = True
179
  unit["passed"] = pass_emoji(unit["passed_"])
180
+ except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
181
  case "automatic-speech-recognition":
182
  try:
183
  user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
184
  best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
185
  unit["best_result"] = best_result
186
  unit["best_model_id"] = best_model_id
187
+ if unit["best_result"] <= unit["baseline_metric"]:
188
+ unit["passed_"] = True
189
+ unit["passed"] = pass_emoji(unit["passed_"])
190
+ except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
191
+ case "text-to-speech":
192
+ try:
193
+ user_tts_models = get_user_models(hf_username, task = "text-to-speech")
194
+ if user_tts_models:
195
+ unit["best_result"] = 0
196
+ unit["best_model_id"] = user_tts_models[0]
197
  unit["passed_"] = True
198
  unit["passed"] = pass_emoji(unit["passed_"])
199
+ except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
 
200
  print("Evaluation for this unit is work in progress")
201
  case _:
202
  print("Unknown task")
203
+
204
  print(results_certification)
205
+
206
  df = pd.DataFrame(results_certification)
207
  df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
208
  return df
209
+
210
  with gr.Blocks() as demo:
211
  gr.Markdown(f"""
212
  # πŸ† Check your progress in the Audio Course πŸ†