jun-han commited on
Commit
de0b412
·
verified ·
1 Parent(s): 7c57eda

Training checkpoint

Browse files
Files changed (2) hide show
  1. README.md +8 -6
  2. trainer_state.json +78 -14
README.md CHANGED
@@ -1,22 +1,24 @@
1
  ---
2
- base_model: openai/whisper-small
3
  library_name: transformers
 
 
4
  license: apache-2.0
5
- metrics:
6
- - wer
7
  tags:
8
  - generated_from_trainer
 
 
9
  model-index:
10
- - name: Whisper-squeezeformer-v3
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # Whisper-squeezeformer-v3
18
 
19
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1511
22
  - Wer: 6.8035
 
1
  ---
 
2
  library_name: transformers
3
+ language:
4
+ - en
5
  license: apache-2.0
6
+ base_model: openai/whisper-small
 
7
  tags:
8
  - generated_from_trainer
9
+ metrics:
10
+ - wer
11
  model-index:
12
+ - name: Whisper-squeezeformer-NSQU-whisper
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ # Whisper-squeezeformer-NSQU-whisper
20
 
21
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the LibriSpeech dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 0.1511
24
  - Wer: 6.8035
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 7.149650030432136,
3
- "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-20000",
4
- "epoch": 8.0,
5
  "eval_steps": 2500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -137,19 +137,83 @@
137
  "step": 20000
138
  },
139
  {
140
- "epoch": 8.0,
141
- "step": 20000,
142
- "total_flos": 1.38658480128e+20,
143
- "train_loss": 0.047600341033935546,
144
- "train_runtime": 48327.6236,
145
- "train_samples_per_second": 8.277,
146
- "train_steps_per_second": 0.414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 2500,
150
- "max_steps": 20000,
151
  "num_input_tokens_seen": 0,
152
- "num_train_epochs": 8,
153
  "save_steps": 2500,
154
  "stateful_callbacks": {
155
  "TrainerControl": {
@@ -163,7 +227,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 1.38658480128e+20,
167
  "train_batch_size": 20,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.75973828362751,
3
+ "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-27500",
4
+ "epoch": 12.0,
5
  "eval_steps": 2500,
6
+ "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
137
  "step": 20000
138
  },
139
  {
140
+ "epoch": 9.0,
141
+ "grad_norm": 4.3119354248046875,
142
+ "learning_rate": 2.731636363636364e-06,
143
+ "loss": 0.2168,
144
+ "step": 22500
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "eval_loss": 0.15093587338924408,
149
+ "eval_runtime": 583.1378,
150
+ "eval_samples_per_second": 4.493,
151
+ "eval_steps_per_second": 0.562,
152
+ "eval_wer": 7.050745587340232,
153
+ "step": 22500
154
+ },
155
+ {
156
+ "epoch": 10.0,
157
+ "grad_norm": 3.5476253032684326,
158
+ "learning_rate": 1.822909090909091e-06,
159
+ "loss": 0.1467,
160
+ "step": 25000
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "eval_loss": 0.1494235247373581,
165
+ "eval_runtime": 591.6025,
166
+ "eval_samples_per_second": 4.429,
167
+ "eval_steps_per_second": 0.554,
168
+ "eval_wer": 6.967057212416311,
169
+ "step": 25000
170
+ },
171
+ {
172
+ "epoch": 11.0,
173
+ "grad_norm": 3.3021419048309326,
174
+ "learning_rate": 9.141818181818182e-07,
175
+ "loss": 0.1113,
176
+ "step": 27500
177
+ },
178
+ {
179
+ "epoch": 11.0,
180
+ "eval_loss": 0.14934100210666656,
181
+ "eval_runtime": 584.5502,
182
+ "eval_samples_per_second": 4.482,
183
+ "eval_steps_per_second": 0.561,
184
+ "eval_wer": 6.75973828362751,
185
+ "step": 27500
186
+ },
187
+ {
188
+ "epoch": 12.0,
189
+ "grad_norm": 2.4910900592803955,
190
+ "learning_rate": 5.090909090909091e-09,
191
+ "loss": 0.0914,
192
+ "step": 30000
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_loss": 0.15113578736782074,
197
+ "eval_runtime": 619.6762,
198
+ "eval_samples_per_second": 4.228,
199
+ "eval_steps_per_second": 0.529,
200
+ "eval_wer": 6.80348447961047,
201
+ "step": 30000
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "step": 30000,
206
+ "total_flos": 2.07987720192e+20,
207
+ "train_loss": 0.04717991434733073,
208
+ "train_runtime": 48062.4633,
209
+ "train_samples_per_second": 12.484,
210
+ "train_steps_per_second": 0.624
211
  }
212
  ],
213
  "logging_steps": 2500,
214
+ "max_steps": 30000,
215
  "num_input_tokens_seen": 0,
216
+ "num_train_epochs": 12,
217
  "save_steps": 2500,
218
  "stateful_callbacks": {
219
  "TrainerControl": {
 
227
  "attributes": {}
228
  }
229
  },
230
+ "total_flos": 2.07987720192e+20,
231
  "train_batch_size": 20,
232
  "trial_name": null,
233
  "trial_params": null