jun-han commited on
Commit
577912f
·
verified ·
1 Parent(s): 6009fe8

Training checkpoint

Browse files
Files changed (2) hide show
  1. README.md +8 -6
  2. trainer_state.json +110 -14
README.md CHANGED
@@ -1,22 +1,24 @@
1
  ---
2
- base_model: openai/whisper-small
3
  library_name: transformers
 
 
4
  license: apache-2.0
5
- metrics:
6
- - wer
7
  tags:
8
  - generated_from_trainer
 
 
9
  model-index:
10
- - name: Whisper-squeezeformer-v3
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # Whisper-squeezeformer-v3
18
 
19
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1297
22
  - Wer: 5.6204
 
1
  ---
 
2
  library_name: transformers
3
+ language:
4
+ - en
5
  license: apache-2.0
6
+ base_model: openai/whisper-small
 
7
  tags:
8
  - generated_from_trainer
9
+ metrics:
10
+ - wer
11
  model-index:
12
+ - name: Whisper-squeezeformer-NSQU-whisper
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ # Whisper-squeezeformer-NSQU-whisper
20
 
21
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the LibriSpeech dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 0.1297
24
  - Wer: 5.6204
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.75973828362751,
3
- "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-27500",
4
- "epoch": 12.0,
5
  "eval_steps": 2500,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -201,19 +201,115 @@
201
  "step": 30000
202
  },
203
  {
204
- "epoch": 12.0,
205
- "step": 30000,
206
- "total_flos": 2.07987720192e+20,
207
- "train_loss": 0.04717991434733073,
208
- "train_runtime": 48062.4633,
209
- "train_samples_per_second": 12.484,
210
- "train_steps_per_second": 0.624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  }
212
  ],
213
  "logging_steps": 2500,
214
- "max_steps": 30000,
215
  "num_input_tokens_seen": 0,
216
- "num_train_epochs": 12,
217
  "save_steps": 2500,
218
  "stateful_callbacks": {
219
  "TrainerControl": {
@@ -227,7 +323,7 @@
227
  "attributes": {}
228
  }
229
  },
230
- "total_flos": 2.07987720192e+20,
231
  "train_batch_size": 20,
232
  "trial_name": null,
233
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.599513085818624,
3
+ "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-40000",
4
+ "epoch": 18.0,
5
  "eval_steps": 2500,
6
+ "global_step": 45000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
201
  "step": 30000
202
  },
203
  {
204
+ "epoch": 13.0,
205
+ "grad_norm": 4.114705562591553,
206
+ "learning_rate": 2.004e-06,
207
+ "loss": 0.1946,
208
+ "step": 32500
209
+ },
210
+ {
211
+ "epoch": 13.0,
212
+ "eval_loss": 0.139073446393013,
213
+ "eval_runtime": 595.1979,
214
+ "eval_samples_per_second": 4.402,
215
+ "eval_steps_per_second": 0.551,
216
+ "eval_wer": 6.421180766889837,
217
+ "step": 32500
218
+ },
219
+ {
220
+ "epoch": 14.0,
221
+ "grad_norm": 4.109494686126709,
222
+ "learning_rate": 1.3378666666666667e-06,
223
+ "loss": 0.1425,
224
+ "step": 35000
225
+ },
226
+ {
227
+ "epoch": 14.0,
228
+ "eval_loss": 0.13689054548740387,
229
+ "eval_runtime": 633.0271,
230
+ "eval_samples_per_second": 4.139,
231
+ "eval_steps_per_second": 0.518,
232
+ "eval_wer": 5.87530432136336,
233
+ "step": 35000
234
+ },
235
+ {
236
+ "epoch": 15.0,
237
+ "grad_norm": 2.726956367492676,
238
+ "learning_rate": 6.712e-07,
239
+ "loss": 0.1145,
240
+ "step": 37500
241
+ },
242
+ {
243
+ "epoch": 15.0,
244
+ "eval_loss": 0.13682714104652405,
245
+ "eval_runtime": 601.9865,
246
+ "eval_samples_per_second": 4.352,
247
+ "eval_steps_per_second": 0.545,
248
+ "eval_wer": 5.753575776019477,
249
+ "step": 37500
250
+ },
251
+ {
252
+ "epoch": 16.0,
253
+ "grad_norm": 3.426710844039917,
254
+ "learning_rate": 1.1807058823529414e-06,
255
+ "loss": 0.1776,
256
+ "step": 40000
257
+ },
258
+ {
259
+ "epoch": 16.0,
260
+ "eval_loss": 0.13023081421852112,
261
+ "eval_runtime": 578.9166,
262
+ "eval_samples_per_second": 4.526,
263
+ "eval_steps_per_second": 0.567,
264
+ "eval_wer": 5.599513085818624,
265
+ "step": 40000
266
+ },
267
+ {
268
+ "epoch": 17.0,
269
+ "grad_norm": 3.3029887676239014,
270
+ "learning_rate": 5.927058823529412e-07,
271
+ "loss": 0.1416,
272
+ "step": 42500
273
+ },
274
+ {
275
+ "epoch": 17.0,
276
+ "eval_loss": 0.12984110414981842,
277
+ "eval_runtime": 624.3393,
278
+ "eval_samples_per_second": 4.196,
279
+ "eval_steps_per_second": 0.525,
280
+ "eval_wer": 5.620435179549604,
281
+ "step": 42500
282
+ },
283
+ {
284
+ "epoch": 18.0,
285
+ "grad_norm": 3.106375217437744,
286
+ "learning_rate": 4.705882352941177e-09,
287
+ "loss": 0.1239,
288
+ "step": 45000
289
+ },
290
+ {
291
+ "epoch": 18.0,
292
+ "eval_loss": 0.12974976003170013,
293
+ "eval_runtime": 580.9928,
294
+ "eval_samples_per_second": 4.51,
295
+ "eval_steps_per_second": 0.565,
296
+ "eval_wer": 5.620435179549604,
297
+ "step": 45000
298
+ },
299
+ {
300
+ "epoch": 18.0,
301
+ "step": 45000,
302
+ "total_flos": 3.11981580288e+20,
303
+ "train_loss": 0.024614295111762153,
304
+ "train_runtime": 36051.4679,
305
+ "train_samples_per_second": 24.964,
306
+ "train_steps_per_second": 1.248
307
  }
308
  ],
309
  "logging_steps": 2500,
310
+ "max_steps": 45000,
311
  "num_input_tokens_seen": 0,
312
+ "num_train_epochs": 18,
313
  "save_steps": 2500,
314
  "stateful_callbacks": {
315
  "TrainerControl": {
 
323
  "attributes": {}
324
  }
325
  },
326
+ "total_flos": 3.11981580288e+20,
327
  "train_batch_size": 20,
328
  "trial_name": null,
329
  "trial_params": null