jun-han commited on
Commit
7cc1079
·
verified ·
1 Parent(s): f9b3e67

Training checkpoint

Browse files
Files changed (2) hide show
  1. README.md +8 -6
  2. trainer_state.json +46 -14
README.md CHANGED
@@ -1,22 +1,24 @@
1
  ---
2
- base_model: openai/whisper-small
3
  library_name: transformers
 
 
4
  license: apache-2.0
5
- metrics:
6
- - wer
7
  tags:
8
  - generated_from_trainer
 
 
9
  model-index:
10
- - name: Whisper-squeezeformer-v4
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # Whisper-squeezeformer-v4
18
 
19
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1860
22
  - Wer: 9.1296
 
1
  ---
 
2
  library_name: transformers
3
+ language:
4
+ - en
5
  license: apache-2.0
6
+ base_model: openai/whisper-small
 
7
  tags:
8
  - generated_from_trainer
9
+ metrics:
10
+ - wer
11
  model-index:
12
+ - name: Whisper-squeezeformer-NSQU-whisper-sparse-A
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ # Whisper-squeezeformer-NSQU-whisper-sparse-A
20
 
21
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the LibriSpeech dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 0.1860
24
  - Wer: 9.1296
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 9.591828971393792,
3
- "best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-30000",
4
- "epoch": 12.0,
5
  "eval_steps": 3000,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -169,19 +169,51 @@
169
  "step": 30000
170
  },
171
  {
172
- "epoch": 12.0,
173
- "step": 30000,
174
- "total_flos": 2.07987720192e+20,
175
- "train_loss": 0.043555545043945314,
176
- "train_runtime": 76522.9244,
177
- "train_samples_per_second": 7.841,
178
- "train_steps_per_second": 0.392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
  ],
181
  "logging_steps": 3000,
182
- "max_steps": 30000,
183
  "num_input_tokens_seen": 0,
184
- "num_train_epochs": 12,
185
  "save_steps": 3000,
186
  "stateful_callbacks": {
187
  "TrainerControl": {
@@ -195,7 +227,7 @@
195
  "attributes": {}
196
  }
197
  },
198
- "total_flos": 2.07987720192e+20,
199
  "train_batch_size": 20,
200
  "trial_name": null,
201
  "trial_params": null
 
1
  {
2
+ "best_metric": 9.103012781497261,
3
+ "best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-33000",
4
+ "epoch": 14.4,
5
  "eval_steps": 3000,
6
+ "global_step": 36000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
169
  "step": 30000
170
  },
171
  {
172
+ "epoch": 13.2,
173
+ "grad_norm": 1.6552714109420776,
174
+ "learning_rate": 9.13939393939394e-07,
175
+ "loss": 0.0785,
176
+ "step": 33000
177
+ },
178
+ {
179
+ "epoch": 13.2,
180
+ "eval_loss": 0.18493999540805817,
181
+ "eval_runtime": 1318.1594,
182
+ "eval_samples_per_second": 1.988,
183
+ "eval_steps_per_second": 0.249,
184
+ "eval_wer": 9.103012781497261,
185
+ "step": 33000
186
+ },
187
+ {
188
+ "epoch": 14.4,
189
+ "grad_norm": 2.0688605308532715,
190
+ "learning_rate": 5.151515151515151e-09,
191
+ "loss": 0.0595,
192
+ "step": 36000
193
+ },
194
+ {
195
+ "epoch": 14.4,
196
+ "eval_loss": 0.1860339492559433,
197
+ "eval_runtime": 1345.7303,
198
+ "eval_samples_per_second": 1.947,
199
+ "eval_steps_per_second": 0.244,
200
+ "eval_wer": 9.129640900791236,
201
+ "step": 36000
202
+ },
203
+ {
204
+ "epoch": 14.4,
205
+ "step": 36000,
206
+ "total_flos": 2.495852642304e+20,
207
+ "train_loss": 0.011502729203965929,
208
+ "train_runtime": 50610.9609,
209
+ "train_samples_per_second": 14.226,
210
+ "train_steps_per_second": 0.711
211
  }
212
  ],
213
  "logging_steps": 3000,
214
+ "max_steps": 36000,
215
  "num_input_tokens_seen": 0,
216
+ "num_train_epochs": 15,
217
  "save_steps": 3000,
218
  "stateful_callbacks": {
219
  "TrainerControl": {
 
227
  "attributes": {}
228
  }
229
  },
230
+ "total_flos": 2.495852642304e+20,
231
  "train_batch_size": 20,
232
  "trial_name": null,
233
  "trial_params": null