jun-han
/

Whisper-squeezeformer-v4

@@ -1,22 +1,24 @@
 ---
-base_model: openai/whisper-small
 library_name: transformers
 license: apache-2.0
-metrics:
-- wer
 tags:
 - generated_from_trainer
 model-index:
-- name: Whisper-squeezeformer-v4
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Whisper-squeezeformer-v4
-This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the None dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1860
 - Wer: 9.1296

 ---
 library_name: transformers
+language:
+- en
 license: apache-2.0
+base_model: openai/whisper-small
 tags:
 - generated_from_trainer
+metrics:
+- wer
 model-index:
+- name: Whisper-squeezeformer-NSQU-whisper-sparse-A
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper-squeezeformer-NSQU-whisper-sparse-A
+This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the LibriSpeech dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1860
 - Wer: 9.1296

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 9.591828971393792,
-  "best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-30000",
-  "epoch": 12.0,
   "eval_steps": 3000,
-  "global_step": 30000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -169,19 +169,51 @@
       "step": 30000
     },
     {
-      "epoch": 12.0,
-      "step": 30000,
-      "total_flos": 2.07987720192e+20,
-      "train_loss": 0.043555545043945314,
-      "train_runtime": 76522.9244,
-      "train_samples_per_second": 7.841,
-      "train_steps_per_second": 0.392
     }
   ],
   "logging_steps": 3000,
-  "max_steps": 30000,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 12,
   "save_steps": 3000,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -195,7 +227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.07987720192e+20,
   "train_batch_size": 20,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 9.103012781497261,
+  "best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-33000",
+  "epoch": 14.4,
   "eval_steps": 3000,
+  "global_step": 36000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 30000
     },
     {
+      "epoch": 13.2,
+      "grad_norm": 1.6552714109420776,
+      "learning_rate": 9.13939393939394e-07,
+      "loss": 0.0785,
+      "step": 33000
+    },
+    {
+      "epoch": 13.2,
+      "eval_loss": 0.18493999540805817,
+      "eval_runtime": 1318.1594,
+      "eval_samples_per_second": 1.988,
+      "eval_steps_per_second": 0.249,
+      "eval_wer": 9.103012781497261,
+      "step": 33000
+    },
+    {
+      "epoch": 14.4,
+      "grad_norm": 2.0688605308532715,
+      "learning_rate": 5.151515151515151e-09,
+      "loss": 0.0595,
+      "step": 36000
+    },
+    {
+      "epoch": 14.4,
+      "eval_loss": 0.1860339492559433,
+      "eval_runtime": 1345.7303,
+      "eval_samples_per_second": 1.947,
+      "eval_steps_per_second": 0.244,
+      "eval_wer": 9.129640900791236,
+      "step": 36000
+    },
+    {
+      "epoch": 14.4,
+      "step": 36000,
+      "total_flos": 2.495852642304e+20,
+      "train_loss": 0.011502729203965929,
+      "train_runtime": 50610.9609,
+      "train_samples_per_second": 14.226,
+      "train_steps_per_second": 0.711
     }
   ],
   "logging_steps": 3000,
+  "max_steps": 36000,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
   "save_steps": 3000,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.495852642304e+20,
   "train_batch_size": 20,
   "trial_name": null,
   "trial_params": null