jun-han
/

Whisper-squeezeformer-v3

@@ -1,22 +1,24 @@
 ---
-base_model: openai/whisper-small
 library_name: transformers
 license: apache-2.0
-metrics:
-- wer
 tags:
 - generated_from_trainer
 model-index:
-- name: Whisper-squeezeformer-v3
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# Whisper-squeezeformer-v3
-This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the None dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1322
 - Wer: 5.6642

 ---
 library_name: transformers
+language:
+- en
 license: apache-2.0
+base_model: openai/whisper-small
 tags:
 - generated_from_trainer
+metrics:
+- wer
 model-index:
+- name: Whisper-squeezeformer-NSQU-whisper
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper-squeezeformer-NSQU-whisper
+This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the LibriSpeech dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1322
 - Wer: 5.6642

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 5.599513085818624,
   "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-40000",
-  "epoch": 18.0,
   "eval_steps": 2500,
-  "global_step": 45000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -297,19 +297,51 @@
       "step": 45000
     },
     {
-      "epoch": 18.0,
-      "step": 45000,
-      "total_flos": 3.11981580288e+20,
-      "train_loss": 0.024614295111762153,
-      "train_runtime": 36051.4679,
-      "train_samples_per_second": 24.964,
-      "train_steps_per_second": 1.248
     }
   ],
   "logging_steps": 2500,
-  "max_steps": 45000,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 18,
   "save_steps": 2500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -323,7 +355,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.11981580288e+20,
   "train_batch_size": 20,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 5.599513085818624,
   "best_model_checkpoint": "./Whisper-squeezeformer-v3\\checkpoint-40000",
+  "epoch": 20.0,
   "eval_steps": 2500,
+  "global_step": 50000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 45000
     },
     {
+      "epoch": 19.0,
+      "grad_norm": 4.747838973999023,
+      "learning_rate": 5.309473684210527e-07,
+      "loss": 0.3373,
+      "step": 47500
+    },
+    {
+      "epoch": 19.0,
+      "eval_loss": 0.13534972071647644,
+      "eval_runtime": 649.9396,
+      "eval_samples_per_second": 4.031,
+      "eval_steps_per_second": 0.505,
+      "eval_wer": 5.740261716372489,
+      "step": 47500
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 5.997547626495361,
+      "learning_rate": 4.842105263157895e-09,
+      "loss": 0.2785,
+      "step": 50000
+    },
+    {
+      "epoch": 20.0,
+      "eval_loss": 0.13221722841262817,
+      "eval_runtime": 634.989,
+      "eval_samples_per_second": 4.126,
+      "eval_steps_per_second": 0.517,
+      "eval_wer": 5.6641813755325625,
+      "step": 50000
+    },
+    {
+      "epoch": 20.0,
+      "step": 50000,
+      "total_flos": 3.4664620032e+20,
+      "train_loss": 0.030787386474609376,
+      "train_runtime": 24493.4093,
+      "train_samples_per_second": 40.827,
+      "train_steps_per_second": 2.041
     }
   ],
   "logging_steps": 2500,
+  "max_steps": 50000,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
   "save_steps": 2500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 3.4664620032e+20,
   "train_batch_size": 20,
   "trial_name": null,
   "trial_params": null