Saving weights and logs of step 40000 - epoch 0

Files changed (5) hide show

README.md CHANGED Viewed

@@ -42,13 +42,13 @@ The following hyperparameters were used during training:
 - per_device_train_batch_size: 6
 - total_train_batch_size_per_node: 48
 - total_train_batch_size: 48
-- total_optimization_steps: 100,000
-- starting_optimization_step: None
 - finishing_optimization_step: 100,000
 - num_train_dataset_workers: 32
 - num_hosts: 1
 - total_num_training_examples: 4,800,000
-- steps_per_epoch: 385
 - num_beams: None
 - weight_decay: 0.01
 - adam_beta1: 0.9
@@ -66,7 +66,7 @@ The following hyperparameters were used during training:
 | 10000 | 0.8720          | 0.3747     | 18.2181        | 5.2803         | 21.4096              | 5.6762               |
 | 20000 | 1.1365          | 0.2741     | 15.2926        | 4.6304         | 18.0851              | 5.0588               |
 | 30000 | 1.2561          | 0.2111     | 14.6277        | 4.0617         | 17.9521              | 4.5011               |
-| 40000 | 1.1817          | 0.2133     | 14.6277        | 4.3258         | 17.0213              | 4.6604               |
 ### Framework versions

 - per_device_train_batch_size: 6
 - total_train_batch_size_per_node: 48
 - total_train_batch_size: 48
+- total_optimization_steps: 60,000
+- starting_optimization_step: 40,000
 - finishing_optimization_step: 100,000
 - num_train_dataset_workers: 32
 - num_hosts: 1
 - total_num_training_examples: 4,800,000
+- steps_per_epoch: _To be computed after first epoch_
 - num_beams: None
 - weight_decay: 0.01
 - adam_beta1: 0.9
 | 10000 | 0.8720          | 0.3747     | 18.2181        | 5.2803         | 21.4096              | 5.6762               |
 | 20000 | 1.1365          | 0.2741     | 15.2926        | 4.6304         | 18.0851              | 5.0588               |
 | 30000 | 1.2561          | 0.2111     | 14.6277        | 4.0617         | 17.9521              | 4.5011               |
+| 40000 | 33.1032         | 10.4733    | 100.0          | 100.0          | 100.0                | 98.0681              |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "openai/whisper-large-v2",
   "activation_dropout": 0.1,
   "activation_function": "gelu",
   "apply_spec_augment": false,

 {
+  "_name_or_path": "../../../salmon-whisper-large-smj-lr5e-5",
   "activation_dropout": 0.1,
   "activation_function": "gelu",
   "apply_spec_augment": false,

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca30be2246e63bf1b7c8bdf3721fd3ae42fada57d77d87059febc157f4e1157d
 size 3086611993

 version https://git-lfs.github.com/spec/v1
+oid sha256:9130b500f49c88509e9e20e81bfa666f1f0ff9d15adedcf7e310fc3be40da74c
 size 3086611993

special_tokens_map.json CHANGED Viewed

@@ -122,7 +122,13 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|endoftext|>",
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "unk_token": {
     "content": "<|endoftext|>",
     "lstrip": false,

training_state.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f3505837a71e898a26c23b4de0a201238964810b00350ac23f7e53a786fe639
-size 1283

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4cabf1497c61279383985d7a10b34f0d544c11288253163f5ce25843e97db94
+size 1244