versae commited on
Commit
ce4cfee
·
1 Parent(s): 10fbc6d

Saving weights and logs of step 40000 - epoch 0

Browse files
README.md CHANGED
@@ -42,13 +42,13 @@ The following hyperparameters were used during training:
42
  - per_device_train_batch_size: 6
43
  - total_train_batch_size_per_node: 48
44
  - total_train_batch_size: 48
45
- - total_optimization_steps: 100,000
46
- - starting_optimization_step: None
47
  - finishing_optimization_step: 100,000
48
  - num_train_dataset_workers: 32
49
  - num_hosts: 1
50
  - total_num_training_examples: 4,800,000
51
- - steps_per_epoch: 385
52
  - num_beams: None
53
  - weight_decay: 0.01
54
  - adam_beta1: 0.9
@@ -66,7 +66,7 @@ The following hyperparameters were used during training:
66
  | 10000 | 0.8720 | 0.3747 | 18.2181 | 5.2803 | 21.4096 | 5.6762 |
67
  | 20000 | 1.1365 | 0.2741 | 15.2926 | 4.6304 | 18.0851 | 5.0588 |
68
  | 30000 | 1.2561 | 0.2111 | 14.6277 | 4.0617 | 17.9521 | 4.5011 |
69
- | 40000 | 1.1817 | 0.2133 | 14.6277 | 4.3258 | 17.0213 | 4.6604 |
70
 
71
 
72
  ### Framework versions
 
42
  - per_device_train_batch_size: 6
43
  - total_train_batch_size_per_node: 48
44
  - total_train_batch_size: 48
45
+ - total_optimization_steps: 60,000
46
+ - starting_optimization_step: 40,000
47
  - finishing_optimization_step: 100,000
48
  - num_train_dataset_workers: 32
49
  - num_hosts: 1
50
  - total_num_training_examples: 4,800,000
51
+ - steps_per_epoch: _To be computed after first epoch_
52
  - num_beams: None
53
  - weight_decay: 0.01
54
  - adam_beta1: 0.9
 
66
  | 10000 | 0.8720 | 0.3747 | 18.2181 | 5.2803 | 21.4096 | 5.6762 |
67
  | 20000 | 1.1365 | 0.2741 | 15.2926 | 4.6304 | 18.0851 | 5.0588 |
68
  | 30000 | 1.2561 | 0.2111 | 14.6277 | 4.0617 | 17.9521 | 4.5011 |
69
+ | 40000 | 33.1032 | 10.4733 | 100.0 | 100.0 | 100.0 | 98.0681 |
70
 
71
 
72
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-large-v2",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
1
  {
2
+ "_name_or_path": "../../../salmon-whisper-large-smj-lr5e-5",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca30be2246e63bf1b7c8bdf3721fd3ae42fada57d77d87059febc157f4e1157d
3
  size 3086611993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9130b500f49c88509e9e20e81bfa666f1f0ff9d15adedcf7e310fc3be40da74c
3
  size 3086611993
special_tokens_map.json CHANGED
@@ -122,7 +122,13 @@
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
126
  "unk_token": {
127
  "content": "<|endoftext|>",
128
  "lstrip": false,
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": {
126
+ "content": "<|endoftext|>",
127
+ "lstrip": false,
128
+ "normalized": true,
129
+ "rstrip": false,
130
+ "single_word": false
131
+ },
132
  "unk_token": {
133
  "content": "<|endoftext|>",
134
  "lstrip": false,
training_state.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f3505837a71e898a26c23b4de0a201238964810b00350ac23f7e53a786fe639
3
- size 1283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4cabf1497c61279383985d7a10b34f0d544c11288253163f5ce25843e97db94
3
+ size 1244