Saving weights and logs of step 60000 - epoch 51
Browse files- README.md +2 -1
- flax_model.msgpack +1 -1
- training_state.bin +2 -2
README.md
CHANGED
@@ -48,7 +48,7 @@ The following hyperparameters were used during training:
|
|
48 |
- num_train_dataset_workers: 32
|
49 |
- num_hosts: 1
|
50 |
- total_num_training_examples: 4,800,000
|
51 |
-
- steps_per_epoch:
|
52 |
- num_beams: None
|
53 |
- weight_decay: 0.01
|
54 |
- adam_beta1: 0.9
|
@@ -68,6 +68,7 @@ The following hyperparameters were used during training:
|
|
68 |
| 30000 | 1.2561 | 0.2111 | 14.6277 | 4.0617 | 17.9521 | 4.5011 |
|
69 |
| 40000 | 33.1032 | 10.4733 | 100.0 | 100.0 | 100.0 | 98.0681 |
|
70 |
| 50000 | 3.0192 | 2.5972 | 100.7979 | 80.9301 | 101.3298 | 79.8447 |
|
|
|
71 |
|
72 |
|
73 |
### Framework versions
|
|
|
48 |
- num_train_dataset_workers: 32
|
49 |
- num_hosts: 1
|
50 |
- total_num_training_examples: 4,800,000
|
51 |
+
- steps_per_epoch: 1169
|
52 |
- num_beams: None
|
53 |
- weight_decay: 0.01
|
54 |
- adam_beta1: 0.9
|
|
|
68 |
| 30000 | 1.2561 | 0.2111 | 14.6277 | 4.0617 | 17.9521 | 4.5011 |
|
69 |
| 40000 | 33.1032 | 10.4733 | 100.0 | 100.0 | 100.0 | 98.0681 |
|
70 |
| 50000 | 3.0192 | 2.5972 | 100.7979 | 80.9301 | 101.3298 | 79.8447 |
|
71 |
+
| 60000 | 2.7909 | 2.0728 | 99.6011 | 79.8944 | 100.5319 | 78.8688 |
|
72 |
|
73 |
|
74 |
### Framework versions
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3086611993
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65f866e787146e24c4500d40746f42d281d07c90796777332e227095249f4df3
|
3 |
size 3086611993
|
training_state.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abd4aa3fe102d92fbff78f2fe68e0299155c085e4bea66874666465f600bfab3
|
3 |
+
size 1745
|