Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 242041896
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bfeb044fce0ec07f38afd28ec6b0084f31572d37f96aa23ea2e011e868dcf81
|
3 |
size 242041896
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 484163514
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c39b1d290cc081d5297e8b98e30ca571f3bc439ccc2cab431a1404666f8aa3f7
|
3 |
size 484163514
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d6eae14e064a019e87309392259ece75c694ae13f8513610fd6013d08c7d940
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a8a931535331ccd5841024a74ae43f574a1035af856bc5d5dc42308c3d8be57
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -85,13 +85,29 @@
|
|
85 |
"learning_rate": 0.0008005524861878454,
|
86 |
"loss": 0.6089,
|
87 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
],
|
90 |
"logging_steps": 500,
|
91 |
"max_steps": 32580,
|
92 |
"num_train_epochs": 5,
|
93 |
"save_steps": 500,
|
94 |
-
"total_flos":
|
95 |
"trial_name": null,
|
96 |
"trial_params": null
|
97 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0742786985880908,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 7000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
85 |
"learning_rate": 0.0008005524861878454,
|
86 |
"loss": 0.6089,
|
87 |
"step": 6500
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"epoch": 1.0,
|
91 |
+
"eval_bleu": 3.6822,
|
92 |
+
"eval_gen_len": 18.235,
|
93 |
+
"eval_loss": 0.506125271320343,
|
94 |
+
"eval_runtime": 27.1471,
|
95 |
+
"eval_samples_per_second": 36.836,
|
96 |
+
"eval_steps_per_second": 2.321,
|
97 |
+
"step": 6516
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"epoch": 1.07,
|
101 |
+
"learning_rate": 0.0007852056476365869,
|
102 |
+
"loss": 0.5657,
|
103 |
+
"step": 7000
|
104 |
}
|
105 |
],
|
106 |
"logging_steps": 500,
|
107 |
"max_steps": 32580,
|
108 |
"num_train_epochs": 5,
|
109 |
"save_steps": 500,
|
110 |
+
"total_flos": 3709993691381760.0,
|
111 |
"trial_name": null,
|
112 |
"trial_params": null
|
113 |
}
|