youdiniplays
commited on
Training in progress, step 23500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 242041896
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2af21bcb234416212ea1f5b026d11d179220f6fbb3a2641ccba3f02dde45c3dd
|
3 |
size 242041896
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 484163514
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25ef7d9bf21993e077d3c1f4582bfea1ebb7c01c47e5cc086a69cbc49cac3765
|
3 |
size 484163514
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:591801439d497d64b1d36c4d68d48d623a6198675bbf3bfc206f69a006b70528
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11f2f9e913d6dc22945b53e51010b8c3a78477d95df46ad1339d411474610a33
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -313,13 +313,19 @@
|
|
313 |
"learning_rate": 0.0002943216697360344,
|
314 |
"loss": 0.486,
|
315 |
"step": 23000
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
}
|
317 |
],
|
318 |
"logging_steps": 500,
|
319 |
"max_steps": 32580,
|
320 |
"num_train_epochs": 5,
|
321 |
"save_steps": 500,
|
322 |
-
"total_flos": 1.
|
323 |
"trial_name": null,
|
324 |
"trial_params": null
|
325 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.6065070595457334,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 23500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
313 |
"learning_rate": 0.0002943216697360344,
|
314 |
"loss": 0.486,
|
315 |
"step": 23000
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"epoch": 3.61,
|
319 |
+
"learning_rate": 0.00027897483118477593,
|
320 |
+
"loss": 0.4932,
|
321 |
+
"step": 23500
|
322 |
}
|
323 |
],
|
324 |
"logging_steps": 500,
|
325 |
"max_steps": 32580,
|
326 |
"num_train_epochs": 5,
|
327 |
"save_steps": 500,
|
328 |
+
"total_flos": 1.2455218388140032e+16,
|
329 |
"trial_name": null,
|
330 |
"trial_params": null
|
331 |
}
|