gabrielaltay
commited on
Training in progress, step 14168, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500389884
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8de22d450769d69a96c63c76531a3fb7e01f6423c491441830387e9cb7ed916e
|
3 |
size 500389884
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000900218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f43ce093c117315e7acfb01d8346e842f77cf100d65506fdd1c745797e57ff5c
|
3 |
size 1000900218
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9faa9a63eb45764eaaacb970ffffa47beb58be3388211b399565fed7ba6f8ef
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1c08e7ac0c6c146323ce2c3786f8affd631063335ef26e898e86831cecaafa1
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a25ecdb576c7c60f2386731ab1defd8898d170e70fb4eb9008331e7fc07ff9c3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21259,6 +21259,3548 @@
|
|
21259 |
"learning_rate": 2.0003458668906565e-05,
|
21260 |
"loss": 5.5015,
|
21261 |
"step": 12144
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21262 |
}
|
21263 |
],
|
21264 |
"logging_steps": 4,
|
@@ -21266,7 +24808,7 @@
|
|
21266 |
"num_input_tokens_seen": 0,
|
21267 |
"num_train_epochs": 1,
|
21268 |
"save_steps": 2024,
|
21269 |
-
"total_flos": 5.
|
21270 |
"train_batch_size": 8,
|
21271 |
"trial_name": null,
|
21272 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7000345866890657,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 14168,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21259 |
"learning_rate": 2.0003458668906565e-05,
|
21260 |
"loss": 5.5015,
|
21261 |
"step": 12144
|
21262 |
+
},
|
21263 |
+
{
|
21264 |
+
"epoch": 0.6,
|
21265 |
+
"grad_norm": 1.796938180923462,
|
21266 |
+
"learning_rate": 1.999357675774495e-05,
|
21267 |
+
"loss": 5.4519,
|
21268 |
+
"step": 12148
|
21269 |
+
},
|
21270 |
+
{
|
21271 |
+
"epoch": 0.6,
|
21272 |
+
"grad_norm": 2.2093305587768555,
|
21273 |
+
"learning_rate": 1.9983694846583332e-05,
|
21274 |
+
"loss": 5.6164,
|
21275 |
+
"step": 12152
|
21276 |
+
},
|
21277 |
+
{
|
21278 |
+
"epoch": 0.6,
|
21279 |
+
"grad_norm": 1.8350774049758911,
|
21280 |
+
"learning_rate": 1.997381293542171e-05,
|
21281 |
+
"loss": 5.583,
|
21282 |
+
"step": 12156
|
21283 |
+
},
|
21284 |
+
{
|
21285 |
+
"epoch": 0.6,
|
21286 |
+
"grad_norm": 1.8653216361999512,
|
21287 |
+
"learning_rate": 1.9963931024260093e-05,
|
21288 |
+
"loss": 5.656,
|
21289 |
+
"step": 12160
|
21290 |
+
},
|
21291 |
+
{
|
21292 |
+
"epoch": 0.6,
|
21293 |
+
"grad_norm": 1.7206474542617798,
|
21294 |
+
"learning_rate": 1.9954049113098475e-05,
|
21295 |
+
"loss": 5.5226,
|
21296 |
+
"step": 12164
|
21297 |
+
},
|
21298 |
+
{
|
21299 |
+
"epoch": 0.6,
|
21300 |
+
"grad_norm": 1.8878382444381714,
|
21301 |
+
"learning_rate": 1.9944167201936857e-05,
|
21302 |
+
"loss": 5.6363,
|
21303 |
+
"step": 12168
|
21304 |
+
},
|
21305 |
+
{
|
21306 |
+
"epoch": 0.6,
|
21307 |
+
"grad_norm": 2.0668506622314453,
|
21308 |
+
"learning_rate": 1.9934285290775236e-05,
|
21309 |
+
"loss": 5.5106,
|
21310 |
+
"step": 12172
|
21311 |
+
},
|
21312 |
+
{
|
21313 |
+
"epoch": 0.6,
|
21314 |
+
"grad_norm": 2.0027477741241455,
|
21315 |
+
"learning_rate": 1.9924403379613618e-05,
|
21316 |
+
"loss": 5.5217,
|
21317 |
+
"step": 12176
|
21318 |
+
},
|
21319 |
+
{
|
21320 |
+
"epoch": 0.6,
|
21321 |
+
"grad_norm": 2.1123950481414795,
|
21322 |
+
"learning_rate": 1.9914521468452e-05,
|
21323 |
+
"loss": 5.6275,
|
21324 |
+
"step": 12180
|
21325 |
+
},
|
21326 |
+
{
|
21327 |
+
"epoch": 0.6,
|
21328 |
+
"grad_norm": 2.1055514812469482,
|
21329 |
+
"learning_rate": 1.9904639557290382e-05,
|
21330 |
+
"loss": 5.6186,
|
21331 |
+
"step": 12184
|
21332 |
+
},
|
21333 |
+
{
|
21334 |
+
"epoch": 0.6,
|
21335 |
+
"grad_norm": 2.0140175819396973,
|
21336 |
+
"learning_rate": 1.989475764612876e-05,
|
21337 |
+
"loss": 5.5965,
|
21338 |
+
"step": 12188
|
21339 |
+
},
|
21340 |
+
{
|
21341 |
+
"epoch": 0.6,
|
21342 |
+
"grad_norm": 1.902254343032837,
|
21343 |
+
"learning_rate": 1.9884875734967142e-05,
|
21344 |
+
"loss": 5.5557,
|
21345 |
+
"step": 12192
|
21346 |
+
},
|
21347 |
+
{
|
21348 |
+
"epoch": 0.6,
|
21349 |
+
"grad_norm": 1.9510375261306763,
|
21350 |
+
"learning_rate": 1.9874993823805525e-05,
|
21351 |
+
"loss": 5.4804,
|
21352 |
+
"step": 12196
|
21353 |
+
},
|
21354 |
+
{
|
21355 |
+
"epoch": 0.6,
|
21356 |
+
"grad_norm": 1.950716495513916,
|
21357 |
+
"learning_rate": 1.9865111912643907e-05,
|
21358 |
+
"loss": 5.5887,
|
21359 |
+
"step": 12200
|
21360 |
+
},
|
21361 |
+
{
|
21362 |
+
"epoch": 0.6,
|
21363 |
+
"grad_norm": 2.0936696529388428,
|
21364 |
+
"learning_rate": 1.985523000148229e-05,
|
21365 |
+
"loss": 5.5463,
|
21366 |
+
"step": 12204
|
21367 |
+
},
|
21368 |
+
{
|
21369 |
+
"epoch": 0.6,
|
21370 |
+
"grad_norm": 1.6361807584762573,
|
21371 |
+
"learning_rate": 1.984534809032067e-05,
|
21372 |
+
"loss": 5.5516,
|
21373 |
+
"step": 12208
|
21374 |
+
},
|
21375 |
+
{
|
21376 |
+
"epoch": 0.6,
|
21377 |
+
"grad_norm": 2.1499600410461426,
|
21378 |
+
"learning_rate": 1.9835466179159053e-05,
|
21379 |
+
"loss": 5.5467,
|
21380 |
+
"step": 12212
|
21381 |
+
},
|
21382 |
+
{
|
21383 |
+
"epoch": 0.6,
|
21384 |
+
"grad_norm": 1.9551507234573364,
|
21385 |
+
"learning_rate": 1.982558426799743e-05,
|
21386 |
+
"loss": 5.549,
|
21387 |
+
"step": 12216
|
21388 |
+
},
|
21389 |
+
{
|
21390 |
+
"epoch": 0.6,
|
21391 |
+
"grad_norm": 1.9426565170288086,
|
21392 |
+
"learning_rate": 1.9815702356835814e-05,
|
21393 |
+
"loss": 5.5726,
|
21394 |
+
"step": 12220
|
21395 |
+
},
|
21396 |
+
{
|
21397 |
+
"epoch": 0.6,
|
21398 |
+
"grad_norm": 1.9303926229476929,
|
21399 |
+
"learning_rate": 1.9805820445674196e-05,
|
21400 |
+
"loss": 5.5571,
|
21401 |
+
"step": 12224
|
21402 |
+
},
|
21403 |
+
{
|
21404 |
+
"epoch": 0.6,
|
21405 |
+
"grad_norm": 1.859390139579773,
|
21406 |
+
"learning_rate": 1.9795938534512574e-05,
|
21407 |
+
"loss": 5.499,
|
21408 |
+
"step": 12228
|
21409 |
+
},
|
21410 |
+
{
|
21411 |
+
"epoch": 0.6,
|
21412 |
+
"grad_norm": 1.8791084289550781,
|
21413 |
+
"learning_rate": 1.9786056623350956e-05,
|
21414 |
+
"loss": 5.5617,
|
21415 |
+
"step": 12232
|
21416 |
+
},
|
21417 |
+
{
|
21418 |
+
"epoch": 0.6,
|
21419 |
+
"grad_norm": 2.1466195583343506,
|
21420 |
+
"learning_rate": 1.977617471218934e-05,
|
21421 |
+
"loss": 5.3845,
|
21422 |
+
"step": 12236
|
21423 |
+
},
|
21424 |
+
{
|
21425 |
+
"epoch": 0.6,
|
21426 |
+
"grad_norm": 1.7159942388534546,
|
21427 |
+
"learning_rate": 1.976629280102772e-05,
|
21428 |
+
"loss": 5.551,
|
21429 |
+
"step": 12240
|
21430 |
+
},
|
21431 |
+
{
|
21432 |
+
"epoch": 0.6,
|
21433 |
+
"grad_norm": 1.8822176456451416,
|
21434 |
+
"learning_rate": 1.97564108898661e-05,
|
21435 |
+
"loss": 5.4277,
|
21436 |
+
"step": 12244
|
21437 |
+
},
|
21438 |
+
{
|
21439 |
+
"epoch": 0.61,
|
21440 |
+
"grad_norm": 1.798642635345459,
|
21441 |
+
"learning_rate": 1.974652897870448e-05,
|
21442 |
+
"loss": 5.6463,
|
21443 |
+
"step": 12248
|
21444 |
+
},
|
21445 |
+
{
|
21446 |
+
"epoch": 0.61,
|
21447 |
+
"grad_norm": 1.9936386346817017,
|
21448 |
+
"learning_rate": 1.9736647067542863e-05,
|
21449 |
+
"loss": 5.6342,
|
21450 |
+
"step": 12252
|
21451 |
+
},
|
21452 |
+
{
|
21453 |
+
"epoch": 0.61,
|
21454 |
+
"grad_norm": 2.1285390853881836,
|
21455 |
+
"learning_rate": 1.9726765156381245e-05,
|
21456 |
+
"loss": 5.5005,
|
21457 |
+
"step": 12256
|
21458 |
+
},
|
21459 |
+
{
|
21460 |
+
"epoch": 0.61,
|
21461 |
+
"grad_norm": 1.88554048538208,
|
21462 |
+
"learning_rate": 1.9716883245219624e-05,
|
21463 |
+
"loss": 5.5671,
|
21464 |
+
"step": 12260
|
21465 |
+
},
|
21466 |
+
{
|
21467 |
+
"epoch": 0.61,
|
21468 |
+
"grad_norm": 2.1367435455322266,
|
21469 |
+
"learning_rate": 1.970700133405801e-05,
|
21470 |
+
"loss": 5.4724,
|
21471 |
+
"step": 12264
|
21472 |
+
},
|
21473 |
+
{
|
21474 |
+
"epoch": 0.61,
|
21475 |
+
"grad_norm": 1.9615224599838257,
|
21476 |
+
"learning_rate": 1.969711942289639e-05,
|
21477 |
+
"loss": 5.4797,
|
21478 |
+
"step": 12268
|
21479 |
+
},
|
21480 |
+
{
|
21481 |
+
"epoch": 0.61,
|
21482 |
+
"grad_norm": 2.0672008991241455,
|
21483 |
+
"learning_rate": 1.968723751173477e-05,
|
21484 |
+
"loss": 5.4903,
|
21485 |
+
"step": 12272
|
21486 |
+
},
|
21487 |
+
{
|
21488 |
+
"epoch": 0.61,
|
21489 |
+
"grad_norm": 1.8822599649429321,
|
21490 |
+
"learning_rate": 1.9677355600573152e-05,
|
21491 |
+
"loss": 5.4196,
|
21492 |
+
"step": 12276
|
21493 |
+
},
|
21494 |
+
{
|
21495 |
+
"epoch": 0.61,
|
21496 |
+
"grad_norm": 1.9987417459487915,
|
21497 |
+
"learning_rate": 1.9667473689411534e-05,
|
21498 |
+
"loss": 5.5575,
|
21499 |
+
"step": 12280
|
21500 |
+
},
|
21501 |
+
{
|
21502 |
+
"epoch": 0.61,
|
21503 |
+
"grad_norm": 1.8995615243911743,
|
21504 |
+
"learning_rate": 1.9657591778249916e-05,
|
21505 |
+
"loss": 5.5343,
|
21506 |
+
"step": 12284
|
21507 |
+
},
|
21508 |
+
{
|
21509 |
+
"epoch": 0.61,
|
21510 |
+
"grad_norm": 1.9003994464874268,
|
21511 |
+
"learning_rate": 1.9647709867088295e-05,
|
21512 |
+
"loss": 5.5341,
|
21513 |
+
"step": 12288
|
21514 |
+
},
|
21515 |
+
{
|
21516 |
+
"epoch": 0.61,
|
21517 |
+
"grad_norm": 1.9870491027832031,
|
21518 |
+
"learning_rate": 1.9637827955926677e-05,
|
21519 |
+
"loss": 5.5928,
|
21520 |
+
"step": 12292
|
21521 |
+
},
|
21522 |
+
{
|
21523 |
+
"epoch": 0.61,
|
21524 |
+
"grad_norm": 2.072319269180298,
|
21525 |
+
"learning_rate": 1.962794604476506e-05,
|
21526 |
+
"loss": 5.4821,
|
21527 |
+
"step": 12296
|
21528 |
+
},
|
21529 |
+
{
|
21530 |
+
"epoch": 0.61,
|
21531 |
+
"grad_norm": 1.9005805253982544,
|
21532 |
+
"learning_rate": 1.9618064133603438e-05,
|
21533 |
+
"loss": 5.5296,
|
21534 |
+
"step": 12300
|
21535 |
+
},
|
21536 |
+
{
|
21537 |
+
"epoch": 0.61,
|
21538 |
+
"grad_norm": 1.8590092658996582,
|
21539 |
+
"learning_rate": 1.960818222244182e-05,
|
21540 |
+
"loss": 5.568,
|
21541 |
+
"step": 12304
|
21542 |
+
},
|
21543 |
+
{
|
21544 |
+
"epoch": 0.61,
|
21545 |
+
"grad_norm": 2.030334949493408,
|
21546 |
+
"learning_rate": 1.9598300311280202e-05,
|
21547 |
+
"loss": 5.4239,
|
21548 |
+
"step": 12308
|
21549 |
+
},
|
21550 |
+
{
|
21551 |
+
"epoch": 0.61,
|
21552 |
+
"grad_norm": 1.9241563081741333,
|
21553 |
+
"learning_rate": 1.9588418400118584e-05,
|
21554 |
+
"loss": 5.5618,
|
21555 |
+
"step": 12312
|
21556 |
+
},
|
21557 |
+
{
|
21558 |
+
"epoch": 0.61,
|
21559 |
+
"grad_norm": 1.9295763969421387,
|
21560 |
+
"learning_rate": 1.9578536488956963e-05,
|
21561 |
+
"loss": 5.6011,
|
21562 |
+
"step": 12316
|
21563 |
+
},
|
21564 |
+
{
|
21565 |
+
"epoch": 0.61,
|
21566 |
+
"grad_norm": 1.9351352453231812,
|
21567 |
+
"learning_rate": 1.9568654577795348e-05,
|
21568 |
+
"loss": 5.5549,
|
21569 |
+
"step": 12320
|
21570 |
+
},
|
21571 |
+
{
|
21572 |
+
"epoch": 0.61,
|
21573 |
+
"grad_norm": 2.0557003021240234,
|
21574 |
+
"learning_rate": 1.955877266663373e-05,
|
21575 |
+
"loss": 5.531,
|
21576 |
+
"step": 12324
|
21577 |
+
},
|
21578 |
+
{
|
21579 |
+
"epoch": 0.61,
|
21580 |
+
"grad_norm": 2.0774848461151123,
|
21581 |
+
"learning_rate": 1.954889075547211e-05,
|
21582 |
+
"loss": 5.5031,
|
21583 |
+
"step": 12328
|
21584 |
+
},
|
21585 |
+
{
|
21586 |
+
"epoch": 0.61,
|
21587 |
+
"grad_norm": 1.9636493921279907,
|
21588 |
+
"learning_rate": 1.953900884431049e-05,
|
21589 |
+
"loss": 5.5733,
|
21590 |
+
"step": 12332
|
21591 |
+
},
|
21592 |
+
{
|
21593 |
+
"epoch": 0.61,
|
21594 |
+
"grad_norm": 2.006387710571289,
|
21595 |
+
"learning_rate": 1.9529126933148873e-05,
|
21596 |
+
"loss": 5.5184,
|
21597 |
+
"step": 12336
|
21598 |
+
},
|
21599 |
+
{
|
21600 |
+
"epoch": 0.61,
|
21601 |
+
"grad_norm": 1.8912975788116455,
|
21602 |
+
"learning_rate": 1.9519245021987255e-05,
|
21603 |
+
"loss": 5.4529,
|
21604 |
+
"step": 12340
|
21605 |
+
},
|
21606 |
+
{
|
21607 |
+
"epoch": 0.61,
|
21608 |
+
"grad_norm": 2.028090715408325,
|
21609 |
+
"learning_rate": 1.9509363110825634e-05,
|
21610 |
+
"loss": 5.4242,
|
21611 |
+
"step": 12344
|
21612 |
+
},
|
21613 |
+
{
|
21614 |
+
"epoch": 0.61,
|
21615 |
+
"grad_norm": 2.042482376098633,
|
21616 |
+
"learning_rate": 1.9499481199664016e-05,
|
21617 |
+
"loss": 5.5455,
|
21618 |
+
"step": 12348
|
21619 |
+
},
|
21620 |
+
{
|
21621 |
+
"epoch": 0.61,
|
21622 |
+
"grad_norm": 2.1823337078094482,
|
21623 |
+
"learning_rate": 1.9489599288502398e-05,
|
21624 |
+
"loss": 5.4165,
|
21625 |
+
"step": 12352
|
21626 |
+
},
|
21627 |
+
{
|
21628 |
+
"epoch": 0.61,
|
21629 |
+
"grad_norm": 1.8578866720199585,
|
21630 |
+
"learning_rate": 1.947971737734078e-05,
|
21631 |
+
"loss": 5.4873,
|
21632 |
+
"step": 12356
|
21633 |
+
},
|
21634 |
+
{
|
21635 |
+
"epoch": 0.61,
|
21636 |
+
"grad_norm": 1.964311957359314,
|
21637 |
+
"learning_rate": 1.946983546617916e-05,
|
21638 |
+
"loss": 5.5457,
|
21639 |
+
"step": 12360
|
21640 |
+
},
|
21641 |
+
{
|
21642 |
+
"epoch": 0.61,
|
21643 |
+
"grad_norm": 2.030364513397217,
|
21644 |
+
"learning_rate": 1.945995355501754e-05,
|
21645 |
+
"loss": 5.474,
|
21646 |
+
"step": 12364
|
21647 |
+
},
|
21648 |
+
{
|
21649 |
+
"epoch": 0.61,
|
21650 |
+
"grad_norm": 2.0638349056243896,
|
21651 |
+
"learning_rate": 1.9450071643855923e-05,
|
21652 |
+
"loss": 5.5748,
|
21653 |
+
"step": 12368
|
21654 |
+
},
|
21655 |
+
{
|
21656 |
+
"epoch": 0.61,
|
21657 |
+
"grad_norm": 2.002610445022583,
|
21658 |
+
"learning_rate": 1.94401897326943e-05,
|
21659 |
+
"loss": 5.546,
|
21660 |
+
"step": 12372
|
21661 |
+
},
|
21662 |
+
{
|
21663 |
+
"epoch": 0.61,
|
21664 |
+
"grad_norm": 1.8753662109375,
|
21665 |
+
"learning_rate": 1.9430307821532687e-05,
|
21666 |
+
"loss": 5.5053,
|
21667 |
+
"step": 12376
|
21668 |
+
},
|
21669 |
+
{
|
21670 |
+
"epoch": 0.61,
|
21671 |
+
"grad_norm": 2.1950864791870117,
|
21672 |
+
"learning_rate": 1.942042591037107e-05,
|
21673 |
+
"loss": 5.4716,
|
21674 |
+
"step": 12380
|
21675 |
+
},
|
21676 |
+
{
|
21677 |
+
"epoch": 0.61,
|
21678 |
+
"grad_norm": 2.067065715789795,
|
21679 |
+
"learning_rate": 1.9410543999209447e-05,
|
21680 |
+
"loss": 5.5028,
|
21681 |
+
"step": 12384
|
21682 |
+
},
|
21683 |
+
{
|
21684 |
+
"epoch": 0.61,
|
21685 |
+
"grad_norm": 2.0129029750823975,
|
21686 |
+
"learning_rate": 1.940066208804783e-05,
|
21687 |
+
"loss": 5.522,
|
21688 |
+
"step": 12388
|
21689 |
+
},
|
21690 |
+
{
|
21691 |
+
"epoch": 0.61,
|
21692 |
+
"grad_norm": 1.5361533164978027,
|
21693 |
+
"learning_rate": 1.939078017688621e-05,
|
21694 |
+
"loss": 5.4896,
|
21695 |
+
"step": 12392
|
21696 |
+
},
|
21697 |
+
{
|
21698 |
+
"epoch": 0.61,
|
21699 |
+
"grad_norm": 1.945295810699463,
|
21700 |
+
"learning_rate": 1.9380898265724594e-05,
|
21701 |
+
"loss": 5.5306,
|
21702 |
+
"step": 12396
|
21703 |
+
},
|
21704 |
+
{
|
21705 |
+
"epoch": 0.61,
|
21706 |
+
"grad_norm": 1.7914199829101562,
|
21707 |
+
"learning_rate": 1.9371016354562972e-05,
|
21708 |
+
"loss": 5.5514,
|
21709 |
+
"step": 12400
|
21710 |
+
},
|
21711 |
+
{
|
21712 |
+
"epoch": 0.61,
|
21713 |
+
"grad_norm": 2.061509609222412,
|
21714 |
+
"learning_rate": 1.9361134443401354e-05,
|
21715 |
+
"loss": 5.5803,
|
21716 |
+
"step": 12404
|
21717 |
+
},
|
21718 |
+
{
|
21719 |
+
"epoch": 0.61,
|
21720 |
+
"grad_norm": 1.9697644710540771,
|
21721 |
+
"learning_rate": 1.9351252532239736e-05,
|
21722 |
+
"loss": 5.4989,
|
21723 |
+
"step": 12408
|
21724 |
+
},
|
21725 |
+
{
|
21726 |
+
"epoch": 0.61,
|
21727 |
+
"grad_norm": 1.801199197769165,
|
21728 |
+
"learning_rate": 1.934137062107812e-05,
|
21729 |
+
"loss": 5.6458,
|
21730 |
+
"step": 12412
|
21731 |
+
},
|
21732 |
+
{
|
21733 |
+
"epoch": 0.61,
|
21734 |
+
"grad_norm": 1.985129475593567,
|
21735 |
+
"learning_rate": 1.9331488709916497e-05,
|
21736 |
+
"loss": 5.4291,
|
21737 |
+
"step": 12416
|
21738 |
+
},
|
21739 |
+
{
|
21740 |
+
"epoch": 0.61,
|
21741 |
+
"grad_norm": 1.8330814838409424,
|
21742 |
+
"learning_rate": 1.932160679875488e-05,
|
21743 |
+
"loss": 5.3954,
|
21744 |
+
"step": 12420
|
21745 |
+
},
|
21746 |
+
{
|
21747 |
+
"epoch": 0.61,
|
21748 |
+
"grad_norm": 2.2082693576812744,
|
21749 |
+
"learning_rate": 1.931172488759326e-05,
|
21750 |
+
"loss": 5.5882,
|
21751 |
+
"step": 12424
|
21752 |
+
},
|
21753 |
+
{
|
21754 |
+
"epoch": 0.61,
|
21755 |
+
"grad_norm": 1.6590445041656494,
|
21756 |
+
"learning_rate": 1.930184297643164e-05,
|
21757 |
+
"loss": 5.4728,
|
21758 |
+
"step": 12428
|
21759 |
+
},
|
21760 |
+
{
|
21761 |
+
"epoch": 0.61,
|
21762 |
+
"grad_norm": 2.0365848541259766,
|
21763 |
+
"learning_rate": 1.9291961065270022e-05,
|
21764 |
+
"loss": 5.5032,
|
21765 |
+
"step": 12432
|
21766 |
+
},
|
21767 |
+
{
|
21768 |
+
"epoch": 0.61,
|
21769 |
+
"grad_norm": 1.9523799419403076,
|
21770 |
+
"learning_rate": 1.9282079154108407e-05,
|
21771 |
+
"loss": 5.4974,
|
21772 |
+
"step": 12436
|
21773 |
+
},
|
21774 |
+
{
|
21775 |
+
"epoch": 0.61,
|
21776 |
+
"grad_norm": 1.9144923686981201,
|
21777 |
+
"learning_rate": 1.9272197242946786e-05,
|
21778 |
+
"loss": 5.5348,
|
21779 |
+
"step": 12440
|
21780 |
+
},
|
21781 |
+
{
|
21782 |
+
"epoch": 0.61,
|
21783 |
+
"grad_norm": 1.7671104669570923,
|
21784 |
+
"learning_rate": 1.9262315331785168e-05,
|
21785 |
+
"loss": 5.5292,
|
21786 |
+
"step": 12444
|
21787 |
+
},
|
21788 |
+
{
|
21789 |
+
"epoch": 0.62,
|
21790 |
+
"grad_norm": 2.0515549182891846,
|
21791 |
+
"learning_rate": 1.925243342062355e-05,
|
21792 |
+
"loss": 5.4845,
|
21793 |
+
"step": 12448
|
21794 |
+
},
|
21795 |
+
{
|
21796 |
+
"epoch": 0.62,
|
21797 |
+
"grad_norm": 1.9381266832351685,
|
21798 |
+
"learning_rate": 1.9242551509461932e-05,
|
21799 |
+
"loss": 5.5455,
|
21800 |
+
"step": 12452
|
21801 |
+
},
|
21802 |
+
{
|
21803 |
+
"epoch": 0.62,
|
21804 |
+
"grad_norm": 2.017817497253418,
|
21805 |
+
"learning_rate": 1.923266959830031e-05,
|
21806 |
+
"loss": 5.3844,
|
21807 |
+
"step": 12456
|
21808 |
+
},
|
21809 |
+
{
|
21810 |
+
"epoch": 0.62,
|
21811 |
+
"grad_norm": 1.8400537967681885,
|
21812 |
+
"learning_rate": 1.9222787687138693e-05,
|
21813 |
+
"loss": 5.4878,
|
21814 |
+
"step": 12460
|
21815 |
+
},
|
21816 |
+
{
|
21817 |
+
"epoch": 0.62,
|
21818 |
+
"grad_norm": 2.263641119003296,
|
21819 |
+
"learning_rate": 1.9212905775977075e-05,
|
21820 |
+
"loss": 5.5841,
|
21821 |
+
"step": 12464
|
21822 |
+
},
|
21823 |
+
{
|
21824 |
+
"epoch": 0.62,
|
21825 |
+
"grad_norm": 1.8722437620162964,
|
21826 |
+
"learning_rate": 1.9203023864815457e-05,
|
21827 |
+
"loss": 5.4782,
|
21828 |
+
"step": 12468
|
21829 |
+
},
|
21830 |
+
{
|
21831 |
+
"epoch": 0.62,
|
21832 |
+
"grad_norm": 2.020585060119629,
|
21833 |
+
"learning_rate": 1.9193141953653836e-05,
|
21834 |
+
"loss": 5.4668,
|
21835 |
+
"step": 12472
|
21836 |
+
},
|
21837 |
+
{
|
21838 |
+
"epoch": 0.62,
|
21839 |
+
"grad_norm": 1.7479088306427002,
|
21840 |
+
"learning_rate": 1.9183260042492218e-05,
|
21841 |
+
"loss": 5.5514,
|
21842 |
+
"step": 12476
|
21843 |
+
},
|
21844 |
+
{
|
21845 |
+
"epoch": 0.62,
|
21846 |
+
"grad_norm": 1.9156551361083984,
|
21847 |
+
"learning_rate": 1.91733781313306e-05,
|
21848 |
+
"loss": 5.4994,
|
21849 |
+
"step": 12480
|
21850 |
+
},
|
21851 |
+
{
|
21852 |
+
"epoch": 0.62,
|
21853 |
+
"grad_norm": 1.882408618927002,
|
21854 |
+
"learning_rate": 1.9163496220168982e-05,
|
21855 |
+
"loss": 5.4834,
|
21856 |
+
"step": 12484
|
21857 |
+
},
|
21858 |
+
{
|
21859 |
+
"epoch": 0.62,
|
21860 |
+
"grad_norm": 2.083282232284546,
|
21861 |
+
"learning_rate": 1.915361430900736e-05,
|
21862 |
+
"loss": 5.5116,
|
21863 |
+
"step": 12488
|
21864 |
+
},
|
21865 |
+
{
|
21866 |
+
"epoch": 0.62,
|
21867 |
+
"grad_norm": 1.9320555925369263,
|
21868 |
+
"learning_rate": 1.9143732397845746e-05,
|
21869 |
+
"loss": 5.5378,
|
21870 |
+
"step": 12492
|
21871 |
+
},
|
21872 |
+
{
|
21873 |
+
"epoch": 0.62,
|
21874 |
+
"grad_norm": 2.215940475463867,
|
21875 |
+
"learning_rate": 1.9133850486684128e-05,
|
21876 |
+
"loss": 5.4937,
|
21877 |
+
"step": 12496
|
21878 |
+
},
|
21879 |
+
{
|
21880 |
+
"epoch": 0.62,
|
21881 |
+
"grad_norm": 2.308119297027588,
|
21882 |
+
"learning_rate": 1.9123968575522507e-05,
|
21883 |
+
"loss": 5.555,
|
21884 |
+
"step": 12500
|
21885 |
+
},
|
21886 |
+
{
|
21887 |
+
"epoch": 0.62,
|
21888 |
+
"grad_norm": 2.178675413131714,
|
21889 |
+
"learning_rate": 1.911408666436089e-05,
|
21890 |
+
"loss": 5.487,
|
21891 |
+
"step": 12504
|
21892 |
+
},
|
21893 |
+
{
|
21894 |
+
"epoch": 0.62,
|
21895 |
+
"grad_norm": 2.004458427429199,
|
21896 |
+
"learning_rate": 1.910420475319927e-05,
|
21897 |
+
"loss": 5.5416,
|
21898 |
+
"step": 12508
|
21899 |
+
},
|
21900 |
+
{
|
21901 |
+
"epoch": 0.62,
|
21902 |
+
"grad_norm": 2.0435168743133545,
|
21903 |
+
"learning_rate": 1.909432284203765e-05,
|
21904 |
+
"loss": 5.4122,
|
21905 |
+
"step": 12512
|
21906 |
+
},
|
21907 |
+
{
|
21908 |
+
"epoch": 0.62,
|
21909 |
+
"grad_norm": 2.0281968116760254,
|
21910 |
+
"learning_rate": 1.9084440930876032e-05,
|
21911 |
+
"loss": 5.5032,
|
21912 |
+
"step": 12516
|
21913 |
+
},
|
21914 |
+
{
|
21915 |
+
"epoch": 0.62,
|
21916 |
+
"grad_norm": 1.9901241064071655,
|
21917 |
+
"learning_rate": 1.9074559019714414e-05,
|
21918 |
+
"loss": 5.5346,
|
21919 |
+
"step": 12520
|
21920 |
+
},
|
21921 |
+
{
|
21922 |
+
"epoch": 0.62,
|
21923 |
+
"grad_norm": 2.0608649253845215,
|
21924 |
+
"learning_rate": 1.9064677108552796e-05,
|
21925 |
+
"loss": 5.5318,
|
21926 |
+
"step": 12524
|
21927 |
+
},
|
21928 |
+
{
|
21929 |
+
"epoch": 0.62,
|
21930 |
+
"grad_norm": 2.1132655143737793,
|
21931 |
+
"learning_rate": 1.9054795197391175e-05,
|
21932 |
+
"loss": 5.5227,
|
21933 |
+
"step": 12528
|
21934 |
+
},
|
21935 |
+
{
|
21936 |
+
"epoch": 0.62,
|
21937 |
+
"grad_norm": 2.1006295680999756,
|
21938 |
+
"learning_rate": 1.9044913286229557e-05,
|
21939 |
+
"loss": 5.4704,
|
21940 |
+
"step": 12532
|
21941 |
+
},
|
21942 |
+
{
|
21943 |
+
"epoch": 0.62,
|
21944 |
+
"grad_norm": 1.8386894464492798,
|
21945 |
+
"learning_rate": 1.903503137506794e-05,
|
21946 |
+
"loss": 5.4159,
|
21947 |
+
"step": 12536
|
21948 |
+
},
|
21949 |
+
{
|
21950 |
+
"epoch": 0.62,
|
21951 |
+
"grad_norm": 1.9647696018218994,
|
21952 |
+
"learning_rate": 1.902514946390632e-05,
|
21953 |
+
"loss": 5.6805,
|
21954 |
+
"step": 12540
|
21955 |
+
},
|
21956 |
+
{
|
21957 |
+
"epoch": 0.62,
|
21958 |
+
"grad_norm": 2.1188244819641113,
|
21959 |
+
"learning_rate": 1.90152675527447e-05,
|
21960 |
+
"loss": 5.4004,
|
21961 |
+
"step": 12544
|
21962 |
+
},
|
21963 |
+
{
|
21964 |
+
"epoch": 0.62,
|
21965 |
+
"grad_norm": 2.0802998542785645,
|
21966 |
+
"learning_rate": 1.9005385641583085e-05,
|
21967 |
+
"loss": 5.4416,
|
21968 |
+
"step": 12548
|
21969 |
+
},
|
21970 |
+
{
|
21971 |
+
"epoch": 0.62,
|
21972 |
+
"grad_norm": 1.834084153175354,
|
21973 |
+
"learning_rate": 1.8995503730421467e-05,
|
21974 |
+
"loss": 5.6306,
|
21975 |
+
"step": 12552
|
21976 |
+
},
|
21977 |
+
{
|
21978 |
+
"epoch": 0.62,
|
21979 |
+
"grad_norm": 1.8112331628799438,
|
21980 |
+
"learning_rate": 1.8985621819259846e-05,
|
21981 |
+
"loss": 5.5333,
|
21982 |
+
"step": 12556
|
21983 |
+
},
|
21984 |
+
{
|
21985 |
+
"epoch": 0.62,
|
21986 |
+
"grad_norm": 1.899707317352295,
|
21987 |
+
"learning_rate": 1.8975739908098228e-05,
|
21988 |
+
"loss": 5.4976,
|
21989 |
+
"step": 12560
|
21990 |
+
},
|
21991 |
+
{
|
21992 |
+
"epoch": 0.62,
|
21993 |
+
"grad_norm": 2.298161506652832,
|
21994 |
+
"learning_rate": 1.896585799693661e-05,
|
21995 |
+
"loss": 5.594,
|
21996 |
+
"step": 12564
|
21997 |
+
},
|
21998 |
+
{
|
21999 |
+
"epoch": 0.62,
|
22000 |
+
"grad_norm": 2.173597574234009,
|
22001 |
+
"learning_rate": 1.8955976085774992e-05,
|
22002 |
+
"loss": 5.572,
|
22003 |
+
"step": 12568
|
22004 |
+
},
|
22005 |
+
{
|
22006 |
+
"epoch": 0.62,
|
22007 |
+
"grad_norm": 1.8348718881607056,
|
22008 |
+
"learning_rate": 1.894609417461337e-05,
|
22009 |
+
"loss": 5.4743,
|
22010 |
+
"step": 12572
|
22011 |
+
},
|
22012 |
+
{
|
22013 |
+
"epoch": 0.62,
|
22014 |
+
"grad_norm": 1.7967536449432373,
|
22015 |
+
"learning_rate": 1.8936212263451753e-05,
|
22016 |
+
"loss": 5.5196,
|
22017 |
+
"step": 12576
|
22018 |
+
},
|
22019 |
+
{
|
22020 |
+
"epoch": 0.62,
|
22021 |
+
"grad_norm": 1.9703553915023804,
|
22022 |
+
"learning_rate": 1.8926330352290135e-05,
|
22023 |
+
"loss": 5.532,
|
22024 |
+
"step": 12580
|
22025 |
+
},
|
22026 |
+
{
|
22027 |
+
"epoch": 0.62,
|
22028 |
+
"grad_norm": 1.7581743001937866,
|
22029 |
+
"learning_rate": 1.8916448441128513e-05,
|
22030 |
+
"loss": 5.5114,
|
22031 |
+
"step": 12584
|
22032 |
+
},
|
22033 |
+
{
|
22034 |
+
"epoch": 0.62,
|
22035 |
+
"grad_norm": 2.0711758136749268,
|
22036 |
+
"learning_rate": 1.8906566529966895e-05,
|
22037 |
+
"loss": 5.4094,
|
22038 |
+
"step": 12588
|
22039 |
+
},
|
22040 |
+
{
|
22041 |
+
"epoch": 0.62,
|
22042 |
+
"grad_norm": 1.7996923923492432,
|
22043 |
+
"learning_rate": 1.8896684618805277e-05,
|
22044 |
+
"loss": 5.5836,
|
22045 |
+
"step": 12592
|
22046 |
+
},
|
22047 |
+
{
|
22048 |
+
"epoch": 0.62,
|
22049 |
+
"grad_norm": 1.97800612449646,
|
22050 |
+
"learning_rate": 1.888680270764366e-05,
|
22051 |
+
"loss": 5.4898,
|
22052 |
+
"step": 12596
|
22053 |
+
},
|
22054 |
+
{
|
22055 |
+
"epoch": 0.62,
|
22056 |
+
"grad_norm": 1.7940218448638916,
|
22057 |
+
"learning_rate": 1.8876920796482038e-05,
|
22058 |
+
"loss": 5.6005,
|
22059 |
+
"step": 12600
|
22060 |
+
},
|
22061 |
+
{
|
22062 |
+
"epoch": 0.62,
|
22063 |
+
"grad_norm": 1.8298521041870117,
|
22064 |
+
"learning_rate": 1.886703888532042e-05,
|
22065 |
+
"loss": 5.55,
|
22066 |
+
"step": 12604
|
22067 |
+
},
|
22068 |
+
{
|
22069 |
+
"epoch": 0.62,
|
22070 |
+
"grad_norm": 2.040109872817993,
|
22071 |
+
"learning_rate": 1.8857156974158806e-05,
|
22072 |
+
"loss": 5.5685,
|
22073 |
+
"step": 12608
|
22074 |
+
},
|
22075 |
+
{
|
22076 |
+
"epoch": 0.62,
|
22077 |
+
"grad_norm": 1.8531662225723267,
|
22078 |
+
"learning_rate": 1.8847275062997184e-05,
|
22079 |
+
"loss": 5.5343,
|
22080 |
+
"step": 12612
|
22081 |
+
},
|
22082 |
+
{
|
22083 |
+
"epoch": 0.62,
|
22084 |
+
"grad_norm": 2.1842970848083496,
|
22085 |
+
"learning_rate": 1.8837393151835566e-05,
|
22086 |
+
"loss": 5.4704,
|
22087 |
+
"step": 12616
|
22088 |
+
},
|
22089 |
+
{
|
22090 |
+
"epoch": 0.62,
|
22091 |
+
"grad_norm": 1.876779556274414,
|
22092 |
+
"learning_rate": 1.882751124067395e-05,
|
22093 |
+
"loss": 5.5272,
|
22094 |
+
"step": 12620
|
22095 |
+
},
|
22096 |
+
{
|
22097 |
+
"epoch": 0.62,
|
22098 |
+
"grad_norm": 1.9100033044815063,
|
22099 |
+
"learning_rate": 1.881762932951233e-05,
|
22100 |
+
"loss": 5.4669,
|
22101 |
+
"step": 12624
|
22102 |
+
},
|
22103 |
+
{
|
22104 |
+
"epoch": 0.62,
|
22105 |
+
"grad_norm": 2.233772039413452,
|
22106 |
+
"learning_rate": 1.880774741835071e-05,
|
22107 |
+
"loss": 5.5786,
|
22108 |
+
"step": 12628
|
22109 |
+
},
|
22110 |
+
{
|
22111 |
+
"epoch": 0.62,
|
22112 |
+
"grad_norm": 2.021141767501831,
|
22113 |
+
"learning_rate": 1.879786550718909e-05,
|
22114 |
+
"loss": 5.5469,
|
22115 |
+
"step": 12632
|
22116 |
+
},
|
22117 |
+
{
|
22118 |
+
"epoch": 0.62,
|
22119 |
+
"grad_norm": 1.8748712539672852,
|
22120 |
+
"learning_rate": 1.8787983596027473e-05,
|
22121 |
+
"loss": 5.4186,
|
22122 |
+
"step": 12636
|
22123 |
+
},
|
22124 |
+
{
|
22125 |
+
"epoch": 0.62,
|
22126 |
+
"grad_norm": 2.0556745529174805,
|
22127 |
+
"learning_rate": 1.8778101684865855e-05,
|
22128 |
+
"loss": 5.6404,
|
22129 |
+
"step": 12640
|
22130 |
+
},
|
22131 |
+
{
|
22132 |
+
"epoch": 0.62,
|
22133 |
+
"grad_norm": 2.089085102081299,
|
22134 |
+
"learning_rate": 1.8768219773704234e-05,
|
22135 |
+
"loss": 5.6056,
|
22136 |
+
"step": 12644
|
22137 |
+
},
|
22138 |
+
{
|
22139 |
+
"epoch": 0.62,
|
22140 |
+
"grad_norm": 1.8434518575668335,
|
22141 |
+
"learning_rate": 1.8758337862542616e-05,
|
22142 |
+
"loss": 5.6488,
|
22143 |
+
"step": 12648
|
22144 |
+
},
|
22145 |
+
{
|
22146 |
+
"epoch": 0.63,
|
22147 |
+
"grad_norm": 2.003434658050537,
|
22148 |
+
"learning_rate": 1.8748455951380998e-05,
|
22149 |
+
"loss": 5.5057,
|
22150 |
+
"step": 12652
|
22151 |
+
},
|
22152 |
+
{
|
22153 |
+
"epoch": 0.63,
|
22154 |
+
"grad_norm": 2.292663335800171,
|
22155 |
+
"learning_rate": 1.8738574040219377e-05,
|
22156 |
+
"loss": 5.5022,
|
22157 |
+
"step": 12656
|
22158 |
+
},
|
22159 |
+
{
|
22160 |
+
"epoch": 0.63,
|
22161 |
+
"grad_norm": 1.9476063251495361,
|
22162 |
+
"learning_rate": 1.872869212905776e-05,
|
22163 |
+
"loss": 5.5844,
|
22164 |
+
"step": 12660
|
22165 |
+
},
|
22166 |
+
{
|
22167 |
+
"epoch": 0.63,
|
22168 |
+
"grad_norm": 2.138032913208008,
|
22169 |
+
"learning_rate": 1.8718810217896144e-05,
|
22170 |
+
"loss": 5.4383,
|
22171 |
+
"step": 12664
|
22172 |
+
},
|
22173 |
+
{
|
22174 |
+
"epoch": 0.63,
|
22175 |
+
"grad_norm": 1.8477308750152588,
|
22176 |
+
"learning_rate": 1.8708928306734523e-05,
|
22177 |
+
"loss": 5.5665,
|
22178 |
+
"step": 12668
|
22179 |
+
},
|
22180 |
+
{
|
22181 |
+
"epoch": 0.63,
|
22182 |
+
"grad_norm": 1.9181241989135742,
|
22183 |
+
"learning_rate": 1.8699046395572905e-05,
|
22184 |
+
"loss": 5.5661,
|
22185 |
+
"step": 12672
|
22186 |
+
},
|
22187 |
+
{
|
22188 |
+
"epoch": 0.63,
|
22189 |
+
"grad_norm": 1.928312063217163,
|
22190 |
+
"learning_rate": 1.8689164484411287e-05,
|
22191 |
+
"loss": 5.5366,
|
22192 |
+
"step": 12676
|
22193 |
+
},
|
22194 |
+
{
|
22195 |
+
"epoch": 0.63,
|
22196 |
+
"grad_norm": 2.210855484008789,
|
22197 |
+
"learning_rate": 1.867928257324967e-05,
|
22198 |
+
"loss": 5.5136,
|
22199 |
+
"step": 12680
|
22200 |
+
},
|
22201 |
+
{
|
22202 |
+
"epoch": 0.63,
|
22203 |
+
"grad_norm": 2.030755043029785,
|
22204 |
+
"learning_rate": 1.8669400662088048e-05,
|
22205 |
+
"loss": 5.4813,
|
22206 |
+
"step": 12684
|
22207 |
+
},
|
22208 |
+
{
|
22209 |
+
"epoch": 0.63,
|
22210 |
+
"grad_norm": 2.1717166900634766,
|
22211 |
+
"learning_rate": 1.865951875092643e-05,
|
22212 |
+
"loss": 5.579,
|
22213 |
+
"step": 12688
|
22214 |
+
},
|
22215 |
+
{
|
22216 |
+
"epoch": 0.63,
|
22217 |
+
"grad_norm": 2.068718671798706,
|
22218 |
+
"learning_rate": 1.8649636839764812e-05,
|
22219 |
+
"loss": 5.4381,
|
22220 |
+
"step": 12692
|
22221 |
+
},
|
22222 |
+
{
|
22223 |
+
"epoch": 0.63,
|
22224 |
+
"grad_norm": 2.0134997367858887,
|
22225 |
+
"learning_rate": 1.8639754928603194e-05,
|
22226 |
+
"loss": 5.6513,
|
22227 |
+
"step": 12696
|
22228 |
+
},
|
22229 |
+
{
|
22230 |
+
"epoch": 0.63,
|
22231 |
+
"grad_norm": 2.061288356781006,
|
22232 |
+
"learning_rate": 1.8629873017441573e-05,
|
22233 |
+
"loss": 5.4894,
|
22234 |
+
"step": 12700
|
22235 |
+
},
|
22236 |
+
{
|
22237 |
+
"epoch": 0.63,
|
22238 |
+
"grad_norm": 2.0297648906707764,
|
22239 |
+
"learning_rate": 1.8619991106279955e-05,
|
22240 |
+
"loss": 5.5485,
|
22241 |
+
"step": 12704
|
22242 |
+
},
|
22243 |
+
{
|
22244 |
+
"epoch": 0.63,
|
22245 |
+
"grad_norm": 2.0792784690856934,
|
22246 |
+
"learning_rate": 1.8610109195118337e-05,
|
22247 |
+
"loss": 5.5622,
|
22248 |
+
"step": 12708
|
22249 |
+
},
|
22250 |
+
{
|
22251 |
+
"epoch": 0.63,
|
22252 |
+
"grad_norm": 2.003371238708496,
|
22253 |
+
"learning_rate": 1.8600227283956715e-05,
|
22254 |
+
"loss": 5.5616,
|
22255 |
+
"step": 12712
|
22256 |
+
},
|
22257 |
+
{
|
22258 |
+
"epoch": 0.63,
|
22259 |
+
"grad_norm": 2.0913472175598145,
|
22260 |
+
"learning_rate": 1.8590345372795098e-05,
|
22261 |
+
"loss": 5.6751,
|
22262 |
+
"step": 12716
|
22263 |
+
},
|
22264 |
+
{
|
22265 |
+
"epoch": 0.63,
|
22266 |
+
"grad_norm": 1.9094600677490234,
|
22267 |
+
"learning_rate": 1.8580463461633483e-05,
|
22268 |
+
"loss": 5.5047,
|
22269 |
+
"step": 12720
|
22270 |
+
},
|
22271 |
+
{
|
22272 |
+
"epoch": 0.63,
|
22273 |
+
"grad_norm": 2.1928985118865967,
|
22274 |
+
"learning_rate": 1.857058155047186e-05,
|
22275 |
+
"loss": 5.6044,
|
22276 |
+
"step": 12724
|
22277 |
+
},
|
22278 |
+
{
|
22279 |
+
"epoch": 0.63,
|
22280 |
+
"grad_norm": 1.9102288484573364,
|
22281 |
+
"learning_rate": 1.8560699639310244e-05,
|
22282 |
+
"loss": 5.4843,
|
22283 |
+
"step": 12728
|
22284 |
+
},
|
22285 |
+
{
|
22286 |
+
"epoch": 0.63,
|
22287 |
+
"grad_norm": 1.908156156539917,
|
22288 |
+
"learning_rate": 1.8550817728148626e-05,
|
22289 |
+
"loss": 5.5217,
|
22290 |
+
"step": 12732
|
22291 |
+
},
|
22292 |
+
{
|
22293 |
+
"epoch": 0.63,
|
22294 |
+
"grad_norm": 1.8940017223358154,
|
22295 |
+
"learning_rate": 1.8540935816987008e-05,
|
22296 |
+
"loss": 5.5211,
|
22297 |
+
"step": 12736
|
22298 |
+
},
|
22299 |
+
{
|
22300 |
+
"epoch": 0.63,
|
22301 |
+
"grad_norm": 1.6733046770095825,
|
22302 |
+
"learning_rate": 1.8531053905825386e-05,
|
22303 |
+
"loss": 5.6507,
|
22304 |
+
"step": 12740
|
22305 |
+
},
|
22306 |
+
{
|
22307 |
+
"epoch": 0.63,
|
22308 |
+
"grad_norm": 1.9494881629943848,
|
22309 |
+
"learning_rate": 1.852117199466377e-05,
|
22310 |
+
"loss": 5.6028,
|
22311 |
+
"step": 12744
|
22312 |
+
},
|
22313 |
+
{
|
22314 |
+
"epoch": 0.63,
|
22315 |
+
"grad_norm": 2.361642360687256,
|
22316 |
+
"learning_rate": 1.851129008350215e-05,
|
22317 |
+
"loss": 5.5678,
|
22318 |
+
"step": 12748
|
22319 |
+
},
|
22320 |
+
{
|
22321 |
+
"epoch": 0.63,
|
22322 |
+
"grad_norm": 1.9810757637023926,
|
22323 |
+
"learning_rate": 1.8501408172340533e-05,
|
22324 |
+
"loss": 5.5055,
|
22325 |
+
"step": 12752
|
22326 |
+
},
|
22327 |
+
{
|
22328 |
+
"epoch": 0.63,
|
22329 |
+
"grad_norm": 2.196544885635376,
|
22330 |
+
"learning_rate": 1.849152626117891e-05,
|
22331 |
+
"loss": 5.4622,
|
22332 |
+
"step": 12756
|
22333 |
+
},
|
22334 |
+
{
|
22335 |
+
"epoch": 0.63,
|
22336 |
+
"grad_norm": 1.841874599456787,
|
22337 |
+
"learning_rate": 1.8481644350017293e-05,
|
22338 |
+
"loss": 5.525,
|
22339 |
+
"step": 12760
|
22340 |
+
},
|
22341 |
+
{
|
22342 |
+
"epoch": 0.63,
|
22343 |
+
"grad_norm": 1.982703685760498,
|
22344 |
+
"learning_rate": 1.8471762438855675e-05,
|
22345 |
+
"loss": 5.5797,
|
22346 |
+
"step": 12764
|
22347 |
+
},
|
22348 |
+
{
|
22349 |
+
"epoch": 0.63,
|
22350 |
+
"grad_norm": 2.193528413772583,
|
22351 |
+
"learning_rate": 1.8461880527694058e-05,
|
22352 |
+
"loss": 5.4744,
|
22353 |
+
"step": 12768
|
22354 |
+
},
|
22355 |
+
{
|
22356 |
+
"epoch": 0.63,
|
22357 |
+
"grad_norm": 1.6755714416503906,
|
22358 |
+
"learning_rate": 1.8451998616532436e-05,
|
22359 |
+
"loss": 5.4944,
|
22360 |
+
"step": 12772
|
22361 |
+
},
|
22362 |
+
{
|
22363 |
+
"epoch": 0.63,
|
22364 |
+
"grad_norm": 1.9214802980422974,
|
22365 |
+
"learning_rate": 1.8442116705370818e-05,
|
22366 |
+
"loss": 5.4525,
|
22367 |
+
"step": 12776
|
22368 |
+
},
|
22369 |
+
{
|
22370 |
+
"epoch": 0.63,
|
22371 |
+
"grad_norm": 2.1332216262817383,
|
22372 |
+
"learning_rate": 1.8432234794209204e-05,
|
22373 |
+
"loss": 5.4974,
|
22374 |
+
"step": 12780
|
22375 |
+
},
|
22376 |
+
{
|
22377 |
+
"epoch": 0.63,
|
22378 |
+
"grad_norm": 1.9983022212982178,
|
22379 |
+
"learning_rate": 1.8422352883047582e-05,
|
22380 |
+
"loss": 5.4737,
|
22381 |
+
"step": 12784
|
22382 |
+
},
|
22383 |
+
{
|
22384 |
+
"epoch": 0.63,
|
22385 |
+
"grad_norm": 2.090367555618286,
|
22386 |
+
"learning_rate": 1.8412470971885964e-05,
|
22387 |
+
"loss": 5.4413,
|
22388 |
+
"step": 12788
|
22389 |
+
},
|
22390 |
+
{
|
22391 |
+
"epoch": 0.63,
|
22392 |
+
"grad_norm": 1.903393030166626,
|
22393 |
+
"learning_rate": 1.8402589060724346e-05,
|
22394 |
+
"loss": 5.5338,
|
22395 |
+
"step": 12792
|
22396 |
+
},
|
22397 |
+
{
|
22398 |
+
"epoch": 0.63,
|
22399 |
+
"grad_norm": 1.8425928354263306,
|
22400 |
+
"learning_rate": 1.8392707149562725e-05,
|
22401 |
+
"loss": 5.5461,
|
22402 |
+
"step": 12796
|
22403 |
+
},
|
22404 |
+
{
|
22405 |
+
"epoch": 0.63,
|
22406 |
+
"grad_norm": 1.7911487817764282,
|
22407 |
+
"learning_rate": 1.8382825238401107e-05,
|
22408 |
+
"loss": 5.5144,
|
22409 |
+
"step": 12800
|
22410 |
+
},
|
22411 |
+
{
|
22412 |
+
"epoch": 0.63,
|
22413 |
+
"grad_norm": 1.9388378858566284,
|
22414 |
+
"learning_rate": 1.837294332723949e-05,
|
22415 |
+
"loss": 5.4777,
|
22416 |
+
"step": 12804
|
22417 |
+
},
|
22418 |
+
{
|
22419 |
+
"epoch": 0.63,
|
22420 |
+
"grad_norm": 1.9651098251342773,
|
22421 |
+
"learning_rate": 1.836306141607787e-05,
|
22422 |
+
"loss": 5.583,
|
22423 |
+
"step": 12808
|
22424 |
+
},
|
22425 |
+
{
|
22426 |
+
"epoch": 0.63,
|
22427 |
+
"grad_norm": 2.097846269607544,
|
22428 |
+
"learning_rate": 1.835317950491625e-05,
|
22429 |
+
"loss": 5.4835,
|
22430 |
+
"step": 12812
|
22431 |
+
},
|
22432 |
+
{
|
22433 |
+
"epoch": 0.63,
|
22434 |
+
"grad_norm": 1.8683522939682007,
|
22435 |
+
"learning_rate": 1.8343297593754632e-05,
|
22436 |
+
"loss": 5.542,
|
22437 |
+
"step": 12816
|
22438 |
+
},
|
22439 |
+
{
|
22440 |
+
"epoch": 0.63,
|
22441 |
+
"grad_norm": 1.9476234912872314,
|
22442 |
+
"learning_rate": 1.8333415682593014e-05,
|
22443 |
+
"loss": 5.5376,
|
22444 |
+
"step": 12820
|
22445 |
+
},
|
22446 |
+
{
|
22447 |
+
"epoch": 0.63,
|
22448 |
+
"grad_norm": 2.049328088760376,
|
22449 |
+
"learning_rate": 1.8323533771431396e-05,
|
22450 |
+
"loss": 5.4911,
|
22451 |
+
"step": 12824
|
22452 |
+
},
|
22453 |
+
{
|
22454 |
+
"epoch": 0.63,
|
22455 |
+
"grad_norm": 1.8876453638076782,
|
22456 |
+
"learning_rate": 1.8313651860269775e-05,
|
22457 |
+
"loss": 5.5226,
|
22458 |
+
"step": 12828
|
22459 |
+
},
|
22460 |
+
{
|
22461 |
+
"epoch": 0.63,
|
22462 |
+
"grad_norm": 1.9597487449645996,
|
22463 |
+
"learning_rate": 1.8303769949108157e-05,
|
22464 |
+
"loss": 5.4785,
|
22465 |
+
"step": 12832
|
22466 |
+
},
|
22467 |
+
{
|
22468 |
+
"epoch": 0.63,
|
22469 |
+
"grad_norm": 2.0029759407043457,
|
22470 |
+
"learning_rate": 1.8293888037946542e-05,
|
22471 |
+
"loss": 5.5109,
|
22472 |
+
"step": 12836
|
22473 |
+
},
|
22474 |
+
{
|
22475 |
+
"epoch": 0.63,
|
22476 |
+
"grad_norm": 1.9219965934753418,
|
22477 |
+
"learning_rate": 1.828400612678492e-05,
|
22478 |
+
"loss": 5.508,
|
22479 |
+
"step": 12840
|
22480 |
+
},
|
22481 |
+
{
|
22482 |
+
"epoch": 0.63,
|
22483 |
+
"grad_norm": 1.9963774681091309,
|
22484 |
+
"learning_rate": 1.8274124215623303e-05,
|
22485 |
+
"loss": 5.664,
|
22486 |
+
"step": 12844
|
22487 |
+
},
|
22488 |
+
{
|
22489 |
+
"epoch": 0.63,
|
22490 |
+
"grad_norm": 2.181628942489624,
|
22491 |
+
"learning_rate": 1.8264242304461685e-05,
|
22492 |
+
"loss": 5.4549,
|
22493 |
+
"step": 12848
|
22494 |
+
},
|
22495 |
+
{
|
22496 |
+
"epoch": 0.64,
|
22497 |
+
"grad_norm": 1.9105952978134155,
|
22498 |
+
"learning_rate": 1.8254360393300067e-05,
|
22499 |
+
"loss": 5.6288,
|
22500 |
+
"step": 12852
|
22501 |
+
},
|
22502 |
+
{
|
22503 |
+
"epoch": 0.64,
|
22504 |
+
"grad_norm": 1.8461229801177979,
|
22505 |
+
"learning_rate": 1.8244478482138446e-05,
|
22506 |
+
"loss": 5.5195,
|
22507 |
+
"step": 12856
|
22508 |
+
},
|
22509 |
+
{
|
22510 |
+
"epoch": 0.64,
|
22511 |
+
"grad_norm": 1.9446773529052734,
|
22512 |
+
"learning_rate": 1.8234596570976828e-05,
|
22513 |
+
"loss": 5.4313,
|
22514 |
+
"step": 12860
|
22515 |
+
},
|
22516 |
+
{
|
22517 |
+
"epoch": 0.64,
|
22518 |
+
"grad_norm": 2.007297992706299,
|
22519 |
+
"learning_rate": 1.822471465981521e-05,
|
22520 |
+
"loss": 5.6664,
|
22521 |
+
"step": 12864
|
22522 |
+
},
|
22523 |
+
{
|
22524 |
+
"epoch": 0.64,
|
22525 |
+
"grad_norm": 2.0537166595458984,
|
22526 |
+
"learning_rate": 1.821483274865359e-05,
|
22527 |
+
"loss": 5.5447,
|
22528 |
+
"step": 12868
|
22529 |
+
},
|
22530 |
+
{
|
22531 |
+
"epoch": 0.64,
|
22532 |
+
"grad_norm": 2.0639407634735107,
|
22533 |
+
"learning_rate": 1.820495083749197e-05,
|
22534 |
+
"loss": 5.4532,
|
22535 |
+
"step": 12872
|
22536 |
+
},
|
22537 |
+
{
|
22538 |
+
"epoch": 0.64,
|
22539 |
+
"grad_norm": 1.9469093084335327,
|
22540 |
+
"learning_rate": 1.8195068926330353e-05,
|
22541 |
+
"loss": 5.5461,
|
22542 |
+
"step": 12876
|
22543 |
+
},
|
22544 |
+
{
|
22545 |
+
"epoch": 0.64,
|
22546 |
+
"grad_norm": 1.766298770904541,
|
22547 |
+
"learning_rate": 1.8185187015168735e-05,
|
22548 |
+
"loss": 5.4775,
|
22549 |
+
"step": 12880
|
22550 |
+
},
|
22551 |
+
{
|
22552 |
+
"epoch": 0.64,
|
22553 |
+
"grad_norm": 1.7954472303390503,
|
22554 |
+
"learning_rate": 1.8175305104007114e-05,
|
22555 |
+
"loss": 5.5326,
|
22556 |
+
"step": 12884
|
22557 |
+
},
|
22558 |
+
{
|
22559 |
+
"epoch": 0.64,
|
22560 |
+
"grad_norm": 1.8361804485321045,
|
22561 |
+
"learning_rate": 1.8165423192845496e-05,
|
22562 |
+
"loss": 5.5217,
|
22563 |
+
"step": 12888
|
22564 |
+
},
|
22565 |
+
{
|
22566 |
+
"epoch": 0.64,
|
22567 |
+
"grad_norm": 2.2192041873931885,
|
22568 |
+
"learning_rate": 1.815554128168388e-05,
|
22569 |
+
"loss": 5.4166,
|
22570 |
+
"step": 12892
|
22571 |
+
},
|
22572 |
+
{
|
22573 |
+
"epoch": 0.64,
|
22574 |
+
"grad_norm": 2.092569351196289,
|
22575 |
+
"learning_rate": 1.814565937052226e-05,
|
22576 |
+
"loss": 5.5973,
|
22577 |
+
"step": 12896
|
22578 |
+
},
|
22579 |
+
{
|
22580 |
+
"epoch": 0.64,
|
22581 |
+
"grad_norm": 1.8529601097106934,
|
22582 |
+
"learning_rate": 1.8135777459360642e-05,
|
22583 |
+
"loss": 5.4088,
|
22584 |
+
"step": 12900
|
22585 |
+
},
|
22586 |
+
{
|
22587 |
+
"epoch": 0.64,
|
22588 |
+
"grad_norm": 1.7156609296798706,
|
22589 |
+
"learning_rate": 1.8125895548199024e-05,
|
22590 |
+
"loss": 5.5506,
|
22591 |
+
"step": 12904
|
22592 |
+
},
|
22593 |
+
{
|
22594 |
+
"epoch": 0.64,
|
22595 |
+
"grad_norm": 2.3277995586395264,
|
22596 |
+
"learning_rate": 1.8116013637037406e-05,
|
22597 |
+
"loss": 5.4852,
|
22598 |
+
"step": 12908
|
22599 |
+
},
|
22600 |
+
{
|
22601 |
+
"epoch": 0.64,
|
22602 |
+
"grad_norm": 2.039177894592285,
|
22603 |
+
"learning_rate": 1.8106131725875785e-05,
|
22604 |
+
"loss": 5.509,
|
22605 |
+
"step": 12912
|
22606 |
+
},
|
22607 |
+
{
|
22608 |
+
"epoch": 0.64,
|
22609 |
+
"grad_norm": 2.2541489601135254,
|
22610 |
+
"learning_rate": 1.8096249814714167e-05,
|
22611 |
+
"loss": 5.5551,
|
22612 |
+
"step": 12916
|
22613 |
+
},
|
22614 |
+
{
|
22615 |
+
"epoch": 0.64,
|
22616 |
+
"grad_norm": 1.9068487882614136,
|
22617 |
+
"learning_rate": 1.808636790355255e-05,
|
22618 |
+
"loss": 5.5223,
|
22619 |
+
"step": 12920
|
22620 |
+
},
|
22621 |
+
{
|
22622 |
+
"epoch": 0.64,
|
22623 |
+
"grad_norm": 1.8464723825454712,
|
22624 |
+
"learning_rate": 1.807648599239093e-05,
|
22625 |
+
"loss": 5.4688,
|
22626 |
+
"step": 12924
|
22627 |
+
},
|
22628 |
+
{
|
22629 |
+
"epoch": 0.64,
|
22630 |
+
"grad_norm": 1.8630855083465576,
|
22631 |
+
"learning_rate": 1.806660408122931e-05,
|
22632 |
+
"loss": 5.521,
|
22633 |
+
"step": 12928
|
22634 |
+
},
|
22635 |
+
{
|
22636 |
+
"epoch": 0.64,
|
22637 |
+
"grad_norm": 1.9354201555252075,
|
22638 |
+
"learning_rate": 1.805672217006769e-05,
|
22639 |
+
"loss": 5.5234,
|
22640 |
+
"step": 12932
|
22641 |
+
},
|
22642 |
+
{
|
22643 |
+
"epoch": 0.64,
|
22644 |
+
"grad_norm": 2.2278544902801514,
|
22645 |
+
"learning_rate": 1.8046840258906074e-05,
|
22646 |
+
"loss": 5.563,
|
22647 |
+
"step": 12936
|
22648 |
+
},
|
22649 |
+
{
|
22650 |
+
"epoch": 0.64,
|
22651 |
+
"grad_norm": 2.0102896690368652,
|
22652 |
+
"learning_rate": 1.8036958347744452e-05,
|
22653 |
+
"loss": 5.6071,
|
22654 |
+
"step": 12940
|
22655 |
+
},
|
22656 |
+
{
|
22657 |
+
"epoch": 0.64,
|
22658 |
+
"grad_norm": 2.12506103515625,
|
22659 |
+
"learning_rate": 1.8027076436582834e-05,
|
22660 |
+
"loss": 5.5518,
|
22661 |
+
"step": 12944
|
22662 |
+
},
|
22663 |
+
{
|
22664 |
+
"epoch": 0.64,
|
22665 |
+
"grad_norm": 2.134568929672241,
|
22666 |
+
"learning_rate": 1.8017194525421216e-05,
|
22667 |
+
"loss": 5.4289,
|
22668 |
+
"step": 12948
|
22669 |
+
},
|
22670 |
+
{
|
22671 |
+
"epoch": 0.64,
|
22672 |
+
"grad_norm": 1.866938829421997,
|
22673 |
+
"learning_rate": 1.80073126142596e-05,
|
22674 |
+
"loss": 5.4047,
|
22675 |
+
"step": 12952
|
22676 |
+
},
|
22677 |
+
{
|
22678 |
+
"epoch": 0.64,
|
22679 |
+
"grad_norm": 2.0489606857299805,
|
22680 |
+
"learning_rate": 1.799743070309798e-05,
|
22681 |
+
"loss": 5.4668,
|
22682 |
+
"step": 12956
|
22683 |
+
},
|
22684 |
+
{
|
22685 |
+
"epoch": 0.64,
|
22686 |
+
"grad_norm": 2.130350351333618,
|
22687 |
+
"learning_rate": 1.7987548791936363e-05,
|
22688 |
+
"loss": 5.599,
|
22689 |
+
"step": 12960
|
22690 |
+
},
|
22691 |
+
{
|
22692 |
+
"epoch": 0.64,
|
22693 |
+
"grad_norm": 2.1276466846466064,
|
22694 |
+
"learning_rate": 1.7977666880774745e-05,
|
22695 |
+
"loss": 5.3925,
|
22696 |
+
"step": 12964
|
22697 |
+
},
|
22698 |
+
{
|
22699 |
+
"epoch": 0.64,
|
22700 |
+
"grad_norm": 1.8620883226394653,
|
22701 |
+
"learning_rate": 1.7967784969613123e-05,
|
22702 |
+
"loss": 5.4918,
|
22703 |
+
"step": 12968
|
22704 |
+
},
|
22705 |
+
{
|
22706 |
+
"epoch": 0.64,
|
22707 |
+
"grad_norm": 1.7407227754592896,
|
22708 |
+
"learning_rate": 1.7957903058451505e-05,
|
22709 |
+
"loss": 5.5406,
|
22710 |
+
"step": 12972
|
22711 |
+
},
|
22712 |
+
{
|
22713 |
+
"epoch": 0.64,
|
22714 |
+
"grad_norm": 1.9221688508987427,
|
22715 |
+
"learning_rate": 1.7948021147289887e-05,
|
22716 |
+
"loss": 5.5111,
|
22717 |
+
"step": 12976
|
22718 |
+
},
|
22719 |
+
{
|
22720 |
+
"epoch": 0.64,
|
22721 |
+
"grad_norm": 2.0765016078948975,
|
22722 |
+
"learning_rate": 1.793813923612827e-05,
|
22723 |
+
"loss": 5.6063,
|
22724 |
+
"step": 12980
|
22725 |
+
},
|
22726 |
+
{
|
22727 |
+
"epoch": 0.64,
|
22728 |
+
"grad_norm": 2.040132999420166,
|
22729 |
+
"learning_rate": 1.7928257324966648e-05,
|
22730 |
+
"loss": 5.5904,
|
22731 |
+
"step": 12984
|
22732 |
+
},
|
22733 |
+
{
|
22734 |
+
"epoch": 0.64,
|
22735 |
+
"grad_norm": 1.8433319330215454,
|
22736 |
+
"learning_rate": 1.791837541380503e-05,
|
22737 |
+
"loss": 5.4806,
|
22738 |
+
"step": 12988
|
22739 |
+
},
|
22740 |
+
{
|
22741 |
+
"epoch": 0.64,
|
22742 |
+
"grad_norm": 2.065800428390503,
|
22743 |
+
"learning_rate": 1.7908493502643412e-05,
|
22744 |
+
"loss": 5.563,
|
22745 |
+
"step": 12992
|
22746 |
+
},
|
22747 |
+
{
|
22748 |
+
"epoch": 0.64,
|
22749 |
+
"grad_norm": 2.199831485748291,
|
22750 |
+
"learning_rate": 1.789861159148179e-05,
|
22751 |
+
"loss": 5.5233,
|
22752 |
+
"step": 12996
|
22753 |
+
},
|
22754 |
+
{
|
22755 |
+
"epoch": 0.64,
|
22756 |
+
"grad_norm": 1.8309836387634277,
|
22757 |
+
"learning_rate": 1.7888729680320173e-05,
|
22758 |
+
"loss": 5.4911,
|
22759 |
+
"step": 13000
|
22760 |
+
},
|
22761 |
+
{
|
22762 |
+
"epoch": 0.64,
|
22763 |
+
"grad_norm": 1.900347113609314,
|
22764 |
+
"learning_rate": 1.7878847769158555e-05,
|
22765 |
+
"loss": 5.5083,
|
22766 |
+
"step": 13004
|
22767 |
+
},
|
22768 |
+
{
|
22769 |
+
"epoch": 0.64,
|
22770 |
+
"grad_norm": 1.8298702239990234,
|
22771 |
+
"learning_rate": 1.786896585799694e-05,
|
22772 |
+
"loss": 5.5197,
|
22773 |
+
"step": 13008
|
22774 |
+
},
|
22775 |
+
{
|
22776 |
+
"epoch": 0.64,
|
22777 |
+
"grad_norm": 1.8966771364212036,
|
22778 |
+
"learning_rate": 1.785908394683532e-05,
|
22779 |
+
"loss": 5.4034,
|
22780 |
+
"step": 13012
|
22781 |
+
},
|
22782 |
+
{
|
22783 |
+
"epoch": 0.64,
|
22784 |
+
"grad_norm": 1.9861708879470825,
|
22785 |
+
"learning_rate": 1.78492020356737e-05,
|
22786 |
+
"loss": 5.3773,
|
22787 |
+
"step": 13016
|
22788 |
+
},
|
22789 |
+
{
|
22790 |
+
"epoch": 0.64,
|
22791 |
+
"grad_norm": 2.1633896827697754,
|
22792 |
+
"learning_rate": 1.7839320124512083e-05,
|
22793 |
+
"loss": 5.5735,
|
22794 |
+
"step": 13020
|
22795 |
+
},
|
22796 |
+
{
|
22797 |
+
"epoch": 0.64,
|
22798 |
+
"grad_norm": 1.8944169282913208,
|
22799 |
+
"learning_rate": 1.7829438213350462e-05,
|
22800 |
+
"loss": 5.4982,
|
22801 |
+
"step": 13024
|
22802 |
+
},
|
22803 |
+
{
|
22804 |
+
"epoch": 0.64,
|
22805 |
+
"grad_norm": 2.358996868133545,
|
22806 |
+
"learning_rate": 1.7819556302188844e-05,
|
22807 |
+
"loss": 5.5851,
|
22808 |
+
"step": 13028
|
22809 |
+
},
|
22810 |
+
{
|
22811 |
+
"epoch": 0.64,
|
22812 |
+
"grad_norm": 1.8002705574035645,
|
22813 |
+
"learning_rate": 1.7809674391027226e-05,
|
22814 |
+
"loss": 5.5903,
|
22815 |
+
"step": 13032
|
22816 |
+
},
|
22817 |
+
{
|
22818 |
+
"epoch": 0.64,
|
22819 |
+
"grad_norm": 2.174081325531006,
|
22820 |
+
"learning_rate": 1.7799792479865608e-05,
|
22821 |
+
"loss": 5.5327,
|
22822 |
+
"step": 13036
|
22823 |
+
},
|
22824 |
+
{
|
22825 |
+
"epoch": 0.64,
|
22826 |
+
"grad_norm": 2.115267515182495,
|
22827 |
+
"learning_rate": 1.7789910568703987e-05,
|
22828 |
+
"loss": 5.6085,
|
22829 |
+
"step": 13040
|
22830 |
+
},
|
22831 |
+
{
|
22832 |
+
"epoch": 0.64,
|
22833 |
+
"grad_norm": 2.197908878326416,
|
22834 |
+
"learning_rate": 1.778002865754237e-05,
|
22835 |
+
"loss": 5.5499,
|
22836 |
+
"step": 13044
|
22837 |
+
},
|
22838 |
+
{
|
22839 |
+
"epoch": 0.64,
|
22840 |
+
"grad_norm": 2.2714781761169434,
|
22841 |
+
"learning_rate": 1.777014674638075e-05,
|
22842 |
+
"loss": 5.6221,
|
22843 |
+
"step": 13048
|
22844 |
+
},
|
22845 |
+
{
|
22846 |
+
"epoch": 0.64,
|
22847 |
+
"grad_norm": 2.1368441581726074,
|
22848 |
+
"learning_rate": 1.7760264835219133e-05,
|
22849 |
+
"loss": 5.5613,
|
22850 |
+
"step": 13052
|
22851 |
+
},
|
22852 |
+
{
|
22853 |
+
"epoch": 0.65,
|
22854 |
+
"grad_norm": 2.0283219814300537,
|
22855 |
+
"learning_rate": 1.775038292405751e-05,
|
22856 |
+
"loss": 5.4734,
|
22857 |
+
"step": 13056
|
22858 |
+
},
|
22859 |
+
{
|
22860 |
+
"epoch": 0.65,
|
22861 |
+
"grad_norm": 1.7801271677017212,
|
22862 |
+
"learning_rate": 1.7740501012895894e-05,
|
22863 |
+
"loss": 5.4928,
|
22864 |
+
"step": 13060
|
22865 |
+
},
|
22866 |
+
{
|
22867 |
+
"epoch": 0.65,
|
22868 |
+
"grad_norm": 1.8141615390777588,
|
22869 |
+
"learning_rate": 1.7730619101734276e-05,
|
22870 |
+
"loss": 5.4943,
|
22871 |
+
"step": 13064
|
22872 |
+
},
|
22873 |
+
{
|
22874 |
+
"epoch": 0.65,
|
22875 |
+
"grad_norm": 1.9424062967300415,
|
22876 |
+
"learning_rate": 1.7720737190572658e-05,
|
22877 |
+
"loss": 5.4888,
|
22878 |
+
"step": 13068
|
22879 |
+
},
|
22880 |
+
{
|
22881 |
+
"epoch": 0.65,
|
22882 |
+
"grad_norm": 1.7956730127334595,
|
22883 |
+
"learning_rate": 1.771085527941104e-05,
|
22884 |
+
"loss": 5.3367,
|
22885 |
+
"step": 13072
|
22886 |
+
},
|
22887 |
+
{
|
22888 |
+
"epoch": 0.65,
|
22889 |
+
"grad_norm": 1.8769028186798096,
|
22890 |
+
"learning_rate": 1.7700973368249422e-05,
|
22891 |
+
"loss": 5.5365,
|
22892 |
+
"step": 13076
|
22893 |
+
},
|
22894 |
+
{
|
22895 |
+
"epoch": 0.65,
|
22896 |
+
"grad_norm": 1.8470765352249146,
|
22897 |
+
"learning_rate": 1.76910914570878e-05,
|
22898 |
+
"loss": 5.6281,
|
22899 |
+
"step": 13080
|
22900 |
+
},
|
22901 |
+
{
|
22902 |
+
"epoch": 0.65,
|
22903 |
+
"grad_norm": 1.9143494367599487,
|
22904 |
+
"learning_rate": 1.7681209545926183e-05,
|
22905 |
+
"loss": 5.403,
|
22906 |
+
"step": 13084
|
22907 |
+
},
|
22908 |
+
{
|
22909 |
+
"epoch": 0.65,
|
22910 |
+
"grad_norm": 1.8906290531158447,
|
22911 |
+
"learning_rate": 1.7671327634764565e-05,
|
22912 |
+
"loss": 5.4201,
|
22913 |
+
"step": 13088
|
22914 |
+
},
|
22915 |
+
{
|
22916 |
+
"epoch": 0.65,
|
22917 |
+
"grad_norm": 1.9759694337844849,
|
22918 |
+
"learning_rate": 1.7661445723602947e-05,
|
22919 |
+
"loss": 5.5,
|
22920 |
+
"step": 13092
|
22921 |
+
},
|
22922 |
+
{
|
22923 |
+
"epoch": 0.65,
|
22924 |
+
"grad_norm": 2.16597580909729,
|
22925 |
+
"learning_rate": 1.7651563812441325e-05,
|
22926 |
+
"loss": 5.4831,
|
22927 |
+
"step": 13096
|
22928 |
+
},
|
22929 |
+
{
|
22930 |
+
"epoch": 0.65,
|
22931 |
+
"grad_norm": 2.142273187637329,
|
22932 |
+
"learning_rate": 1.7641681901279708e-05,
|
22933 |
+
"loss": 5.5608,
|
22934 |
+
"step": 13100
|
22935 |
+
},
|
22936 |
+
{
|
22937 |
+
"epoch": 0.65,
|
22938 |
+
"grad_norm": 2.0143542289733887,
|
22939 |
+
"learning_rate": 1.763179999011809e-05,
|
22940 |
+
"loss": 5.4914,
|
22941 |
+
"step": 13104
|
22942 |
+
},
|
22943 |
+
{
|
22944 |
+
"epoch": 0.65,
|
22945 |
+
"grad_norm": 2.0241010189056396,
|
22946 |
+
"learning_rate": 1.762191807895647e-05,
|
22947 |
+
"loss": 5.5402,
|
22948 |
+
"step": 13108
|
22949 |
+
},
|
22950 |
+
{
|
22951 |
+
"epoch": 0.65,
|
22952 |
+
"grad_norm": 2.111691951751709,
|
22953 |
+
"learning_rate": 1.761203616779485e-05,
|
22954 |
+
"loss": 5.5417,
|
22955 |
+
"step": 13112
|
22956 |
+
},
|
22957 |
+
{
|
22958 |
+
"epoch": 0.65,
|
22959 |
+
"grad_norm": 2.0966546535491943,
|
22960 |
+
"learning_rate": 1.7602154256633232e-05,
|
22961 |
+
"loss": 5.4544,
|
22962 |
+
"step": 13116
|
22963 |
+
},
|
22964 |
+
{
|
22965 |
+
"epoch": 0.65,
|
22966 |
+
"grad_norm": 2.2063889503479004,
|
22967 |
+
"learning_rate": 1.7592272345471614e-05,
|
22968 |
+
"loss": 5.5447,
|
22969 |
+
"step": 13120
|
22970 |
+
},
|
22971 |
+
{
|
22972 |
+
"epoch": 0.65,
|
22973 |
+
"grad_norm": 2.0416488647460938,
|
22974 |
+
"learning_rate": 1.7582390434309997e-05,
|
22975 |
+
"loss": 5.56,
|
22976 |
+
"step": 13124
|
22977 |
+
},
|
22978 |
+
{
|
22979 |
+
"epoch": 0.65,
|
22980 |
+
"grad_norm": 2.1015825271606445,
|
22981 |
+
"learning_rate": 1.757250852314838e-05,
|
22982 |
+
"loss": 5.5632,
|
22983 |
+
"step": 13128
|
22984 |
+
},
|
22985 |
+
{
|
22986 |
+
"epoch": 0.65,
|
22987 |
+
"grad_norm": 2.154283046722412,
|
22988 |
+
"learning_rate": 1.756262661198676e-05,
|
22989 |
+
"loss": 5.4369,
|
22990 |
+
"step": 13132
|
22991 |
+
},
|
22992 |
+
{
|
22993 |
+
"epoch": 0.65,
|
22994 |
+
"grad_norm": 1.9613300561904907,
|
22995 |
+
"learning_rate": 1.7552744700825143e-05,
|
22996 |
+
"loss": 5.4493,
|
22997 |
+
"step": 13136
|
22998 |
+
},
|
22999 |
+
{
|
23000 |
+
"epoch": 0.65,
|
23001 |
+
"grad_norm": 1.936897873878479,
|
23002 |
+
"learning_rate": 1.754286278966352e-05,
|
23003 |
+
"loss": 5.6429,
|
23004 |
+
"step": 13140
|
23005 |
+
},
|
23006 |
+
{
|
23007 |
+
"epoch": 0.65,
|
23008 |
+
"grad_norm": 2.1174933910369873,
|
23009 |
+
"learning_rate": 1.7532980878501903e-05,
|
23010 |
+
"loss": 5.5222,
|
23011 |
+
"step": 13144
|
23012 |
+
},
|
23013 |
+
{
|
23014 |
+
"epoch": 0.65,
|
23015 |
+
"grad_norm": 1.8853952884674072,
|
23016 |
+
"learning_rate": 1.7523098967340285e-05,
|
23017 |
+
"loss": 5.5823,
|
23018 |
+
"step": 13148
|
23019 |
+
},
|
23020 |
+
{
|
23021 |
+
"epoch": 0.65,
|
23022 |
+
"grad_norm": 1.7702313661575317,
|
23023 |
+
"learning_rate": 1.7513217056178664e-05,
|
23024 |
+
"loss": 5.5205,
|
23025 |
+
"step": 13152
|
23026 |
+
},
|
23027 |
+
{
|
23028 |
+
"epoch": 0.65,
|
23029 |
+
"grad_norm": 1.8901150226593018,
|
23030 |
+
"learning_rate": 1.7503335145017046e-05,
|
23031 |
+
"loss": 5.5227,
|
23032 |
+
"step": 13156
|
23033 |
+
},
|
23034 |
+
{
|
23035 |
+
"epoch": 0.65,
|
23036 |
+
"grad_norm": 1.9898940324783325,
|
23037 |
+
"learning_rate": 1.7493453233855428e-05,
|
23038 |
+
"loss": 5.5064,
|
23039 |
+
"step": 13160
|
23040 |
+
},
|
23041 |
+
{
|
23042 |
+
"epoch": 0.65,
|
23043 |
+
"grad_norm": 1.8458938598632812,
|
23044 |
+
"learning_rate": 1.748357132269381e-05,
|
23045 |
+
"loss": 5.4032,
|
23046 |
+
"step": 13164
|
23047 |
+
},
|
23048 |
+
{
|
23049 |
+
"epoch": 0.65,
|
23050 |
+
"grad_norm": 2.1321139335632324,
|
23051 |
+
"learning_rate": 1.747368941153219e-05,
|
23052 |
+
"loss": 5.5491,
|
23053 |
+
"step": 13168
|
23054 |
+
},
|
23055 |
+
{
|
23056 |
+
"epoch": 0.65,
|
23057 |
+
"grad_norm": 2.055555820465088,
|
23058 |
+
"learning_rate": 1.746380750037057e-05,
|
23059 |
+
"loss": 5.4821,
|
23060 |
+
"step": 13172
|
23061 |
+
},
|
23062 |
+
{
|
23063 |
+
"epoch": 0.65,
|
23064 |
+
"grad_norm": 2.0164570808410645,
|
23065 |
+
"learning_rate": 1.7453925589208953e-05,
|
23066 |
+
"loss": 5.4772,
|
23067 |
+
"step": 13176
|
23068 |
+
},
|
23069 |
+
{
|
23070 |
+
"epoch": 0.65,
|
23071 |
+
"grad_norm": 1.8798964023590088,
|
23072 |
+
"learning_rate": 1.7444043678047335e-05,
|
23073 |
+
"loss": 5.6292,
|
23074 |
+
"step": 13180
|
23075 |
+
},
|
23076 |
+
{
|
23077 |
+
"epoch": 0.65,
|
23078 |
+
"grad_norm": 1.8820830583572388,
|
23079 |
+
"learning_rate": 1.7434161766885717e-05,
|
23080 |
+
"loss": 5.5503,
|
23081 |
+
"step": 13184
|
23082 |
+
},
|
23083 |
+
{
|
23084 |
+
"epoch": 0.65,
|
23085 |
+
"grad_norm": 1.768707513809204,
|
23086 |
+
"learning_rate": 1.74242798557241e-05,
|
23087 |
+
"loss": 5.5907,
|
23088 |
+
"step": 13188
|
23089 |
+
},
|
23090 |
+
{
|
23091 |
+
"epoch": 0.65,
|
23092 |
+
"grad_norm": 1.7819797992706299,
|
23093 |
+
"learning_rate": 1.741439794456248e-05,
|
23094 |
+
"loss": 5.3631,
|
23095 |
+
"step": 13192
|
23096 |
+
},
|
23097 |
+
{
|
23098 |
+
"epoch": 0.65,
|
23099 |
+
"grad_norm": 1.9888780117034912,
|
23100 |
+
"learning_rate": 1.740451603340086e-05,
|
23101 |
+
"loss": 5.5318,
|
23102 |
+
"step": 13196
|
23103 |
+
},
|
23104 |
+
{
|
23105 |
+
"epoch": 0.65,
|
23106 |
+
"grad_norm": 1.7733293771743774,
|
23107 |
+
"learning_rate": 1.7394634122239242e-05,
|
23108 |
+
"loss": 5.4794,
|
23109 |
+
"step": 13200
|
23110 |
+
},
|
23111 |
+
{
|
23112 |
+
"epoch": 0.65,
|
23113 |
+
"grad_norm": 2.08803653717041,
|
23114 |
+
"learning_rate": 1.7384752211077624e-05,
|
23115 |
+
"loss": 5.5392,
|
23116 |
+
"step": 13204
|
23117 |
+
},
|
23118 |
+
{
|
23119 |
+
"epoch": 0.65,
|
23120 |
+
"grad_norm": 1.7837083339691162,
|
23121 |
+
"learning_rate": 1.7374870299916006e-05,
|
23122 |
+
"loss": 5.5612,
|
23123 |
+
"step": 13208
|
23124 |
+
},
|
23125 |
+
{
|
23126 |
+
"epoch": 0.65,
|
23127 |
+
"grad_norm": 2.200601100921631,
|
23128 |
+
"learning_rate": 1.7364988388754385e-05,
|
23129 |
+
"loss": 5.4618,
|
23130 |
+
"step": 13212
|
23131 |
+
},
|
23132 |
+
{
|
23133 |
+
"epoch": 0.65,
|
23134 |
+
"grad_norm": 2.062946319580078,
|
23135 |
+
"learning_rate": 1.7355106477592767e-05,
|
23136 |
+
"loss": 5.4031,
|
23137 |
+
"step": 13216
|
23138 |
+
},
|
23139 |
+
{
|
23140 |
+
"epoch": 0.65,
|
23141 |
+
"grad_norm": 1.8929904699325562,
|
23142 |
+
"learning_rate": 1.734522456643115e-05,
|
23143 |
+
"loss": 5.6011,
|
23144 |
+
"step": 13220
|
23145 |
+
},
|
23146 |
+
{
|
23147 |
+
"epoch": 0.65,
|
23148 |
+
"grad_norm": 2.175036907196045,
|
23149 |
+
"learning_rate": 1.7335342655269528e-05,
|
23150 |
+
"loss": 5.5038,
|
23151 |
+
"step": 13224
|
23152 |
+
},
|
23153 |
+
{
|
23154 |
+
"epoch": 0.65,
|
23155 |
+
"grad_norm": 2.1107425689697266,
|
23156 |
+
"learning_rate": 1.732546074410791e-05,
|
23157 |
+
"loss": 5.4624,
|
23158 |
+
"step": 13228
|
23159 |
+
},
|
23160 |
+
{
|
23161 |
+
"epoch": 0.65,
|
23162 |
+
"grad_norm": 1.8881957530975342,
|
23163 |
+
"learning_rate": 1.7315578832946292e-05,
|
23164 |
+
"loss": 5.4579,
|
23165 |
+
"step": 13232
|
23166 |
+
},
|
23167 |
+
{
|
23168 |
+
"epoch": 0.65,
|
23169 |
+
"grad_norm": 1.9419997930526733,
|
23170 |
+
"learning_rate": 1.7305696921784674e-05,
|
23171 |
+
"loss": 5.5691,
|
23172 |
+
"step": 13236
|
23173 |
+
},
|
23174 |
+
{
|
23175 |
+
"epoch": 0.65,
|
23176 |
+
"grad_norm": 2.006504535675049,
|
23177 |
+
"learning_rate": 1.7295815010623056e-05,
|
23178 |
+
"loss": 5.604,
|
23179 |
+
"step": 13240
|
23180 |
+
},
|
23181 |
+
{
|
23182 |
+
"epoch": 0.65,
|
23183 |
+
"grad_norm": 1.9548697471618652,
|
23184 |
+
"learning_rate": 1.7285933099461438e-05,
|
23185 |
+
"loss": 5.5202,
|
23186 |
+
"step": 13244
|
23187 |
+
},
|
23188 |
+
{
|
23189 |
+
"epoch": 0.65,
|
23190 |
+
"grad_norm": 1.903361201286316,
|
23191 |
+
"learning_rate": 1.727605118829982e-05,
|
23192 |
+
"loss": 5.5459,
|
23193 |
+
"step": 13248
|
23194 |
+
},
|
23195 |
+
{
|
23196 |
+
"epoch": 0.65,
|
23197 |
+
"grad_norm": 1.9035142660140991,
|
23198 |
+
"learning_rate": 1.72661692771382e-05,
|
23199 |
+
"loss": 5.5067,
|
23200 |
+
"step": 13252
|
23201 |
+
},
|
23202 |
+
{
|
23203 |
+
"epoch": 0.65,
|
23204 |
+
"grad_norm": 1.9801918268203735,
|
23205 |
+
"learning_rate": 1.725628736597658e-05,
|
23206 |
+
"loss": 5.502,
|
23207 |
+
"step": 13256
|
23208 |
+
},
|
23209 |
+
{
|
23210 |
+
"epoch": 0.66,
|
23211 |
+
"grad_norm": 1.8996813297271729,
|
23212 |
+
"learning_rate": 1.7246405454814963e-05,
|
23213 |
+
"loss": 5.4709,
|
23214 |
+
"step": 13260
|
23215 |
+
},
|
23216 |
+
{
|
23217 |
+
"epoch": 0.66,
|
23218 |
+
"grad_norm": 1.903206467628479,
|
23219 |
+
"learning_rate": 1.7236523543653345e-05,
|
23220 |
+
"loss": 5.4252,
|
23221 |
+
"step": 13264
|
23222 |
+
},
|
23223 |
+
{
|
23224 |
+
"epoch": 0.66,
|
23225 |
+
"grad_norm": 2.280048370361328,
|
23226 |
+
"learning_rate": 1.7226641632491724e-05,
|
23227 |
+
"loss": 5.6236,
|
23228 |
+
"step": 13268
|
23229 |
+
},
|
23230 |
+
{
|
23231 |
+
"epoch": 0.66,
|
23232 |
+
"grad_norm": 1.916698932647705,
|
23233 |
+
"learning_rate": 1.7216759721330106e-05,
|
23234 |
+
"loss": 5.5417,
|
23235 |
+
"step": 13272
|
23236 |
+
},
|
23237 |
+
{
|
23238 |
+
"epoch": 0.66,
|
23239 |
+
"grad_norm": 1.8944514989852905,
|
23240 |
+
"learning_rate": 1.7206877810168488e-05,
|
23241 |
+
"loss": 5.5273,
|
23242 |
+
"step": 13276
|
23243 |
+
},
|
23244 |
+
{
|
23245 |
+
"epoch": 0.66,
|
23246 |
+
"grad_norm": 2.160426139831543,
|
23247 |
+
"learning_rate": 1.7196995899006866e-05,
|
23248 |
+
"loss": 5.4925,
|
23249 |
+
"step": 13280
|
23250 |
+
},
|
23251 |
+
{
|
23252 |
+
"epoch": 0.66,
|
23253 |
+
"grad_norm": 1.9661693572998047,
|
23254 |
+
"learning_rate": 1.718711398784525e-05,
|
23255 |
+
"loss": 5.5566,
|
23256 |
+
"step": 13284
|
23257 |
+
},
|
23258 |
+
{
|
23259 |
+
"epoch": 0.66,
|
23260 |
+
"grad_norm": 2.1579537391662598,
|
23261 |
+
"learning_rate": 1.717723207668363e-05,
|
23262 |
+
"loss": 5.5,
|
23263 |
+
"step": 13288
|
23264 |
+
},
|
23265 |
+
{
|
23266 |
+
"epoch": 0.66,
|
23267 |
+
"grad_norm": 1.8311907052993774,
|
23268 |
+
"learning_rate": 1.7167350165522013e-05,
|
23269 |
+
"loss": 5.4402,
|
23270 |
+
"step": 13292
|
23271 |
+
},
|
23272 |
+
{
|
23273 |
+
"epoch": 0.66,
|
23274 |
+
"grad_norm": 2.067732810974121,
|
23275 |
+
"learning_rate": 1.7157468254360395e-05,
|
23276 |
+
"loss": 5.5876,
|
23277 |
+
"step": 13296
|
23278 |
+
},
|
23279 |
+
{
|
23280 |
+
"epoch": 0.66,
|
23281 |
+
"grad_norm": 1.9030920267105103,
|
23282 |
+
"learning_rate": 1.7147586343198777e-05,
|
23283 |
+
"loss": 5.5544,
|
23284 |
+
"step": 13300
|
23285 |
+
},
|
23286 |
+
{
|
23287 |
+
"epoch": 0.66,
|
23288 |
+
"grad_norm": 1.9689793586730957,
|
23289 |
+
"learning_rate": 1.713770443203716e-05,
|
23290 |
+
"loss": 5.5876,
|
23291 |
+
"step": 13304
|
23292 |
+
},
|
23293 |
+
{
|
23294 |
+
"epoch": 0.66,
|
23295 |
+
"grad_norm": 2.319972038269043,
|
23296 |
+
"learning_rate": 1.7127822520875537e-05,
|
23297 |
+
"loss": 5.6179,
|
23298 |
+
"step": 13308
|
23299 |
+
},
|
23300 |
+
{
|
23301 |
+
"epoch": 0.66,
|
23302 |
+
"grad_norm": 2.05124568939209,
|
23303 |
+
"learning_rate": 1.711794060971392e-05,
|
23304 |
+
"loss": 5.4808,
|
23305 |
+
"step": 13312
|
23306 |
+
},
|
23307 |
+
{
|
23308 |
+
"epoch": 0.66,
|
23309 |
+
"grad_norm": 2.054259777069092,
|
23310 |
+
"learning_rate": 1.71080586985523e-05,
|
23311 |
+
"loss": 5.571,
|
23312 |
+
"step": 13316
|
23313 |
+
},
|
23314 |
+
{
|
23315 |
+
"epoch": 0.66,
|
23316 |
+
"grad_norm": 2.1697633266448975,
|
23317 |
+
"learning_rate": 1.7098176787390684e-05,
|
23318 |
+
"loss": 5.5687,
|
23319 |
+
"step": 13320
|
23320 |
+
},
|
23321 |
+
{
|
23322 |
+
"epoch": 0.66,
|
23323 |
+
"grad_norm": 2.158599853515625,
|
23324 |
+
"learning_rate": 1.7088294876229062e-05,
|
23325 |
+
"loss": 5.4535,
|
23326 |
+
"step": 13324
|
23327 |
+
},
|
23328 |
+
{
|
23329 |
+
"epoch": 0.66,
|
23330 |
+
"grad_norm": 2.263106107711792,
|
23331 |
+
"learning_rate": 1.7078412965067444e-05,
|
23332 |
+
"loss": 5.5662,
|
23333 |
+
"step": 13328
|
23334 |
+
},
|
23335 |
+
{
|
23336 |
+
"epoch": 0.66,
|
23337 |
+
"grad_norm": 1.9761734008789062,
|
23338 |
+
"learning_rate": 1.7068531053905826e-05,
|
23339 |
+
"loss": 5.4655,
|
23340 |
+
"step": 13332
|
23341 |
+
},
|
23342 |
+
{
|
23343 |
+
"epoch": 0.66,
|
23344 |
+
"grad_norm": 2.1491572856903076,
|
23345 |
+
"learning_rate": 1.705864914274421e-05,
|
23346 |
+
"loss": 5.5403,
|
23347 |
+
"step": 13336
|
23348 |
+
},
|
23349 |
+
{
|
23350 |
+
"epoch": 0.66,
|
23351 |
+
"grad_norm": 1.9614084959030151,
|
23352 |
+
"learning_rate": 1.7048767231582587e-05,
|
23353 |
+
"loss": 5.4883,
|
23354 |
+
"step": 13340
|
23355 |
+
},
|
23356 |
+
{
|
23357 |
+
"epoch": 0.66,
|
23358 |
+
"grad_norm": 2.1084208488464355,
|
23359 |
+
"learning_rate": 1.703888532042097e-05,
|
23360 |
+
"loss": 5.4915,
|
23361 |
+
"step": 13344
|
23362 |
+
},
|
23363 |
+
{
|
23364 |
+
"epoch": 0.66,
|
23365 |
+
"grad_norm": 1.9315608739852905,
|
23366 |
+
"learning_rate": 1.702900340925935e-05,
|
23367 |
+
"loss": 5.531,
|
23368 |
+
"step": 13348
|
23369 |
+
},
|
23370 |
+
{
|
23371 |
+
"epoch": 0.66,
|
23372 |
+
"grad_norm": 1.9300886392593384,
|
23373 |
+
"learning_rate": 1.7019121498097733e-05,
|
23374 |
+
"loss": 5.5294,
|
23375 |
+
"step": 13352
|
23376 |
+
},
|
23377 |
+
{
|
23378 |
+
"epoch": 0.66,
|
23379 |
+
"grad_norm": 1.9439579248428345,
|
23380 |
+
"learning_rate": 1.7009239586936115e-05,
|
23381 |
+
"loss": 5.5372,
|
23382 |
+
"step": 13356
|
23383 |
+
},
|
23384 |
+
{
|
23385 |
+
"epoch": 0.66,
|
23386 |
+
"grad_norm": 1.8347561359405518,
|
23387 |
+
"learning_rate": 1.6999357675774497e-05,
|
23388 |
+
"loss": 5.5304,
|
23389 |
+
"step": 13360
|
23390 |
+
},
|
23391 |
+
{
|
23392 |
+
"epoch": 0.66,
|
23393 |
+
"grad_norm": 1.8642383813858032,
|
23394 |
+
"learning_rate": 1.6989475764612876e-05,
|
23395 |
+
"loss": 5.3163,
|
23396 |
+
"step": 13364
|
23397 |
+
},
|
23398 |
+
{
|
23399 |
+
"epoch": 0.66,
|
23400 |
+
"grad_norm": 1.9651672840118408,
|
23401 |
+
"learning_rate": 1.6979593853451258e-05,
|
23402 |
+
"loss": 5.4271,
|
23403 |
+
"step": 13368
|
23404 |
+
},
|
23405 |
+
{
|
23406 |
+
"epoch": 0.66,
|
23407 |
+
"grad_norm": 2.146197557449341,
|
23408 |
+
"learning_rate": 1.696971194228964e-05,
|
23409 |
+
"loss": 5.6012,
|
23410 |
+
"step": 13372
|
23411 |
+
},
|
23412 |
+
{
|
23413 |
+
"epoch": 0.66,
|
23414 |
+
"grad_norm": 1.9717754125595093,
|
23415 |
+
"learning_rate": 1.6959830031128022e-05,
|
23416 |
+
"loss": 5.4703,
|
23417 |
+
"step": 13376
|
23418 |
+
},
|
23419 |
+
{
|
23420 |
+
"epoch": 0.66,
|
23421 |
+
"grad_norm": 1.8049124479293823,
|
23422 |
+
"learning_rate": 1.69499481199664e-05,
|
23423 |
+
"loss": 5.4398,
|
23424 |
+
"step": 13380
|
23425 |
+
},
|
23426 |
+
{
|
23427 |
+
"epoch": 0.66,
|
23428 |
+
"grad_norm": 1.8642454147338867,
|
23429 |
+
"learning_rate": 1.6940066208804783e-05,
|
23430 |
+
"loss": 5.5652,
|
23431 |
+
"step": 13384
|
23432 |
+
},
|
23433 |
+
{
|
23434 |
+
"epoch": 0.66,
|
23435 |
+
"grad_norm": 1.8307348489761353,
|
23436 |
+
"learning_rate": 1.6930184297643165e-05,
|
23437 |
+
"loss": 5.6079,
|
23438 |
+
"step": 13388
|
23439 |
+
},
|
23440 |
+
{
|
23441 |
+
"epoch": 0.66,
|
23442 |
+
"grad_norm": 1.8231866359710693,
|
23443 |
+
"learning_rate": 1.6920302386481547e-05,
|
23444 |
+
"loss": 5.4477,
|
23445 |
+
"step": 13392
|
23446 |
+
},
|
23447 |
+
{
|
23448 |
+
"epoch": 0.66,
|
23449 |
+
"grad_norm": 1.7777388095855713,
|
23450 |
+
"learning_rate": 1.6910420475319926e-05,
|
23451 |
+
"loss": 5.5014,
|
23452 |
+
"step": 13396
|
23453 |
+
},
|
23454 |
+
{
|
23455 |
+
"epoch": 0.66,
|
23456 |
+
"grad_norm": 2.1190943717956543,
|
23457 |
+
"learning_rate": 1.6900538564158308e-05,
|
23458 |
+
"loss": 5.512,
|
23459 |
+
"step": 13400
|
23460 |
+
},
|
23461 |
+
{
|
23462 |
+
"epoch": 0.66,
|
23463 |
+
"grad_norm": 1.7859139442443848,
|
23464 |
+
"learning_rate": 1.689065665299669e-05,
|
23465 |
+
"loss": 5.4176,
|
23466 |
+
"step": 13404
|
23467 |
+
},
|
23468 |
+
{
|
23469 |
+
"epoch": 0.66,
|
23470 |
+
"grad_norm": 1.937577486038208,
|
23471 |
+
"learning_rate": 1.6880774741835072e-05,
|
23472 |
+
"loss": 5.5242,
|
23473 |
+
"step": 13408
|
23474 |
+
},
|
23475 |
+
{
|
23476 |
+
"epoch": 0.66,
|
23477 |
+
"grad_norm": 1.9069881439208984,
|
23478 |
+
"learning_rate": 1.6870892830673454e-05,
|
23479 |
+
"loss": 5.5248,
|
23480 |
+
"step": 13412
|
23481 |
+
},
|
23482 |
+
{
|
23483 |
+
"epoch": 0.66,
|
23484 |
+
"grad_norm": 2.3434953689575195,
|
23485 |
+
"learning_rate": 1.6861010919511836e-05,
|
23486 |
+
"loss": 5.5887,
|
23487 |
+
"step": 13416
|
23488 |
+
},
|
23489 |
+
{
|
23490 |
+
"epoch": 0.66,
|
23491 |
+
"grad_norm": 2.0236103534698486,
|
23492 |
+
"learning_rate": 1.6851129008350218e-05,
|
23493 |
+
"loss": 5.5241,
|
23494 |
+
"step": 13420
|
23495 |
+
},
|
23496 |
+
{
|
23497 |
+
"epoch": 0.66,
|
23498 |
+
"grad_norm": 1.807458519935608,
|
23499 |
+
"learning_rate": 1.6841247097188597e-05,
|
23500 |
+
"loss": 5.5018,
|
23501 |
+
"step": 13424
|
23502 |
+
},
|
23503 |
+
{
|
23504 |
+
"epoch": 0.66,
|
23505 |
+
"grad_norm": 1.7654306888580322,
|
23506 |
+
"learning_rate": 1.683136518602698e-05,
|
23507 |
+
"loss": 5.5346,
|
23508 |
+
"step": 13428
|
23509 |
+
},
|
23510 |
+
{
|
23511 |
+
"epoch": 0.66,
|
23512 |
+
"grad_norm": 1.796311378479004,
|
23513 |
+
"learning_rate": 1.682148327486536e-05,
|
23514 |
+
"loss": 5.4943,
|
23515 |
+
"step": 13432
|
23516 |
+
},
|
23517 |
+
{
|
23518 |
+
"epoch": 0.66,
|
23519 |
+
"grad_norm": 2.1759448051452637,
|
23520 |
+
"learning_rate": 1.681160136370374e-05,
|
23521 |
+
"loss": 5.5745,
|
23522 |
+
"step": 13436
|
23523 |
+
},
|
23524 |
+
{
|
23525 |
+
"epoch": 0.66,
|
23526 |
+
"grad_norm": 1.9799151420593262,
|
23527 |
+
"learning_rate": 1.680171945254212e-05,
|
23528 |
+
"loss": 5.5721,
|
23529 |
+
"step": 13440
|
23530 |
+
},
|
23531 |
+
{
|
23532 |
+
"epoch": 0.66,
|
23533 |
+
"grad_norm": 1.8375133275985718,
|
23534 |
+
"learning_rate": 1.6791837541380504e-05,
|
23535 |
+
"loss": 5.5058,
|
23536 |
+
"step": 13444
|
23537 |
+
},
|
23538 |
+
{
|
23539 |
+
"epoch": 0.66,
|
23540 |
+
"grad_norm": 2.0697977542877197,
|
23541 |
+
"learning_rate": 1.6781955630218886e-05,
|
23542 |
+
"loss": 5.49,
|
23543 |
+
"step": 13448
|
23544 |
+
},
|
23545 |
+
{
|
23546 |
+
"epoch": 0.66,
|
23547 |
+
"grad_norm": 1.968781590461731,
|
23548 |
+
"learning_rate": 1.6772073719057264e-05,
|
23549 |
+
"loss": 5.414,
|
23550 |
+
"step": 13452
|
23551 |
+
},
|
23552 |
+
{
|
23553 |
+
"epoch": 0.66,
|
23554 |
+
"grad_norm": 2.182166814804077,
|
23555 |
+
"learning_rate": 1.6762191807895647e-05,
|
23556 |
+
"loss": 5.6226,
|
23557 |
+
"step": 13456
|
23558 |
+
},
|
23559 |
+
{
|
23560 |
+
"epoch": 0.67,
|
23561 |
+
"grad_norm": 2.110377073287964,
|
23562 |
+
"learning_rate": 1.675230989673403e-05,
|
23563 |
+
"loss": 5.5626,
|
23564 |
+
"step": 13460
|
23565 |
+
},
|
23566 |
+
{
|
23567 |
+
"epoch": 0.67,
|
23568 |
+
"grad_norm": 1.979601263999939,
|
23569 |
+
"learning_rate": 1.674242798557241e-05,
|
23570 |
+
"loss": 5.62,
|
23571 |
+
"step": 13464
|
23572 |
+
},
|
23573 |
+
{
|
23574 |
+
"epoch": 0.67,
|
23575 |
+
"grad_norm": 2.2129158973693848,
|
23576 |
+
"learning_rate": 1.6732546074410793e-05,
|
23577 |
+
"loss": 5.5565,
|
23578 |
+
"step": 13468
|
23579 |
+
},
|
23580 |
+
{
|
23581 |
+
"epoch": 0.67,
|
23582 |
+
"grad_norm": 2.0401570796966553,
|
23583 |
+
"learning_rate": 1.6722664163249175e-05,
|
23584 |
+
"loss": 5.4781,
|
23585 |
+
"step": 13472
|
23586 |
+
},
|
23587 |
+
{
|
23588 |
+
"epoch": 0.67,
|
23589 |
+
"grad_norm": 1.7625924348831177,
|
23590 |
+
"learning_rate": 1.6712782252087557e-05,
|
23591 |
+
"loss": 5.4104,
|
23592 |
+
"step": 13476
|
23593 |
+
},
|
23594 |
+
{
|
23595 |
+
"epoch": 0.67,
|
23596 |
+
"grad_norm": 1.8981072902679443,
|
23597 |
+
"learning_rate": 1.6702900340925936e-05,
|
23598 |
+
"loss": 5.5208,
|
23599 |
+
"step": 13480
|
23600 |
+
},
|
23601 |
+
{
|
23602 |
+
"epoch": 0.67,
|
23603 |
+
"grad_norm": 2.141097068786621,
|
23604 |
+
"learning_rate": 1.6693018429764318e-05,
|
23605 |
+
"loss": 5.505,
|
23606 |
+
"step": 13484
|
23607 |
+
},
|
23608 |
+
{
|
23609 |
+
"epoch": 0.67,
|
23610 |
+
"grad_norm": 2.0228660106658936,
|
23611 |
+
"learning_rate": 1.66831365186027e-05,
|
23612 |
+
"loss": 5.5653,
|
23613 |
+
"step": 13488
|
23614 |
+
},
|
23615 |
+
{
|
23616 |
+
"epoch": 0.67,
|
23617 |
+
"grad_norm": 1.9627779722213745,
|
23618 |
+
"learning_rate": 1.667325460744108e-05,
|
23619 |
+
"loss": 5.5383,
|
23620 |
+
"step": 13492
|
23621 |
+
},
|
23622 |
+
{
|
23623 |
+
"epoch": 0.67,
|
23624 |
+
"grad_norm": 1.80388605594635,
|
23625 |
+
"learning_rate": 1.666337269627946e-05,
|
23626 |
+
"loss": 5.4095,
|
23627 |
+
"step": 13496
|
23628 |
+
},
|
23629 |
+
{
|
23630 |
+
"epoch": 0.67,
|
23631 |
+
"grad_norm": 2.125562906265259,
|
23632 |
+
"learning_rate": 1.6653490785117842e-05,
|
23633 |
+
"loss": 5.5157,
|
23634 |
+
"step": 13500
|
23635 |
+
},
|
23636 |
+
{
|
23637 |
+
"epoch": 0.67,
|
23638 |
+
"grad_norm": 2.3643898963928223,
|
23639 |
+
"learning_rate": 1.6643608873956224e-05,
|
23640 |
+
"loss": 5.4778,
|
23641 |
+
"step": 13504
|
23642 |
+
},
|
23643 |
+
{
|
23644 |
+
"epoch": 0.67,
|
23645 |
+
"grad_norm": 1.9573434591293335,
|
23646 |
+
"learning_rate": 1.6633726962794603e-05,
|
23647 |
+
"loss": 5.3951,
|
23648 |
+
"step": 13508
|
23649 |
+
},
|
23650 |
+
{
|
23651 |
+
"epoch": 0.67,
|
23652 |
+
"grad_norm": 1.767749309539795,
|
23653 |
+
"learning_rate": 1.6623845051632985e-05,
|
23654 |
+
"loss": 5.5775,
|
23655 |
+
"step": 13512
|
23656 |
+
},
|
23657 |
+
{
|
23658 |
+
"epoch": 0.67,
|
23659 |
+
"grad_norm": 1.7242648601531982,
|
23660 |
+
"learning_rate": 1.6613963140471367e-05,
|
23661 |
+
"loss": 5.5681,
|
23662 |
+
"step": 13516
|
23663 |
+
},
|
23664 |
+
{
|
23665 |
+
"epoch": 0.67,
|
23666 |
+
"grad_norm": 2.098914384841919,
|
23667 |
+
"learning_rate": 1.660408122930975e-05,
|
23668 |
+
"loss": 5.4892,
|
23669 |
+
"step": 13520
|
23670 |
+
},
|
23671 |
+
{
|
23672 |
+
"epoch": 0.67,
|
23673 |
+
"grad_norm": 1.9471126794815063,
|
23674 |
+
"learning_rate": 1.659419931814813e-05,
|
23675 |
+
"loss": 5.5672,
|
23676 |
+
"step": 13524
|
23677 |
+
},
|
23678 |
+
{
|
23679 |
+
"epoch": 0.67,
|
23680 |
+
"grad_norm": 1.9518860578536987,
|
23681 |
+
"learning_rate": 1.6584317406986513e-05,
|
23682 |
+
"loss": 5.5318,
|
23683 |
+
"step": 13528
|
23684 |
+
},
|
23685 |
+
{
|
23686 |
+
"epoch": 0.67,
|
23687 |
+
"grad_norm": 1.9697927236557007,
|
23688 |
+
"learning_rate": 1.6574435495824896e-05,
|
23689 |
+
"loss": 5.5275,
|
23690 |
+
"step": 13532
|
23691 |
+
},
|
23692 |
+
{
|
23693 |
+
"epoch": 0.67,
|
23694 |
+
"grad_norm": 2.005502700805664,
|
23695 |
+
"learning_rate": 1.6564553584663274e-05,
|
23696 |
+
"loss": 5.6122,
|
23697 |
+
"step": 13536
|
23698 |
+
},
|
23699 |
+
{
|
23700 |
+
"epoch": 0.67,
|
23701 |
+
"grad_norm": 2.06354022026062,
|
23702 |
+
"learning_rate": 1.6554671673501656e-05,
|
23703 |
+
"loss": 5.3431,
|
23704 |
+
"step": 13540
|
23705 |
+
},
|
23706 |
+
{
|
23707 |
+
"epoch": 0.67,
|
23708 |
+
"grad_norm": 1.9697012901306152,
|
23709 |
+
"learning_rate": 1.6544789762340038e-05,
|
23710 |
+
"loss": 5.5376,
|
23711 |
+
"step": 13544
|
23712 |
+
},
|
23713 |
+
{
|
23714 |
+
"epoch": 0.67,
|
23715 |
+
"grad_norm": 1.9158987998962402,
|
23716 |
+
"learning_rate": 1.653490785117842e-05,
|
23717 |
+
"loss": 5.5176,
|
23718 |
+
"step": 13548
|
23719 |
+
},
|
23720 |
+
{
|
23721 |
+
"epoch": 0.67,
|
23722 |
+
"grad_norm": 2.0514771938323975,
|
23723 |
+
"learning_rate": 1.65250259400168e-05,
|
23724 |
+
"loss": 5.5724,
|
23725 |
+
"step": 13552
|
23726 |
+
},
|
23727 |
+
{
|
23728 |
+
"epoch": 0.67,
|
23729 |
+
"grad_norm": 1.8229410648345947,
|
23730 |
+
"learning_rate": 1.651514402885518e-05,
|
23731 |
+
"loss": 5.4933,
|
23732 |
+
"step": 13556
|
23733 |
+
},
|
23734 |
+
{
|
23735 |
+
"epoch": 0.67,
|
23736 |
+
"grad_norm": 1.6895649433135986,
|
23737 |
+
"learning_rate": 1.6505262117693563e-05,
|
23738 |
+
"loss": 5.5462,
|
23739 |
+
"step": 13560
|
23740 |
+
},
|
23741 |
+
{
|
23742 |
+
"epoch": 0.67,
|
23743 |
+
"grad_norm": 1.9636627435684204,
|
23744 |
+
"learning_rate": 1.6495380206531942e-05,
|
23745 |
+
"loss": 5.4918,
|
23746 |
+
"step": 13564
|
23747 |
+
},
|
23748 |
+
{
|
23749 |
+
"epoch": 0.67,
|
23750 |
+
"grad_norm": 1.728353500366211,
|
23751 |
+
"learning_rate": 1.6485498295370324e-05,
|
23752 |
+
"loss": 5.4126,
|
23753 |
+
"step": 13568
|
23754 |
+
},
|
23755 |
+
{
|
23756 |
+
"epoch": 0.67,
|
23757 |
+
"grad_norm": 1.8992080688476562,
|
23758 |
+
"learning_rate": 1.6475616384208706e-05,
|
23759 |
+
"loss": 5.4954,
|
23760 |
+
"step": 13572
|
23761 |
+
},
|
23762 |
+
{
|
23763 |
+
"epoch": 0.67,
|
23764 |
+
"grad_norm": 1.8971209526062012,
|
23765 |
+
"learning_rate": 1.6465734473047088e-05,
|
23766 |
+
"loss": 5.5205,
|
23767 |
+
"step": 13576
|
23768 |
+
},
|
23769 |
+
{
|
23770 |
+
"epoch": 0.67,
|
23771 |
+
"grad_norm": 1.9099971055984497,
|
23772 |
+
"learning_rate": 1.6455852561885467e-05,
|
23773 |
+
"loss": 5.5401,
|
23774 |
+
"step": 13580
|
23775 |
+
},
|
23776 |
+
{
|
23777 |
+
"epoch": 0.67,
|
23778 |
+
"grad_norm": 1.7198883295059204,
|
23779 |
+
"learning_rate": 1.6445970650723852e-05,
|
23780 |
+
"loss": 5.5187,
|
23781 |
+
"step": 13584
|
23782 |
+
},
|
23783 |
+
{
|
23784 |
+
"epoch": 0.67,
|
23785 |
+
"grad_norm": 1.854477882385254,
|
23786 |
+
"learning_rate": 1.6436088739562234e-05,
|
23787 |
+
"loss": 5.5558,
|
23788 |
+
"step": 13588
|
23789 |
+
},
|
23790 |
+
{
|
23791 |
+
"epoch": 0.67,
|
23792 |
+
"grad_norm": 1.9261908531188965,
|
23793 |
+
"learning_rate": 1.6426206828400613e-05,
|
23794 |
+
"loss": 5.5156,
|
23795 |
+
"step": 13592
|
23796 |
+
},
|
23797 |
+
{
|
23798 |
+
"epoch": 0.67,
|
23799 |
+
"grad_norm": 2.166208505630493,
|
23800 |
+
"learning_rate": 1.6416324917238995e-05,
|
23801 |
+
"loss": 5.5002,
|
23802 |
+
"step": 13596
|
23803 |
+
},
|
23804 |
+
{
|
23805 |
+
"epoch": 0.67,
|
23806 |
+
"grad_norm": 1.8934109210968018,
|
23807 |
+
"learning_rate": 1.6406443006077377e-05,
|
23808 |
+
"loss": 5.5092,
|
23809 |
+
"step": 13600
|
23810 |
+
},
|
23811 |
+
{
|
23812 |
+
"epoch": 0.67,
|
23813 |
+
"grad_norm": 1.830947756767273,
|
23814 |
+
"learning_rate": 1.639656109491576e-05,
|
23815 |
+
"loss": 5.5542,
|
23816 |
+
"step": 13604
|
23817 |
+
},
|
23818 |
+
{
|
23819 |
+
"epoch": 0.67,
|
23820 |
+
"grad_norm": 1.86097252368927,
|
23821 |
+
"learning_rate": 1.6386679183754138e-05,
|
23822 |
+
"loss": 5.502,
|
23823 |
+
"step": 13608
|
23824 |
+
},
|
23825 |
+
{
|
23826 |
+
"epoch": 0.67,
|
23827 |
+
"grad_norm": 1.9578551054000854,
|
23828 |
+
"learning_rate": 1.637679727259252e-05,
|
23829 |
+
"loss": 5.5351,
|
23830 |
+
"step": 13612
|
23831 |
+
},
|
23832 |
+
{
|
23833 |
+
"epoch": 0.67,
|
23834 |
+
"grad_norm": 1.9985218048095703,
|
23835 |
+
"learning_rate": 1.6366915361430902e-05,
|
23836 |
+
"loss": 5.403,
|
23837 |
+
"step": 13616
|
23838 |
+
},
|
23839 |
+
{
|
23840 |
+
"epoch": 0.67,
|
23841 |
+
"grad_norm": 2.346946954727173,
|
23842 |
+
"learning_rate": 1.6357033450269284e-05,
|
23843 |
+
"loss": 5.4314,
|
23844 |
+
"step": 13620
|
23845 |
+
},
|
23846 |
+
{
|
23847 |
+
"epoch": 0.67,
|
23848 |
+
"grad_norm": 1.9613852500915527,
|
23849 |
+
"learning_rate": 1.6347151539107663e-05,
|
23850 |
+
"loss": 5.4851,
|
23851 |
+
"step": 13624
|
23852 |
+
},
|
23853 |
+
{
|
23854 |
+
"epoch": 0.67,
|
23855 |
+
"grad_norm": 2.2844598293304443,
|
23856 |
+
"learning_rate": 1.6337269627946045e-05,
|
23857 |
+
"loss": 5.4646,
|
23858 |
+
"step": 13628
|
23859 |
+
},
|
23860 |
+
{
|
23861 |
+
"epoch": 0.67,
|
23862 |
+
"grad_norm": 2.0212109088897705,
|
23863 |
+
"learning_rate": 1.6327387716784427e-05,
|
23864 |
+
"loss": 5.5153,
|
23865 |
+
"step": 13632
|
23866 |
+
},
|
23867 |
+
{
|
23868 |
+
"epoch": 0.67,
|
23869 |
+
"grad_norm": 2.0245635509490967,
|
23870 |
+
"learning_rate": 1.6317505805622805e-05,
|
23871 |
+
"loss": 5.5516,
|
23872 |
+
"step": 13636
|
23873 |
+
},
|
23874 |
+
{
|
23875 |
+
"epoch": 0.67,
|
23876 |
+
"grad_norm": 2.051738977432251,
|
23877 |
+
"learning_rate": 1.630762389446119e-05,
|
23878 |
+
"loss": 5.6327,
|
23879 |
+
"step": 13640
|
23880 |
+
},
|
23881 |
+
{
|
23882 |
+
"epoch": 0.67,
|
23883 |
+
"grad_norm": 2.0657126903533936,
|
23884 |
+
"learning_rate": 1.6297741983299573e-05,
|
23885 |
+
"loss": 5.5882,
|
23886 |
+
"step": 13644
|
23887 |
+
},
|
23888 |
+
{
|
23889 |
+
"epoch": 0.67,
|
23890 |
+
"grad_norm": 1.979304313659668,
|
23891 |
+
"learning_rate": 1.628786007213795e-05,
|
23892 |
+
"loss": 5.3553,
|
23893 |
+
"step": 13648
|
23894 |
+
},
|
23895 |
+
{
|
23896 |
+
"epoch": 0.67,
|
23897 |
+
"grad_norm": 2.10304856300354,
|
23898 |
+
"learning_rate": 1.6277978160976334e-05,
|
23899 |
+
"loss": 5.5474,
|
23900 |
+
"step": 13652
|
23901 |
+
},
|
23902 |
+
{
|
23903 |
+
"epoch": 0.67,
|
23904 |
+
"grad_norm": 2.0482354164123535,
|
23905 |
+
"learning_rate": 1.6268096249814716e-05,
|
23906 |
+
"loss": 5.5004,
|
23907 |
+
"step": 13656
|
23908 |
+
},
|
23909 |
+
{
|
23910 |
+
"epoch": 0.67,
|
23911 |
+
"grad_norm": 2.0741443634033203,
|
23912 |
+
"learning_rate": 1.6258214338653098e-05,
|
23913 |
+
"loss": 5.4887,
|
23914 |
+
"step": 13660
|
23915 |
+
},
|
23916 |
+
{
|
23917 |
+
"epoch": 0.68,
|
23918 |
+
"grad_norm": 1.9850010871887207,
|
23919 |
+
"learning_rate": 1.6248332427491476e-05,
|
23920 |
+
"loss": 5.4449,
|
23921 |
+
"step": 13664
|
23922 |
+
},
|
23923 |
+
{
|
23924 |
+
"epoch": 0.68,
|
23925 |
+
"grad_norm": 2.456059217453003,
|
23926 |
+
"learning_rate": 1.623845051632986e-05,
|
23927 |
+
"loss": 5.4471,
|
23928 |
+
"step": 13668
|
23929 |
+
},
|
23930 |
+
{
|
23931 |
+
"epoch": 0.68,
|
23932 |
+
"grad_norm": 1.8275492191314697,
|
23933 |
+
"learning_rate": 1.622856860516824e-05,
|
23934 |
+
"loss": 5.5139,
|
23935 |
+
"step": 13672
|
23936 |
+
},
|
23937 |
+
{
|
23938 |
+
"epoch": 0.68,
|
23939 |
+
"grad_norm": 2.166975736618042,
|
23940 |
+
"learning_rate": 1.6218686694006623e-05,
|
23941 |
+
"loss": 5.5475,
|
23942 |
+
"step": 13676
|
23943 |
+
},
|
23944 |
+
{
|
23945 |
+
"epoch": 0.68,
|
23946 |
+
"grad_norm": 2.0723702907562256,
|
23947 |
+
"learning_rate": 1.6208804782845e-05,
|
23948 |
+
"loss": 5.6062,
|
23949 |
+
"step": 13680
|
23950 |
+
},
|
23951 |
+
{
|
23952 |
+
"epoch": 0.68,
|
23953 |
+
"grad_norm": 2.227518320083618,
|
23954 |
+
"learning_rate": 1.6198922871683383e-05,
|
23955 |
+
"loss": 5.4955,
|
23956 |
+
"step": 13684
|
23957 |
+
},
|
23958 |
+
{
|
23959 |
+
"epoch": 0.68,
|
23960 |
+
"grad_norm": 1.9052515029907227,
|
23961 |
+
"learning_rate": 1.6189040960521765e-05,
|
23962 |
+
"loss": 5.5553,
|
23963 |
+
"step": 13688
|
23964 |
+
},
|
23965 |
+
{
|
23966 |
+
"epoch": 0.68,
|
23967 |
+
"grad_norm": 1.945573329925537,
|
23968 |
+
"learning_rate": 1.6179159049360147e-05,
|
23969 |
+
"loss": 5.3597,
|
23970 |
+
"step": 13692
|
23971 |
+
},
|
23972 |
+
{
|
23973 |
+
"epoch": 0.68,
|
23974 |
+
"grad_norm": 1.874603271484375,
|
23975 |
+
"learning_rate": 1.616927713819853e-05,
|
23976 |
+
"loss": 5.391,
|
23977 |
+
"step": 13696
|
23978 |
+
},
|
23979 |
+
{
|
23980 |
+
"epoch": 0.68,
|
23981 |
+
"grad_norm": 2.339505672454834,
|
23982 |
+
"learning_rate": 1.615939522703691e-05,
|
23983 |
+
"loss": 5.4194,
|
23984 |
+
"step": 13700
|
23985 |
+
},
|
23986 |
+
{
|
23987 |
+
"epoch": 0.68,
|
23988 |
+
"grad_norm": 1.7062286138534546,
|
23989 |
+
"learning_rate": 1.6149513315875294e-05,
|
23990 |
+
"loss": 5.4586,
|
23991 |
+
"step": 13704
|
23992 |
+
},
|
23993 |
+
{
|
23994 |
+
"epoch": 0.68,
|
23995 |
+
"grad_norm": 1.8526628017425537,
|
23996 |
+
"learning_rate": 1.6139631404713672e-05,
|
23997 |
+
"loss": 5.3614,
|
23998 |
+
"step": 13708
|
23999 |
+
},
|
24000 |
+
{
|
24001 |
+
"epoch": 0.68,
|
24002 |
+
"grad_norm": 1.8179652690887451,
|
24003 |
+
"learning_rate": 1.6129749493552054e-05,
|
24004 |
+
"loss": 5.4977,
|
24005 |
+
"step": 13712
|
24006 |
+
},
|
24007 |
+
{
|
24008 |
+
"epoch": 0.68,
|
24009 |
+
"grad_norm": 1.889673113822937,
|
24010 |
+
"learning_rate": 1.6119867582390436e-05,
|
24011 |
+
"loss": 5.5591,
|
24012 |
+
"step": 13716
|
24013 |
+
},
|
24014 |
+
{
|
24015 |
+
"epoch": 0.68,
|
24016 |
+
"grad_norm": 2.039731740951538,
|
24017 |
+
"learning_rate": 1.6109985671228815e-05,
|
24018 |
+
"loss": 5.4809,
|
24019 |
+
"step": 13720
|
24020 |
+
},
|
24021 |
+
{
|
24022 |
+
"epoch": 0.68,
|
24023 |
+
"grad_norm": 2.3024516105651855,
|
24024 |
+
"learning_rate": 1.6100103760067197e-05,
|
24025 |
+
"loss": 5.4518,
|
24026 |
+
"step": 13724
|
24027 |
+
},
|
24028 |
+
{
|
24029 |
+
"epoch": 0.68,
|
24030 |
+
"grad_norm": 1.9836472272872925,
|
24031 |
+
"learning_rate": 1.609022184890558e-05,
|
24032 |
+
"loss": 5.4812,
|
24033 |
+
"step": 13728
|
24034 |
+
},
|
24035 |
+
{
|
24036 |
+
"epoch": 0.68,
|
24037 |
+
"grad_norm": 1.8883979320526123,
|
24038 |
+
"learning_rate": 1.608033993774396e-05,
|
24039 |
+
"loss": 5.4914,
|
24040 |
+
"step": 13732
|
24041 |
+
},
|
24042 |
+
{
|
24043 |
+
"epoch": 0.68,
|
24044 |
+
"grad_norm": 1.9850894212722778,
|
24045 |
+
"learning_rate": 1.607045802658234e-05,
|
24046 |
+
"loss": 5.6323,
|
24047 |
+
"step": 13736
|
24048 |
+
},
|
24049 |
+
{
|
24050 |
+
"epoch": 0.68,
|
24051 |
+
"grad_norm": 2.124187707901001,
|
24052 |
+
"learning_rate": 1.6060576115420722e-05,
|
24053 |
+
"loss": 5.5609,
|
24054 |
+
"step": 13740
|
24055 |
+
},
|
24056 |
+
{
|
24057 |
+
"epoch": 0.68,
|
24058 |
+
"grad_norm": 1.9509978294372559,
|
24059 |
+
"learning_rate": 1.6050694204259104e-05,
|
24060 |
+
"loss": 5.5234,
|
24061 |
+
"step": 13744
|
24062 |
+
},
|
24063 |
+
{
|
24064 |
+
"epoch": 0.68,
|
24065 |
+
"grad_norm": 1.9340142011642456,
|
24066 |
+
"learning_rate": 1.6040812293097486e-05,
|
24067 |
+
"loss": 5.491,
|
24068 |
+
"step": 13748
|
24069 |
+
},
|
24070 |
+
{
|
24071 |
+
"epoch": 0.68,
|
24072 |
+
"grad_norm": 2.0254647731781006,
|
24073 |
+
"learning_rate": 1.6030930381935865e-05,
|
24074 |
+
"loss": 5.4735,
|
24075 |
+
"step": 13752
|
24076 |
+
},
|
24077 |
+
{
|
24078 |
+
"epoch": 0.68,
|
24079 |
+
"grad_norm": 2.143260955810547,
|
24080 |
+
"learning_rate": 1.602104847077425e-05,
|
24081 |
+
"loss": 5.5477,
|
24082 |
+
"step": 13756
|
24083 |
+
},
|
24084 |
+
{
|
24085 |
+
"epoch": 0.68,
|
24086 |
+
"grad_norm": 1.6931935548782349,
|
24087 |
+
"learning_rate": 1.6011166559612632e-05,
|
24088 |
+
"loss": 5.5821,
|
24089 |
+
"step": 13760
|
24090 |
+
},
|
24091 |
+
{
|
24092 |
+
"epoch": 0.68,
|
24093 |
+
"grad_norm": 1.9342801570892334,
|
24094 |
+
"learning_rate": 1.600128464845101e-05,
|
24095 |
+
"loss": 5.4517,
|
24096 |
+
"step": 13764
|
24097 |
+
},
|
24098 |
+
{
|
24099 |
+
"epoch": 0.68,
|
24100 |
+
"grad_norm": 1.8290477991104126,
|
24101 |
+
"learning_rate": 1.5991402737289393e-05,
|
24102 |
+
"loss": 5.4992,
|
24103 |
+
"step": 13768
|
24104 |
+
},
|
24105 |
+
{
|
24106 |
+
"epoch": 0.68,
|
24107 |
+
"grad_norm": 2.020256757736206,
|
24108 |
+
"learning_rate": 1.5981520826127775e-05,
|
24109 |
+
"loss": 5.5175,
|
24110 |
+
"step": 13772
|
24111 |
+
},
|
24112 |
+
{
|
24113 |
+
"epoch": 0.68,
|
24114 |
+
"grad_norm": 2.106531858444214,
|
24115 |
+
"learning_rate": 1.5971638914966157e-05,
|
24116 |
+
"loss": 5.4482,
|
24117 |
+
"step": 13776
|
24118 |
+
},
|
24119 |
+
{
|
24120 |
+
"epoch": 0.68,
|
24121 |
+
"grad_norm": 1.9939652681350708,
|
24122 |
+
"learning_rate": 1.5961757003804536e-05,
|
24123 |
+
"loss": 5.4952,
|
24124 |
+
"step": 13780
|
24125 |
+
},
|
24126 |
+
{
|
24127 |
+
"epoch": 0.68,
|
24128 |
+
"grad_norm": 2.0970144271850586,
|
24129 |
+
"learning_rate": 1.5951875092642918e-05,
|
24130 |
+
"loss": 5.5438,
|
24131 |
+
"step": 13784
|
24132 |
+
},
|
24133 |
+
{
|
24134 |
+
"epoch": 0.68,
|
24135 |
+
"grad_norm": 1.9521361589431763,
|
24136 |
+
"learning_rate": 1.59419931814813e-05,
|
24137 |
+
"loss": 5.5225,
|
24138 |
+
"step": 13788
|
24139 |
+
},
|
24140 |
+
{
|
24141 |
+
"epoch": 0.68,
|
24142 |
+
"grad_norm": 2.0148961544036865,
|
24143 |
+
"learning_rate": 1.593211127031968e-05,
|
24144 |
+
"loss": 5.4785,
|
24145 |
+
"step": 13792
|
24146 |
+
},
|
24147 |
+
{
|
24148 |
+
"epoch": 0.68,
|
24149 |
+
"grad_norm": 2.1378774642944336,
|
24150 |
+
"learning_rate": 1.592222935915806e-05,
|
24151 |
+
"loss": 5.5553,
|
24152 |
+
"step": 13796
|
24153 |
+
},
|
24154 |
+
{
|
24155 |
+
"epoch": 0.68,
|
24156 |
+
"grad_norm": 2.312819719314575,
|
24157 |
+
"learning_rate": 1.5912347447996443e-05,
|
24158 |
+
"loss": 5.4033,
|
24159 |
+
"step": 13800
|
24160 |
+
},
|
24161 |
+
{
|
24162 |
+
"epoch": 0.68,
|
24163 |
+
"grad_norm": 2.2060418128967285,
|
24164 |
+
"learning_rate": 1.5902465536834825e-05,
|
24165 |
+
"loss": 5.4713,
|
24166 |
+
"step": 13804
|
24167 |
+
},
|
24168 |
+
{
|
24169 |
+
"epoch": 0.68,
|
24170 |
+
"grad_norm": 1.8953157663345337,
|
24171 |
+
"learning_rate": 1.5892583625673203e-05,
|
24172 |
+
"loss": 5.5035,
|
24173 |
+
"step": 13808
|
24174 |
+
},
|
24175 |
+
{
|
24176 |
+
"epoch": 0.68,
|
24177 |
+
"grad_norm": 1.7217421531677246,
|
24178 |
+
"learning_rate": 1.588270171451159e-05,
|
24179 |
+
"loss": 5.6139,
|
24180 |
+
"step": 13812
|
24181 |
+
},
|
24182 |
+
{
|
24183 |
+
"epoch": 0.68,
|
24184 |
+
"grad_norm": 1.9969755411148071,
|
24185 |
+
"learning_rate": 1.587281980334997e-05,
|
24186 |
+
"loss": 5.44,
|
24187 |
+
"step": 13816
|
24188 |
+
},
|
24189 |
+
{
|
24190 |
+
"epoch": 0.68,
|
24191 |
+
"grad_norm": 2.071129560470581,
|
24192 |
+
"learning_rate": 1.586293789218835e-05,
|
24193 |
+
"loss": 5.4202,
|
24194 |
+
"step": 13820
|
24195 |
+
},
|
24196 |
+
{
|
24197 |
+
"epoch": 0.68,
|
24198 |
+
"grad_norm": 1.8004354238510132,
|
24199 |
+
"learning_rate": 1.5853055981026732e-05,
|
24200 |
+
"loss": 5.5286,
|
24201 |
+
"step": 13824
|
24202 |
+
},
|
24203 |
+
{
|
24204 |
+
"epoch": 0.68,
|
24205 |
+
"grad_norm": 2.0194168090820312,
|
24206 |
+
"learning_rate": 1.5843174069865114e-05,
|
24207 |
+
"loss": 5.426,
|
24208 |
+
"step": 13828
|
24209 |
+
},
|
24210 |
+
{
|
24211 |
+
"epoch": 0.68,
|
24212 |
+
"grad_norm": 1.7910535335540771,
|
24213 |
+
"learning_rate": 1.5833292158703496e-05,
|
24214 |
+
"loss": 5.529,
|
24215 |
+
"step": 13832
|
24216 |
+
},
|
24217 |
+
{
|
24218 |
+
"epoch": 0.68,
|
24219 |
+
"grad_norm": 2.0648398399353027,
|
24220 |
+
"learning_rate": 1.5823410247541874e-05,
|
24221 |
+
"loss": 5.5632,
|
24222 |
+
"step": 13836
|
24223 |
+
},
|
24224 |
+
{
|
24225 |
+
"epoch": 0.68,
|
24226 |
+
"grad_norm": 1.881971001625061,
|
24227 |
+
"learning_rate": 1.5813528336380257e-05,
|
24228 |
+
"loss": 5.6164,
|
24229 |
+
"step": 13840
|
24230 |
+
},
|
24231 |
+
{
|
24232 |
+
"epoch": 0.68,
|
24233 |
+
"grad_norm": 2.088383913040161,
|
24234 |
+
"learning_rate": 1.580364642521864e-05,
|
24235 |
+
"loss": 5.5221,
|
24236 |
+
"step": 13844
|
24237 |
+
},
|
24238 |
+
{
|
24239 |
+
"epoch": 0.68,
|
24240 |
+
"grad_norm": 1.9117428064346313,
|
24241 |
+
"learning_rate": 1.5793764514057017e-05,
|
24242 |
+
"loss": 5.6231,
|
24243 |
+
"step": 13848
|
24244 |
+
},
|
24245 |
+
{
|
24246 |
+
"epoch": 0.68,
|
24247 |
+
"grad_norm": 2.0459342002868652,
|
24248 |
+
"learning_rate": 1.57838826028954e-05,
|
24249 |
+
"loss": 5.3971,
|
24250 |
+
"step": 13852
|
24251 |
+
},
|
24252 |
+
{
|
24253 |
+
"epoch": 0.68,
|
24254 |
+
"grad_norm": 1.8177950382232666,
|
24255 |
+
"learning_rate": 1.577400069173378e-05,
|
24256 |
+
"loss": 5.4996,
|
24257 |
+
"step": 13856
|
24258 |
+
},
|
24259 |
+
{
|
24260 |
+
"epoch": 0.68,
|
24261 |
+
"grad_norm": 2.090265989303589,
|
24262 |
+
"learning_rate": 1.5764118780572163e-05,
|
24263 |
+
"loss": 5.5479,
|
24264 |
+
"step": 13860
|
24265 |
+
},
|
24266 |
+
{
|
24267 |
+
"epoch": 0.69,
|
24268 |
+
"grad_norm": 2.1461987495422363,
|
24269 |
+
"learning_rate": 1.5754236869410542e-05,
|
24270 |
+
"loss": 5.4959,
|
24271 |
+
"step": 13864
|
24272 |
+
},
|
24273 |
+
{
|
24274 |
+
"epoch": 0.69,
|
24275 |
+
"grad_norm": 2.008942127227783,
|
24276 |
+
"learning_rate": 1.5744354958248928e-05,
|
24277 |
+
"loss": 5.3582,
|
24278 |
+
"step": 13868
|
24279 |
+
},
|
24280 |
+
{
|
24281 |
+
"epoch": 0.69,
|
24282 |
+
"grad_norm": 1.7721225023269653,
|
24283 |
+
"learning_rate": 1.573447304708731e-05,
|
24284 |
+
"loss": 5.4618,
|
24285 |
+
"step": 13872
|
24286 |
+
},
|
24287 |
+
{
|
24288 |
+
"epoch": 0.69,
|
24289 |
+
"grad_norm": 2.161746025085449,
|
24290 |
+
"learning_rate": 1.572459113592569e-05,
|
24291 |
+
"loss": 5.5163,
|
24292 |
+
"step": 13876
|
24293 |
+
},
|
24294 |
+
{
|
24295 |
+
"epoch": 0.69,
|
24296 |
+
"grad_norm": 2.0746963024139404,
|
24297 |
+
"learning_rate": 1.571470922476407e-05,
|
24298 |
+
"loss": 5.4898,
|
24299 |
+
"step": 13880
|
24300 |
+
},
|
24301 |
+
{
|
24302 |
+
"epoch": 0.69,
|
24303 |
+
"grad_norm": Infinity,
|
24304 |
+
"learning_rate": 1.5707297791392857e-05,
|
24305 |
+
"loss": 5.5354,
|
24306 |
+
"step": 13884
|
24307 |
+
},
|
24308 |
+
{
|
24309 |
+
"epoch": 0.69,
|
24310 |
+
"grad_norm": 2.0238466262817383,
|
24311 |
+
"learning_rate": 1.5697415880231236e-05,
|
24312 |
+
"loss": 5.5099,
|
24313 |
+
"step": 13888
|
24314 |
+
},
|
24315 |
+
{
|
24316 |
+
"epoch": 0.69,
|
24317 |
+
"grad_norm": 1.9300113916397095,
|
24318 |
+
"learning_rate": 1.5687533969069618e-05,
|
24319 |
+
"loss": 5.682,
|
24320 |
+
"step": 13892
|
24321 |
+
},
|
24322 |
+
{
|
24323 |
+
"epoch": 0.69,
|
24324 |
+
"grad_norm": 1.9683018922805786,
|
24325 |
+
"learning_rate": 1.5677652057908e-05,
|
24326 |
+
"loss": 5.5058,
|
24327 |
+
"step": 13896
|
24328 |
+
},
|
24329 |
+
{
|
24330 |
+
"epoch": 0.69,
|
24331 |
+
"grad_norm": 1.975710391998291,
|
24332 |
+
"learning_rate": 1.5667770146746382e-05,
|
24333 |
+
"loss": 5.5139,
|
24334 |
+
"step": 13900
|
24335 |
+
},
|
24336 |
+
{
|
24337 |
+
"epoch": 0.69,
|
24338 |
+
"grad_norm": 1.9731028079986572,
|
24339 |
+
"learning_rate": 1.5657888235584764e-05,
|
24340 |
+
"loss": 5.565,
|
24341 |
+
"step": 13904
|
24342 |
+
},
|
24343 |
+
{
|
24344 |
+
"epoch": 0.69,
|
24345 |
+
"grad_norm": 2.0347113609313965,
|
24346 |
+
"learning_rate": 1.5648006324423146e-05,
|
24347 |
+
"loss": 5.6587,
|
24348 |
+
"step": 13908
|
24349 |
+
},
|
24350 |
+
{
|
24351 |
+
"epoch": 0.69,
|
24352 |
+
"grad_norm": 1.9666558504104614,
|
24353 |
+
"learning_rate": 1.5638124413261525e-05,
|
24354 |
+
"loss": 5.6072,
|
24355 |
+
"step": 13912
|
24356 |
+
},
|
24357 |
+
{
|
24358 |
+
"epoch": 0.69,
|
24359 |
+
"grad_norm": 1.808203935623169,
|
24360 |
+
"learning_rate": 1.5628242502099907e-05,
|
24361 |
+
"loss": 5.4888,
|
24362 |
+
"step": 13916
|
24363 |
+
},
|
24364 |
+
{
|
24365 |
+
"epoch": 0.69,
|
24366 |
+
"grad_norm": 2.0783326625823975,
|
24367 |
+
"learning_rate": 1.561836059093829e-05,
|
24368 |
+
"loss": 5.6407,
|
24369 |
+
"step": 13920
|
24370 |
+
},
|
24371 |
+
{
|
24372 |
+
"epoch": 0.69,
|
24373 |
+
"grad_norm": 2.006601572036743,
|
24374 |
+
"learning_rate": 1.560847867977667e-05,
|
24375 |
+
"loss": 5.5085,
|
24376 |
+
"step": 13924
|
24377 |
+
},
|
24378 |
+
{
|
24379 |
+
"epoch": 0.69,
|
24380 |
+
"grad_norm": 1.8633679151535034,
|
24381 |
+
"learning_rate": 1.559859676861505e-05,
|
24382 |
+
"loss": 5.4967,
|
24383 |
+
"step": 13928
|
24384 |
+
},
|
24385 |
+
{
|
24386 |
+
"epoch": 0.69,
|
24387 |
+
"grad_norm": 1.9505640268325806,
|
24388 |
+
"learning_rate": 1.558871485745343e-05,
|
24389 |
+
"loss": 5.4678,
|
24390 |
+
"step": 13932
|
24391 |
+
},
|
24392 |
+
{
|
24393 |
+
"epoch": 0.69,
|
24394 |
+
"grad_norm": 2.0986247062683105,
|
24395 |
+
"learning_rate": 1.5578832946291814e-05,
|
24396 |
+
"loss": 5.4932,
|
24397 |
+
"step": 13936
|
24398 |
+
},
|
24399 |
+
{
|
24400 |
+
"epoch": 0.69,
|
24401 |
+
"grad_norm": 2.256755828857422,
|
24402 |
+
"learning_rate": 1.5568951035130196e-05,
|
24403 |
+
"loss": 5.5958,
|
24404 |
+
"step": 13940
|
24405 |
+
},
|
24406 |
+
{
|
24407 |
+
"epoch": 0.69,
|
24408 |
+
"grad_norm": 2.2147109508514404,
|
24409 |
+
"learning_rate": 1.5559069123968574e-05,
|
24410 |
+
"loss": 5.5194,
|
24411 |
+
"step": 13944
|
24412 |
+
},
|
24413 |
+
{
|
24414 |
+
"epoch": 0.69,
|
24415 |
+
"grad_norm": 1.88095223903656,
|
24416 |
+
"learning_rate": 1.5549187212806956e-05,
|
24417 |
+
"loss": 5.4609,
|
24418 |
+
"step": 13948
|
24419 |
+
},
|
24420 |
+
{
|
24421 |
+
"epoch": 0.69,
|
24422 |
+
"grad_norm": 1.867007851600647,
|
24423 |
+
"learning_rate": 1.553930530164534e-05,
|
24424 |
+
"loss": 5.4802,
|
24425 |
+
"step": 13952
|
24426 |
+
},
|
24427 |
+
{
|
24428 |
+
"epoch": 0.69,
|
24429 |
+
"grad_norm": 2.029303789138794,
|
24430 |
+
"learning_rate": 1.552942339048372e-05,
|
24431 |
+
"loss": 5.5337,
|
24432 |
+
"step": 13956
|
24433 |
+
},
|
24434 |
+
{
|
24435 |
+
"epoch": 0.69,
|
24436 |
+
"grad_norm": 1.9142879247665405,
|
24437 |
+
"learning_rate": 1.5519541479322103e-05,
|
24438 |
+
"loss": 5.4932,
|
24439 |
+
"step": 13960
|
24440 |
+
},
|
24441 |
+
{
|
24442 |
+
"epoch": 0.69,
|
24443 |
+
"grad_norm": 1.8500617742538452,
|
24444 |
+
"learning_rate": 1.5509659568160485e-05,
|
24445 |
+
"loss": 5.4916,
|
24446 |
+
"step": 13964
|
24447 |
+
},
|
24448 |
+
{
|
24449 |
+
"epoch": 0.69,
|
24450 |
+
"grad_norm": 2.0694692134857178,
|
24451 |
+
"learning_rate": 1.5499777656998867e-05,
|
24452 |
+
"loss": 5.4766,
|
24453 |
+
"step": 13968
|
24454 |
+
},
|
24455 |
+
{
|
24456 |
+
"epoch": 0.69,
|
24457 |
+
"grad_norm": 2.122901201248169,
|
24458 |
+
"learning_rate": 1.5489895745837245e-05,
|
24459 |
+
"loss": 5.6467,
|
24460 |
+
"step": 13972
|
24461 |
+
},
|
24462 |
+
{
|
24463 |
+
"epoch": 0.69,
|
24464 |
+
"grad_norm": 2.108297109603882,
|
24465 |
+
"learning_rate": 1.5480013834675627e-05,
|
24466 |
+
"loss": 5.5902,
|
24467 |
+
"step": 13976
|
24468 |
+
},
|
24469 |
+
{
|
24470 |
+
"epoch": 0.69,
|
24471 |
+
"grad_norm": 1.9898920059204102,
|
24472 |
+
"learning_rate": 1.547013192351401e-05,
|
24473 |
+
"loss": 5.5772,
|
24474 |
+
"step": 13980
|
24475 |
+
},
|
24476 |
+
{
|
24477 |
+
"epoch": 0.69,
|
24478 |
+
"grad_norm": 2.2552645206451416,
|
24479 |
+
"learning_rate": 1.5460250012352388e-05,
|
24480 |
+
"loss": 5.507,
|
24481 |
+
"step": 13984
|
24482 |
+
},
|
24483 |
+
{
|
24484 |
+
"epoch": 0.69,
|
24485 |
+
"grad_norm": 1.9198163747787476,
|
24486 |
+
"learning_rate": 1.545036810119077e-05,
|
24487 |
+
"loss": 5.4749,
|
24488 |
+
"step": 13988
|
24489 |
+
},
|
24490 |
+
{
|
24491 |
+
"epoch": 0.69,
|
24492 |
+
"grad_norm": 2.101717233657837,
|
24493 |
+
"learning_rate": 1.5440486190029152e-05,
|
24494 |
+
"loss": 5.444,
|
24495 |
+
"step": 13992
|
24496 |
+
},
|
24497 |
+
{
|
24498 |
+
"epoch": 0.69,
|
24499 |
+
"grad_norm": 2.1184744834899902,
|
24500 |
+
"learning_rate": 1.5430604278867534e-05,
|
24501 |
+
"loss": 5.6017,
|
24502 |
+
"step": 13996
|
24503 |
+
},
|
24504 |
+
{
|
24505 |
+
"epoch": 0.69,
|
24506 |
+
"grad_norm": 1.7170560359954834,
|
24507 |
+
"learning_rate": 1.5420722367705913e-05,
|
24508 |
+
"loss": 5.4235,
|
24509 |
+
"step": 14000
|
24510 |
+
},
|
24511 |
+
{
|
24512 |
+
"epoch": 0.69,
|
24513 |
+
"grad_norm": 2.1068732738494873,
|
24514 |
+
"learning_rate": 1.5410840456544295e-05,
|
24515 |
+
"loss": 5.504,
|
24516 |
+
"step": 14004
|
24517 |
+
},
|
24518 |
+
{
|
24519 |
+
"epoch": 0.69,
|
24520 |
+
"grad_norm": 2.275245189666748,
|
24521 |
+
"learning_rate": 1.5400958545382677e-05,
|
24522 |
+
"loss": 5.5617,
|
24523 |
+
"step": 14008
|
24524 |
+
},
|
24525 |
+
{
|
24526 |
+
"epoch": 0.69,
|
24527 |
+
"grad_norm": 2.2603347301483154,
|
24528 |
+
"learning_rate": 1.539107663422106e-05,
|
24529 |
+
"loss": 5.4309,
|
24530 |
+
"step": 14012
|
24531 |
+
},
|
24532 |
+
{
|
24533 |
+
"epoch": 0.69,
|
24534 |
+
"grad_norm": 2.082984685897827,
|
24535 |
+
"learning_rate": 1.538119472305944e-05,
|
24536 |
+
"loss": 5.408,
|
24537 |
+
"step": 14016
|
24538 |
+
},
|
24539 |
+
{
|
24540 |
+
"epoch": 0.69,
|
24541 |
+
"grad_norm": 1.8457398414611816,
|
24542 |
+
"learning_rate": 1.5371312811897823e-05,
|
24543 |
+
"loss": 5.4611,
|
24544 |
+
"step": 14020
|
24545 |
+
},
|
24546 |
+
{
|
24547 |
+
"epoch": 0.69,
|
24548 |
+
"grad_norm": 2.0834155082702637,
|
24549 |
+
"learning_rate": 1.5361430900736205e-05,
|
24550 |
+
"loss": 5.4901,
|
24551 |
+
"step": 14024
|
24552 |
+
},
|
24553 |
+
{
|
24554 |
+
"epoch": 0.69,
|
24555 |
+
"grad_norm": 1.9145028591156006,
|
24556 |
+
"learning_rate": 1.5351548989574584e-05,
|
24557 |
+
"loss": 5.5115,
|
24558 |
+
"step": 14028
|
24559 |
+
},
|
24560 |
+
{
|
24561 |
+
"epoch": 0.69,
|
24562 |
+
"grad_norm": 1.7850406169891357,
|
24563 |
+
"learning_rate": 1.5341667078412966e-05,
|
24564 |
+
"loss": 5.4384,
|
24565 |
+
"step": 14032
|
24566 |
+
},
|
24567 |
+
{
|
24568 |
+
"epoch": 0.69,
|
24569 |
+
"grad_norm": 1.964521884918213,
|
24570 |
+
"learning_rate": 1.5331785167251348e-05,
|
24571 |
+
"loss": 5.3728,
|
24572 |
+
"step": 14036
|
24573 |
+
},
|
24574 |
+
{
|
24575 |
+
"epoch": 0.69,
|
24576 |
+
"grad_norm": 2.125605821609497,
|
24577 |
+
"learning_rate": 1.532190325608973e-05,
|
24578 |
+
"loss": 5.4972,
|
24579 |
+
"step": 14040
|
24580 |
+
},
|
24581 |
+
{
|
24582 |
+
"epoch": 0.69,
|
24583 |
+
"grad_norm": 1.7855336666107178,
|
24584 |
+
"learning_rate": 1.531202134492811e-05,
|
24585 |
+
"loss": 5.5624,
|
24586 |
+
"step": 14044
|
24587 |
+
},
|
24588 |
+
{
|
24589 |
+
"epoch": 0.69,
|
24590 |
+
"grad_norm": 1.9751635789871216,
|
24591 |
+
"learning_rate": 1.530213943376649e-05,
|
24592 |
+
"loss": 5.4388,
|
24593 |
+
"step": 14048
|
24594 |
+
},
|
24595 |
+
{
|
24596 |
+
"epoch": 0.69,
|
24597 |
+
"grad_norm": 1.943023681640625,
|
24598 |
+
"learning_rate": 1.5292257522604873e-05,
|
24599 |
+
"loss": 5.4861,
|
24600 |
+
"step": 14052
|
24601 |
+
},
|
24602 |
+
{
|
24603 |
+
"epoch": 0.69,
|
24604 |
+
"grad_norm": 1.7670375108718872,
|
24605 |
+
"learning_rate": 1.528237561144325e-05,
|
24606 |
+
"loss": 5.5438,
|
24607 |
+
"step": 14056
|
24608 |
+
},
|
24609 |
+
{
|
24610 |
+
"epoch": 0.69,
|
24611 |
+
"grad_norm": 2.040696859359741,
|
24612 |
+
"learning_rate": 1.5272493700281634e-05,
|
24613 |
+
"loss": 5.4905,
|
24614 |
+
"step": 14060
|
24615 |
+
},
|
24616 |
+
{
|
24617 |
+
"epoch": 0.69,
|
24618 |
+
"grad_norm": 2.2104179859161377,
|
24619 |
+
"learning_rate": 1.5262611789120016e-05,
|
24620 |
+
"loss": 5.4197,
|
24621 |
+
"step": 14064
|
24622 |
+
},
|
24623 |
+
{
|
24624 |
+
"epoch": 0.7,
|
24625 |
+
"grad_norm": 2.070598840713501,
|
24626 |
+
"learning_rate": 1.5252729877958396e-05,
|
24627 |
+
"loss": 5.3342,
|
24628 |
+
"step": 14068
|
24629 |
+
},
|
24630 |
+
{
|
24631 |
+
"epoch": 0.7,
|
24632 |
+
"grad_norm": 2.1541106700897217,
|
24633 |
+
"learning_rate": 1.524284796679678e-05,
|
24634 |
+
"loss": 5.5594,
|
24635 |
+
"step": 14072
|
24636 |
+
},
|
24637 |
+
{
|
24638 |
+
"epoch": 0.7,
|
24639 |
+
"grad_norm": 1.8858451843261719,
|
24640 |
+
"learning_rate": 1.5232966055635162e-05,
|
24641 |
+
"loss": 5.5494,
|
24642 |
+
"step": 14076
|
24643 |
+
},
|
24644 |
+
{
|
24645 |
+
"epoch": 0.7,
|
24646 |
+
"grad_norm": 2.0127689838409424,
|
24647 |
+
"learning_rate": 1.5223084144473542e-05,
|
24648 |
+
"loss": 5.5005,
|
24649 |
+
"step": 14080
|
24650 |
+
},
|
24651 |
+
{
|
24652 |
+
"epoch": 0.7,
|
24653 |
+
"grad_norm": 1.70602548122406,
|
24654 |
+
"learning_rate": 1.5213202233311924e-05,
|
24655 |
+
"loss": 5.4694,
|
24656 |
+
"step": 14084
|
24657 |
+
},
|
24658 |
+
{
|
24659 |
+
"epoch": 0.7,
|
24660 |
+
"grad_norm": 2.1480917930603027,
|
24661 |
+
"learning_rate": 1.5203320322150305e-05,
|
24662 |
+
"loss": 5.5891,
|
24663 |
+
"step": 14088
|
24664 |
+
},
|
24665 |
+
{
|
24666 |
+
"epoch": 0.7,
|
24667 |
+
"grad_norm": 1.8127115964889526,
|
24668 |
+
"learning_rate": 1.5193438410988687e-05,
|
24669 |
+
"loss": 5.497,
|
24670 |
+
"step": 14092
|
24671 |
+
},
|
24672 |
+
{
|
24673 |
+
"epoch": 0.7,
|
24674 |
+
"grad_norm": 1.768389105796814,
|
24675 |
+
"learning_rate": 1.5183556499827067e-05,
|
24676 |
+
"loss": 5.5075,
|
24677 |
+
"step": 14096
|
24678 |
+
},
|
24679 |
+
{
|
24680 |
+
"epoch": 0.7,
|
24681 |
+
"grad_norm": 2.2110512256622314,
|
24682 |
+
"learning_rate": 1.517367458866545e-05,
|
24683 |
+
"loss": 5.4944,
|
24684 |
+
"step": 14100
|
24685 |
+
},
|
24686 |
+
{
|
24687 |
+
"epoch": 0.7,
|
24688 |
+
"grad_norm": 2.2322652339935303,
|
24689 |
+
"learning_rate": 1.516379267750383e-05,
|
24690 |
+
"loss": 5.5072,
|
24691 |
+
"step": 14104
|
24692 |
+
},
|
24693 |
+
{
|
24694 |
+
"epoch": 0.7,
|
24695 |
+
"grad_norm": 2.103665351867676,
|
24696 |
+
"learning_rate": 1.5153910766342212e-05,
|
24697 |
+
"loss": 5.5093,
|
24698 |
+
"step": 14108
|
24699 |
+
},
|
24700 |
+
{
|
24701 |
+
"epoch": 0.7,
|
24702 |
+
"grad_norm": 2.0604758262634277,
|
24703 |
+
"learning_rate": 1.5144028855180592e-05,
|
24704 |
+
"loss": 5.4539,
|
24705 |
+
"step": 14112
|
24706 |
+
},
|
24707 |
+
{
|
24708 |
+
"epoch": 0.7,
|
24709 |
+
"grad_norm": 1.8728346824645996,
|
24710 |
+
"learning_rate": 1.5134146944018972e-05,
|
24711 |
+
"loss": 5.401,
|
24712 |
+
"step": 14116
|
24713 |
+
},
|
24714 |
+
{
|
24715 |
+
"epoch": 0.7,
|
24716 |
+
"grad_norm": 2.031858444213867,
|
24717 |
+
"learning_rate": 1.5124265032857354e-05,
|
24718 |
+
"loss": 5.499,
|
24719 |
+
"step": 14120
|
24720 |
+
},
|
24721 |
+
{
|
24722 |
+
"epoch": 0.7,
|
24723 |
+
"grad_norm": 1.828974962234497,
|
24724 |
+
"learning_rate": 1.5114383121695735e-05,
|
24725 |
+
"loss": 5.4467,
|
24726 |
+
"step": 14124
|
24727 |
+
},
|
24728 |
+
{
|
24729 |
+
"epoch": 0.7,
|
24730 |
+
"grad_norm": 1.8760355710983276,
|
24731 |
+
"learning_rate": 1.5104501210534119e-05,
|
24732 |
+
"loss": 5.3958,
|
24733 |
+
"step": 14128
|
24734 |
+
},
|
24735 |
+
{
|
24736 |
+
"epoch": 0.7,
|
24737 |
+
"grad_norm": 1.8751137256622314,
|
24738 |
+
"learning_rate": 1.50946192993725e-05,
|
24739 |
+
"loss": 5.4515,
|
24740 |
+
"step": 14132
|
24741 |
+
},
|
24742 |
+
{
|
24743 |
+
"epoch": 0.7,
|
24744 |
+
"grad_norm": 1.9187356233596802,
|
24745 |
+
"learning_rate": 1.5084737388210881e-05,
|
24746 |
+
"loss": 5.5581,
|
24747 |
+
"step": 14136
|
24748 |
+
},
|
24749 |
+
{
|
24750 |
+
"epoch": 0.7,
|
24751 |
+
"grad_norm": 2.1077044010162354,
|
24752 |
+
"learning_rate": 1.5074855477049263e-05,
|
24753 |
+
"loss": 5.5018,
|
24754 |
+
"step": 14140
|
24755 |
+
},
|
24756 |
+
{
|
24757 |
+
"epoch": 0.7,
|
24758 |
+
"grad_norm": 1.9304084777832031,
|
24759 |
+
"learning_rate": 1.5064973565887643e-05,
|
24760 |
+
"loss": 5.6017,
|
24761 |
+
"step": 14144
|
24762 |
+
},
|
24763 |
+
{
|
24764 |
+
"epoch": 0.7,
|
24765 |
+
"grad_norm": 1.7740778923034668,
|
24766 |
+
"learning_rate": 1.5055091654726025e-05,
|
24767 |
+
"loss": 5.5123,
|
24768 |
+
"step": 14148
|
24769 |
+
},
|
24770 |
+
{
|
24771 |
+
"epoch": 0.7,
|
24772 |
+
"grad_norm": 1.9732328653335571,
|
24773 |
+
"learning_rate": 1.5045209743564406e-05,
|
24774 |
+
"loss": 5.4148,
|
24775 |
+
"step": 14152
|
24776 |
+
},
|
24777 |
+
{
|
24778 |
+
"epoch": 0.7,
|
24779 |
+
"grad_norm": 1.98763906955719,
|
24780 |
+
"learning_rate": 1.5035327832402788e-05,
|
24781 |
+
"loss": 5.5671,
|
24782 |
+
"step": 14156
|
24783 |
+
},
|
24784 |
+
{
|
24785 |
+
"epoch": 0.7,
|
24786 |
+
"grad_norm": 2.025677442550659,
|
24787 |
+
"learning_rate": 1.5025445921241168e-05,
|
24788 |
+
"loss": 5.5388,
|
24789 |
+
"step": 14160
|
24790 |
+
},
|
24791 |
+
{
|
24792 |
+
"epoch": 0.7,
|
24793 |
+
"grad_norm": 2.1304235458374023,
|
24794 |
+
"learning_rate": 1.501556401007955e-05,
|
24795 |
+
"loss": 5.549,
|
24796 |
+
"step": 14164
|
24797 |
+
},
|
24798 |
+
{
|
24799 |
+
"epoch": 0.7,
|
24800 |
+
"grad_norm": 1.8666431903839111,
|
24801 |
+
"learning_rate": 1.500568209891793e-05,
|
24802 |
+
"loss": 5.5133,
|
24803 |
+
"step": 14168
|
24804 |
}
|
24805 |
],
|
24806 |
"logging_steps": 4,
|
|
|
24808 |
"num_input_tokens_seen": 0,
|
24809 |
"num_train_epochs": 1,
|
24810 |
"save_steps": 2024,
|
24811 |
+
"total_flos": 5.967912250428621e+16,
|
24812 |
"train_batch_size": 8,
|
24813 |
"trial_name": null,
|
24814 |
"trial_params": null
|