gabrielaltay
commited on
Training in progress, step 16192, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500389884
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2f0dbf644a9677e79e68a1d141370f9fc19d80aafd4fa3703ba7daaf4679cdf
|
3 |
size 500389884
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000900218
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f9ef24481a5454737bb36638668624276fc1de872bfb96458291412f8b836da
|
3 |
size 1000900218
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ea443f9780df3e4f55b7a8fe78f5f54f5f5faaadff43eb645514989afd8f776
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bde8530504f368a95ffd37dfe12eaf937597762fada8d945ae1e071babd54fa
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5539ffd5320a73f47cc2092b2ac4690b8e116f5ffa1613c0c9ef2d49e29add72
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -24801,6 +24801,3548 @@
|
|
24801 |
"learning_rate": 1.500568209891793e-05,
|
24802 |
"loss": 5.5133,
|
24803 |
"step": 14168
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24804 |
}
|
24805 |
],
|
24806 |
"logging_steps": 4,
|
@@ -24808,7 +28350,7 @@
|
|
24808 |
"num_input_tokens_seen": 0,
|
24809 |
"num_train_epochs": 1,
|
24810 |
"save_steps": 2024,
|
24811 |
-
"total_flos":
|
24812 |
"train_batch_size": 8,
|
24813 |
"trial_name": null,
|
24814 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8000395276446465,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 16192,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
24801 |
"learning_rate": 1.500568209891793e-05,
|
24802 |
"loss": 5.5133,
|
24803 |
"step": 14168
|
24804 |
+
},
|
24805 |
+
{
|
24806 |
+
"epoch": 0.7,
|
24807 |
+
"grad_norm": 2.001925468444824,
|
24808 |
+
"learning_rate": 1.4995800187756313e-05,
|
24809 |
+
"loss": 5.4878,
|
24810 |
+
"step": 14172
|
24811 |
+
},
|
24812 |
+
{
|
24813 |
+
"epoch": 0.7,
|
24814 |
+
"grad_norm": 2.0786337852478027,
|
24815 |
+
"learning_rate": 1.4985918276594693e-05,
|
24816 |
+
"loss": 5.5475,
|
24817 |
+
"step": 14176
|
24818 |
+
},
|
24819 |
+
{
|
24820 |
+
"epoch": 0.7,
|
24821 |
+
"grad_norm": 2.0157644748687744,
|
24822 |
+
"learning_rate": 1.4976036365433073e-05,
|
24823 |
+
"loss": 5.4686,
|
24824 |
+
"step": 14180
|
24825 |
+
},
|
24826 |
+
{
|
24827 |
+
"epoch": 0.7,
|
24828 |
+
"grad_norm": 2.1091387271881104,
|
24829 |
+
"learning_rate": 1.4966154454271456e-05,
|
24830 |
+
"loss": 5.5175,
|
24831 |
+
"step": 14184
|
24832 |
+
},
|
24833 |
+
{
|
24834 |
+
"epoch": 0.7,
|
24835 |
+
"grad_norm": 1.8892532587051392,
|
24836 |
+
"learning_rate": 1.495627254310984e-05,
|
24837 |
+
"loss": 5.57,
|
24838 |
+
"step": 14188
|
24839 |
+
},
|
24840 |
+
{
|
24841 |
+
"epoch": 0.7,
|
24842 |
+
"grad_norm": 2.0681941509246826,
|
24843 |
+
"learning_rate": 1.4946390631948221e-05,
|
24844 |
+
"loss": 5.4503,
|
24845 |
+
"step": 14192
|
24846 |
+
},
|
24847 |
+
{
|
24848 |
+
"epoch": 0.7,
|
24849 |
+
"grad_norm": 2.028324842453003,
|
24850 |
+
"learning_rate": 1.4936508720786602e-05,
|
24851 |
+
"loss": 5.5231,
|
24852 |
+
"step": 14196
|
24853 |
+
},
|
24854 |
+
{
|
24855 |
+
"epoch": 0.7,
|
24856 |
+
"grad_norm": 1.8590697050094604,
|
24857 |
+
"learning_rate": 1.4926626809624982e-05,
|
24858 |
+
"loss": 5.4046,
|
24859 |
+
"step": 14200
|
24860 |
+
},
|
24861 |
+
{
|
24862 |
+
"epoch": 0.7,
|
24863 |
+
"grad_norm": 2.1684091091156006,
|
24864 |
+
"learning_rate": 1.4916744898463364e-05,
|
24865 |
+
"loss": 5.5428,
|
24866 |
+
"step": 14204
|
24867 |
+
},
|
24868 |
+
{
|
24869 |
+
"epoch": 0.7,
|
24870 |
+
"grad_norm": 2.4600932598114014,
|
24871 |
+
"learning_rate": 1.4906862987301745e-05,
|
24872 |
+
"loss": 5.4973,
|
24873 |
+
"step": 14208
|
24874 |
+
},
|
24875 |
+
{
|
24876 |
+
"epoch": 0.7,
|
24877 |
+
"grad_norm": 2.182546615600586,
|
24878 |
+
"learning_rate": 1.4896981076140127e-05,
|
24879 |
+
"loss": 5.5168,
|
24880 |
+
"step": 14212
|
24881 |
+
},
|
24882 |
+
{
|
24883 |
+
"epoch": 0.7,
|
24884 |
+
"grad_norm": 2.0207765102386475,
|
24885 |
+
"learning_rate": 1.4887099164978507e-05,
|
24886 |
+
"loss": 5.4892,
|
24887 |
+
"step": 14216
|
24888 |
+
},
|
24889 |
+
{
|
24890 |
+
"epoch": 0.7,
|
24891 |
+
"grad_norm": 2.07978892326355,
|
24892 |
+
"learning_rate": 1.4877217253816889e-05,
|
24893 |
+
"loss": 5.5074,
|
24894 |
+
"step": 14220
|
24895 |
+
},
|
24896 |
+
{
|
24897 |
+
"epoch": 0.7,
|
24898 |
+
"grad_norm": 2.0122103691101074,
|
24899 |
+
"learning_rate": 1.486733534265527e-05,
|
24900 |
+
"loss": 5.5007,
|
24901 |
+
"step": 14224
|
24902 |
+
},
|
24903 |
+
{
|
24904 |
+
"epoch": 0.7,
|
24905 |
+
"grad_norm": 1.951704502105713,
|
24906 |
+
"learning_rate": 1.4857453431493651e-05,
|
24907 |
+
"loss": 5.5281,
|
24908 |
+
"step": 14228
|
24909 |
+
},
|
24910 |
+
{
|
24911 |
+
"epoch": 0.7,
|
24912 |
+
"grad_norm": 1.903686761856079,
|
24913 |
+
"learning_rate": 1.4847571520332032e-05,
|
24914 |
+
"loss": 5.483,
|
24915 |
+
"step": 14232
|
24916 |
+
},
|
24917 |
+
{
|
24918 |
+
"epoch": 0.7,
|
24919 |
+
"grad_norm": 1.7512837648391724,
|
24920 |
+
"learning_rate": 1.4837689609170414e-05,
|
24921 |
+
"loss": 5.5026,
|
24922 |
+
"step": 14236
|
24923 |
+
},
|
24924 |
+
{
|
24925 |
+
"epoch": 0.7,
|
24926 |
+
"grad_norm": 1.9716506004333496,
|
24927 |
+
"learning_rate": 1.4827807698008794e-05,
|
24928 |
+
"loss": 5.5547,
|
24929 |
+
"step": 14240
|
24930 |
+
},
|
24931 |
+
{
|
24932 |
+
"epoch": 0.7,
|
24933 |
+
"grad_norm": 2.1064813137054443,
|
24934 |
+
"learning_rate": 1.4817925786847178e-05,
|
24935 |
+
"loss": 5.6868,
|
24936 |
+
"step": 14244
|
24937 |
+
},
|
24938 |
+
{
|
24939 |
+
"epoch": 0.7,
|
24940 |
+
"grad_norm": 1.920245885848999,
|
24941 |
+
"learning_rate": 1.480804387568556e-05,
|
24942 |
+
"loss": 5.5195,
|
24943 |
+
"step": 14248
|
24944 |
+
},
|
24945 |
+
{
|
24946 |
+
"epoch": 0.7,
|
24947 |
+
"grad_norm": 1.8807624578475952,
|
24948 |
+
"learning_rate": 1.479816196452394e-05,
|
24949 |
+
"loss": 5.5605,
|
24950 |
+
"step": 14252
|
24951 |
+
},
|
24952 |
+
{
|
24953 |
+
"epoch": 0.7,
|
24954 |
+
"grad_norm": 2.1558022499084473,
|
24955 |
+
"learning_rate": 1.4788280053362322e-05,
|
24956 |
+
"loss": 5.54,
|
24957 |
+
"step": 14256
|
24958 |
+
},
|
24959 |
+
{
|
24960 |
+
"epoch": 0.7,
|
24961 |
+
"grad_norm": 1.9825403690338135,
|
24962 |
+
"learning_rate": 1.4778398142200703e-05,
|
24963 |
+
"loss": 5.5888,
|
24964 |
+
"step": 14260
|
24965 |
+
},
|
24966 |
+
{
|
24967 |
+
"epoch": 0.7,
|
24968 |
+
"grad_norm": 1.974697232246399,
|
24969 |
+
"learning_rate": 1.4768516231039083e-05,
|
24970 |
+
"loss": 5.5791,
|
24971 |
+
"step": 14264
|
24972 |
+
},
|
24973 |
+
{
|
24974 |
+
"epoch": 0.7,
|
24975 |
+
"grad_norm": 2.0667243003845215,
|
24976 |
+
"learning_rate": 1.4758634319877465e-05,
|
24977 |
+
"loss": 5.4354,
|
24978 |
+
"step": 14268
|
24979 |
+
},
|
24980 |
+
{
|
24981 |
+
"epoch": 0.71,
|
24982 |
+
"grad_norm": 1.8902255296707153,
|
24983 |
+
"learning_rate": 1.4748752408715846e-05,
|
24984 |
+
"loss": 5.5167,
|
24985 |
+
"step": 14272
|
24986 |
+
},
|
24987 |
+
{
|
24988 |
+
"epoch": 0.71,
|
24989 |
+
"grad_norm": 1.882251501083374,
|
24990 |
+
"learning_rate": 1.4738870497554228e-05,
|
24991 |
+
"loss": 5.4551,
|
24992 |
+
"step": 14276
|
24993 |
+
},
|
24994 |
+
{
|
24995 |
+
"epoch": 0.71,
|
24996 |
+
"grad_norm": 2.0265276432037354,
|
24997 |
+
"learning_rate": 1.4728988586392608e-05,
|
24998 |
+
"loss": 5.3989,
|
24999 |
+
"step": 14280
|
25000 |
+
},
|
25001 |
+
{
|
25002 |
+
"epoch": 0.71,
|
25003 |
+
"grad_norm": 2.1499834060668945,
|
25004 |
+
"learning_rate": 1.471910667523099e-05,
|
25005 |
+
"loss": 5.3913,
|
25006 |
+
"step": 14284
|
25007 |
+
},
|
25008 |
+
{
|
25009 |
+
"epoch": 0.71,
|
25010 |
+
"grad_norm": 1.9374068975448608,
|
25011 |
+
"learning_rate": 1.470922476406937e-05,
|
25012 |
+
"loss": 5.4951,
|
25013 |
+
"step": 14288
|
25014 |
+
},
|
25015 |
+
{
|
25016 |
+
"epoch": 0.71,
|
25017 |
+
"grad_norm": 2.04496693611145,
|
25018 |
+
"learning_rate": 1.4699342852907753e-05,
|
25019 |
+
"loss": 5.5568,
|
25020 |
+
"step": 14292
|
25021 |
+
},
|
25022 |
+
{
|
25023 |
+
"epoch": 0.71,
|
25024 |
+
"grad_norm": 2.1718828678131104,
|
25025 |
+
"learning_rate": 1.4689460941746133e-05,
|
25026 |
+
"loss": 5.4003,
|
25027 |
+
"step": 14296
|
25028 |
+
},
|
25029 |
+
{
|
25030 |
+
"epoch": 0.71,
|
25031 |
+
"grad_norm": 2.170172929763794,
|
25032 |
+
"learning_rate": 1.4679579030584517e-05,
|
25033 |
+
"loss": 5.4535,
|
25034 |
+
"step": 14300
|
25035 |
+
},
|
25036 |
+
{
|
25037 |
+
"epoch": 0.71,
|
25038 |
+
"grad_norm": 1.9548550844192505,
|
25039 |
+
"learning_rate": 1.4669697119422899e-05,
|
25040 |
+
"loss": 5.5077,
|
25041 |
+
"step": 14304
|
25042 |
+
},
|
25043 |
+
{
|
25044 |
+
"epoch": 0.71,
|
25045 |
+
"grad_norm": 1.7335338592529297,
|
25046 |
+
"learning_rate": 1.4659815208261279e-05,
|
25047 |
+
"loss": 5.4265,
|
25048 |
+
"step": 14308
|
25049 |
+
},
|
25050 |
+
{
|
25051 |
+
"epoch": 0.71,
|
25052 |
+
"grad_norm": 2.0178778171539307,
|
25053 |
+
"learning_rate": 1.4649933297099661e-05,
|
25054 |
+
"loss": 5.3215,
|
25055 |
+
"step": 14312
|
25056 |
+
},
|
25057 |
+
{
|
25058 |
+
"epoch": 0.71,
|
25059 |
+
"grad_norm": 2.198841094970703,
|
25060 |
+
"learning_rate": 1.4640051385938042e-05,
|
25061 |
+
"loss": 5.5392,
|
25062 |
+
"step": 14316
|
25063 |
+
},
|
25064 |
+
{
|
25065 |
+
"epoch": 0.71,
|
25066 |
+
"grad_norm": 2.093581438064575,
|
25067 |
+
"learning_rate": 1.4630169474776424e-05,
|
25068 |
+
"loss": 5.4572,
|
25069 |
+
"step": 14320
|
25070 |
+
},
|
25071 |
+
{
|
25072 |
+
"epoch": 0.71,
|
25073 |
+
"grad_norm": 1.7876406908035278,
|
25074 |
+
"learning_rate": 1.4620287563614804e-05,
|
25075 |
+
"loss": 5.3976,
|
25076 |
+
"step": 14324
|
25077 |
+
},
|
25078 |
+
{
|
25079 |
+
"epoch": 0.71,
|
25080 |
+
"grad_norm": 2.3379602432250977,
|
25081 |
+
"learning_rate": 1.4610405652453184e-05,
|
25082 |
+
"loss": 5.5972,
|
25083 |
+
"step": 14328
|
25084 |
+
},
|
25085 |
+
{
|
25086 |
+
"epoch": 0.71,
|
25087 |
+
"grad_norm": 2.0745928287506104,
|
25088 |
+
"learning_rate": 1.4600523741291566e-05,
|
25089 |
+
"loss": 5.4489,
|
25090 |
+
"step": 14332
|
25091 |
+
},
|
25092 |
+
{
|
25093 |
+
"epoch": 0.71,
|
25094 |
+
"grad_norm": 1.9868686199188232,
|
25095 |
+
"learning_rate": 1.4590641830129947e-05,
|
25096 |
+
"loss": 5.5921,
|
25097 |
+
"step": 14336
|
25098 |
+
},
|
25099 |
+
{
|
25100 |
+
"epoch": 0.71,
|
25101 |
+
"grad_norm": 2.052441358566284,
|
25102 |
+
"learning_rate": 1.4580759918968329e-05,
|
25103 |
+
"loss": 5.5621,
|
25104 |
+
"step": 14340
|
25105 |
+
},
|
25106 |
+
{
|
25107 |
+
"epoch": 0.71,
|
25108 |
+
"grad_norm": 2.250218629837036,
|
25109 |
+
"learning_rate": 1.457087800780671e-05,
|
25110 |
+
"loss": 5.3768,
|
25111 |
+
"step": 14344
|
25112 |
+
},
|
25113 |
+
{
|
25114 |
+
"epoch": 0.71,
|
25115 |
+
"grad_norm": 2.0471935272216797,
|
25116 |
+
"learning_rate": 1.4560996096645091e-05,
|
25117 |
+
"loss": 5.5863,
|
25118 |
+
"step": 14348
|
25119 |
+
},
|
25120 |
+
{
|
25121 |
+
"epoch": 0.71,
|
25122 |
+
"grad_norm": 2.0607481002807617,
|
25123 |
+
"learning_rate": 1.4551114185483472e-05,
|
25124 |
+
"loss": 5.5404,
|
25125 |
+
"step": 14352
|
25126 |
+
},
|
25127 |
+
{
|
25128 |
+
"epoch": 0.71,
|
25129 |
+
"grad_norm": 1.7724336385726929,
|
25130 |
+
"learning_rate": 1.4541232274321854e-05,
|
25131 |
+
"loss": 5.4719,
|
25132 |
+
"step": 14356
|
25133 |
+
},
|
25134 |
+
{
|
25135 |
+
"epoch": 0.71,
|
25136 |
+
"grad_norm": 2.1439015865325928,
|
25137 |
+
"learning_rate": 1.4531350363160237e-05,
|
25138 |
+
"loss": 5.5045,
|
25139 |
+
"step": 14360
|
25140 |
+
},
|
25141 |
+
{
|
25142 |
+
"epoch": 0.71,
|
25143 |
+
"grad_norm": 2.067152500152588,
|
25144 |
+
"learning_rate": 1.4521468451998618e-05,
|
25145 |
+
"loss": 5.5518,
|
25146 |
+
"step": 14364
|
25147 |
+
},
|
25148 |
+
{
|
25149 |
+
"epoch": 0.71,
|
25150 |
+
"grad_norm": 1.8666131496429443,
|
25151 |
+
"learning_rate": 1.4511586540837e-05,
|
25152 |
+
"loss": 5.4791,
|
25153 |
+
"step": 14368
|
25154 |
+
},
|
25155 |
+
{
|
25156 |
+
"epoch": 0.71,
|
25157 |
+
"grad_norm": 1.9369677305221558,
|
25158 |
+
"learning_rate": 1.450170462967538e-05,
|
25159 |
+
"loss": 5.4379,
|
25160 |
+
"step": 14372
|
25161 |
+
},
|
25162 |
+
{
|
25163 |
+
"epoch": 0.71,
|
25164 |
+
"grad_norm": 2.0378055572509766,
|
25165 |
+
"learning_rate": 1.4491822718513762e-05,
|
25166 |
+
"loss": 5.4874,
|
25167 |
+
"step": 14376
|
25168 |
+
},
|
25169 |
+
{
|
25170 |
+
"epoch": 0.71,
|
25171 |
+
"grad_norm": 1.8923200368881226,
|
25172 |
+
"learning_rate": 1.4481940807352143e-05,
|
25173 |
+
"loss": 5.5813,
|
25174 |
+
"step": 14380
|
25175 |
+
},
|
25176 |
+
{
|
25177 |
+
"epoch": 0.71,
|
25178 |
+
"grad_norm": 1.9572350978851318,
|
25179 |
+
"learning_rate": 1.4472058896190525e-05,
|
25180 |
+
"loss": 5.5161,
|
25181 |
+
"step": 14384
|
25182 |
+
},
|
25183 |
+
{
|
25184 |
+
"epoch": 0.71,
|
25185 |
+
"grad_norm": 1.9131194353103638,
|
25186 |
+
"learning_rate": 1.4462176985028905e-05,
|
25187 |
+
"loss": 5.6163,
|
25188 |
+
"step": 14388
|
25189 |
+
},
|
25190 |
+
{
|
25191 |
+
"epoch": 0.71,
|
25192 |
+
"grad_norm": 2.5276191234588623,
|
25193 |
+
"learning_rate": 1.4452295073867287e-05,
|
25194 |
+
"loss": 5.5442,
|
25195 |
+
"step": 14392
|
25196 |
+
},
|
25197 |
+
{
|
25198 |
+
"epoch": 0.71,
|
25199 |
+
"grad_norm": 2.0798611640930176,
|
25200 |
+
"learning_rate": 1.4442413162705667e-05,
|
25201 |
+
"loss": 5.6086,
|
25202 |
+
"step": 14396
|
25203 |
+
},
|
25204 |
+
{
|
25205 |
+
"epoch": 0.71,
|
25206 |
+
"grad_norm": 2.1294023990631104,
|
25207 |
+
"learning_rate": 1.4432531251544048e-05,
|
25208 |
+
"loss": 5.3953,
|
25209 |
+
"step": 14400
|
25210 |
+
},
|
25211 |
+
{
|
25212 |
+
"epoch": 0.71,
|
25213 |
+
"grad_norm": 2.01753568649292,
|
25214 |
+
"learning_rate": 1.442264934038243e-05,
|
25215 |
+
"loss": 5.537,
|
25216 |
+
"step": 14404
|
25217 |
+
},
|
25218 |
+
{
|
25219 |
+
"epoch": 0.71,
|
25220 |
+
"grad_norm": 2.0679104328155518,
|
25221 |
+
"learning_rate": 1.441276742922081e-05,
|
25222 |
+
"loss": 5.4632,
|
25223 |
+
"step": 14408
|
25224 |
+
},
|
25225 |
+
{
|
25226 |
+
"epoch": 0.71,
|
25227 |
+
"grad_norm": 1.921613097190857,
|
25228 |
+
"learning_rate": 1.4402885518059192e-05,
|
25229 |
+
"loss": 5.2852,
|
25230 |
+
"step": 14412
|
25231 |
+
},
|
25232 |
+
{
|
25233 |
+
"epoch": 0.71,
|
25234 |
+
"grad_norm": 1.9771466255187988,
|
25235 |
+
"learning_rate": 1.4393003606897576e-05,
|
25236 |
+
"loss": 5.5808,
|
25237 |
+
"step": 14416
|
25238 |
+
},
|
25239 |
+
{
|
25240 |
+
"epoch": 0.71,
|
25241 |
+
"grad_norm": 1.8879215717315674,
|
25242 |
+
"learning_rate": 1.4383121695735956e-05,
|
25243 |
+
"loss": 5.4957,
|
25244 |
+
"step": 14420
|
25245 |
+
},
|
25246 |
+
{
|
25247 |
+
"epoch": 0.71,
|
25248 |
+
"grad_norm": 2.0186474323272705,
|
25249 |
+
"learning_rate": 1.4373239784574339e-05,
|
25250 |
+
"loss": 5.4864,
|
25251 |
+
"step": 14424
|
25252 |
+
},
|
25253 |
+
{
|
25254 |
+
"epoch": 0.71,
|
25255 |
+
"grad_norm": 2.147420883178711,
|
25256 |
+
"learning_rate": 1.4363357873412719e-05,
|
25257 |
+
"loss": 5.5098,
|
25258 |
+
"step": 14428
|
25259 |
+
},
|
25260 |
+
{
|
25261 |
+
"epoch": 0.71,
|
25262 |
+
"grad_norm": 1.9963911771774292,
|
25263 |
+
"learning_rate": 1.4353475962251101e-05,
|
25264 |
+
"loss": 5.3886,
|
25265 |
+
"step": 14432
|
25266 |
+
},
|
25267 |
+
{
|
25268 |
+
"epoch": 0.71,
|
25269 |
+
"grad_norm": 2.03368878364563,
|
25270 |
+
"learning_rate": 1.4343594051089481e-05,
|
25271 |
+
"loss": 5.4533,
|
25272 |
+
"step": 14436
|
25273 |
+
},
|
25274 |
+
{
|
25275 |
+
"epoch": 0.71,
|
25276 |
+
"grad_norm": 2.274022340774536,
|
25277 |
+
"learning_rate": 1.4333712139927863e-05,
|
25278 |
+
"loss": 5.5222,
|
25279 |
+
"step": 14440
|
25280 |
+
},
|
25281 |
+
{
|
25282 |
+
"epoch": 0.71,
|
25283 |
+
"grad_norm": 2.0925748348236084,
|
25284 |
+
"learning_rate": 1.4323830228766244e-05,
|
25285 |
+
"loss": 5.3865,
|
25286 |
+
"step": 14444
|
25287 |
+
},
|
25288 |
+
{
|
25289 |
+
"epoch": 0.71,
|
25290 |
+
"grad_norm": 2.25508451461792,
|
25291 |
+
"learning_rate": 1.4313948317604626e-05,
|
25292 |
+
"loss": 5.388,
|
25293 |
+
"step": 14448
|
25294 |
+
},
|
25295 |
+
{
|
25296 |
+
"epoch": 0.71,
|
25297 |
+
"grad_norm": 1.9655667543411255,
|
25298 |
+
"learning_rate": 1.4304066406443006e-05,
|
25299 |
+
"loss": 5.5274,
|
25300 |
+
"step": 14452
|
25301 |
+
},
|
25302 |
+
{
|
25303 |
+
"epoch": 0.71,
|
25304 |
+
"grad_norm": 1.7187186479568481,
|
25305 |
+
"learning_rate": 1.4294184495281388e-05,
|
25306 |
+
"loss": 5.5024,
|
25307 |
+
"step": 14456
|
25308 |
+
},
|
25309 |
+
{
|
25310 |
+
"epoch": 0.71,
|
25311 |
+
"grad_norm": 1.9573662281036377,
|
25312 |
+
"learning_rate": 1.4284302584119769e-05,
|
25313 |
+
"loss": 5.5709,
|
25314 |
+
"step": 14460
|
25315 |
+
},
|
25316 |
+
{
|
25317 |
+
"epoch": 0.71,
|
25318 |
+
"grad_norm": 1.9070782661437988,
|
25319 |
+
"learning_rate": 1.4274420672958149e-05,
|
25320 |
+
"loss": 5.5638,
|
25321 |
+
"step": 14464
|
25322 |
+
},
|
25323 |
+
{
|
25324 |
+
"epoch": 0.71,
|
25325 |
+
"grad_norm": 1.99477219581604,
|
25326 |
+
"learning_rate": 1.4264538761796531e-05,
|
25327 |
+
"loss": 5.554,
|
25328 |
+
"step": 14468
|
25329 |
+
},
|
25330 |
+
{
|
25331 |
+
"epoch": 0.72,
|
25332 |
+
"grad_norm": 2.1351230144500732,
|
25333 |
+
"learning_rate": 1.4254656850634915e-05,
|
25334 |
+
"loss": 5.4401,
|
25335 |
+
"step": 14472
|
25336 |
+
},
|
25337 |
+
{
|
25338 |
+
"epoch": 0.72,
|
25339 |
+
"grad_norm": 2.12768292427063,
|
25340 |
+
"learning_rate": 1.4244774939473297e-05,
|
25341 |
+
"loss": 5.5931,
|
25342 |
+
"step": 14476
|
25343 |
+
},
|
25344 |
+
{
|
25345 |
+
"epoch": 0.72,
|
25346 |
+
"grad_norm": 1.834502100944519,
|
25347 |
+
"learning_rate": 1.4234893028311677e-05,
|
25348 |
+
"loss": 5.3383,
|
25349 |
+
"step": 14480
|
25350 |
+
},
|
25351 |
+
{
|
25352 |
+
"epoch": 0.72,
|
25353 |
+
"grad_norm": 1.9401111602783203,
|
25354 |
+
"learning_rate": 1.4225011117150058e-05,
|
25355 |
+
"loss": 5.5206,
|
25356 |
+
"step": 14484
|
25357 |
+
},
|
25358 |
+
{
|
25359 |
+
"epoch": 0.72,
|
25360 |
+
"grad_norm": 2.1304268836975098,
|
25361 |
+
"learning_rate": 1.421512920598844e-05,
|
25362 |
+
"loss": 5.4791,
|
25363 |
+
"step": 14488
|
25364 |
+
},
|
25365 |
+
{
|
25366 |
+
"epoch": 0.72,
|
25367 |
+
"grad_norm": 1.8855656385421753,
|
25368 |
+
"learning_rate": 1.420524729482682e-05,
|
25369 |
+
"loss": 5.4404,
|
25370 |
+
"step": 14492
|
25371 |
+
},
|
25372 |
+
{
|
25373 |
+
"epoch": 0.72,
|
25374 |
+
"grad_norm": 2.1601414680480957,
|
25375 |
+
"learning_rate": 1.4195365383665202e-05,
|
25376 |
+
"loss": 5.509,
|
25377 |
+
"step": 14496
|
25378 |
+
},
|
25379 |
+
{
|
25380 |
+
"epoch": 0.72,
|
25381 |
+
"grad_norm": 2.054011821746826,
|
25382 |
+
"learning_rate": 1.4185483472503582e-05,
|
25383 |
+
"loss": 5.5097,
|
25384 |
+
"step": 14500
|
25385 |
+
},
|
25386 |
+
{
|
25387 |
+
"epoch": 0.72,
|
25388 |
+
"grad_norm": 2.2468581199645996,
|
25389 |
+
"learning_rate": 1.4175601561341964e-05,
|
25390 |
+
"loss": 5.5658,
|
25391 |
+
"step": 14504
|
25392 |
+
},
|
25393 |
+
{
|
25394 |
+
"epoch": 0.72,
|
25395 |
+
"grad_norm": 1.8028780221939087,
|
25396 |
+
"learning_rate": 1.4165719650180345e-05,
|
25397 |
+
"loss": 5.4528,
|
25398 |
+
"step": 14508
|
25399 |
+
},
|
25400 |
+
{
|
25401 |
+
"epoch": 0.72,
|
25402 |
+
"grad_norm": 1.7557624578475952,
|
25403 |
+
"learning_rate": 1.4155837739018727e-05,
|
25404 |
+
"loss": 5.4267,
|
25405 |
+
"step": 14512
|
25406 |
+
},
|
25407 |
+
{
|
25408 |
+
"epoch": 0.72,
|
25409 |
+
"grad_norm": 2.2908835411071777,
|
25410 |
+
"learning_rate": 1.4145955827857107e-05,
|
25411 |
+
"loss": 5.5676,
|
25412 |
+
"step": 14516
|
25413 |
+
},
|
25414 |
+
{
|
25415 |
+
"epoch": 0.72,
|
25416 |
+
"grad_norm": 1.8863370418548584,
|
25417 |
+
"learning_rate": 1.413607391669549e-05,
|
25418 |
+
"loss": 5.5873,
|
25419 |
+
"step": 14520
|
25420 |
+
},
|
25421 |
+
{
|
25422 |
+
"epoch": 0.72,
|
25423 |
+
"grad_norm": 1.9381186962127686,
|
25424 |
+
"learning_rate": 1.412619200553387e-05,
|
25425 |
+
"loss": 5.5573,
|
25426 |
+
"step": 14524
|
25427 |
+
},
|
25428 |
+
{
|
25429 |
+
"epoch": 0.72,
|
25430 |
+
"grad_norm": 1.9330207109451294,
|
25431 |
+
"learning_rate": 1.4116310094372252e-05,
|
25432 |
+
"loss": 5.3673,
|
25433 |
+
"step": 14528
|
25434 |
+
},
|
25435 |
+
{
|
25436 |
+
"epoch": 0.72,
|
25437 |
+
"grad_norm": 2.0471458435058594,
|
25438 |
+
"learning_rate": 1.4106428183210636e-05,
|
25439 |
+
"loss": 5.4435,
|
25440 |
+
"step": 14532
|
25441 |
+
},
|
25442 |
+
{
|
25443 |
+
"epoch": 0.72,
|
25444 |
+
"grad_norm": 1.922804594039917,
|
25445 |
+
"learning_rate": 1.4096546272049016e-05,
|
25446 |
+
"loss": 5.4511,
|
25447 |
+
"step": 14536
|
25448 |
+
},
|
25449 |
+
{
|
25450 |
+
"epoch": 0.72,
|
25451 |
+
"grad_norm": 1.946153998374939,
|
25452 |
+
"learning_rate": 1.4086664360887398e-05,
|
25453 |
+
"loss": 5.4641,
|
25454 |
+
"step": 14540
|
25455 |
+
},
|
25456 |
+
{
|
25457 |
+
"epoch": 0.72,
|
25458 |
+
"grad_norm": 2.0340843200683594,
|
25459 |
+
"learning_rate": 1.4076782449725778e-05,
|
25460 |
+
"loss": 5.5205,
|
25461 |
+
"step": 14544
|
25462 |
+
},
|
25463 |
+
{
|
25464 |
+
"epoch": 0.72,
|
25465 |
+
"grad_norm": 2.218997001647949,
|
25466 |
+
"learning_rate": 1.4066900538564159e-05,
|
25467 |
+
"loss": 5.4572,
|
25468 |
+
"step": 14548
|
25469 |
+
},
|
25470 |
+
{
|
25471 |
+
"epoch": 0.72,
|
25472 |
+
"grad_norm": 1.8929450511932373,
|
25473 |
+
"learning_rate": 1.405701862740254e-05,
|
25474 |
+
"loss": 5.4428,
|
25475 |
+
"step": 14552
|
25476 |
+
},
|
25477 |
+
{
|
25478 |
+
"epoch": 0.72,
|
25479 |
+
"grad_norm": 1.828822374343872,
|
25480 |
+
"learning_rate": 1.4047136716240921e-05,
|
25481 |
+
"loss": 5.4737,
|
25482 |
+
"step": 14556
|
25483 |
+
},
|
25484 |
+
{
|
25485 |
+
"epoch": 0.72,
|
25486 |
+
"grad_norm": 2.1129636764526367,
|
25487 |
+
"learning_rate": 1.4037254805079303e-05,
|
25488 |
+
"loss": 5.4952,
|
25489 |
+
"step": 14560
|
25490 |
+
},
|
25491 |
+
{
|
25492 |
+
"epoch": 0.72,
|
25493 |
+
"grad_norm": 2.0485777854919434,
|
25494 |
+
"learning_rate": 1.4027372893917684e-05,
|
25495 |
+
"loss": 5.5007,
|
25496 |
+
"step": 14564
|
25497 |
+
},
|
25498 |
+
{
|
25499 |
+
"epoch": 0.72,
|
25500 |
+
"grad_norm": 2.2085585594177246,
|
25501 |
+
"learning_rate": 1.4017490982756066e-05,
|
25502 |
+
"loss": 5.5036,
|
25503 |
+
"step": 14568
|
25504 |
+
},
|
25505 |
+
{
|
25506 |
+
"epoch": 0.72,
|
25507 |
+
"grad_norm": 2.0381059646606445,
|
25508 |
+
"learning_rate": 1.4007609071594446e-05,
|
25509 |
+
"loss": 5.5335,
|
25510 |
+
"step": 14572
|
25511 |
+
},
|
25512 |
+
{
|
25513 |
+
"epoch": 0.72,
|
25514 |
+
"grad_norm": 1.994460940361023,
|
25515 |
+
"learning_rate": 1.3997727160432828e-05,
|
25516 |
+
"loss": 5.5262,
|
25517 |
+
"step": 14576
|
25518 |
+
},
|
25519 |
+
{
|
25520 |
+
"epoch": 0.72,
|
25521 |
+
"grad_norm": 1.9527006149291992,
|
25522 |
+
"learning_rate": 1.3987845249271208e-05,
|
25523 |
+
"loss": 5.4994,
|
25524 |
+
"step": 14580
|
25525 |
+
},
|
25526 |
+
{
|
25527 |
+
"epoch": 0.72,
|
25528 |
+
"grad_norm": 2.07161808013916,
|
25529 |
+
"learning_rate": 1.397796333810959e-05,
|
25530 |
+
"loss": 5.5022,
|
25531 |
+
"step": 14584
|
25532 |
+
},
|
25533 |
+
{
|
25534 |
+
"epoch": 0.72,
|
25535 |
+
"grad_norm": 2.083160638809204,
|
25536 |
+
"learning_rate": 1.3968081426947974e-05,
|
25537 |
+
"loss": 5.46,
|
25538 |
+
"step": 14588
|
25539 |
+
},
|
25540 |
+
{
|
25541 |
+
"epoch": 0.72,
|
25542 |
+
"grad_norm": 1.9139795303344727,
|
25543 |
+
"learning_rate": 1.3958199515786355e-05,
|
25544 |
+
"loss": 5.3918,
|
25545 |
+
"step": 14592
|
25546 |
+
},
|
25547 |
+
{
|
25548 |
+
"epoch": 0.72,
|
25549 |
+
"grad_norm": 2.016361951828003,
|
25550 |
+
"learning_rate": 1.3948317604624737e-05,
|
25551 |
+
"loss": 5.3577,
|
25552 |
+
"step": 14596
|
25553 |
+
},
|
25554 |
+
{
|
25555 |
+
"epoch": 0.72,
|
25556 |
+
"grad_norm": 1.9085208177566528,
|
25557 |
+
"learning_rate": 1.3938435693463117e-05,
|
25558 |
+
"loss": 5.5672,
|
25559 |
+
"step": 14600
|
25560 |
+
},
|
25561 |
+
{
|
25562 |
+
"epoch": 0.72,
|
25563 |
+
"grad_norm": 2.156883716583252,
|
25564 |
+
"learning_rate": 1.3928553782301499e-05,
|
25565 |
+
"loss": 5.4402,
|
25566 |
+
"step": 14604
|
25567 |
+
},
|
25568 |
+
{
|
25569 |
+
"epoch": 0.72,
|
25570 |
+
"grad_norm": 2.1130199432373047,
|
25571 |
+
"learning_rate": 1.391867187113988e-05,
|
25572 |
+
"loss": 5.5375,
|
25573 |
+
"step": 14608
|
25574 |
+
},
|
25575 |
+
{
|
25576 |
+
"epoch": 0.72,
|
25577 |
+
"grad_norm": 2.0779714584350586,
|
25578 |
+
"learning_rate": 1.3908789959978261e-05,
|
25579 |
+
"loss": 5.4568,
|
25580 |
+
"step": 14612
|
25581 |
+
},
|
25582 |
+
{
|
25583 |
+
"epoch": 0.72,
|
25584 |
+
"grad_norm": 2.0961239337921143,
|
25585 |
+
"learning_rate": 1.3898908048816642e-05,
|
25586 |
+
"loss": 5.5331,
|
25587 |
+
"step": 14616
|
25588 |
+
},
|
25589 |
+
{
|
25590 |
+
"epoch": 0.72,
|
25591 |
+
"grad_norm": 1.9321538209915161,
|
25592 |
+
"learning_rate": 1.3889026137655022e-05,
|
25593 |
+
"loss": 5.4607,
|
25594 |
+
"step": 14620
|
25595 |
+
},
|
25596 |
+
{
|
25597 |
+
"epoch": 0.72,
|
25598 |
+
"grad_norm": 2.171811103820801,
|
25599 |
+
"learning_rate": 1.3879144226493404e-05,
|
25600 |
+
"loss": 5.5705,
|
25601 |
+
"step": 14624
|
25602 |
+
},
|
25603 |
+
{
|
25604 |
+
"epoch": 0.72,
|
25605 |
+
"grad_norm": 2.0111582279205322,
|
25606 |
+
"learning_rate": 1.3869262315331785e-05,
|
25607 |
+
"loss": 5.5365,
|
25608 |
+
"step": 14628
|
25609 |
+
},
|
25610 |
+
{
|
25611 |
+
"epoch": 0.72,
|
25612 |
+
"grad_norm": 2.2230725288391113,
|
25613 |
+
"learning_rate": 1.3859380404170167e-05,
|
25614 |
+
"loss": 5.4544,
|
25615 |
+
"step": 14632
|
25616 |
+
},
|
25617 |
+
{
|
25618 |
+
"epoch": 0.72,
|
25619 |
+
"grad_norm": 2.1781952381134033,
|
25620 |
+
"learning_rate": 1.3849498493008547e-05,
|
25621 |
+
"loss": 5.5207,
|
25622 |
+
"step": 14636
|
25623 |
+
},
|
25624 |
+
{
|
25625 |
+
"epoch": 0.72,
|
25626 |
+
"grad_norm": 2.1648108959198,
|
25627 |
+
"learning_rate": 1.3839616581846929e-05,
|
25628 |
+
"loss": 5.542,
|
25629 |
+
"step": 14640
|
25630 |
+
},
|
25631 |
+
{
|
25632 |
+
"epoch": 0.72,
|
25633 |
+
"grad_norm": 2.008898973464966,
|
25634 |
+
"learning_rate": 1.3829734670685313e-05,
|
25635 |
+
"loss": 5.4819,
|
25636 |
+
"step": 14644
|
25637 |
+
},
|
25638 |
+
{
|
25639 |
+
"epoch": 0.72,
|
25640 |
+
"grad_norm": 1.8579167127609253,
|
25641 |
+
"learning_rate": 1.3819852759523693e-05,
|
25642 |
+
"loss": 5.5307,
|
25643 |
+
"step": 14648
|
25644 |
+
},
|
25645 |
+
{
|
25646 |
+
"epoch": 0.72,
|
25647 |
+
"grad_norm": 1.951011300086975,
|
25648 |
+
"learning_rate": 1.3809970848362075e-05,
|
25649 |
+
"loss": 5.5083,
|
25650 |
+
"step": 14652
|
25651 |
+
},
|
25652 |
+
{
|
25653 |
+
"epoch": 0.72,
|
25654 |
+
"grad_norm": 2.622732639312744,
|
25655 |
+
"learning_rate": 1.3800088937200456e-05,
|
25656 |
+
"loss": 5.4687,
|
25657 |
+
"step": 14656
|
25658 |
+
},
|
25659 |
+
{
|
25660 |
+
"epoch": 0.72,
|
25661 |
+
"grad_norm": 1.8241809606552124,
|
25662 |
+
"learning_rate": 1.3790207026038838e-05,
|
25663 |
+
"loss": 5.5702,
|
25664 |
+
"step": 14660
|
25665 |
+
},
|
25666 |
+
{
|
25667 |
+
"epoch": 0.72,
|
25668 |
+
"grad_norm": 2.2202184200286865,
|
25669 |
+
"learning_rate": 1.3780325114877218e-05,
|
25670 |
+
"loss": 5.5717,
|
25671 |
+
"step": 14664
|
25672 |
+
},
|
25673 |
+
{
|
25674 |
+
"epoch": 0.72,
|
25675 |
+
"grad_norm": 2.125215530395508,
|
25676 |
+
"learning_rate": 1.37704432037156e-05,
|
25677 |
+
"loss": 5.4724,
|
25678 |
+
"step": 14668
|
25679 |
+
},
|
25680 |
+
{
|
25681 |
+
"epoch": 0.72,
|
25682 |
+
"grad_norm": 2.0341951847076416,
|
25683 |
+
"learning_rate": 1.376056129255398e-05,
|
25684 |
+
"loss": 5.4859,
|
25685 |
+
"step": 14672
|
25686 |
+
},
|
25687 |
+
{
|
25688 |
+
"epoch": 0.73,
|
25689 |
+
"grad_norm": 1.9950518608093262,
|
25690 |
+
"learning_rate": 1.3750679381392363e-05,
|
25691 |
+
"loss": 5.4983,
|
25692 |
+
"step": 14676
|
25693 |
+
},
|
25694 |
+
{
|
25695 |
+
"epoch": 0.73,
|
25696 |
+
"grad_norm": 1.9778791666030884,
|
25697 |
+
"learning_rate": 1.3740797470230743e-05,
|
25698 |
+
"loss": 5.3464,
|
25699 |
+
"step": 14680
|
25700 |
+
},
|
25701 |
+
{
|
25702 |
+
"epoch": 0.73,
|
25703 |
+
"grad_norm": 2.052849054336548,
|
25704 |
+
"learning_rate": 1.3730915559069123e-05,
|
25705 |
+
"loss": 5.4766,
|
25706 |
+
"step": 14684
|
25707 |
+
},
|
25708 |
+
{
|
25709 |
+
"epoch": 0.73,
|
25710 |
+
"grad_norm": 2.0208585262298584,
|
25711 |
+
"learning_rate": 1.3721033647907505e-05,
|
25712 |
+
"loss": 5.4662,
|
25713 |
+
"step": 14688
|
25714 |
+
},
|
25715 |
+
{
|
25716 |
+
"epoch": 0.73,
|
25717 |
+
"grad_norm": 2.0381686687469482,
|
25718 |
+
"learning_rate": 1.3711151736745886e-05,
|
25719 |
+
"loss": 5.4048,
|
25720 |
+
"step": 14692
|
25721 |
+
},
|
25722 |
+
{
|
25723 |
+
"epoch": 0.73,
|
25724 |
+
"grad_norm": 2.0504868030548096,
|
25725 |
+
"learning_rate": 1.3701269825584268e-05,
|
25726 |
+
"loss": 5.5422,
|
25727 |
+
"step": 14696
|
25728 |
+
},
|
25729 |
+
{
|
25730 |
+
"epoch": 0.73,
|
25731 |
+
"grad_norm": 1.9056216478347778,
|
25732 |
+
"learning_rate": 1.3691387914422648e-05,
|
25733 |
+
"loss": 5.5829,
|
25734 |
+
"step": 14700
|
25735 |
+
},
|
25736 |
+
{
|
25737 |
+
"epoch": 0.73,
|
25738 |
+
"grad_norm": 2.1584672927856445,
|
25739 |
+
"learning_rate": 1.3681506003261032e-05,
|
25740 |
+
"loss": 5.4341,
|
25741 |
+
"step": 14704
|
25742 |
+
},
|
25743 |
+
{
|
25744 |
+
"epoch": 0.73,
|
25745 |
+
"grad_norm": 1.8544187545776367,
|
25746 |
+
"learning_rate": 1.3671624092099414e-05,
|
25747 |
+
"loss": 5.5552,
|
25748 |
+
"step": 14708
|
25749 |
+
},
|
25750 |
+
{
|
25751 |
+
"epoch": 0.73,
|
25752 |
+
"grad_norm": 1.9360532760620117,
|
25753 |
+
"learning_rate": 1.3661742180937794e-05,
|
25754 |
+
"loss": 5.413,
|
25755 |
+
"step": 14712
|
25756 |
+
},
|
25757 |
+
{
|
25758 |
+
"epoch": 0.73,
|
25759 |
+
"grad_norm": 1.7825835943222046,
|
25760 |
+
"learning_rate": 1.3651860269776176e-05,
|
25761 |
+
"loss": 5.4916,
|
25762 |
+
"step": 14716
|
25763 |
+
},
|
25764 |
+
{
|
25765 |
+
"epoch": 0.73,
|
25766 |
+
"grad_norm": 2.0297420024871826,
|
25767 |
+
"learning_rate": 1.3641978358614557e-05,
|
25768 |
+
"loss": 5.4825,
|
25769 |
+
"step": 14720
|
25770 |
+
},
|
25771 |
+
{
|
25772 |
+
"epoch": 0.73,
|
25773 |
+
"grad_norm": 2.0158278942108154,
|
25774 |
+
"learning_rate": 1.3632096447452939e-05,
|
25775 |
+
"loss": 5.4502,
|
25776 |
+
"step": 14724
|
25777 |
+
},
|
25778 |
+
{
|
25779 |
+
"epoch": 0.73,
|
25780 |
+
"grad_norm": 2.265287160873413,
|
25781 |
+
"learning_rate": 1.362221453629132e-05,
|
25782 |
+
"loss": 5.5644,
|
25783 |
+
"step": 14728
|
25784 |
+
},
|
25785 |
+
{
|
25786 |
+
"epoch": 0.73,
|
25787 |
+
"grad_norm": 2.0918755531311035,
|
25788 |
+
"learning_rate": 1.3612332625129701e-05,
|
25789 |
+
"loss": 5.6084,
|
25790 |
+
"step": 14732
|
25791 |
+
},
|
25792 |
+
{
|
25793 |
+
"epoch": 0.73,
|
25794 |
+
"grad_norm": 2.141103744506836,
|
25795 |
+
"learning_rate": 1.3602450713968082e-05,
|
25796 |
+
"loss": 5.4327,
|
25797 |
+
"step": 14736
|
25798 |
+
},
|
25799 |
+
{
|
25800 |
+
"epoch": 0.73,
|
25801 |
+
"grad_norm": 2.070760488510132,
|
25802 |
+
"learning_rate": 1.3592568802806464e-05,
|
25803 |
+
"loss": 5.5944,
|
25804 |
+
"step": 14740
|
25805 |
+
},
|
25806 |
+
{
|
25807 |
+
"epoch": 0.73,
|
25808 |
+
"grad_norm": 1.9426864385604858,
|
25809 |
+
"learning_rate": 1.3582686891644844e-05,
|
25810 |
+
"loss": 5.4326,
|
25811 |
+
"step": 14744
|
25812 |
+
},
|
25813 |
+
{
|
25814 |
+
"epoch": 0.73,
|
25815 |
+
"grad_norm": 1.8541244268417358,
|
25816 |
+
"learning_rate": 1.3572804980483224e-05,
|
25817 |
+
"loss": 5.5438,
|
25818 |
+
"step": 14748
|
25819 |
+
},
|
25820 |
+
{
|
25821 |
+
"epoch": 0.73,
|
25822 |
+
"grad_norm": 2.081083297729492,
|
25823 |
+
"learning_rate": 1.3562923069321606e-05,
|
25824 |
+
"loss": 5.5656,
|
25825 |
+
"step": 14752
|
25826 |
+
},
|
25827 |
+
{
|
25828 |
+
"epoch": 0.73,
|
25829 |
+
"grad_norm": 1.923309326171875,
|
25830 |
+
"learning_rate": 1.3553041158159987e-05,
|
25831 |
+
"loss": 5.5546,
|
25832 |
+
"step": 14756
|
25833 |
+
},
|
25834 |
+
{
|
25835 |
+
"epoch": 0.73,
|
25836 |
+
"grad_norm": 1.746881365776062,
|
25837 |
+
"learning_rate": 1.3543159246998372e-05,
|
25838 |
+
"loss": 5.4799,
|
25839 |
+
"step": 14760
|
25840 |
+
},
|
25841 |
+
{
|
25842 |
+
"epoch": 0.73,
|
25843 |
+
"grad_norm": 1.91403067111969,
|
25844 |
+
"learning_rate": 1.3533277335836753e-05,
|
25845 |
+
"loss": 5.4892,
|
25846 |
+
"step": 14764
|
25847 |
+
},
|
25848 |
+
{
|
25849 |
+
"epoch": 0.73,
|
25850 |
+
"grad_norm": 2.0929675102233887,
|
25851 |
+
"learning_rate": 1.3523395424675133e-05,
|
25852 |
+
"loss": 5.5852,
|
25853 |
+
"step": 14768
|
25854 |
+
},
|
25855 |
+
{
|
25856 |
+
"epoch": 0.73,
|
25857 |
+
"grad_norm": 1.9422674179077148,
|
25858 |
+
"learning_rate": 1.3513513513513515e-05,
|
25859 |
+
"loss": 5.5581,
|
25860 |
+
"step": 14772
|
25861 |
+
},
|
25862 |
+
{
|
25863 |
+
"epoch": 0.73,
|
25864 |
+
"grad_norm": 1.684131383895874,
|
25865 |
+
"learning_rate": 1.3503631602351895e-05,
|
25866 |
+
"loss": 5.553,
|
25867 |
+
"step": 14776
|
25868 |
+
},
|
25869 |
+
{
|
25870 |
+
"epoch": 0.73,
|
25871 |
+
"grad_norm": 1.8600718975067139,
|
25872 |
+
"learning_rate": 1.3493749691190278e-05,
|
25873 |
+
"loss": 5.5774,
|
25874 |
+
"step": 14780
|
25875 |
+
},
|
25876 |
+
{
|
25877 |
+
"epoch": 0.73,
|
25878 |
+
"grad_norm": 1.7786033153533936,
|
25879 |
+
"learning_rate": 1.3483867780028658e-05,
|
25880 |
+
"loss": 5.4997,
|
25881 |
+
"step": 14784
|
25882 |
+
},
|
25883 |
+
{
|
25884 |
+
"epoch": 0.73,
|
25885 |
+
"grad_norm": 2.239494562149048,
|
25886 |
+
"learning_rate": 1.347398586886704e-05,
|
25887 |
+
"loss": 5.6186,
|
25888 |
+
"step": 14788
|
25889 |
+
},
|
25890 |
+
{
|
25891 |
+
"epoch": 0.73,
|
25892 |
+
"grad_norm": 2.078833818435669,
|
25893 |
+
"learning_rate": 1.346410395770542e-05,
|
25894 |
+
"loss": 5.5609,
|
25895 |
+
"step": 14792
|
25896 |
+
},
|
25897 |
+
{
|
25898 |
+
"epoch": 0.73,
|
25899 |
+
"grad_norm": 1.867530107498169,
|
25900 |
+
"learning_rate": 1.3454222046543802e-05,
|
25901 |
+
"loss": 5.5624,
|
25902 |
+
"step": 14796
|
25903 |
+
},
|
25904 |
+
{
|
25905 |
+
"epoch": 0.73,
|
25906 |
+
"grad_norm": 1.8725926876068115,
|
25907 |
+
"learning_rate": 1.3444340135382183e-05,
|
25908 |
+
"loss": 5.5675,
|
25909 |
+
"step": 14800
|
25910 |
+
},
|
25911 |
+
{
|
25912 |
+
"epoch": 0.73,
|
25913 |
+
"grad_norm": 1.9592565298080444,
|
25914 |
+
"learning_rate": 1.3434458224220565e-05,
|
25915 |
+
"loss": 5.4278,
|
25916 |
+
"step": 14804
|
25917 |
+
},
|
25918 |
+
{
|
25919 |
+
"epoch": 0.73,
|
25920 |
+
"grad_norm": 2.0653085708618164,
|
25921 |
+
"learning_rate": 1.3424576313058945e-05,
|
25922 |
+
"loss": 5.4732,
|
25923 |
+
"step": 14808
|
25924 |
+
},
|
25925 |
+
{
|
25926 |
+
"epoch": 0.73,
|
25927 |
+
"grad_norm": 2.0164620876312256,
|
25928 |
+
"learning_rate": 1.3414694401897327e-05,
|
25929 |
+
"loss": 5.4636,
|
25930 |
+
"step": 14812
|
25931 |
+
},
|
25932 |
+
{
|
25933 |
+
"epoch": 0.73,
|
25934 |
+
"grad_norm": 1.9894605875015259,
|
25935 |
+
"learning_rate": 1.3404812490735708e-05,
|
25936 |
+
"loss": 5.5102,
|
25937 |
+
"step": 14816
|
25938 |
+
},
|
25939 |
+
{
|
25940 |
+
"epoch": 0.73,
|
25941 |
+
"grad_norm": 2.0743892192840576,
|
25942 |
+
"learning_rate": 1.3394930579574091e-05,
|
25943 |
+
"loss": 5.4852,
|
25944 |
+
"step": 14820
|
25945 |
+
},
|
25946 |
+
{
|
25947 |
+
"epoch": 0.73,
|
25948 |
+
"grad_norm": 2.000945806503296,
|
25949 |
+
"learning_rate": 1.3385048668412473e-05,
|
25950 |
+
"loss": 5.4698,
|
25951 |
+
"step": 14824
|
25952 |
+
},
|
25953 |
+
{
|
25954 |
+
"epoch": 0.73,
|
25955 |
+
"grad_norm": 2.219153881072998,
|
25956 |
+
"learning_rate": 1.3375166757250854e-05,
|
25957 |
+
"loss": 5.5966,
|
25958 |
+
"step": 14828
|
25959 |
+
},
|
25960 |
+
{
|
25961 |
+
"epoch": 0.73,
|
25962 |
+
"grad_norm": 2.0266921520233154,
|
25963 |
+
"learning_rate": 1.3365284846089234e-05,
|
25964 |
+
"loss": 5.5036,
|
25965 |
+
"step": 14832
|
25966 |
+
},
|
25967 |
+
{
|
25968 |
+
"epoch": 0.73,
|
25969 |
+
"grad_norm": 2.1432504653930664,
|
25970 |
+
"learning_rate": 1.3355402934927616e-05,
|
25971 |
+
"loss": 5.5122,
|
25972 |
+
"step": 14836
|
25973 |
+
},
|
25974 |
+
{
|
25975 |
+
"epoch": 0.73,
|
25976 |
+
"grad_norm": 1.975110411643982,
|
25977 |
+
"learning_rate": 1.3345521023765997e-05,
|
25978 |
+
"loss": 5.4677,
|
25979 |
+
"step": 14840
|
25980 |
+
},
|
25981 |
+
{
|
25982 |
+
"epoch": 0.73,
|
25983 |
+
"grad_norm": 2.1612823009490967,
|
25984 |
+
"learning_rate": 1.3335639112604379e-05,
|
25985 |
+
"loss": 5.544,
|
25986 |
+
"step": 14844
|
25987 |
+
},
|
25988 |
+
{
|
25989 |
+
"epoch": 0.73,
|
25990 |
+
"grad_norm": 2.0813982486724854,
|
25991 |
+
"learning_rate": 1.3325757201442759e-05,
|
25992 |
+
"loss": 5.3651,
|
25993 |
+
"step": 14848
|
25994 |
+
},
|
25995 |
+
{
|
25996 |
+
"epoch": 0.73,
|
25997 |
+
"grad_norm": 1.8772978782653809,
|
25998 |
+
"learning_rate": 1.3315875290281141e-05,
|
25999 |
+
"loss": 5.4319,
|
26000 |
+
"step": 14852
|
26001 |
+
},
|
26002 |
+
{
|
26003 |
+
"epoch": 0.73,
|
26004 |
+
"grad_norm": 1.8763823509216309,
|
26005 |
+
"learning_rate": 1.3305993379119521e-05,
|
26006 |
+
"loss": 5.6508,
|
26007 |
+
"step": 14856
|
26008 |
+
},
|
26009 |
+
{
|
26010 |
+
"epoch": 0.73,
|
26011 |
+
"grad_norm": 2.0054590702056885,
|
26012 |
+
"learning_rate": 1.3296111467957903e-05,
|
26013 |
+
"loss": 5.4899,
|
26014 |
+
"step": 14860
|
26015 |
+
},
|
26016 |
+
{
|
26017 |
+
"epoch": 0.73,
|
26018 |
+
"grad_norm": 2.0155956745147705,
|
26019 |
+
"learning_rate": 1.3286229556796284e-05,
|
26020 |
+
"loss": 5.4597,
|
26021 |
+
"step": 14864
|
26022 |
+
},
|
26023 |
+
{
|
26024 |
+
"epoch": 0.73,
|
26025 |
+
"grad_norm": 1.9215456247329712,
|
26026 |
+
"learning_rate": 1.3276347645634666e-05,
|
26027 |
+
"loss": 5.5587,
|
26028 |
+
"step": 14868
|
26029 |
+
},
|
26030 |
+
{
|
26031 |
+
"epoch": 0.73,
|
26032 |
+
"grad_norm": 2.2192320823669434,
|
26033 |
+
"learning_rate": 1.3266465734473046e-05,
|
26034 |
+
"loss": 5.5397,
|
26035 |
+
"step": 14872
|
26036 |
+
},
|
26037 |
+
{
|
26038 |
+
"epoch": 0.74,
|
26039 |
+
"grad_norm": 1.8037538528442383,
|
26040 |
+
"learning_rate": 1.325658382331143e-05,
|
26041 |
+
"loss": 5.503,
|
26042 |
+
"step": 14876
|
26043 |
+
},
|
26044 |
+
{
|
26045 |
+
"epoch": 0.74,
|
26046 |
+
"grad_norm": 2.096147298812866,
|
26047 |
+
"learning_rate": 1.3246701912149812e-05,
|
26048 |
+
"loss": 5.4808,
|
26049 |
+
"step": 14880
|
26050 |
+
},
|
26051 |
+
{
|
26052 |
+
"epoch": 0.74,
|
26053 |
+
"grad_norm": 2.0961217880249023,
|
26054 |
+
"learning_rate": 1.3236820000988192e-05,
|
26055 |
+
"loss": 5.5546,
|
26056 |
+
"step": 14884
|
26057 |
+
},
|
26058 |
+
{
|
26059 |
+
"epoch": 0.74,
|
26060 |
+
"grad_norm": 2.1718828678131104,
|
26061 |
+
"learning_rate": 1.3226938089826574e-05,
|
26062 |
+
"loss": 5.4201,
|
26063 |
+
"step": 14888
|
26064 |
+
},
|
26065 |
+
{
|
26066 |
+
"epoch": 0.74,
|
26067 |
+
"grad_norm": 2.062138795852661,
|
26068 |
+
"learning_rate": 1.3217056178664955e-05,
|
26069 |
+
"loss": 5.46,
|
26070 |
+
"step": 14892
|
26071 |
+
},
|
26072 |
+
{
|
26073 |
+
"epoch": 0.74,
|
26074 |
+
"grad_norm": 2.0585410594940186,
|
26075 |
+
"learning_rate": 1.3207174267503337e-05,
|
26076 |
+
"loss": 5.5545,
|
26077 |
+
"step": 14896
|
26078 |
+
},
|
26079 |
+
{
|
26080 |
+
"epoch": 0.74,
|
26081 |
+
"grad_norm": 1.8028877973556519,
|
26082 |
+
"learning_rate": 1.3197292356341717e-05,
|
26083 |
+
"loss": 5.4255,
|
26084 |
+
"step": 14900
|
26085 |
+
},
|
26086 |
+
{
|
26087 |
+
"epoch": 0.74,
|
26088 |
+
"grad_norm": 2.002005100250244,
|
26089 |
+
"learning_rate": 1.3187410445180098e-05,
|
26090 |
+
"loss": 5.5847,
|
26091 |
+
"step": 14904
|
26092 |
+
},
|
26093 |
+
{
|
26094 |
+
"epoch": 0.74,
|
26095 |
+
"grad_norm": 2.1855287551879883,
|
26096 |
+
"learning_rate": 1.317752853401848e-05,
|
26097 |
+
"loss": 5.536,
|
26098 |
+
"step": 14908
|
26099 |
+
},
|
26100 |
+
{
|
26101 |
+
"epoch": 0.74,
|
26102 |
+
"grad_norm": 2.193171977996826,
|
26103 |
+
"learning_rate": 1.316764662285686e-05,
|
26104 |
+
"loss": 5.4059,
|
26105 |
+
"step": 14912
|
26106 |
+
},
|
26107 |
+
{
|
26108 |
+
"epoch": 0.74,
|
26109 |
+
"grad_norm": 2.0406908988952637,
|
26110 |
+
"learning_rate": 1.3157764711695242e-05,
|
26111 |
+
"loss": 5.5851,
|
26112 |
+
"step": 14916
|
26113 |
+
},
|
26114 |
+
{
|
26115 |
+
"epoch": 0.74,
|
26116 |
+
"grad_norm": 2.342682123184204,
|
26117 |
+
"learning_rate": 1.3147882800533623e-05,
|
26118 |
+
"loss": 5.5068,
|
26119 |
+
"step": 14920
|
26120 |
+
},
|
26121 |
+
{
|
26122 |
+
"epoch": 0.74,
|
26123 |
+
"grad_norm": 2.0584092140197754,
|
26124 |
+
"learning_rate": 1.3138000889372005e-05,
|
26125 |
+
"loss": 5.3973,
|
26126 |
+
"step": 14924
|
26127 |
+
},
|
26128 |
+
{
|
26129 |
+
"epoch": 0.74,
|
26130 |
+
"grad_norm": 1.7771698236465454,
|
26131 |
+
"learning_rate": 1.3128118978210385e-05,
|
26132 |
+
"loss": 5.4812,
|
26133 |
+
"step": 14928
|
26134 |
+
},
|
26135 |
+
{
|
26136 |
+
"epoch": 0.74,
|
26137 |
+
"grad_norm": 1.923229694366455,
|
26138 |
+
"learning_rate": 1.3118237067048769e-05,
|
26139 |
+
"loss": 5.4176,
|
26140 |
+
"step": 14932
|
26141 |
+
},
|
26142 |
+
{
|
26143 |
+
"epoch": 0.74,
|
26144 |
+
"grad_norm": 1.9571110010147095,
|
26145 |
+
"learning_rate": 1.310835515588715e-05,
|
26146 |
+
"loss": 5.5085,
|
26147 |
+
"step": 14936
|
26148 |
+
},
|
26149 |
+
{
|
26150 |
+
"epoch": 0.74,
|
26151 |
+
"grad_norm": 1.9827457666397095,
|
26152 |
+
"learning_rate": 1.3098473244725531e-05,
|
26153 |
+
"loss": 5.4454,
|
26154 |
+
"step": 14940
|
26155 |
+
},
|
26156 |
+
{
|
26157 |
+
"epoch": 0.74,
|
26158 |
+
"grad_norm": 2.2986981868743896,
|
26159 |
+
"learning_rate": 1.3088591333563913e-05,
|
26160 |
+
"loss": 5.562,
|
26161 |
+
"step": 14944
|
26162 |
+
},
|
26163 |
+
{
|
26164 |
+
"epoch": 0.74,
|
26165 |
+
"grad_norm": 2.087019920349121,
|
26166 |
+
"learning_rate": 1.3078709422402294e-05,
|
26167 |
+
"loss": 5.5164,
|
26168 |
+
"step": 14948
|
26169 |
+
},
|
26170 |
+
{
|
26171 |
+
"epoch": 0.74,
|
26172 |
+
"grad_norm": 1.9258410930633545,
|
26173 |
+
"learning_rate": 1.3068827511240676e-05,
|
26174 |
+
"loss": 5.5252,
|
26175 |
+
"step": 14952
|
26176 |
+
},
|
26177 |
+
{
|
26178 |
+
"epoch": 0.74,
|
26179 |
+
"grad_norm": 1.833736538887024,
|
26180 |
+
"learning_rate": 1.3058945600079056e-05,
|
26181 |
+
"loss": 5.4423,
|
26182 |
+
"step": 14956
|
26183 |
+
},
|
26184 |
+
{
|
26185 |
+
"epoch": 0.74,
|
26186 |
+
"grad_norm": 2.1392202377319336,
|
26187 |
+
"learning_rate": 1.3049063688917438e-05,
|
26188 |
+
"loss": 5.5637,
|
26189 |
+
"step": 14960
|
26190 |
+
},
|
26191 |
+
{
|
26192 |
+
"epoch": 0.74,
|
26193 |
+
"grad_norm": 1.9656065702438354,
|
26194 |
+
"learning_rate": 1.3039181777755818e-05,
|
26195 |
+
"loss": 5.474,
|
26196 |
+
"step": 14964
|
26197 |
+
},
|
26198 |
+
{
|
26199 |
+
"epoch": 0.74,
|
26200 |
+
"grad_norm": 1.8016414642333984,
|
26201 |
+
"learning_rate": 1.3029299866594199e-05,
|
26202 |
+
"loss": 5.4101,
|
26203 |
+
"step": 14968
|
26204 |
+
},
|
26205 |
+
{
|
26206 |
+
"epoch": 0.74,
|
26207 |
+
"grad_norm": 2.141220808029175,
|
26208 |
+
"learning_rate": 1.301941795543258e-05,
|
26209 |
+
"loss": 5.4747,
|
26210 |
+
"step": 14972
|
26211 |
+
},
|
26212 |
+
{
|
26213 |
+
"epoch": 0.74,
|
26214 |
+
"grad_norm": 2.1080868244171143,
|
26215 |
+
"learning_rate": 1.3009536044270961e-05,
|
26216 |
+
"loss": 5.3842,
|
26217 |
+
"step": 14976
|
26218 |
+
},
|
26219 |
+
{
|
26220 |
+
"epoch": 0.74,
|
26221 |
+
"grad_norm": 1.9357255697250366,
|
26222 |
+
"learning_rate": 1.2999654133109343e-05,
|
26223 |
+
"loss": 5.5716,
|
26224 |
+
"step": 14980
|
26225 |
+
},
|
26226 |
+
{
|
26227 |
+
"epoch": 0.74,
|
26228 |
+
"grad_norm": 2.3347742557525635,
|
26229 |
+
"learning_rate": 1.2989772221947724e-05,
|
26230 |
+
"loss": 5.5935,
|
26231 |
+
"step": 14984
|
26232 |
+
},
|
26233 |
+
{
|
26234 |
+
"epoch": 0.74,
|
26235 |
+
"grad_norm": 1.999286413192749,
|
26236 |
+
"learning_rate": 1.2979890310786106e-05,
|
26237 |
+
"loss": 5.4976,
|
26238 |
+
"step": 14988
|
26239 |
+
},
|
26240 |
+
{
|
26241 |
+
"epoch": 0.74,
|
26242 |
+
"grad_norm": 2.040515184402466,
|
26243 |
+
"learning_rate": 1.297000839962449e-05,
|
26244 |
+
"loss": 5.6029,
|
26245 |
+
"step": 14992
|
26246 |
+
},
|
26247 |
+
{
|
26248 |
+
"epoch": 0.74,
|
26249 |
+
"grad_norm": 1.9321497678756714,
|
26250 |
+
"learning_rate": 1.296012648846287e-05,
|
26251 |
+
"loss": 5.503,
|
26252 |
+
"step": 14996
|
26253 |
+
},
|
26254 |
+
{
|
26255 |
+
"epoch": 0.74,
|
26256 |
+
"grad_norm": 1.834435224533081,
|
26257 |
+
"learning_rate": 1.2950244577301252e-05,
|
26258 |
+
"loss": 5.5076,
|
26259 |
+
"step": 15000
|
26260 |
+
},
|
26261 |
+
{
|
26262 |
+
"epoch": 0.74,
|
26263 |
+
"grad_norm": 2.410569906234741,
|
26264 |
+
"learning_rate": 1.2940362666139632e-05,
|
26265 |
+
"loss": 5.6112,
|
26266 |
+
"step": 15004
|
26267 |
+
},
|
26268 |
+
{
|
26269 |
+
"epoch": 0.74,
|
26270 |
+
"grad_norm": 2.110943555831909,
|
26271 |
+
"learning_rate": 1.2930480754978014e-05,
|
26272 |
+
"loss": 5.4589,
|
26273 |
+
"step": 15008
|
26274 |
+
},
|
26275 |
+
{
|
26276 |
+
"epoch": 0.74,
|
26277 |
+
"grad_norm": 2.2104272842407227,
|
26278 |
+
"learning_rate": 1.2920598843816395e-05,
|
26279 |
+
"loss": 5.4494,
|
26280 |
+
"step": 15012
|
26281 |
+
},
|
26282 |
+
{
|
26283 |
+
"epoch": 0.74,
|
26284 |
+
"grad_norm": 2.0038228034973145,
|
26285 |
+
"learning_rate": 1.2910716932654777e-05,
|
26286 |
+
"loss": 5.4229,
|
26287 |
+
"step": 15016
|
26288 |
+
},
|
26289 |
+
{
|
26290 |
+
"epoch": 0.74,
|
26291 |
+
"grad_norm": 2.0080738067626953,
|
26292 |
+
"learning_rate": 1.2900835021493157e-05,
|
26293 |
+
"loss": 5.5645,
|
26294 |
+
"step": 15020
|
26295 |
+
},
|
26296 |
+
{
|
26297 |
+
"epoch": 0.74,
|
26298 |
+
"grad_norm": 2.075934886932373,
|
26299 |
+
"learning_rate": 1.2890953110331539e-05,
|
26300 |
+
"loss": 5.6104,
|
26301 |
+
"step": 15024
|
26302 |
+
},
|
26303 |
+
{
|
26304 |
+
"epoch": 0.74,
|
26305 |
+
"grad_norm": 1.9630203247070312,
|
26306 |
+
"learning_rate": 1.288107119916992e-05,
|
26307 |
+
"loss": 5.4888,
|
26308 |
+
"step": 15028
|
26309 |
+
},
|
26310 |
+
{
|
26311 |
+
"epoch": 0.74,
|
26312 |
+
"grad_norm": 2.1975886821746826,
|
26313 |
+
"learning_rate": 1.28711892880083e-05,
|
26314 |
+
"loss": 5.5283,
|
26315 |
+
"step": 15032
|
26316 |
+
},
|
26317 |
+
{
|
26318 |
+
"epoch": 0.74,
|
26319 |
+
"grad_norm": 1.9507403373718262,
|
26320 |
+
"learning_rate": 1.2861307376846682e-05,
|
26321 |
+
"loss": 5.6203,
|
26322 |
+
"step": 15036
|
26323 |
+
},
|
26324 |
+
{
|
26325 |
+
"epoch": 0.74,
|
26326 |
+
"grad_norm": 2.0065085887908936,
|
26327 |
+
"learning_rate": 1.2851425465685062e-05,
|
26328 |
+
"loss": 5.4493,
|
26329 |
+
"step": 15040
|
26330 |
+
},
|
26331 |
+
{
|
26332 |
+
"epoch": 0.74,
|
26333 |
+
"grad_norm": 2.170250177383423,
|
26334 |
+
"learning_rate": 1.2841543554523444e-05,
|
26335 |
+
"loss": 5.5062,
|
26336 |
+
"step": 15044
|
26337 |
+
},
|
26338 |
+
{
|
26339 |
+
"epoch": 0.74,
|
26340 |
+
"grad_norm": 1.9755939245224,
|
26341 |
+
"learning_rate": 1.2831661643361828e-05,
|
26342 |
+
"loss": 5.4901,
|
26343 |
+
"step": 15048
|
26344 |
+
},
|
26345 |
+
{
|
26346 |
+
"epoch": 0.74,
|
26347 |
+
"grad_norm": 2.0391461849212646,
|
26348 |
+
"learning_rate": 1.2821779732200208e-05,
|
26349 |
+
"loss": 5.4896,
|
26350 |
+
"step": 15052
|
26351 |
+
},
|
26352 |
+
{
|
26353 |
+
"epoch": 0.74,
|
26354 |
+
"grad_norm": 1.8779929876327515,
|
26355 |
+
"learning_rate": 1.281189782103859e-05,
|
26356 |
+
"loss": 5.4311,
|
26357 |
+
"step": 15056
|
26358 |
+
},
|
26359 |
+
{
|
26360 |
+
"epoch": 0.74,
|
26361 |
+
"grad_norm": 2.123504400253296,
|
26362 |
+
"learning_rate": 1.2802015909876971e-05,
|
26363 |
+
"loss": 5.5117,
|
26364 |
+
"step": 15060
|
26365 |
+
},
|
26366 |
+
{
|
26367 |
+
"epoch": 0.74,
|
26368 |
+
"grad_norm": 1.885321021080017,
|
26369 |
+
"learning_rate": 1.2792133998715353e-05,
|
26370 |
+
"loss": 5.515,
|
26371 |
+
"step": 15064
|
26372 |
+
},
|
26373 |
+
{
|
26374 |
+
"epoch": 0.74,
|
26375 |
+
"grad_norm": 2.281585454940796,
|
26376 |
+
"learning_rate": 1.2782252087553733e-05,
|
26377 |
+
"loss": 5.4979,
|
26378 |
+
"step": 15068
|
26379 |
+
},
|
26380 |
+
{
|
26381 |
+
"epoch": 0.74,
|
26382 |
+
"grad_norm": 2.1277706623077393,
|
26383 |
+
"learning_rate": 1.2772370176392115e-05,
|
26384 |
+
"loss": 5.4285,
|
26385 |
+
"step": 15072
|
26386 |
+
},
|
26387 |
+
{
|
26388 |
+
"epoch": 0.74,
|
26389 |
+
"grad_norm": 2.0085339546203613,
|
26390 |
+
"learning_rate": 1.2762488265230496e-05,
|
26391 |
+
"loss": 5.4741,
|
26392 |
+
"step": 15076
|
26393 |
+
},
|
26394 |
+
{
|
26395 |
+
"epoch": 0.75,
|
26396 |
+
"grad_norm": 2.0528817176818848,
|
26397 |
+
"learning_rate": 1.2752606354068878e-05,
|
26398 |
+
"loss": 5.5861,
|
26399 |
+
"step": 15080
|
26400 |
+
},
|
26401 |
+
{
|
26402 |
+
"epoch": 0.75,
|
26403 |
+
"grad_norm": 2.0713131427764893,
|
26404 |
+
"learning_rate": 1.2742724442907258e-05,
|
26405 |
+
"loss": 5.5236,
|
26406 |
+
"step": 15084
|
26407 |
+
},
|
26408 |
+
{
|
26409 |
+
"epoch": 0.75,
|
26410 |
+
"grad_norm": 1.9570958614349365,
|
26411 |
+
"learning_rate": 1.273284253174564e-05,
|
26412 |
+
"loss": 5.4983,
|
26413 |
+
"step": 15088
|
26414 |
+
},
|
26415 |
+
{
|
26416 |
+
"epoch": 0.75,
|
26417 |
+
"grad_norm": 1.8373630046844482,
|
26418 |
+
"learning_rate": 1.272296062058402e-05,
|
26419 |
+
"loss": 5.4913,
|
26420 |
+
"step": 15092
|
26421 |
+
},
|
26422 |
+
{
|
26423 |
+
"epoch": 0.75,
|
26424 |
+
"grad_norm": 2.0442957878112793,
|
26425 |
+
"learning_rate": 1.2713078709422403e-05,
|
26426 |
+
"loss": 5.4757,
|
26427 |
+
"step": 15096
|
26428 |
+
},
|
26429 |
+
{
|
26430 |
+
"epoch": 0.75,
|
26431 |
+
"grad_norm": 2.090407609939575,
|
26432 |
+
"learning_rate": 1.2703196798260783e-05,
|
26433 |
+
"loss": 5.4915,
|
26434 |
+
"step": 15100
|
26435 |
+
},
|
26436 |
+
{
|
26437 |
+
"epoch": 0.75,
|
26438 |
+
"grad_norm": 2.1005630493164062,
|
26439 |
+
"learning_rate": 1.2693314887099167e-05,
|
26440 |
+
"loss": 5.5734,
|
26441 |
+
"step": 15104
|
26442 |
+
},
|
26443 |
+
{
|
26444 |
+
"epoch": 0.75,
|
26445 |
+
"grad_norm": 2.062366008758545,
|
26446 |
+
"learning_rate": 1.2683432975937549e-05,
|
26447 |
+
"loss": 5.5959,
|
26448 |
+
"step": 15108
|
26449 |
+
},
|
26450 |
+
{
|
26451 |
+
"epoch": 0.75,
|
26452 |
+
"grad_norm": 1.9203088283538818,
|
26453 |
+
"learning_rate": 1.267355106477593e-05,
|
26454 |
+
"loss": 5.4388,
|
26455 |
+
"step": 15112
|
26456 |
+
},
|
26457 |
+
{
|
26458 |
+
"epoch": 0.75,
|
26459 |
+
"grad_norm": 2.0604312419891357,
|
26460 |
+
"learning_rate": 1.266366915361431e-05,
|
26461 |
+
"loss": 5.5115,
|
26462 |
+
"step": 15116
|
26463 |
+
},
|
26464 |
+
{
|
26465 |
+
"epoch": 0.75,
|
26466 |
+
"grad_norm": 2.230130672454834,
|
26467 |
+
"learning_rate": 1.2653787242452692e-05,
|
26468 |
+
"loss": 5.596,
|
26469 |
+
"step": 15120
|
26470 |
+
},
|
26471 |
+
{
|
26472 |
+
"epoch": 0.75,
|
26473 |
+
"grad_norm": 2.0392181873321533,
|
26474 |
+
"learning_rate": 1.2643905331291072e-05,
|
26475 |
+
"loss": 5.57,
|
26476 |
+
"step": 15124
|
26477 |
+
},
|
26478 |
+
{
|
26479 |
+
"epoch": 0.75,
|
26480 |
+
"grad_norm": 2.253793954849243,
|
26481 |
+
"learning_rate": 1.2634023420129454e-05,
|
26482 |
+
"loss": 5.5402,
|
26483 |
+
"step": 15128
|
26484 |
+
},
|
26485 |
+
{
|
26486 |
+
"epoch": 0.75,
|
26487 |
+
"grad_norm": 1.9613460302352905,
|
26488 |
+
"learning_rate": 1.2624141508967834e-05,
|
26489 |
+
"loss": 5.3467,
|
26490 |
+
"step": 15132
|
26491 |
+
},
|
26492 |
+
{
|
26493 |
+
"epoch": 0.75,
|
26494 |
+
"grad_norm": 1.8354169130325317,
|
26495 |
+
"learning_rate": 1.2614259597806216e-05,
|
26496 |
+
"loss": 5.5377,
|
26497 |
+
"step": 15136
|
26498 |
+
},
|
26499 |
+
{
|
26500 |
+
"epoch": 0.75,
|
26501 |
+
"grad_norm": 1.8670376539230347,
|
26502 |
+
"learning_rate": 1.2604377686644597e-05,
|
26503 |
+
"loss": 5.4526,
|
26504 |
+
"step": 15140
|
26505 |
+
},
|
26506 |
+
{
|
26507 |
+
"epoch": 0.75,
|
26508 |
+
"grad_norm": 2.1439993381500244,
|
26509 |
+
"learning_rate": 1.2594495775482979e-05,
|
26510 |
+
"loss": 5.4915,
|
26511 |
+
"step": 15144
|
26512 |
+
},
|
26513 |
+
{
|
26514 |
+
"epoch": 0.75,
|
26515 |
+
"grad_norm": 1.9621843099594116,
|
26516 |
+
"learning_rate": 1.258461386432136e-05,
|
26517 |
+
"loss": 5.415,
|
26518 |
+
"step": 15148
|
26519 |
+
},
|
26520 |
+
{
|
26521 |
+
"epoch": 0.75,
|
26522 |
+
"grad_norm": 2.0130698680877686,
|
26523 |
+
"learning_rate": 1.2574731953159741e-05,
|
26524 |
+
"loss": 5.5646,
|
26525 |
+
"step": 15152
|
26526 |
+
},
|
26527 |
+
{
|
26528 |
+
"epoch": 0.75,
|
26529 |
+
"grad_norm": 2.1963608264923096,
|
26530 |
+
"learning_rate": 1.2564850041998122e-05,
|
26531 |
+
"loss": 5.5678,
|
26532 |
+
"step": 15156
|
26533 |
+
},
|
26534 |
+
{
|
26535 |
+
"epoch": 0.75,
|
26536 |
+
"grad_norm": 2.007394790649414,
|
26537 |
+
"learning_rate": 1.2554968130836504e-05,
|
26538 |
+
"loss": 5.4874,
|
26539 |
+
"step": 15160
|
26540 |
+
},
|
26541 |
+
{
|
26542 |
+
"epoch": 0.75,
|
26543 |
+
"grad_norm": 2.03348708152771,
|
26544 |
+
"learning_rate": 1.2545086219674888e-05,
|
26545 |
+
"loss": 5.3595,
|
26546 |
+
"step": 15164
|
26547 |
+
},
|
26548 |
+
{
|
26549 |
+
"epoch": 0.75,
|
26550 |
+
"grad_norm": 2.1427929401397705,
|
26551 |
+
"learning_rate": 1.2535204308513268e-05,
|
26552 |
+
"loss": 5.5416,
|
26553 |
+
"step": 15168
|
26554 |
+
},
|
26555 |
+
{
|
26556 |
+
"epoch": 0.75,
|
26557 |
+
"grad_norm": 2.0188114643096924,
|
26558 |
+
"learning_rate": 1.252532239735165e-05,
|
26559 |
+
"loss": 5.4923,
|
26560 |
+
"step": 15172
|
26561 |
+
},
|
26562 |
+
{
|
26563 |
+
"epoch": 0.75,
|
26564 |
+
"grad_norm": 1.9621504545211792,
|
26565 |
+
"learning_rate": 1.251544048619003e-05,
|
26566 |
+
"loss": 5.5167,
|
26567 |
+
"step": 15176
|
26568 |
+
},
|
26569 |
+
{
|
26570 |
+
"epoch": 0.75,
|
26571 |
+
"grad_norm": 2.1401383876800537,
|
26572 |
+
"learning_rate": 1.2505558575028412e-05,
|
26573 |
+
"loss": 5.3883,
|
26574 |
+
"step": 15180
|
26575 |
+
},
|
26576 |
+
{
|
26577 |
+
"epoch": 0.75,
|
26578 |
+
"grad_norm": 2.0091779232025146,
|
26579 |
+
"learning_rate": 1.2495676663866793e-05,
|
26580 |
+
"loss": 5.5855,
|
26581 |
+
"step": 15184
|
26582 |
+
},
|
26583 |
+
{
|
26584 |
+
"epoch": 0.75,
|
26585 |
+
"grad_norm": 2.0588550567626953,
|
26586 |
+
"learning_rate": 1.2485794752705173e-05,
|
26587 |
+
"loss": 5.4961,
|
26588 |
+
"step": 15188
|
26589 |
+
},
|
26590 |
+
{
|
26591 |
+
"epoch": 0.75,
|
26592 |
+
"grad_norm": 2.0023372173309326,
|
26593 |
+
"learning_rate": 1.2475912841543555e-05,
|
26594 |
+
"loss": 5.3909,
|
26595 |
+
"step": 15192
|
26596 |
+
},
|
26597 |
+
{
|
26598 |
+
"epoch": 0.75,
|
26599 |
+
"grad_norm": 2.395747184753418,
|
26600 |
+
"learning_rate": 1.2466030930381936e-05,
|
26601 |
+
"loss": 5.4807,
|
26602 |
+
"step": 15196
|
26603 |
+
},
|
26604 |
+
{
|
26605 |
+
"epoch": 0.75,
|
26606 |
+
"grad_norm": 1.8098511695861816,
|
26607 |
+
"learning_rate": 1.2456149019220318e-05,
|
26608 |
+
"loss": 5.4387,
|
26609 |
+
"step": 15200
|
26610 |
+
},
|
26611 |
+
{
|
26612 |
+
"epoch": 0.75,
|
26613 |
+
"grad_norm": 1.9918867349624634,
|
26614 |
+
"learning_rate": 1.24462671080587e-05,
|
26615 |
+
"loss": 5.396,
|
26616 |
+
"step": 15204
|
26617 |
+
},
|
26618 |
+
{
|
26619 |
+
"epoch": 0.75,
|
26620 |
+
"grad_norm": 2.0140202045440674,
|
26621 |
+
"learning_rate": 1.2436385196897082e-05,
|
26622 |
+
"loss": 5.4098,
|
26623 |
+
"step": 15208
|
26624 |
+
},
|
26625 |
+
{
|
26626 |
+
"epoch": 0.75,
|
26627 |
+
"grad_norm": 1.9749282598495483,
|
26628 |
+
"learning_rate": 1.2426503285735462e-05,
|
26629 |
+
"loss": 5.4362,
|
26630 |
+
"step": 15212
|
26631 |
+
},
|
26632 |
+
{
|
26633 |
+
"epoch": 0.75,
|
26634 |
+
"grad_norm": 2.1992135047912598,
|
26635 |
+
"learning_rate": 1.2416621374573842e-05,
|
26636 |
+
"loss": 5.5986,
|
26637 |
+
"step": 15216
|
26638 |
+
},
|
26639 |
+
{
|
26640 |
+
"epoch": 0.75,
|
26641 |
+
"grad_norm": 2.3077468872070312,
|
26642 |
+
"learning_rate": 1.2406739463412225e-05,
|
26643 |
+
"loss": 5.5058,
|
26644 |
+
"step": 15220
|
26645 |
+
},
|
26646 |
+
{
|
26647 |
+
"epoch": 0.75,
|
26648 |
+
"grad_norm": 1.964931845664978,
|
26649 |
+
"learning_rate": 1.2396857552250605e-05,
|
26650 |
+
"loss": 5.3997,
|
26651 |
+
"step": 15224
|
26652 |
+
},
|
26653 |
+
{
|
26654 |
+
"epoch": 0.75,
|
26655 |
+
"grad_norm": 1.9411903619766235,
|
26656 |
+
"learning_rate": 1.2386975641088987e-05,
|
26657 |
+
"loss": 5.4008,
|
26658 |
+
"step": 15228
|
26659 |
+
},
|
26660 |
+
{
|
26661 |
+
"epoch": 0.75,
|
26662 |
+
"grad_norm": 2.01727557182312,
|
26663 |
+
"learning_rate": 1.2377093729927367e-05,
|
26664 |
+
"loss": 5.6187,
|
26665 |
+
"step": 15232
|
26666 |
+
},
|
26667 |
+
{
|
26668 |
+
"epoch": 0.75,
|
26669 |
+
"grad_norm": 1.9451463222503662,
|
26670 |
+
"learning_rate": 1.2367211818765751e-05,
|
26671 |
+
"loss": 5.5272,
|
26672 |
+
"step": 15236
|
26673 |
+
},
|
26674 |
+
{
|
26675 |
+
"epoch": 0.75,
|
26676 |
+
"grad_norm": 2.1012978553771973,
|
26677 |
+
"learning_rate": 1.2357329907604131e-05,
|
26678 |
+
"loss": 5.3657,
|
26679 |
+
"step": 15240
|
26680 |
+
},
|
26681 |
+
{
|
26682 |
+
"epoch": 0.75,
|
26683 |
+
"grad_norm": 2.219510078430176,
|
26684 |
+
"learning_rate": 1.2347447996442513e-05,
|
26685 |
+
"loss": 5.526,
|
26686 |
+
"step": 15244
|
26687 |
+
},
|
26688 |
+
{
|
26689 |
+
"epoch": 0.75,
|
26690 |
+
"grad_norm": 2.013444185256958,
|
26691 |
+
"learning_rate": 1.2337566085280894e-05,
|
26692 |
+
"loss": 5.4749,
|
26693 |
+
"step": 15248
|
26694 |
+
},
|
26695 |
+
{
|
26696 |
+
"epoch": 0.75,
|
26697 |
+
"grad_norm": 1.7549456357955933,
|
26698 |
+
"learning_rate": 1.2327684174119274e-05,
|
26699 |
+
"loss": 5.3478,
|
26700 |
+
"step": 15252
|
26701 |
+
},
|
26702 |
+
{
|
26703 |
+
"epoch": 0.75,
|
26704 |
+
"grad_norm": 1.8471717834472656,
|
26705 |
+
"learning_rate": 1.2317802262957656e-05,
|
26706 |
+
"loss": 5.5201,
|
26707 |
+
"step": 15256
|
26708 |
+
},
|
26709 |
+
{
|
26710 |
+
"epoch": 0.75,
|
26711 |
+
"grad_norm": 2.324028491973877,
|
26712 |
+
"learning_rate": 1.2307920351796037e-05,
|
26713 |
+
"loss": 5.5589,
|
26714 |
+
"step": 15260
|
26715 |
+
},
|
26716 |
+
{
|
26717 |
+
"epoch": 0.75,
|
26718 |
+
"grad_norm": 2.0255486965179443,
|
26719 |
+
"learning_rate": 1.229803844063442e-05,
|
26720 |
+
"loss": 5.5575,
|
26721 |
+
"step": 15264
|
26722 |
+
},
|
26723 |
+
{
|
26724 |
+
"epoch": 0.75,
|
26725 |
+
"grad_norm": 2.1873011589050293,
|
26726 |
+
"learning_rate": 1.22881565294728e-05,
|
26727 |
+
"loss": 5.5165,
|
26728 |
+
"step": 15268
|
26729 |
+
},
|
26730 |
+
{
|
26731 |
+
"epoch": 0.75,
|
26732 |
+
"grad_norm": 2.1346065998077393,
|
26733 |
+
"learning_rate": 1.2278274618311183e-05,
|
26734 |
+
"loss": 5.5053,
|
26735 |
+
"step": 15272
|
26736 |
+
},
|
26737 |
+
{
|
26738 |
+
"epoch": 0.75,
|
26739 |
+
"grad_norm": 2.1076815128326416,
|
26740 |
+
"learning_rate": 1.2268392707149563e-05,
|
26741 |
+
"loss": 5.4105,
|
26742 |
+
"step": 15276
|
26743 |
+
},
|
26744 |
+
{
|
26745 |
+
"epoch": 0.75,
|
26746 |
+
"grad_norm": 1.9122978448867798,
|
26747 |
+
"learning_rate": 1.2258510795987945e-05,
|
26748 |
+
"loss": 5.4235,
|
26749 |
+
"step": 15280
|
26750 |
+
},
|
26751 |
+
{
|
26752 |
+
"epoch": 0.76,
|
26753 |
+
"grad_norm": 2.054979085922241,
|
26754 |
+
"learning_rate": 1.2248628884826326e-05,
|
26755 |
+
"loss": 5.5838,
|
26756 |
+
"step": 15284
|
26757 |
+
},
|
26758 |
+
{
|
26759 |
+
"epoch": 0.76,
|
26760 |
+
"grad_norm": 1.9995005130767822,
|
26761 |
+
"learning_rate": 1.2238746973664706e-05,
|
26762 |
+
"loss": 5.4584,
|
26763 |
+
"step": 15288
|
26764 |
+
},
|
26765 |
+
{
|
26766 |
+
"epoch": 0.76,
|
26767 |
+
"grad_norm": 2.0757248401641846,
|
26768 |
+
"learning_rate": 1.222886506250309e-05,
|
26769 |
+
"loss": 5.5064,
|
26770 |
+
"step": 15292
|
26771 |
+
},
|
26772 |
+
{
|
26773 |
+
"epoch": 0.76,
|
26774 |
+
"grad_norm": 1.831465721130371,
|
26775 |
+
"learning_rate": 1.221898315134147e-05,
|
26776 |
+
"loss": 5.5416,
|
26777 |
+
"step": 15296
|
26778 |
+
},
|
26779 |
+
{
|
26780 |
+
"epoch": 0.76,
|
26781 |
+
"grad_norm": 2.3364105224609375,
|
26782 |
+
"learning_rate": 1.2209101240179852e-05,
|
26783 |
+
"loss": 5.5332,
|
26784 |
+
"step": 15300
|
26785 |
+
},
|
26786 |
+
{
|
26787 |
+
"epoch": 0.76,
|
26788 |
+
"grad_norm": 1.9546422958374023,
|
26789 |
+
"learning_rate": 1.2199219329018233e-05,
|
26790 |
+
"loss": 5.4726,
|
26791 |
+
"step": 15304
|
26792 |
+
},
|
26793 |
+
{
|
26794 |
+
"epoch": 0.76,
|
26795 |
+
"grad_norm": 1.8395370244979858,
|
26796 |
+
"learning_rate": 1.2189337417856615e-05,
|
26797 |
+
"loss": 5.4443,
|
26798 |
+
"step": 15308
|
26799 |
+
},
|
26800 |
+
{
|
26801 |
+
"epoch": 0.76,
|
26802 |
+
"grad_norm": 2.080458402633667,
|
26803 |
+
"learning_rate": 1.2179455506694995e-05,
|
26804 |
+
"loss": 5.5154,
|
26805 |
+
"step": 15312
|
26806 |
+
},
|
26807 |
+
{
|
26808 |
+
"epoch": 0.76,
|
26809 |
+
"grad_norm": 2.017116069793701,
|
26810 |
+
"learning_rate": 1.2169573595533375e-05,
|
26811 |
+
"loss": 5.5048,
|
26812 |
+
"step": 15316
|
26813 |
+
},
|
26814 |
+
{
|
26815 |
+
"epoch": 0.76,
|
26816 |
+
"grad_norm": 2.0325398445129395,
|
26817 |
+
"learning_rate": 1.2159691684371759e-05,
|
26818 |
+
"loss": 5.3668,
|
26819 |
+
"step": 15320
|
26820 |
+
},
|
26821 |
+
{
|
26822 |
+
"epoch": 0.76,
|
26823 |
+
"grad_norm": 2.246100664138794,
|
26824 |
+
"learning_rate": 1.214980977321014e-05,
|
26825 |
+
"loss": 5.5176,
|
26826 |
+
"step": 15324
|
26827 |
+
},
|
26828 |
+
{
|
26829 |
+
"epoch": 0.76,
|
26830 |
+
"grad_norm": 2.0287086963653564,
|
26831 |
+
"learning_rate": 1.2139927862048522e-05,
|
26832 |
+
"loss": 5.5352,
|
26833 |
+
"step": 15328
|
26834 |
+
},
|
26835 |
+
{
|
26836 |
+
"epoch": 0.76,
|
26837 |
+
"grad_norm": 2.278211832046509,
|
26838 |
+
"learning_rate": 1.2130045950886902e-05,
|
26839 |
+
"loss": 5.441,
|
26840 |
+
"step": 15332
|
26841 |
+
},
|
26842 |
+
{
|
26843 |
+
"epoch": 0.76,
|
26844 |
+
"grad_norm": 2.143902063369751,
|
26845 |
+
"learning_rate": 1.2120164039725284e-05,
|
26846 |
+
"loss": 5.5263,
|
26847 |
+
"step": 15336
|
26848 |
+
},
|
26849 |
+
{
|
26850 |
+
"epoch": 0.76,
|
26851 |
+
"grad_norm": 2.0122079849243164,
|
26852 |
+
"learning_rate": 1.2110282128563664e-05,
|
26853 |
+
"loss": 5.3925,
|
26854 |
+
"step": 15340
|
26855 |
+
},
|
26856 |
+
{
|
26857 |
+
"epoch": 0.76,
|
26858 |
+
"grad_norm": 2.094726324081421,
|
26859 |
+
"learning_rate": 1.2100400217402046e-05,
|
26860 |
+
"loss": 5.4837,
|
26861 |
+
"step": 15344
|
26862 |
+
},
|
26863 |
+
{
|
26864 |
+
"epoch": 0.76,
|
26865 |
+
"grad_norm": 2.0904650688171387,
|
26866 |
+
"learning_rate": 1.2090518306240428e-05,
|
26867 |
+
"loss": 5.4732,
|
26868 |
+
"step": 15348
|
26869 |
+
},
|
26870 |
+
{
|
26871 |
+
"epoch": 0.76,
|
26872 |
+
"grad_norm": 1.869597315788269,
|
26873 |
+
"learning_rate": 1.2080636395078809e-05,
|
26874 |
+
"loss": 5.4292,
|
26875 |
+
"step": 15352
|
26876 |
+
},
|
26877 |
+
{
|
26878 |
+
"epoch": 0.76,
|
26879 |
+
"grad_norm": 2.0959110260009766,
|
26880 |
+
"learning_rate": 1.2070754483917191e-05,
|
26881 |
+
"loss": 5.5033,
|
26882 |
+
"step": 15356
|
26883 |
+
},
|
26884 |
+
{
|
26885 |
+
"epoch": 0.76,
|
26886 |
+
"grad_norm": 2.153909921646118,
|
26887 |
+
"learning_rate": 1.2060872572755571e-05,
|
26888 |
+
"loss": 5.4692,
|
26889 |
+
"step": 15360
|
26890 |
+
},
|
26891 |
+
{
|
26892 |
+
"epoch": 0.76,
|
26893 |
+
"grad_norm": 2.0445823669433594,
|
26894 |
+
"learning_rate": 1.2050990661593953e-05,
|
26895 |
+
"loss": 5.3999,
|
26896 |
+
"step": 15364
|
26897 |
+
},
|
26898 |
+
{
|
26899 |
+
"epoch": 0.76,
|
26900 |
+
"grad_norm": 2.0181336402893066,
|
26901 |
+
"learning_rate": 1.2041108750432334e-05,
|
26902 |
+
"loss": 5.4045,
|
26903 |
+
"step": 15368
|
26904 |
+
},
|
26905 |
+
{
|
26906 |
+
"epoch": 0.76,
|
26907 |
+
"grad_norm": 2.008654832839966,
|
26908 |
+
"learning_rate": 1.2031226839270716e-05,
|
26909 |
+
"loss": 5.6017,
|
26910 |
+
"step": 15372
|
26911 |
+
},
|
26912 |
+
{
|
26913 |
+
"epoch": 0.76,
|
26914 |
+
"grad_norm": 2.126469850540161,
|
26915 |
+
"learning_rate": 1.2021344928109096e-05,
|
26916 |
+
"loss": 5.4578,
|
26917 |
+
"step": 15376
|
26918 |
+
},
|
26919 |
+
{
|
26920 |
+
"epoch": 0.76,
|
26921 |
+
"grad_norm": 2.031398057937622,
|
26922 |
+
"learning_rate": 1.2011463016947478e-05,
|
26923 |
+
"loss": 5.5091,
|
26924 |
+
"step": 15380
|
26925 |
+
},
|
26926 |
+
{
|
26927 |
+
"epoch": 0.76,
|
26928 |
+
"grad_norm": 2.102151870727539,
|
26929 |
+
"learning_rate": 1.200158110578586e-05,
|
26930 |
+
"loss": 5.4637,
|
26931 |
+
"step": 15384
|
26932 |
+
},
|
26933 |
+
{
|
26934 |
+
"epoch": 0.76,
|
26935 |
+
"grad_norm": 1.9239962100982666,
|
26936 |
+
"learning_rate": 1.199169919462424e-05,
|
26937 |
+
"loss": 5.5014,
|
26938 |
+
"step": 15388
|
26939 |
+
},
|
26940 |
+
{
|
26941 |
+
"epoch": 0.76,
|
26942 |
+
"grad_norm": 1.9613525867462158,
|
26943 |
+
"learning_rate": 1.1981817283462623e-05,
|
26944 |
+
"loss": 5.5283,
|
26945 |
+
"step": 15392
|
26946 |
+
},
|
26947 |
+
{
|
26948 |
+
"epoch": 0.76,
|
26949 |
+
"grad_norm": 1.973003625869751,
|
26950 |
+
"learning_rate": 1.1971935372301003e-05,
|
26951 |
+
"loss": 5.4865,
|
26952 |
+
"step": 15396
|
26953 |
+
},
|
26954 |
+
{
|
26955 |
+
"epoch": 0.76,
|
26956 |
+
"grad_norm": 2.3486452102661133,
|
26957 |
+
"learning_rate": 1.1962053461139385e-05,
|
26958 |
+
"loss": 5.5125,
|
26959 |
+
"step": 15400
|
26960 |
+
},
|
26961 |
+
{
|
26962 |
+
"epoch": 0.76,
|
26963 |
+
"grad_norm": 2.088740825653076,
|
26964 |
+
"learning_rate": 1.1952171549977765e-05,
|
26965 |
+
"loss": 5.4416,
|
26966 |
+
"step": 15404
|
26967 |
+
},
|
26968 |
+
{
|
26969 |
+
"epoch": 0.76,
|
26970 |
+
"grad_norm": 1.9701464176177979,
|
26971 |
+
"learning_rate": 1.1942289638816147e-05,
|
26972 |
+
"loss": 5.2871,
|
26973 |
+
"step": 15408
|
26974 |
+
},
|
26975 |
+
{
|
26976 |
+
"epoch": 0.76,
|
26977 |
+
"grad_norm": 2.2388153076171875,
|
26978 |
+
"learning_rate": 1.193240772765453e-05,
|
26979 |
+
"loss": 5.551,
|
26980 |
+
"step": 15412
|
26981 |
+
},
|
26982 |
+
{
|
26983 |
+
"epoch": 0.76,
|
26984 |
+
"grad_norm": 2.2453413009643555,
|
26985 |
+
"learning_rate": 1.192252581649291e-05,
|
26986 |
+
"loss": 5.4372,
|
26987 |
+
"step": 15416
|
26988 |
+
},
|
26989 |
+
{
|
26990 |
+
"epoch": 0.76,
|
26991 |
+
"grad_norm": 2.1105456352233887,
|
26992 |
+
"learning_rate": 1.1912643905331292e-05,
|
26993 |
+
"loss": 5.5284,
|
26994 |
+
"step": 15420
|
26995 |
+
},
|
26996 |
+
{
|
26997 |
+
"epoch": 0.76,
|
26998 |
+
"grad_norm": 2.197547197341919,
|
26999 |
+
"learning_rate": 1.1902761994169672e-05,
|
27000 |
+
"loss": 5.5067,
|
27001 |
+
"step": 15424
|
27002 |
+
},
|
27003 |
+
{
|
27004 |
+
"epoch": 0.76,
|
27005 |
+
"grad_norm": 1.9585908651351929,
|
27006 |
+
"learning_rate": 1.1892880083008054e-05,
|
27007 |
+
"loss": 5.4292,
|
27008 |
+
"step": 15428
|
27009 |
+
},
|
27010 |
+
{
|
27011 |
+
"epoch": 0.76,
|
27012 |
+
"grad_norm": 2.037917137145996,
|
27013 |
+
"learning_rate": 1.1882998171846435e-05,
|
27014 |
+
"loss": 5.455,
|
27015 |
+
"step": 15432
|
27016 |
+
},
|
27017 |
+
{
|
27018 |
+
"epoch": 0.76,
|
27019 |
+
"grad_norm": 1.797452449798584,
|
27020 |
+
"learning_rate": 1.1873116260684817e-05,
|
27021 |
+
"loss": 5.4833,
|
27022 |
+
"step": 15436
|
27023 |
+
},
|
27024 |
+
{
|
27025 |
+
"epoch": 0.76,
|
27026 |
+
"grad_norm": 1.8614048957824707,
|
27027 |
+
"learning_rate": 1.1863234349523199e-05,
|
27028 |
+
"loss": 5.5127,
|
27029 |
+
"step": 15440
|
27030 |
+
},
|
27031 |
+
{
|
27032 |
+
"epoch": 0.76,
|
27033 |
+
"grad_norm": 1.9821441173553467,
|
27034 |
+
"learning_rate": 1.185335243836158e-05,
|
27035 |
+
"loss": 5.5038,
|
27036 |
+
"step": 15444
|
27037 |
+
},
|
27038 |
+
{
|
27039 |
+
"epoch": 0.76,
|
27040 |
+
"grad_norm": 2.3709757328033447,
|
27041 |
+
"learning_rate": 1.1843470527199961e-05,
|
27042 |
+
"loss": 5.4745,
|
27043 |
+
"step": 15448
|
27044 |
+
},
|
27045 |
+
{
|
27046 |
+
"epoch": 0.76,
|
27047 |
+
"grad_norm": 1.8125073909759521,
|
27048 |
+
"learning_rate": 1.1833588616038342e-05,
|
27049 |
+
"loss": 5.4642,
|
27050 |
+
"step": 15452
|
27051 |
+
},
|
27052 |
+
{
|
27053 |
+
"epoch": 0.76,
|
27054 |
+
"grad_norm": 2.0087850093841553,
|
27055 |
+
"learning_rate": 1.1823706704876724e-05,
|
27056 |
+
"loss": 5.4628,
|
27057 |
+
"step": 15456
|
27058 |
+
},
|
27059 |
+
{
|
27060 |
+
"epoch": 0.76,
|
27061 |
+
"grad_norm": 1.9169509410858154,
|
27062 |
+
"learning_rate": 1.1813824793715104e-05,
|
27063 |
+
"loss": 5.4821,
|
27064 |
+
"step": 15460
|
27065 |
+
},
|
27066 |
+
{
|
27067 |
+
"epoch": 0.76,
|
27068 |
+
"grad_norm": 2.01556396484375,
|
27069 |
+
"learning_rate": 1.1803942882553488e-05,
|
27070 |
+
"loss": 5.4142,
|
27071 |
+
"step": 15464
|
27072 |
+
},
|
27073 |
+
{
|
27074 |
+
"epoch": 0.76,
|
27075 |
+
"grad_norm": 1.9918155670166016,
|
27076 |
+
"learning_rate": 1.1794060971391868e-05,
|
27077 |
+
"loss": 5.6412,
|
27078 |
+
"step": 15468
|
27079 |
+
},
|
27080 |
+
{
|
27081 |
+
"epoch": 0.76,
|
27082 |
+
"grad_norm": 2.052454710006714,
|
27083 |
+
"learning_rate": 1.1784179060230249e-05,
|
27084 |
+
"loss": 5.4347,
|
27085 |
+
"step": 15472
|
27086 |
+
},
|
27087 |
+
{
|
27088 |
+
"epoch": 0.76,
|
27089 |
+
"grad_norm": 1.8537468910217285,
|
27090 |
+
"learning_rate": 1.177429714906863e-05,
|
27091 |
+
"loss": 5.5036,
|
27092 |
+
"step": 15476
|
27093 |
+
},
|
27094 |
+
{
|
27095 |
+
"epoch": 0.76,
|
27096 |
+
"grad_norm": 1.9864877462387085,
|
27097 |
+
"learning_rate": 1.1764415237907011e-05,
|
27098 |
+
"loss": 5.4979,
|
27099 |
+
"step": 15480
|
27100 |
+
},
|
27101 |
+
{
|
27102 |
+
"epoch": 0.77,
|
27103 |
+
"grad_norm": 2.200070858001709,
|
27104 |
+
"learning_rate": 1.1754533326745393e-05,
|
27105 |
+
"loss": 5.5684,
|
27106 |
+
"step": 15484
|
27107 |
+
},
|
27108 |
+
{
|
27109 |
+
"epoch": 0.77,
|
27110 |
+
"grad_norm": 1.7679542303085327,
|
27111 |
+
"learning_rate": 1.1744651415583773e-05,
|
27112 |
+
"loss": 5.4283,
|
27113 |
+
"step": 15488
|
27114 |
+
},
|
27115 |
+
{
|
27116 |
+
"epoch": 0.77,
|
27117 |
+
"grad_norm": 1.8458738327026367,
|
27118 |
+
"learning_rate": 1.1734769504422157e-05,
|
27119 |
+
"loss": 5.5566,
|
27120 |
+
"step": 15492
|
27121 |
+
},
|
27122 |
+
{
|
27123 |
+
"epoch": 0.77,
|
27124 |
+
"grad_norm": 2.3362741470336914,
|
27125 |
+
"learning_rate": 1.1724887593260538e-05,
|
27126 |
+
"loss": 5.4377,
|
27127 |
+
"step": 15496
|
27128 |
+
},
|
27129 |
+
{
|
27130 |
+
"epoch": 0.77,
|
27131 |
+
"grad_norm": 1.7595479488372803,
|
27132 |
+
"learning_rate": 1.1715005682098918e-05,
|
27133 |
+
"loss": 5.4785,
|
27134 |
+
"step": 15500
|
27135 |
+
},
|
27136 |
+
{
|
27137 |
+
"epoch": 0.77,
|
27138 |
+
"grad_norm": 2.1757829189300537,
|
27139 |
+
"learning_rate": 1.17051237709373e-05,
|
27140 |
+
"loss": 5.5964,
|
27141 |
+
"step": 15504
|
27142 |
+
},
|
27143 |
+
{
|
27144 |
+
"epoch": 0.77,
|
27145 |
+
"grad_norm": 2.0845088958740234,
|
27146 |
+
"learning_rate": 1.169524185977568e-05,
|
27147 |
+
"loss": 5.5294,
|
27148 |
+
"step": 15508
|
27149 |
+
},
|
27150 |
+
{
|
27151 |
+
"epoch": 0.77,
|
27152 |
+
"grad_norm": 1.937070608139038,
|
27153 |
+
"learning_rate": 1.1685359948614062e-05,
|
27154 |
+
"loss": 5.5338,
|
27155 |
+
"step": 15512
|
27156 |
+
},
|
27157 |
+
{
|
27158 |
+
"epoch": 0.77,
|
27159 |
+
"grad_norm": 2.137470006942749,
|
27160 |
+
"learning_rate": 1.1675478037452443e-05,
|
27161 |
+
"loss": 5.4788,
|
27162 |
+
"step": 15516
|
27163 |
+
},
|
27164 |
+
{
|
27165 |
+
"epoch": 0.77,
|
27166 |
+
"grad_norm": 2.1320180892944336,
|
27167 |
+
"learning_rate": 1.1665596126290827e-05,
|
27168 |
+
"loss": 5.6173,
|
27169 |
+
"step": 15520
|
27170 |
+
},
|
27171 |
+
{
|
27172 |
+
"epoch": 0.77,
|
27173 |
+
"grad_norm": 2.0288352966308594,
|
27174 |
+
"learning_rate": 1.1655714215129207e-05,
|
27175 |
+
"loss": 5.3012,
|
27176 |
+
"step": 15524
|
27177 |
+
},
|
27178 |
+
{
|
27179 |
+
"epoch": 0.77,
|
27180 |
+
"grad_norm": 1.784881830215454,
|
27181 |
+
"learning_rate": 1.1645832303967589e-05,
|
27182 |
+
"loss": 5.5805,
|
27183 |
+
"step": 15528
|
27184 |
+
},
|
27185 |
+
{
|
27186 |
+
"epoch": 0.77,
|
27187 |
+
"grad_norm": 2.08077073097229,
|
27188 |
+
"learning_rate": 1.163595039280597e-05,
|
27189 |
+
"loss": 5.4992,
|
27190 |
+
"step": 15532
|
27191 |
+
},
|
27192 |
+
{
|
27193 |
+
"epoch": 0.77,
|
27194 |
+
"grad_norm": 1.7977101802825928,
|
27195 |
+
"learning_rate": 1.162606848164435e-05,
|
27196 |
+
"loss": 5.4872,
|
27197 |
+
"step": 15536
|
27198 |
+
},
|
27199 |
+
{
|
27200 |
+
"epoch": 0.77,
|
27201 |
+
"grad_norm": 2.186459541320801,
|
27202 |
+
"learning_rate": 1.1616186570482732e-05,
|
27203 |
+
"loss": 5.578,
|
27204 |
+
"step": 15540
|
27205 |
+
},
|
27206 |
+
{
|
27207 |
+
"epoch": 0.77,
|
27208 |
+
"grad_norm": 2.183048725128174,
|
27209 |
+
"learning_rate": 1.1606304659321112e-05,
|
27210 |
+
"loss": 5.4738,
|
27211 |
+
"step": 15544
|
27212 |
+
},
|
27213 |
+
{
|
27214 |
+
"epoch": 0.77,
|
27215 |
+
"grad_norm": 1.8500030040740967,
|
27216 |
+
"learning_rate": 1.1596422748159494e-05,
|
27217 |
+
"loss": 5.5345,
|
27218 |
+
"step": 15548
|
27219 |
+
},
|
27220 |
+
{
|
27221 |
+
"epoch": 0.77,
|
27222 |
+
"grad_norm": 1.8691611289978027,
|
27223 |
+
"learning_rate": 1.1586540836997876e-05,
|
27224 |
+
"loss": 5.5283,
|
27225 |
+
"step": 15552
|
27226 |
+
},
|
27227 |
+
{
|
27228 |
+
"epoch": 0.77,
|
27229 |
+
"grad_norm": 2.1216142177581787,
|
27230 |
+
"learning_rate": 1.1576658925836258e-05,
|
27231 |
+
"loss": 5.581,
|
27232 |
+
"step": 15556
|
27233 |
+
},
|
27234 |
+
{
|
27235 |
+
"epoch": 0.77,
|
27236 |
+
"grad_norm": 2.0468342304229736,
|
27237 |
+
"learning_rate": 1.1566777014674639e-05,
|
27238 |
+
"loss": 5.5649,
|
27239 |
+
"step": 15560
|
27240 |
+
},
|
27241 |
+
{
|
27242 |
+
"epoch": 0.77,
|
27243 |
+
"grad_norm": 2.060667037963867,
|
27244 |
+
"learning_rate": 1.155689510351302e-05,
|
27245 |
+
"loss": 5.4901,
|
27246 |
+
"step": 15564
|
27247 |
+
},
|
27248 |
+
{
|
27249 |
+
"epoch": 0.77,
|
27250 |
+
"grad_norm": 1.803126573562622,
|
27251 |
+
"learning_rate": 1.1547013192351401e-05,
|
27252 |
+
"loss": 5.4285,
|
27253 |
+
"step": 15568
|
27254 |
+
},
|
27255 |
+
{
|
27256 |
+
"epoch": 0.77,
|
27257 |
+
"grad_norm": 2.05906081199646,
|
27258 |
+
"learning_rate": 1.1537131281189781e-05,
|
27259 |
+
"loss": 5.4268,
|
27260 |
+
"step": 15572
|
27261 |
+
},
|
27262 |
+
{
|
27263 |
+
"epoch": 0.77,
|
27264 |
+
"grad_norm": 2.0089757442474365,
|
27265 |
+
"learning_rate": 1.1527249370028164e-05,
|
27266 |
+
"loss": 5.5307,
|
27267 |
+
"step": 15576
|
27268 |
+
},
|
27269 |
+
{
|
27270 |
+
"epoch": 0.77,
|
27271 |
+
"grad_norm": 1.9152356386184692,
|
27272 |
+
"learning_rate": 1.1517367458866546e-05,
|
27273 |
+
"loss": 5.4297,
|
27274 |
+
"step": 15580
|
27275 |
+
},
|
27276 |
+
{
|
27277 |
+
"epoch": 0.77,
|
27278 |
+
"grad_norm": 1.9012370109558105,
|
27279 |
+
"learning_rate": 1.1507485547704928e-05,
|
27280 |
+
"loss": 5.5271,
|
27281 |
+
"step": 15584
|
27282 |
+
},
|
27283 |
+
{
|
27284 |
+
"epoch": 0.77,
|
27285 |
+
"grad_norm": 2.0786564350128174,
|
27286 |
+
"learning_rate": 1.1497603636543308e-05,
|
27287 |
+
"loss": 5.5957,
|
27288 |
+
"step": 15588
|
27289 |
+
},
|
27290 |
+
{
|
27291 |
+
"epoch": 0.77,
|
27292 |
+
"grad_norm": 2.03715443611145,
|
27293 |
+
"learning_rate": 1.148772172538169e-05,
|
27294 |
+
"loss": 5.5501,
|
27295 |
+
"step": 15592
|
27296 |
+
},
|
27297 |
+
{
|
27298 |
+
"epoch": 0.77,
|
27299 |
+
"grad_norm": 2.1424834728240967,
|
27300 |
+
"learning_rate": 1.147783981422007e-05,
|
27301 |
+
"loss": 5.5153,
|
27302 |
+
"step": 15596
|
27303 |
+
},
|
27304 |
+
{
|
27305 |
+
"epoch": 0.77,
|
27306 |
+
"grad_norm": 1.9324986934661865,
|
27307 |
+
"learning_rate": 1.146795790305845e-05,
|
27308 |
+
"loss": 5.5928,
|
27309 |
+
"step": 15600
|
27310 |
+
},
|
27311 |
+
{
|
27312 |
+
"epoch": 0.77,
|
27313 |
+
"grad_norm": 1.986244559288025,
|
27314 |
+
"learning_rate": 1.1458075991896833e-05,
|
27315 |
+
"loss": 5.4466,
|
27316 |
+
"step": 15604
|
27317 |
+
},
|
27318 |
+
{
|
27319 |
+
"epoch": 0.77,
|
27320 |
+
"grad_norm": 2.3215315341949463,
|
27321 |
+
"learning_rate": 1.1448194080735215e-05,
|
27322 |
+
"loss": 5.4948,
|
27323 |
+
"step": 15608
|
27324 |
+
},
|
27325 |
+
{
|
27326 |
+
"epoch": 0.77,
|
27327 |
+
"grad_norm": 2.2035694122314453,
|
27328 |
+
"learning_rate": 1.1438312169573597e-05,
|
27329 |
+
"loss": 5.6002,
|
27330 |
+
"step": 15612
|
27331 |
+
},
|
27332 |
+
{
|
27333 |
+
"epoch": 0.77,
|
27334 |
+
"grad_norm": 2.0092086791992188,
|
27335 |
+
"learning_rate": 1.1428430258411977e-05,
|
27336 |
+
"loss": 5.4083,
|
27337 |
+
"step": 15616
|
27338 |
+
},
|
27339 |
+
{
|
27340 |
+
"epoch": 0.77,
|
27341 |
+
"grad_norm": 2.008274555206299,
|
27342 |
+
"learning_rate": 1.141854834725036e-05,
|
27343 |
+
"loss": 5.3977,
|
27344 |
+
"step": 15620
|
27345 |
+
},
|
27346 |
+
{
|
27347 |
+
"epoch": 0.77,
|
27348 |
+
"grad_norm": 2.084843873977661,
|
27349 |
+
"learning_rate": 1.140866643608874e-05,
|
27350 |
+
"loss": 5.4896,
|
27351 |
+
"step": 15624
|
27352 |
+
},
|
27353 |
+
{
|
27354 |
+
"epoch": 0.77,
|
27355 |
+
"grad_norm": 2.138129949569702,
|
27356 |
+
"learning_rate": 1.1398784524927122e-05,
|
27357 |
+
"loss": 5.556,
|
27358 |
+
"step": 15628
|
27359 |
+
},
|
27360 |
+
{
|
27361 |
+
"epoch": 0.77,
|
27362 |
+
"grad_norm": 2.161590814590454,
|
27363 |
+
"learning_rate": 1.1388902613765502e-05,
|
27364 |
+
"loss": 5.4909,
|
27365 |
+
"step": 15632
|
27366 |
+
},
|
27367 |
+
{
|
27368 |
+
"epoch": 0.77,
|
27369 |
+
"grad_norm": 2.1914632320404053,
|
27370 |
+
"learning_rate": 1.1379020702603884e-05,
|
27371 |
+
"loss": 5.5578,
|
27372 |
+
"step": 15636
|
27373 |
+
},
|
27374 |
+
{
|
27375 |
+
"epoch": 0.77,
|
27376 |
+
"grad_norm": 2.254403829574585,
|
27377 |
+
"learning_rate": 1.1369138791442266e-05,
|
27378 |
+
"loss": 5.6521,
|
27379 |
+
"step": 15640
|
27380 |
+
},
|
27381 |
+
{
|
27382 |
+
"epoch": 0.77,
|
27383 |
+
"grad_norm": 1.9875483512878418,
|
27384 |
+
"learning_rate": 1.1359256880280647e-05,
|
27385 |
+
"loss": 5.4972,
|
27386 |
+
"step": 15644
|
27387 |
+
},
|
27388 |
+
{
|
27389 |
+
"epoch": 0.77,
|
27390 |
+
"grad_norm": 2.2201669216156006,
|
27391 |
+
"learning_rate": 1.1349374969119029e-05,
|
27392 |
+
"loss": 5.6261,
|
27393 |
+
"step": 15648
|
27394 |
+
},
|
27395 |
+
{
|
27396 |
+
"epoch": 0.77,
|
27397 |
+
"grad_norm": 2.054435968399048,
|
27398 |
+
"learning_rate": 1.1339493057957409e-05,
|
27399 |
+
"loss": 5.5603,
|
27400 |
+
"step": 15652
|
27401 |
+
},
|
27402 |
+
{
|
27403 |
+
"epoch": 0.77,
|
27404 |
+
"grad_norm": 1.902565598487854,
|
27405 |
+
"learning_rate": 1.1329611146795791e-05,
|
27406 |
+
"loss": 5.6618,
|
27407 |
+
"step": 15656
|
27408 |
+
},
|
27409 |
+
{
|
27410 |
+
"epoch": 0.77,
|
27411 |
+
"grad_norm": 2.2018725872039795,
|
27412 |
+
"learning_rate": 1.1319729235634172e-05,
|
27413 |
+
"loss": 5.429,
|
27414 |
+
"step": 15660
|
27415 |
+
},
|
27416 |
+
{
|
27417 |
+
"epoch": 0.77,
|
27418 |
+
"grad_norm": 1.9127063751220703,
|
27419 |
+
"learning_rate": 1.1309847324472554e-05,
|
27420 |
+
"loss": 5.5171,
|
27421 |
+
"step": 15664
|
27422 |
+
},
|
27423 |
+
{
|
27424 |
+
"epoch": 0.77,
|
27425 |
+
"grad_norm": 1.8942164182662964,
|
27426 |
+
"learning_rate": 1.1299965413310936e-05,
|
27427 |
+
"loss": 5.5357,
|
27428 |
+
"step": 15668
|
27429 |
+
},
|
27430 |
+
{
|
27431 |
+
"epoch": 0.77,
|
27432 |
+
"grad_norm": 2.2651684284210205,
|
27433 |
+
"learning_rate": 1.1290083502149316e-05,
|
27434 |
+
"loss": 5.4591,
|
27435 |
+
"step": 15672
|
27436 |
+
},
|
27437 |
+
{
|
27438 |
+
"epoch": 0.77,
|
27439 |
+
"grad_norm": 2.189774990081787,
|
27440 |
+
"learning_rate": 1.1280201590987698e-05,
|
27441 |
+
"loss": 5.4338,
|
27442 |
+
"step": 15676
|
27443 |
+
},
|
27444 |
+
{
|
27445 |
+
"epoch": 0.77,
|
27446 |
+
"grad_norm": 2.1134681701660156,
|
27447 |
+
"learning_rate": 1.1270319679826078e-05,
|
27448 |
+
"loss": 5.5294,
|
27449 |
+
"step": 15680
|
27450 |
+
},
|
27451 |
+
{
|
27452 |
+
"epoch": 0.77,
|
27453 |
+
"grad_norm": 2.2125091552734375,
|
27454 |
+
"learning_rate": 1.126043776866446e-05,
|
27455 |
+
"loss": 5.5269,
|
27456 |
+
"step": 15684
|
27457 |
+
},
|
27458 |
+
{
|
27459 |
+
"epoch": 0.78,
|
27460 |
+
"grad_norm": 1.9919397830963135,
|
27461 |
+
"learning_rate": 1.1250555857502841e-05,
|
27462 |
+
"loss": 5.4072,
|
27463 |
+
"step": 15688
|
27464 |
+
},
|
27465 |
+
{
|
27466 |
+
"epoch": 0.78,
|
27467 |
+
"grad_norm": 1.958975911140442,
|
27468 |
+
"learning_rate": 1.1240673946341223e-05,
|
27469 |
+
"loss": 5.4421,
|
27470 |
+
"step": 15692
|
27471 |
+
},
|
27472 |
+
{
|
27473 |
+
"epoch": 0.78,
|
27474 |
+
"grad_norm": 2.033118724822998,
|
27475 |
+
"learning_rate": 1.1230792035179605e-05,
|
27476 |
+
"loss": 5.5095,
|
27477 |
+
"step": 15696
|
27478 |
+
},
|
27479 |
+
{
|
27480 |
+
"epoch": 0.78,
|
27481 |
+
"grad_norm": 2.1201207637786865,
|
27482 |
+
"learning_rate": 1.1220910124017985e-05,
|
27483 |
+
"loss": 5.4767,
|
27484 |
+
"step": 15700
|
27485 |
+
},
|
27486 |
+
{
|
27487 |
+
"epoch": 0.78,
|
27488 |
+
"grad_norm": 1.9773985147476196,
|
27489 |
+
"learning_rate": 1.1211028212856367e-05,
|
27490 |
+
"loss": 5.424,
|
27491 |
+
"step": 15704
|
27492 |
+
},
|
27493 |
+
{
|
27494 |
+
"epoch": 0.78,
|
27495 |
+
"grad_norm": 1.841871976852417,
|
27496 |
+
"learning_rate": 1.1201146301694748e-05,
|
27497 |
+
"loss": 5.521,
|
27498 |
+
"step": 15708
|
27499 |
+
},
|
27500 |
+
{
|
27501 |
+
"epoch": 0.78,
|
27502 |
+
"grad_norm": 1.9351176023483276,
|
27503 |
+
"learning_rate": 1.119126439053313e-05,
|
27504 |
+
"loss": 5.5646,
|
27505 |
+
"step": 15712
|
27506 |
+
},
|
27507 |
+
{
|
27508 |
+
"epoch": 0.78,
|
27509 |
+
"grad_norm": 2.0061535835266113,
|
27510 |
+
"learning_rate": 1.118138247937151e-05,
|
27511 |
+
"loss": 5.36,
|
27512 |
+
"step": 15716
|
27513 |
+
},
|
27514 |
+
{
|
27515 |
+
"epoch": 0.78,
|
27516 |
+
"grad_norm": 2.4020817279815674,
|
27517 |
+
"learning_rate": 1.1171500568209892e-05,
|
27518 |
+
"loss": 5.4701,
|
27519 |
+
"step": 15720
|
27520 |
+
},
|
27521 |
+
{
|
27522 |
+
"epoch": 0.78,
|
27523 |
+
"grad_norm": 1.9213863611221313,
|
27524 |
+
"learning_rate": 1.1161618657048274e-05,
|
27525 |
+
"loss": 5.4668,
|
27526 |
+
"step": 15724
|
27527 |
+
},
|
27528 |
+
{
|
27529 |
+
"epoch": 0.78,
|
27530 |
+
"grad_norm": 2.3538432121276855,
|
27531 |
+
"learning_rate": 1.1151736745886655e-05,
|
27532 |
+
"loss": 5.5128,
|
27533 |
+
"step": 15728
|
27534 |
+
},
|
27535 |
+
{
|
27536 |
+
"epoch": 0.78,
|
27537 |
+
"grad_norm": 2.147163152694702,
|
27538 |
+
"learning_rate": 1.1141854834725037e-05,
|
27539 |
+
"loss": 5.4579,
|
27540 |
+
"step": 15732
|
27541 |
+
},
|
27542 |
+
{
|
27543 |
+
"epoch": 0.78,
|
27544 |
+
"grad_norm": 2.1335911750793457,
|
27545 |
+
"learning_rate": 1.1131972923563417e-05,
|
27546 |
+
"loss": 5.4277,
|
27547 |
+
"step": 15736
|
27548 |
+
},
|
27549 |
+
{
|
27550 |
+
"epoch": 0.78,
|
27551 |
+
"grad_norm": 2.1882131099700928,
|
27552 |
+
"learning_rate": 1.11220910124018e-05,
|
27553 |
+
"loss": 5.6003,
|
27554 |
+
"step": 15740
|
27555 |
+
},
|
27556 |
+
{
|
27557 |
+
"epoch": 0.78,
|
27558 |
+
"grad_norm": 1.9644808769226074,
|
27559 |
+
"learning_rate": 1.111220910124018e-05,
|
27560 |
+
"loss": 5.5355,
|
27561 |
+
"step": 15744
|
27562 |
+
},
|
27563 |
+
{
|
27564 |
+
"epoch": 0.78,
|
27565 |
+
"grad_norm": 1.7521815299987793,
|
27566 |
+
"learning_rate": 1.1102327190078562e-05,
|
27567 |
+
"loss": 5.4743,
|
27568 |
+
"step": 15748
|
27569 |
+
},
|
27570 |
+
{
|
27571 |
+
"epoch": 0.78,
|
27572 |
+
"grad_norm": 2.1638858318328857,
|
27573 |
+
"learning_rate": 1.1092445278916944e-05,
|
27574 |
+
"loss": 5.3701,
|
27575 |
+
"step": 15752
|
27576 |
+
},
|
27577 |
+
{
|
27578 |
+
"epoch": 0.78,
|
27579 |
+
"grad_norm": 1.8966166973114014,
|
27580 |
+
"learning_rate": 1.1082563367755324e-05,
|
27581 |
+
"loss": 5.5355,
|
27582 |
+
"step": 15756
|
27583 |
+
},
|
27584 |
+
{
|
27585 |
+
"epoch": 0.78,
|
27586 |
+
"grad_norm": 1.981217861175537,
|
27587 |
+
"learning_rate": 1.1072681456593706e-05,
|
27588 |
+
"loss": 5.4366,
|
27589 |
+
"step": 15760
|
27590 |
+
},
|
27591 |
+
{
|
27592 |
+
"epoch": 0.78,
|
27593 |
+
"grad_norm": 2.0059125423431396,
|
27594 |
+
"learning_rate": 1.1062799545432086e-05,
|
27595 |
+
"loss": 5.4749,
|
27596 |
+
"step": 15764
|
27597 |
+
},
|
27598 |
+
{
|
27599 |
+
"epoch": 0.78,
|
27600 |
+
"grad_norm": 2.042475461959839,
|
27601 |
+
"learning_rate": 1.1052917634270469e-05,
|
27602 |
+
"loss": 5.4066,
|
27603 |
+
"step": 15768
|
27604 |
+
},
|
27605 |
+
{
|
27606 |
+
"epoch": 0.78,
|
27607 |
+
"grad_norm": 2.1431596279144287,
|
27608 |
+
"learning_rate": 1.1043035723108849e-05,
|
27609 |
+
"loss": 5.4831,
|
27610 |
+
"step": 15772
|
27611 |
+
},
|
27612 |
+
{
|
27613 |
+
"epoch": 0.78,
|
27614 |
+
"grad_norm": 1.903968095779419,
|
27615 |
+
"learning_rate": 1.1033153811947231e-05,
|
27616 |
+
"loss": 5.4749,
|
27617 |
+
"step": 15776
|
27618 |
+
},
|
27619 |
+
{
|
27620 |
+
"epoch": 0.78,
|
27621 |
+
"grad_norm": 2.2087221145629883,
|
27622 |
+
"learning_rate": 1.1023271900785613e-05,
|
27623 |
+
"loss": 5.5542,
|
27624 |
+
"step": 15780
|
27625 |
+
},
|
27626 |
+
{
|
27627 |
+
"epoch": 0.78,
|
27628 |
+
"grad_norm": 1.990248680114746,
|
27629 |
+
"learning_rate": 1.1013389989623993e-05,
|
27630 |
+
"loss": 5.4592,
|
27631 |
+
"step": 15784
|
27632 |
+
},
|
27633 |
+
{
|
27634 |
+
"epoch": 0.78,
|
27635 |
+
"grad_norm": 1.9822028875350952,
|
27636 |
+
"learning_rate": 1.1003508078462375e-05,
|
27637 |
+
"loss": 5.5245,
|
27638 |
+
"step": 15788
|
27639 |
+
},
|
27640 |
+
{
|
27641 |
+
"epoch": 0.78,
|
27642 |
+
"grad_norm": 2.0607571601867676,
|
27643 |
+
"learning_rate": 1.0993626167300756e-05,
|
27644 |
+
"loss": 5.5105,
|
27645 |
+
"step": 15792
|
27646 |
+
},
|
27647 |
+
{
|
27648 |
+
"epoch": 0.78,
|
27649 |
+
"grad_norm": 2.2510719299316406,
|
27650 |
+
"learning_rate": 1.0983744256139138e-05,
|
27651 |
+
"loss": 5.4485,
|
27652 |
+
"step": 15796
|
27653 |
+
},
|
27654 |
+
{
|
27655 |
+
"epoch": 0.78,
|
27656 |
+
"grad_norm": 1.9407929182052612,
|
27657 |
+
"learning_rate": 1.0973862344977518e-05,
|
27658 |
+
"loss": 5.4835,
|
27659 |
+
"step": 15800
|
27660 |
+
},
|
27661 |
+
{
|
27662 |
+
"epoch": 0.78,
|
27663 |
+
"grad_norm": 2.0228731632232666,
|
27664 |
+
"learning_rate": 1.09639804338159e-05,
|
27665 |
+
"loss": 5.4121,
|
27666 |
+
"step": 15804
|
27667 |
+
},
|
27668 |
+
{
|
27669 |
+
"epoch": 0.78,
|
27670 |
+
"grad_norm": 2.0110514163970947,
|
27671 |
+
"learning_rate": 1.0954098522654282e-05,
|
27672 |
+
"loss": 5.4097,
|
27673 |
+
"step": 15808
|
27674 |
+
},
|
27675 |
+
{
|
27676 |
+
"epoch": 0.78,
|
27677 |
+
"grad_norm": 2.005176305770874,
|
27678 |
+
"learning_rate": 1.0944216611492664e-05,
|
27679 |
+
"loss": 5.5651,
|
27680 |
+
"step": 15812
|
27681 |
+
},
|
27682 |
+
{
|
27683 |
+
"epoch": 0.78,
|
27684 |
+
"grad_norm": 2.08233380317688,
|
27685 |
+
"learning_rate": 1.0934334700331045e-05,
|
27686 |
+
"loss": 5.5942,
|
27687 |
+
"step": 15816
|
27688 |
+
},
|
27689 |
+
{
|
27690 |
+
"epoch": 0.78,
|
27691 |
+
"grad_norm": 1.76272714138031,
|
27692 |
+
"learning_rate": 1.0924452789169425e-05,
|
27693 |
+
"loss": 5.3883,
|
27694 |
+
"step": 15820
|
27695 |
+
},
|
27696 |
+
{
|
27697 |
+
"epoch": 0.78,
|
27698 |
+
"grad_norm": 1.8395804166793823,
|
27699 |
+
"learning_rate": 1.0914570878007807e-05,
|
27700 |
+
"loss": 5.372,
|
27701 |
+
"step": 15824
|
27702 |
+
},
|
27703 |
+
{
|
27704 |
+
"epoch": 0.78,
|
27705 |
+
"grad_norm": 2.197016954421997,
|
27706 |
+
"learning_rate": 1.0904688966846188e-05,
|
27707 |
+
"loss": 5.4239,
|
27708 |
+
"step": 15828
|
27709 |
+
},
|
27710 |
+
{
|
27711 |
+
"epoch": 0.78,
|
27712 |
+
"grad_norm": 2.1127779483795166,
|
27713 |
+
"learning_rate": 1.089480705568457e-05,
|
27714 |
+
"loss": 5.358,
|
27715 |
+
"step": 15832
|
27716 |
+
},
|
27717 |
+
{
|
27718 |
+
"epoch": 0.78,
|
27719 |
+
"grad_norm": 2.1906561851501465,
|
27720 |
+
"learning_rate": 1.0884925144522952e-05,
|
27721 |
+
"loss": 5.528,
|
27722 |
+
"step": 15836
|
27723 |
+
},
|
27724 |
+
{
|
27725 |
+
"epoch": 0.78,
|
27726 |
+
"grad_norm": 2.2963180541992188,
|
27727 |
+
"learning_rate": 1.0875043233361334e-05,
|
27728 |
+
"loss": 5.6399,
|
27729 |
+
"step": 15840
|
27730 |
+
},
|
27731 |
+
{
|
27732 |
+
"epoch": 0.78,
|
27733 |
+
"grad_norm": 2.1306509971618652,
|
27734 |
+
"learning_rate": 1.0865161322199714e-05,
|
27735 |
+
"loss": 5.4986,
|
27736 |
+
"step": 15844
|
27737 |
+
},
|
27738 |
+
{
|
27739 |
+
"epoch": 0.78,
|
27740 |
+
"grad_norm": 1.8235065937042236,
|
27741 |
+
"learning_rate": 1.0855279411038096e-05,
|
27742 |
+
"loss": 5.5502,
|
27743 |
+
"step": 15848
|
27744 |
+
},
|
27745 |
+
{
|
27746 |
+
"epoch": 0.78,
|
27747 |
+
"grad_norm": 2.058922529220581,
|
27748 |
+
"learning_rate": 1.0845397499876477e-05,
|
27749 |
+
"loss": 5.5531,
|
27750 |
+
"step": 15852
|
27751 |
+
},
|
27752 |
+
{
|
27753 |
+
"epoch": 0.78,
|
27754 |
+
"grad_norm": 1.818724274635315,
|
27755 |
+
"learning_rate": 1.0835515588714857e-05,
|
27756 |
+
"loss": 5.4607,
|
27757 |
+
"step": 15856
|
27758 |
+
},
|
27759 |
+
{
|
27760 |
+
"epoch": 0.78,
|
27761 |
+
"grad_norm": 2.0503363609313965,
|
27762 |
+
"learning_rate": 1.0825633677553239e-05,
|
27763 |
+
"loss": 5.4722,
|
27764 |
+
"step": 15860
|
27765 |
+
},
|
27766 |
+
{
|
27767 |
+
"epoch": 0.78,
|
27768 |
+
"grad_norm": 2.076927423477173,
|
27769 |
+
"learning_rate": 1.081575176639162e-05,
|
27770 |
+
"loss": 5.4039,
|
27771 |
+
"step": 15864
|
27772 |
+
},
|
27773 |
+
{
|
27774 |
+
"epoch": 0.78,
|
27775 |
+
"grad_norm": 1.991584300994873,
|
27776 |
+
"learning_rate": 1.0805869855230003e-05,
|
27777 |
+
"loss": 5.4651,
|
27778 |
+
"step": 15868
|
27779 |
+
},
|
27780 |
+
{
|
27781 |
+
"epoch": 0.78,
|
27782 |
+
"grad_norm": 2.276181936264038,
|
27783 |
+
"learning_rate": 1.0795987944068383e-05,
|
27784 |
+
"loss": 5.5605,
|
27785 |
+
"step": 15872
|
27786 |
+
},
|
27787 |
+
{
|
27788 |
+
"epoch": 0.78,
|
27789 |
+
"grad_norm": 2.0964319705963135,
|
27790 |
+
"learning_rate": 1.0786106032906766e-05,
|
27791 |
+
"loss": 5.4315,
|
27792 |
+
"step": 15876
|
27793 |
+
},
|
27794 |
+
{
|
27795 |
+
"epoch": 0.78,
|
27796 |
+
"grad_norm": 2.0152671337127686,
|
27797 |
+
"learning_rate": 1.0776224121745146e-05,
|
27798 |
+
"loss": 5.5034,
|
27799 |
+
"step": 15880
|
27800 |
+
},
|
27801 |
+
{
|
27802 |
+
"epoch": 0.78,
|
27803 |
+
"grad_norm": 2.065906524658203,
|
27804 |
+
"learning_rate": 1.0766342210583528e-05,
|
27805 |
+
"loss": 5.5383,
|
27806 |
+
"step": 15884
|
27807 |
+
},
|
27808 |
+
{
|
27809 |
+
"epoch": 0.79,
|
27810 |
+
"grad_norm": 1.9010733366012573,
|
27811 |
+
"learning_rate": 1.0756460299421908e-05,
|
27812 |
+
"loss": 5.5424,
|
27813 |
+
"step": 15888
|
27814 |
+
},
|
27815 |
+
{
|
27816 |
+
"epoch": 0.79,
|
27817 |
+
"grad_norm": 1.9711081981658936,
|
27818 |
+
"learning_rate": 1.0746578388260289e-05,
|
27819 |
+
"loss": 5.4899,
|
27820 |
+
"step": 15892
|
27821 |
+
},
|
27822 |
+
{
|
27823 |
+
"epoch": 0.79,
|
27824 |
+
"grad_norm": 1.9570908546447754,
|
27825 |
+
"learning_rate": 1.0736696477098672e-05,
|
27826 |
+
"loss": 5.5072,
|
27827 |
+
"step": 15896
|
27828 |
+
},
|
27829 |
+
{
|
27830 |
+
"epoch": 0.79,
|
27831 |
+
"grad_norm": 2.1524243354797363,
|
27832 |
+
"learning_rate": 1.0726814565937053e-05,
|
27833 |
+
"loss": 5.4421,
|
27834 |
+
"step": 15900
|
27835 |
+
},
|
27836 |
+
{
|
27837 |
+
"epoch": 0.79,
|
27838 |
+
"grad_norm": 1.8811511993408203,
|
27839 |
+
"learning_rate": 1.0716932654775435e-05,
|
27840 |
+
"loss": 5.3984,
|
27841 |
+
"step": 15904
|
27842 |
+
},
|
27843 |
+
{
|
27844 |
+
"epoch": 0.79,
|
27845 |
+
"grad_norm": 1.914753794670105,
|
27846 |
+
"learning_rate": 1.0707050743613815e-05,
|
27847 |
+
"loss": 5.5874,
|
27848 |
+
"step": 15908
|
27849 |
+
},
|
27850 |
+
{
|
27851 |
+
"epoch": 0.79,
|
27852 |
+
"grad_norm": 1.9757344722747803,
|
27853 |
+
"learning_rate": 1.0697168832452197e-05,
|
27854 |
+
"loss": 5.5402,
|
27855 |
+
"step": 15912
|
27856 |
+
},
|
27857 |
+
{
|
27858 |
+
"epoch": 0.79,
|
27859 |
+
"grad_norm": 2.1234004497528076,
|
27860 |
+
"learning_rate": 1.0689757399080982e-05,
|
27861 |
+
"loss": 5.4948,
|
27862 |
+
"step": 15916
|
27863 |
+
},
|
27864 |
+
{
|
27865 |
+
"epoch": 0.79,
|
27866 |
+
"grad_norm": 1.9321492910385132,
|
27867 |
+
"learning_rate": 1.0679875487919364e-05,
|
27868 |
+
"loss": 5.3983,
|
27869 |
+
"step": 15920
|
27870 |
+
},
|
27871 |
+
{
|
27872 |
+
"epoch": 0.79,
|
27873 |
+
"grad_norm": 2.0924885272979736,
|
27874 |
+
"learning_rate": 1.0669993576757746e-05,
|
27875 |
+
"loss": 5.3964,
|
27876 |
+
"step": 15924
|
27877 |
+
},
|
27878 |
+
{
|
27879 |
+
"epoch": 0.79,
|
27880 |
+
"grad_norm": 2.1206881999969482,
|
27881 |
+
"learning_rate": 1.0660111665596127e-05,
|
27882 |
+
"loss": 5.4496,
|
27883 |
+
"step": 15928
|
27884 |
+
},
|
27885 |
+
{
|
27886 |
+
"epoch": 0.79,
|
27887 |
+
"grad_norm": 2.230121374130249,
|
27888 |
+
"learning_rate": 1.0650229754434509e-05,
|
27889 |
+
"loss": 5.3782,
|
27890 |
+
"step": 15932
|
27891 |
+
},
|
27892 |
+
{
|
27893 |
+
"epoch": 0.79,
|
27894 |
+
"grad_norm": 1.9307682514190674,
|
27895 |
+
"learning_rate": 1.0640347843272889e-05,
|
27896 |
+
"loss": 5.4723,
|
27897 |
+
"step": 15936
|
27898 |
+
},
|
27899 |
+
{
|
27900 |
+
"epoch": 0.79,
|
27901 |
+
"grad_norm": 2.326744794845581,
|
27902 |
+
"learning_rate": 1.0630465932111271e-05,
|
27903 |
+
"loss": 5.4944,
|
27904 |
+
"step": 15940
|
27905 |
+
},
|
27906 |
+
{
|
27907 |
+
"epoch": 0.79,
|
27908 |
+
"grad_norm": 2.0075511932373047,
|
27909 |
+
"learning_rate": 1.0620584020949651e-05,
|
27910 |
+
"loss": 5.4213,
|
27911 |
+
"step": 15944
|
27912 |
+
},
|
27913 |
+
{
|
27914 |
+
"epoch": 0.79,
|
27915 |
+
"grad_norm": 2.0154924392700195,
|
27916 |
+
"learning_rate": 1.0610702109788034e-05,
|
27917 |
+
"loss": 5.5201,
|
27918 |
+
"step": 15948
|
27919 |
+
},
|
27920 |
+
{
|
27921 |
+
"epoch": 0.79,
|
27922 |
+
"grad_norm": 2.3839402198791504,
|
27923 |
+
"learning_rate": 1.0600820198626416e-05,
|
27924 |
+
"loss": 5.5373,
|
27925 |
+
"step": 15952
|
27926 |
+
},
|
27927 |
+
{
|
27928 |
+
"epoch": 0.79,
|
27929 |
+
"grad_norm": 1.8833853006362915,
|
27930 |
+
"learning_rate": 1.0590938287464796e-05,
|
27931 |
+
"loss": 5.5104,
|
27932 |
+
"step": 15956
|
27933 |
+
},
|
27934 |
+
{
|
27935 |
+
"epoch": 0.79,
|
27936 |
+
"grad_norm": 2.0425021648406982,
|
27937 |
+
"learning_rate": 1.0581056376303178e-05,
|
27938 |
+
"loss": 5.5934,
|
27939 |
+
"step": 15960
|
27940 |
+
},
|
27941 |
+
{
|
27942 |
+
"epoch": 0.79,
|
27943 |
+
"grad_norm": 1.8929284811019897,
|
27944 |
+
"learning_rate": 1.0571174465141558e-05,
|
27945 |
+
"loss": 5.5095,
|
27946 |
+
"step": 15964
|
27947 |
+
},
|
27948 |
+
{
|
27949 |
+
"epoch": 0.79,
|
27950 |
+
"grad_norm": 1.8911198377609253,
|
27951 |
+
"learning_rate": 1.056129255397994e-05,
|
27952 |
+
"loss": 5.4591,
|
27953 |
+
"step": 15968
|
27954 |
+
},
|
27955 |
+
{
|
27956 |
+
"epoch": 0.79,
|
27957 |
+
"grad_norm": 1.856986403465271,
|
27958 |
+
"learning_rate": 1.055141064281832e-05,
|
27959 |
+
"loss": 5.35,
|
27960 |
+
"step": 15972
|
27961 |
+
},
|
27962 |
+
{
|
27963 |
+
"epoch": 0.79,
|
27964 |
+
"grad_norm": 1.8045761585235596,
|
27965 |
+
"learning_rate": 1.0541528731656703e-05,
|
27966 |
+
"loss": 5.5255,
|
27967 |
+
"step": 15976
|
27968 |
+
},
|
27969 |
+
{
|
27970 |
+
"epoch": 0.79,
|
27971 |
+
"grad_norm": 1.9339076280593872,
|
27972 |
+
"learning_rate": 1.0531646820495085e-05,
|
27973 |
+
"loss": 5.4632,
|
27974 |
+
"step": 15980
|
27975 |
+
},
|
27976 |
+
{
|
27977 |
+
"epoch": 0.79,
|
27978 |
+
"grad_norm": 2.076307535171509,
|
27979 |
+
"learning_rate": 1.0521764909333465e-05,
|
27980 |
+
"loss": 5.453,
|
27981 |
+
"step": 15984
|
27982 |
+
},
|
27983 |
+
{
|
27984 |
+
"epoch": 0.79,
|
27985 |
+
"grad_norm": 2.0790350437164307,
|
27986 |
+
"learning_rate": 1.0511882998171847e-05,
|
27987 |
+
"loss": 5.4504,
|
27988 |
+
"step": 15988
|
27989 |
+
},
|
27990 |
+
{
|
27991 |
+
"epoch": 0.79,
|
27992 |
+
"grad_norm": 2.015014171600342,
|
27993 |
+
"learning_rate": 1.0502001087010228e-05,
|
27994 |
+
"loss": 5.4082,
|
27995 |
+
"step": 15992
|
27996 |
+
},
|
27997 |
+
{
|
27998 |
+
"epoch": 0.79,
|
27999 |
+
"grad_norm": 1.9248104095458984,
|
28000 |
+
"learning_rate": 1.049211917584861e-05,
|
28001 |
+
"loss": 5.5261,
|
28002 |
+
"step": 15996
|
28003 |
+
},
|
28004 |
+
{
|
28005 |
+
"epoch": 0.79,
|
28006 |
+
"grad_norm": 1.9845161437988281,
|
28007 |
+
"learning_rate": 1.048223726468699e-05,
|
28008 |
+
"loss": 5.495,
|
28009 |
+
"step": 16000
|
28010 |
+
},
|
28011 |
+
{
|
28012 |
+
"epoch": 0.79,
|
28013 |
+
"grad_norm": 2.0011472702026367,
|
28014 |
+
"learning_rate": 1.0472355353525372e-05,
|
28015 |
+
"loss": 5.3597,
|
28016 |
+
"step": 16004
|
28017 |
+
},
|
28018 |
+
{
|
28019 |
+
"epoch": 0.79,
|
28020 |
+
"grad_norm": 2.0203723907470703,
|
28021 |
+
"learning_rate": 1.0462473442363753e-05,
|
28022 |
+
"loss": 5.5997,
|
28023 |
+
"step": 16008
|
28024 |
+
},
|
28025 |
+
{
|
28026 |
+
"epoch": 0.79,
|
28027 |
+
"grad_norm": 1.9304739236831665,
|
28028 |
+
"learning_rate": 1.0452591531202136e-05,
|
28029 |
+
"loss": 5.4136,
|
28030 |
+
"step": 16012
|
28031 |
+
},
|
28032 |
+
{
|
28033 |
+
"epoch": 0.79,
|
28034 |
+
"grad_norm": 2.1026952266693115,
|
28035 |
+
"learning_rate": 1.0442709620040517e-05,
|
28036 |
+
"loss": 5.5538,
|
28037 |
+
"step": 16016
|
28038 |
+
},
|
28039 |
+
{
|
28040 |
+
"epoch": 0.79,
|
28041 |
+
"grad_norm": 1.9498411417007446,
|
28042 |
+
"learning_rate": 1.0432827708878897e-05,
|
28043 |
+
"loss": 5.5066,
|
28044 |
+
"step": 16020
|
28045 |
+
},
|
28046 |
+
{
|
28047 |
+
"epoch": 0.79,
|
28048 |
+
"grad_norm": 2.043534517288208,
|
28049 |
+
"learning_rate": 1.0422945797717279e-05,
|
28050 |
+
"loss": 5.4616,
|
28051 |
+
"step": 16024
|
28052 |
+
},
|
28053 |
+
{
|
28054 |
+
"epoch": 0.79,
|
28055 |
+
"grad_norm": 1.9264411926269531,
|
28056 |
+
"learning_rate": 1.041306388655566e-05,
|
28057 |
+
"loss": 5.5081,
|
28058 |
+
"step": 16028
|
28059 |
+
},
|
28060 |
+
{
|
28061 |
+
"epoch": 0.79,
|
28062 |
+
"grad_norm": 1.9634205102920532,
|
28063 |
+
"learning_rate": 1.0403181975394042e-05,
|
28064 |
+
"loss": 5.436,
|
28065 |
+
"step": 16032
|
28066 |
+
},
|
28067 |
+
{
|
28068 |
+
"epoch": 0.79,
|
28069 |
+
"grad_norm": 1.9793167114257812,
|
28070 |
+
"learning_rate": 1.0393300064232422e-05,
|
28071 |
+
"loss": 5.4457,
|
28072 |
+
"step": 16036
|
28073 |
+
},
|
28074 |
+
{
|
28075 |
+
"epoch": 0.79,
|
28076 |
+
"grad_norm": 2.0029139518737793,
|
28077 |
+
"learning_rate": 1.0383418153070806e-05,
|
28078 |
+
"loss": 5.5097,
|
28079 |
+
"step": 16040
|
28080 |
+
},
|
28081 |
+
{
|
28082 |
+
"epoch": 0.79,
|
28083 |
+
"grad_norm": 1.93049156665802,
|
28084 |
+
"learning_rate": 1.0373536241909186e-05,
|
28085 |
+
"loss": 5.5088,
|
28086 |
+
"step": 16044
|
28087 |
+
},
|
28088 |
+
{
|
28089 |
+
"epoch": 0.79,
|
28090 |
+
"grad_norm": 1.9566268920898438,
|
28091 |
+
"learning_rate": 1.0363654330747566e-05,
|
28092 |
+
"loss": 5.509,
|
28093 |
+
"step": 16048
|
28094 |
+
},
|
28095 |
+
{
|
28096 |
+
"epoch": 0.79,
|
28097 |
+
"grad_norm": 1.9150116443634033,
|
28098 |
+
"learning_rate": 1.0353772419585948e-05,
|
28099 |
+
"loss": 5.432,
|
28100 |
+
"step": 16052
|
28101 |
+
},
|
28102 |
+
{
|
28103 |
+
"epoch": 0.79,
|
28104 |
+
"grad_norm": 1.9457064867019653,
|
28105 |
+
"learning_rate": 1.0343890508424329e-05,
|
28106 |
+
"loss": 5.5555,
|
28107 |
+
"step": 16056
|
28108 |
+
},
|
28109 |
+
{
|
28110 |
+
"epoch": 0.79,
|
28111 |
+
"grad_norm": 2.037177562713623,
|
28112 |
+
"learning_rate": 1.0334008597262711e-05,
|
28113 |
+
"loss": 5.4792,
|
28114 |
+
"step": 16060
|
28115 |
+
},
|
28116 |
+
{
|
28117 |
+
"epoch": 0.79,
|
28118 |
+
"grad_norm": 2.0516912937164307,
|
28119 |
+
"learning_rate": 1.0324126686101091e-05,
|
28120 |
+
"loss": 5.5053,
|
28121 |
+
"step": 16064
|
28122 |
+
},
|
28123 |
+
{
|
28124 |
+
"epoch": 0.79,
|
28125 |
+
"grad_norm": 1.7669730186462402,
|
28126 |
+
"learning_rate": 1.0314244774939475e-05,
|
28127 |
+
"loss": 5.3994,
|
28128 |
+
"step": 16068
|
28129 |
+
},
|
28130 |
+
{
|
28131 |
+
"epoch": 0.79,
|
28132 |
+
"grad_norm": 2.1283509731292725,
|
28133 |
+
"learning_rate": 1.0304362863777855e-05,
|
28134 |
+
"loss": 5.466,
|
28135 |
+
"step": 16072
|
28136 |
+
},
|
28137 |
+
{
|
28138 |
+
"epoch": 0.79,
|
28139 |
+
"grad_norm": 1.7946540117263794,
|
28140 |
+
"learning_rate": 1.0294480952616237e-05,
|
28141 |
+
"loss": 5.4215,
|
28142 |
+
"step": 16076
|
28143 |
+
},
|
28144 |
+
{
|
28145 |
+
"epoch": 0.79,
|
28146 |
+
"grad_norm": 2.273894786834717,
|
28147 |
+
"learning_rate": 1.0284599041454618e-05,
|
28148 |
+
"loss": 5.408,
|
28149 |
+
"step": 16080
|
28150 |
+
},
|
28151 |
+
{
|
28152 |
+
"epoch": 0.79,
|
28153 |
+
"grad_norm": 2.055126667022705,
|
28154 |
+
"learning_rate": 1.0274717130292998e-05,
|
28155 |
+
"loss": 5.5304,
|
28156 |
+
"step": 16084
|
28157 |
+
},
|
28158 |
+
{
|
28159 |
+
"epoch": 0.79,
|
28160 |
+
"grad_norm": 2.0089943408966064,
|
28161 |
+
"learning_rate": 1.026483521913138e-05,
|
28162 |
+
"loss": 5.4147,
|
28163 |
+
"step": 16088
|
28164 |
+
},
|
28165 |
+
{
|
28166 |
+
"epoch": 0.8,
|
28167 |
+
"grad_norm": 2.053406000137329,
|
28168 |
+
"learning_rate": 1.025495330796976e-05,
|
28169 |
+
"loss": 5.5624,
|
28170 |
+
"step": 16092
|
28171 |
+
},
|
28172 |
+
{
|
28173 |
+
"epoch": 0.8,
|
28174 |
+
"grad_norm": 1.9701050519943237,
|
28175 |
+
"learning_rate": 1.0245071396808144e-05,
|
28176 |
+
"loss": 5.3615,
|
28177 |
+
"step": 16096
|
28178 |
+
},
|
28179 |
+
{
|
28180 |
+
"epoch": 0.8,
|
28181 |
+
"grad_norm": 1.9590353965759277,
|
28182 |
+
"learning_rate": 1.0235189485646525e-05,
|
28183 |
+
"loss": 5.4707,
|
28184 |
+
"step": 16100
|
28185 |
+
},
|
28186 |
+
{
|
28187 |
+
"epoch": 0.8,
|
28188 |
+
"grad_norm": 2.097073793411255,
|
28189 |
+
"learning_rate": 1.0225307574484907e-05,
|
28190 |
+
"loss": 5.4783,
|
28191 |
+
"step": 16104
|
28192 |
+
},
|
28193 |
+
{
|
28194 |
+
"epoch": 0.8,
|
28195 |
+
"grad_norm": 1.9956692457199097,
|
28196 |
+
"learning_rate": 1.0215425663323287e-05,
|
28197 |
+
"loss": 5.4288,
|
28198 |
+
"step": 16108
|
28199 |
+
},
|
28200 |
+
{
|
28201 |
+
"epoch": 0.8,
|
28202 |
+
"grad_norm": 1.8568942546844482,
|
28203 |
+
"learning_rate": 1.020554375216167e-05,
|
28204 |
+
"loss": 5.5691,
|
28205 |
+
"step": 16112
|
28206 |
+
},
|
28207 |
+
{
|
28208 |
+
"epoch": 0.8,
|
28209 |
+
"grad_norm": 1.9717293977737427,
|
28210 |
+
"learning_rate": 1.019566184100005e-05,
|
28211 |
+
"loss": 5.4763,
|
28212 |
+
"step": 16116
|
28213 |
+
},
|
28214 |
+
{
|
28215 |
+
"epoch": 0.8,
|
28216 |
+
"grad_norm": 2.085775375366211,
|
28217 |
+
"learning_rate": 1.018577992983843e-05,
|
28218 |
+
"loss": 5.5406,
|
28219 |
+
"step": 16120
|
28220 |
+
},
|
28221 |
+
{
|
28222 |
+
"epoch": 0.8,
|
28223 |
+
"grad_norm": 1.9938271045684814,
|
28224 |
+
"learning_rate": 1.0175898018676814e-05,
|
28225 |
+
"loss": 5.52,
|
28226 |
+
"step": 16124
|
28227 |
+
},
|
28228 |
+
{
|
28229 |
+
"epoch": 0.8,
|
28230 |
+
"grad_norm": 2.1408092975616455,
|
28231 |
+
"learning_rate": 1.0166016107515194e-05,
|
28232 |
+
"loss": 5.4718,
|
28233 |
+
"step": 16128
|
28234 |
+
},
|
28235 |
+
{
|
28236 |
+
"epoch": 0.8,
|
28237 |
+
"grad_norm": 2.084689140319824,
|
28238 |
+
"learning_rate": 1.0156134196353576e-05,
|
28239 |
+
"loss": 5.5263,
|
28240 |
+
"step": 16132
|
28241 |
+
},
|
28242 |
+
{
|
28243 |
+
"epoch": 0.8,
|
28244 |
+
"grad_norm": 1.8502254486083984,
|
28245 |
+
"learning_rate": 1.0146252285191956e-05,
|
28246 |
+
"loss": 5.4056,
|
28247 |
+
"step": 16136
|
28248 |
+
},
|
28249 |
+
{
|
28250 |
+
"epoch": 0.8,
|
28251 |
+
"grad_norm": 1.832261323928833,
|
28252 |
+
"learning_rate": 1.0136370374030339e-05,
|
28253 |
+
"loss": 5.4597,
|
28254 |
+
"step": 16140
|
28255 |
+
},
|
28256 |
+
{
|
28257 |
+
"epoch": 0.8,
|
28258 |
+
"grad_norm": 1.9201068878173828,
|
28259 |
+
"learning_rate": 1.0126488462868719e-05,
|
28260 |
+
"loss": 5.5378,
|
28261 |
+
"step": 16144
|
28262 |
+
},
|
28263 |
+
{
|
28264 |
+
"epoch": 0.8,
|
28265 |
+
"grad_norm": 1.8467752933502197,
|
28266 |
+
"learning_rate": 1.0116606551707101e-05,
|
28267 |
+
"loss": 5.4567,
|
28268 |
+
"step": 16148
|
28269 |
+
},
|
28270 |
+
{
|
28271 |
+
"epoch": 0.8,
|
28272 |
+
"grad_norm": 2.1131222248077393,
|
28273 |
+
"learning_rate": 1.0106724640545481e-05,
|
28274 |
+
"loss": 5.35,
|
28275 |
+
"step": 16152
|
28276 |
+
},
|
28277 |
+
{
|
28278 |
+
"epoch": 0.8,
|
28279 |
+
"grad_norm": 2.0209567546844482,
|
28280 |
+
"learning_rate": 1.0096842729383863e-05,
|
28281 |
+
"loss": 5.5339,
|
28282 |
+
"step": 16156
|
28283 |
+
},
|
28284 |
+
{
|
28285 |
+
"epoch": 0.8,
|
28286 |
+
"grad_norm": 2.1684625148773193,
|
28287 |
+
"learning_rate": 1.0086960818222245e-05,
|
28288 |
+
"loss": 5.4267,
|
28289 |
+
"step": 16160
|
28290 |
+
},
|
28291 |
+
{
|
28292 |
+
"epoch": 0.8,
|
28293 |
+
"grad_norm": 2.1010987758636475,
|
28294 |
+
"learning_rate": 1.0077078907060626e-05,
|
28295 |
+
"loss": 5.4042,
|
28296 |
+
"step": 16164
|
28297 |
+
},
|
28298 |
+
{
|
28299 |
+
"epoch": 0.8,
|
28300 |
+
"grad_norm": 1.9701296091079712,
|
28301 |
+
"learning_rate": 1.0067196995899008e-05,
|
28302 |
+
"loss": 5.6227,
|
28303 |
+
"step": 16168
|
28304 |
+
},
|
28305 |
+
{
|
28306 |
+
"epoch": 0.8,
|
28307 |
+
"grad_norm": 1.8271695375442505,
|
28308 |
+
"learning_rate": 1.0057315084737388e-05,
|
28309 |
+
"loss": 5.4151,
|
28310 |
+
"step": 16172
|
28311 |
+
},
|
28312 |
+
{
|
28313 |
+
"epoch": 0.8,
|
28314 |
+
"grad_norm": 2.2199959754943848,
|
28315 |
+
"learning_rate": 1.004743317357577e-05,
|
28316 |
+
"loss": 5.5352,
|
28317 |
+
"step": 16176
|
28318 |
+
},
|
28319 |
+
{
|
28320 |
+
"epoch": 0.8,
|
28321 |
+
"grad_norm": 2.2069809436798096,
|
28322 |
+
"learning_rate": 1.003755126241415e-05,
|
28323 |
+
"loss": 5.4987,
|
28324 |
+
"step": 16180
|
28325 |
+
},
|
28326 |
+
{
|
28327 |
+
"epoch": 0.8,
|
28328 |
+
"grad_norm": 2.027318239212036,
|
28329 |
+
"learning_rate": 1.0027669351252533e-05,
|
28330 |
+
"loss": 5.5284,
|
28331 |
+
"step": 16184
|
28332 |
+
},
|
28333 |
+
{
|
28334 |
+
"epoch": 0.8,
|
28335 |
+
"grad_norm": 1.9697614908218384,
|
28336 |
+
"learning_rate": 1.0017787440090915e-05,
|
28337 |
+
"loss": 5.5906,
|
28338 |
+
"step": 16188
|
28339 |
+
},
|
28340 |
+
{
|
28341 |
+
"epoch": 0.8,
|
28342 |
+
"grad_norm": 2.252358913421631,
|
28343 |
+
"learning_rate": 1.0007905528929295e-05,
|
28344 |
+
"loss": 5.4911,
|
28345 |
+
"step": 16192
|
28346 |
}
|
28347 |
],
|
28348 |
"logging_steps": 4,
|
|
|
28350 |
"num_input_tokens_seen": 0,
|
28351 |
"num_train_epochs": 1,
|
28352 |
"save_steps": 2024,
|
28353 |
+
"total_flos": 6.820471143346995e+16,
|
28354 |
"train_batch_size": 8,
|
28355 |
"trial_name": null,
|
28356 |
"trial_params": null
|