Training in progress, step 488, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +620 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2900591d81d085d6f1a16d411604f9a3883bb62f7ce4c7d8c00ac8e3b37106be
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:da6cb8e3ceb59865787e3be19146ecfe8f090e00a5e13cdf927c08d575198fae
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e0f855f277f1cfe633417c3619eeb88bc0fcf9e01649d46a606ce490c582477
 size 71878996

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4957b732848f725fe8e97d4887a86c7374d04e7a61a5f57433336b19c5f05d6
 size 71878996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c86ead3108943626e936579b25ad4d0313259ed7a62597799a2f43e75bbd8bf5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:41b8611fadb80f1bdbbbba5e4fc8c3638269de0aed2e0197a291cadd4ef1f1ad
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4890e18ed32a9392f71679e2e5ba429d90989986c0245d64b82048dee600522d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e7fb2e4e3f7c67f2979e17d634f1bb48e2792b76f31dccbd7feec15021f43a0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.813357412815094,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.8205128205128205,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,622 @@
       "eval_samples_per_second": 58.256,
       "eval_steps_per_second": 14.599,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3486,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.09139297271808e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.813357412815094,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 1.001025641025641,
   "eval_steps": 100,
+  "global_step": 488,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 58.256,
       "eval_steps_per_second": 14.599,
       "step": 400
+    },
+    {
+      "epoch": 0.8225641025641026,
+      "grad_norm": 2.1067893505096436,
+      "learning_rate": 8.287193065856935e-06,
+      "loss": 0.857,
+      "step": 401
+    },
+    {
+      "epoch": 0.8246153846153846,
+      "grad_norm": 2.872138261795044,
+      "learning_rate": 8.103069939070945e-06,
+      "loss": 0.9339,
+      "step": 402
+    },
+    {
+      "epoch": 0.8266666666666667,
+      "grad_norm": 2.640960216522217,
+      "learning_rate": 7.920834754120304e-06,
+      "loss": 0.7526,
+      "step": 403
+    },
+    {
+      "epoch": 0.8287179487179487,
+      "grad_norm": 2.455273389816284,
+      "learning_rate": 7.740495722810271e-06,
+      "loss": 0.8467,
+      "step": 404
+    },
+    {
+      "epoch": 0.8307692307692308,
+      "grad_norm": 3.302844285964966,
+      "learning_rate": 7.562060971502383e-06,
+      "loss": 0.7286,
+      "step": 405
+    },
+    {
+      "epoch": 0.8328205128205128,
+      "grad_norm": 2.6610546112060547,
+      "learning_rate": 7.385538540748327e-06,
+      "loss": 0.8152,
+      "step": 406
+    },
+    {
+      "epoch": 0.8348717948717949,
+      "grad_norm": 2.261064291000366,
+      "learning_rate": 7.21093638492763e-06,
+      "loss": 0.6278,
+      "step": 407
+    },
+    {
+      "epoch": 0.8369230769230769,
+      "grad_norm": 3.7655062675476074,
+      "learning_rate": 7.038262371889159e-06,
+      "loss": 0.8202,
+      "step": 408
+    },
+    {
+      "epoch": 0.838974358974359,
+      "grad_norm": 2.8197319507598877,
+      "learning_rate": 6.867524282596655e-06,
+      "loss": 0.8749,
+      "step": 409
+    },
+    {
+      "epoch": 0.841025641025641,
+      "grad_norm": 2.7082679271698,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.7821,
+      "step": 410
+    },
+    {
+      "epoch": 0.8430769230769231,
+      "grad_norm": 2.2520222663879395,
+      "learning_rate": 6.531886562578859e-06,
+      "loss": 0.5002,
+      "step": 411
+    },
+    {
+      "epoch": 0.8451282051282051,
+      "grad_norm": 2.5191712379455566,
+      "learning_rate": 6.367002056219284e-06,
+      "loss": 0.7014,
+      "step": 412
+    },
+    {
+      "epoch": 0.8471794871794872,
+      "grad_norm": 5.02801513671875,
+      "learning_rate": 6.204083721655607e-06,
+      "loss": 0.8951,
+      "step": 413
+    },
+    {
+      "epoch": 0.8492307692307692,
+      "grad_norm": 2.9786057472229004,
+      "learning_rate": 6.043138900245277e-06,
+      "loss": 0.9056,
+      "step": 414
+    },
+    {
+      "epoch": 0.8512820512820513,
+      "grad_norm": 2.791360378265381,
+      "learning_rate": 5.884174844416102e-06,
+      "loss": 0.7512,
+      "step": 415
+    },
+    {
+      "epoch": 0.8533333333333334,
+      "grad_norm": 2.5517685413360596,
+      "learning_rate": 5.727198717339511e-06,
+      "loss": 0.7028,
+      "step": 416
+    },
+    {
+      "epoch": 0.8553846153846154,
+      "grad_norm": 2.8642799854278564,
+      "learning_rate": 5.572217592607687e-06,
+      "loss": 0.8472,
+      "step": 417
+    },
+    {
+      "epoch": 0.8574358974358974,
+      "grad_norm": 2.9429116249084473,
+      "learning_rate": 5.41923845391486e-06,
+      "loss": 0.5711,
+      "step": 418
+    },
+    {
+      "epoch": 0.8594871794871795,
+      "grad_norm": 2.486117124557495,
+      "learning_rate": 5.2682681947426375e-06,
+      "loss": 0.5636,
+      "step": 419
+    },
+    {
+      "epoch": 0.8615384615384616,
+      "grad_norm": 3.1933610439300537,
+      "learning_rate": 5.1193136180493095e-06,
+      "loss": 0.6232,
+      "step": 420
+    },
+    {
+      "epoch": 0.8635897435897436,
+      "grad_norm": 3.0535292625427246,
+      "learning_rate": 4.97238143596333e-06,
+      "loss": 0.7645,
+      "step": 421
+    },
+    {
+      "epoch": 0.8656410256410256,
+      "grad_norm": 2.3928494453430176,
+      "learning_rate": 4.827478269480895e-06,
+      "loss": 0.4642,
+      "step": 422
+    },
+    {
+      "epoch": 0.8676923076923077,
+      "grad_norm": 3.066641330718994,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.8899,
+      "step": 423
+    },
+    {
+      "epoch": 0.8697435897435898,
+      "grad_norm": 2.6845340728759766,
+      "learning_rate": 4.54378500986381e-06,
+      "loss": 0.6358,
+      "step": 424
+    },
+    {
+      "epoch": 0.8717948717948718,
+      "grad_norm": 3.794255495071411,
+      "learning_rate": 4.405007700395497e-06,
+      "loss": 0.7231,
+      "step": 425
+    },
+    {
+      "epoch": 0.8738461538461538,
+      "grad_norm": 3.2792623043060303,
+      "learning_rate": 4.268284973287273e-06,
+      "loss": 0.6754,
+      "step": 426
+    },
+    {
+      "epoch": 0.8758974358974358,
+      "grad_norm": 4.019685745239258,
+      "learning_rate": 4.133622989481145e-06,
+      "loss": 0.8601,
+      "step": 427
+    },
+    {
+      "epoch": 0.877948717948718,
+      "grad_norm": 5.093227386474609,
+      "learning_rate": 4.001027817058789e-06,
+      "loss": 0.9993,
+      "step": 428
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 3.0464017391204834,
+      "learning_rate": 3.870505430968069e-06,
+      "loss": 0.6586,
+      "step": 429
+    },
+    {
+      "epoch": 0.882051282051282,
+      "grad_norm": 3.241689920425415,
+      "learning_rate": 3.7420617127538248e-06,
+      "loss": 0.7764,
+      "step": 430
+    },
+    {
+      "epoch": 0.884102564102564,
+      "grad_norm": 3.573296546936035,
+      "learning_rate": 3.615702450292857e-06,
+      "loss": 0.7413,
+      "step": 431
+    },
+    {
+      "epoch": 0.8861538461538462,
+      "grad_norm": 3.759922742843628,
+      "learning_rate": 3.4914333375330898e-06,
+      "loss": 0.7581,
+      "step": 432
+    },
+    {
+      "epoch": 0.8882051282051282,
+      "grad_norm": 3.483435869216919,
+      "learning_rate": 3.369259974236988e-06,
+      "loss": 0.5884,
+      "step": 433
+    },
+    {
+      "epoch": 0.8902564102564102,
+      "grad_norm": 3.6014602184295654,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 0.7147,
+      "step": 434
+    },
+    {
+      "epoch": 0.8923076923076924,
+      "grad_norm": 3.7031497955322266,
+      "learning_rate": 3.1312224226487442e-06,
+      "loss": 0.7788,
+      "step": 435
+    },
+    {
+      "epoch": 0.8943589743589744,
+      "grad_norm": 3.692850351333618,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.7269,
+      "step": 436
+    },
+    {
+      "epoch": 0.8964102564102564,
+      "grad_norm": 3.5270211696624756,
+      "learning_rate": 2.901632700436757e-06,
+      "loss": 0.7998,
+      "step": 437
+    },
+    {
+      "epoch": 0.8984615384615384,
+      "grad_norm": 3.858431577682495,
+      "learning_rate": 2.790018766980773e-06,
+      "loss": 0.8085,
+      "step": 438
+    },
+    {
+      "epoch": 0.9005128205128206,
+      "grad_norm": 3.0940706729888916,
+      "learning_rate": 2.680532189836732e-06,
+      "loss": 0.6352,
+      "step": 439
+    },
+    {
+      "epoch": 0.9025641025641026,
+      "grad_norm": 4.406203269958496,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 0.8474,
+      "step": 440
+    },
+    {
+      "epoch": 0.9046153846153846,
+      "grad_norm": 3.506132125854492,
+      "learning_rate": 2.467960742952463e-06,
+      "loss": 0.5986,
+      "step": 441
+    },
+    {
+      "epoch": 0.9066666666666666,
+      "grad_norm": 4.049818992614746,
+      "learning_rate": 2.3648854520173237e-06,
+      "loss": 0.821,
+      "step": 442
+    },
+    {
+      "epoch": 0.9087179487179488,
+      "grad_norm": 3.5991644859313965,
+      "learning_rate": 2.2639566745727205e-06,
+      "loss": 0.8375,
+      "step": 443
+    },
+    {
+      "epoch": 0.9107692307692308,
+      "grad_norm": 4.406035900115967,
+      "learning_rate": 2.1651789586287442e-06,
+      "loss": 0.797,
+      "step": 444
+    },
+    {
+      "epoch": 0.9128205128205128,
+      "grad_norm": 5.088395595550537,
+      "learning_rate": 2.068556755265272e-06,
+      "loss": 0.8375,
+      "step": 445
+    },
+    {
+      "epoch": 0.9148717948717948,
+      "grad_norm": 7.348681926727295,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 0.7428,
+      "step": 446
+    },
+    {
+      "epoch": 0.916923076923077,
+      "grad_norm": 5.225501537322998,
+      "learning_rate": 1.8817962047491699e-06,
+      "loss": 0.8446,
+      "step": 447
+    },
+    {
+      "epoch": 0.918974358974359,
+      "grad_norm": 5.816100120544434,
+      "learning_rate": 1.7916662733218847e-06,
+      "loss": 1.1185,
+      "step": 448
+    },
+    {
+      "epoch": 0.921025641025641,
+      "grad_norm": 6.379942417144775,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.1595,
+      "step": 449
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 11.633496284484863,
+      "learning_rate": 1.6179274049310966e-06,
+      "loss": 1.8986,
+      "step": 450
+    },
+    {
+      "epoch": 0.9251282051282051,
+      "grad_norm": 2.403395175933838,
+      "learning_rate": 1.5343262969153783e-06,
+      "loss": 0.8987,
+      "step": 451
+    },
+    {
+      "epoch": 0.9271794871794872,
+      "grad_norm": 2.5767834186553955,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 0.7343,
+      "step": 452
+    },
+    {
+      "epoch": 0.9292307692307692,
+      "grad_norm": 2.610196352005005,
+      "learning_rate": 1.3736795690633354e-06,
+      "loss": 0.808,
+      "step": 453
+    },
+    {
+      "epoch": 0.9312820512820513,
+      "grad_norm": 3.0826358795166016,
+      "learning_rate": 1.2966411882222696e-06,
+      "loss": 0.8493,
+      "step": 454
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 2.933225154876709,
+      "learning_rate": 1.2217974576453073e-06,
+      "loss": 0.7868,
+      "step": 455
+    },
+    {
+      "epoch": 0.9353846153846154,
+      "grad_norm": 2.5828311443328857,
+      "learning_rate": 1.1491517499091498e-06,
+      "loss": 0.6543,
+      "step": 456
+    },
+    {
+      "epoch": 0.9374358974358974,
+      "grad_norm": 2.710909605026245,
+      "learning_rate": 1.0787073385441048e-06,
+      "loss": 0.6686,
+      "step": 457
+    },
+    {
+      "epoch": 0.9394871794871795,
+      "grad_norm": 2.428332567214966,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 0.6812,
+      "step": 458
+    },
+    {
+      "epoch": 0.9415384615384615,
+      "grad_norm": 3.3245909214019775,
+      "learning_rate": 9.44435002936167e-07,
+      "loss": 0.8798,
+      "step": 459
+    },
+    {
+      "epoch": 0.9435897435897436,
+      "grad_norm": 3.67254376411438,
+      "learning_rate": 8.806131292167618e-07,
+      "loss": 0.8829,
+      "step": 460
+    },
+    {
+      "epoch": 0.9456410256410256,
+      "grad_norm": 2.8118197917938232,
+      "learning_rate": 8.190046526428242e-07,
+      "loss": 0.6889,
+      "step": 461
+    },
+    {
+      "epoch": 0.9476923076923077,
+      "grad_norm": 2.5844786167144775,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.8317,
+      "step": 462
+    },
+    {
+      "epoch": 0.9497435897435897,
+      "grad_norm": 3.378085136413574,
+      "learning_rate": 7.024388957680705e-07,
+      "loss": 0.7873,
+      "step": 463
+    },
+    {
+      "epoch": 0.9517948717948718,
+      "grad_norm": 3.895148992538452,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 0.9465,
+      "step": 464
+    },
+    {
+      "epoch": 0.9538461538461539,
+      "grad_norm": 2.4344851970672607,
+      "learning_rate": 5.947587426236078e-07,
+      "loss": 0.5724,
+      "step": 465
+    },
+    {
+      "epoch": 0.9558974358974359,
+      "grad_norm": 3.014648199081421,
+      "learning_rate": 5.442568953384186e-07,
+      "loss": 0.72,
+      "step": 466
+    },
+    {
+      "epoch": 0.9579487179487179,
+      "grad_norm": 3.841538429260254,
+      "learning_rate": 4.959836019417963e-07,
+      "loss": 0.7759,
+      "step": 467
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.5754928588867188,
+      "learning_rate": 4.4994103770457653e-07,
+      "loss": 0.579,
+      "step": 468
+    },
+    {
+      "epoch": 0.9620512820512821,
+      "grad_norm": 3.4755969047546387,
+      "learning_rate": 4.06131277377414e-07,
+      "loss": 0.8353,
+      "step": 469
+    },
+    {
+      "epoch": 0.9641025641025641,
+      "grad_norm": 3.1296160221099854,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 0.8424,
+      "step": 470
+    },
+    {
+      "epoch": 0.9661538461538461,
+      "grad_norm": 2.8293471336364746,
+      "learning_rate": 3.2521796429859084e-07,
+      "loss": 0.6852,
+      "step": 471
+    },
+    {
+      "epoch": 0.9682051282051282,
+      "grad_norm": 3.9338464736938477,
+      "learning_rate": 2.8811805762860576e-07,
+      "loss": 0.7015,
+      "step": 472
+    },
+    {
+      "epoch": 0.9702564102564103,
+      "grad_norm": 3.2302651405334473,
+      "learning_rate": 2.532582468677214e-07,
+      "loss": 0.7174,
+      "step": 473
+    },
+    {
+      "epoch": 0.9723076923076923,
+      "grad_norm": 3.1608636379241943,
+      "learning_rate": 2.206401028540639e-07,
+      "loss": 0.6316,
+      "step": 474
+    },
+    {
+      "epoch": 0.9743589743589743,
+      "grad_norm": 3.069204807281494,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 0.6748,
+      "step": 475
+    },
+    {
+      "epoch": 0.9764102564102564,
+      "grad_norm": 3.582129955291748,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 0.7998,
+      "step": 476
+    },
+    {
+      "epoch": 0.9784615384615385,
+      "grad_norm": 3.0587315559387207,
+      "learning_rate": 1.3624986408924956e-07,
+      "loss": 0.5679,
+      "step": 477
+    },
+    {
+      "epoch": 0.9805128205128205,
+      "grad_norm": 4.51352071762085,
+      "learning_rate": 1.1261207421874309e-07,
+      "loss": 0.7706,
+      "step": 478
+    },
+    {
+      "epoch": 0.9825641025641025,
+      "grad_norm": 3.1249945163726807,
+      "learning_rate": 9.12222888341252e-08,
+      "loss": 0.64,
+      "step": 479
+    },
+    {
+      "epoch": 0.9846153846153847,
+      "grad_norm": 3.75665020942688,
+      "learning_rate": 7.208147179291192e-08,
+      "loss": 0.9884,
+      "step": 480
+    },
+    {
+      "epoch": 0.9866666666666667,
+      "grad_norm": 3.27207088470459,
+      "learning_rate": 5.5190485610534525e-08,
+      "loss": 0.4883,
+      "step": 481
+    },
+    {
+      "epoch": 0.9887179487179487,
+      "grad_norm": 4.946873664855957,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 0.8809,
+      "step": 482
+    },
+    {
+      "epoch": 0.9907692307692307,
+      "grad_norm": 3.621314287185669,
+      "learning_rate": 2.8160948945138434e-08,
+      "loss": 0.8451,
+      "step": 483
+    },
+    {
+      "epoch": 0.9928205128205129,
+      "grad_norm": 3.5091235637664795,
+      "learning_rate": 1.802361645573125e-08,
+      "loss": 0.7231,
+      "step": 484
+    },
+    {
+      "epoch": 0.9948717948717949,
+      "grad_norm": 3.9179797172546387,
+      "learning_rate": 1.0138550757493592e-08,
+      "loss": 0.6897,
+      "step": 485
+    },
+    {
+      "epoch": 0.9969230769230769,
+      "grad_norm": 5.395089149475098,
+      "learning_rate": 4.506107163948503e-09,
+      "loss": 1.0574,
+      "step": 486
+    },
+    {
+      "epoch": 0.9989743589743589,
+      "grad_norm": 6.939630508422852,
+      "learning_rate": 1.1265394818993358e-09,
+      "loss": 1.0886,
+      "step": 487
+    },
+    {
+      "epoch": 1.001025641025641,
+      "grad_norm": 24.829050064086914,
+      "learning_rate": 0.0,
+      "loss": 3.5024,
+      "step": 488
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.771257911640064e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null