Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1ef7d6881806f0b8b27333ed3b6fa94f07c99aef287cde0bf2eb26190a4ac9f
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:816d7a3288d3ca3c798c5de0f911ff182c74e7e74d6fe14fec34118e28d539bf
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d137b203e3a2d292473cc4eeb0c30946cc28785f2abea88ce24d66c41c39ed4
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:796cb294f93321556dc0d95f894bbb4b2cb45162af5d7202ff077b3c0fd7fac1
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ead5793da7e49884c59c189a0521589493532c89c20ab524c83962ae91b99ff3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b3d33baca34951821fbb146f445dae7e7fd1c0109242ab842bc5af7397cf536
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.382917404174805,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.17738359201773837,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 378.314,
       "eval_steps_per_second": 94.778,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6001705746432.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.382617950439453,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.35476718403547675,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 378.314,
       "eval_steps_per_second": 94.778,
       "step": 50
+    },
+    {
+      "epoch": 0.18093126385809313,
+      "grad_norm": 0.1157335638999939,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 10.3827,
+      "step": 51
+    },
+    {
+      "epoch": 0.1844789356984479,
+      "grad_norm": 0.11048056185245514,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 10.3812,
+      "step": 52
+    },
+    {
+      "epoch": 0.18802660753880265,
+      "grad_norm": 0.11069793999195099,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 10.3823,
+      "step": 53
+    },
+    {
+      "epoch": 0.19157427937915744,
+      "grad_norm": 0.11067813634872437,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 10.3808,
+      "step": 54
+    },
+    {
+      "epoch": 0.1951219512195122,
+      "grad_norm": 0.07049928605556488,
+      "learning_rate": 2.5e-06,
+      "loss": 10.3763,
+      "step": 55
+    },
+    {
+      "epoch": 0.19866962305986696,
+      "grad_norm": 0.06557988375425339,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 10.3784,
+      "step": 56
+    },
+    {
+      "epoch": 0.20221729490022172,
+      "grad_norm": 0.0957433357834816,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 10.383,
+      "step": 57
+    },
+    {
+      "epoch": 0.2057649667405765,
+      "grad_norm": 0.0918344035744667,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 10.3896,
+      "step": 58
+    },
+    {
+      "epoch": 0.20931263858093127,
+      "grad_norm": 0.09141485393047333,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 10.3836,
+      "step": 59
+    },
+    {
+      "epoch": 0.21286031042128603,
+      "grad_norm": 0.10251690447330475,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 10.3834,
+      "step": 60
+    },
+    {
+      "epoch": 0.2164079822616408,
+      "grad_norm": 0.0801919624209404,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 10.3855,
+      "step": 61
+    },
+    {
+      "epoch": 0.21995565410199558,
+      "grad_norm": 0.09267133474349976,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 10.381,
+      "step": 62
+    },
+    {
+      "epoch": 0.22350332594235034,
+      "grad_norm": 0.10453727096319199,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 10.3845,
+      "step": 63
+    },
+    {
+      "epoch": 0.2270509977827051,
+      "grad_norm": 0.11158421635627747,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 10.3886,
+      "step": 64
+    },
+    {
+      "epoch": 0.23059866962305986,
+      "grad_norm": 0.1125878095626831,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 10.3909,
+      "step": 65
+    },
+    {
+      "epoch": 0.23414634146341465,
+      "grad_norm": 0.10446196794509888,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 10.3873,
+      "step": 66
+    },
+    {
+      "epoch": 0.2376940133037694,
+      "grad_norm": 0.10698756575584412,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 10.3856,
+      "step": 67
+    },
+    {
+      "epoch": 0.24124168514412417,
+      "grad_norm": 0.09546708315610886,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 10.3852,
+      "step": 68
+    },
+    {
+      "epoch": 0.24478935698447893,
+      "grad_norm": 0.08554647862911224,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 10.3823,
+      "step": 69
+    },
+    {
+      "epoch": 0.24833702882483372,
+      "grad_norm": 0.08267805725336075,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 10.3863,
+      "step": 70
+    },
+    {
+      "epoch": 0.2518847006651885,
+      "grad_norm": 0.09411504864692688,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 10.3777,
+      "step": 71
+    },
+    {
+      "epoch": 0.25543237250554324,
+      "grad_norm": 0.08879115432500839,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 10.3706,
+      "step": 72
+    },
+    {
+      "epoch": 0.258980044345898,
+      "grad_norm": 0.11268702149391174,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 10.371,
+      "step": 73
+    },
+    {
+      "epoch": 0.26252771618625276,
+      "grad_norm": 0.08132211118936539,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 10.3782,
+      "step": 74
+    },
+    {
+      "epoch": 0.2660753880266075,
+      "grad_norm": 0.0807732418179512,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 10.3848,
+      "step": 75
+    },
+    {
+      "epoch": 0.26962305986696233,
+      "grad_norm": 0.09335960447788239,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 10.3878,
+      "step": 76
+    },
+    {
+      "epoch": 0.2731707317073171,
+      "grad_norm": 0.0670682042837143,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 10.3828,
+      "step": 77
+    },
+    {
+      "epoch": 0.27671840354767185,
+      "grad_norm": 0.07090764492750168,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 10.3856,
+      "step": 78
+    },
+    {
+      "epoch": 0.2802660753880266,
+      "grad_norm": 0.09205988049507141,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 10.3809,
+      "step": 79
+    },
+    {
+      "epoch": 0.2838137472283814,
+      "grad_norm": 0.0862230658531189,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 10.3826,
+      "step": 80
+    },
+    {
+      "epoch": 0.28736141906873613,
+      "grad_norm": 0.07999718189239502,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 10.3876,
+      "step": 81
+    },
+    {
+      "epoch": 0.2909090909090909,
+      "grad_norm": 0.08017993718385696,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 10.3882,
+      "step": 82
+    },
+    {
+      "epoch": 0.29445676274944566,
+      "grad_norm": 0.07943841069936752,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 10.3807,
+      "step": 83
+    },
+    {
+      "epoch": 0.29800443458980047,
+      "grad_norm": 0.07772082835435867,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 10.3832,
+      "step": 84
+    },
+    {
+      "epoch": 0.30155210643015523,
+      "grad_norm": 0.08380313217639923,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 10.3864,
+      "step": 85
+    },
+    {
+      "epoch": 0.30509977827051,
+      "grad_norm": 0.08102784305810928,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 10.3864,
+      "step": 86
+    },
+    {
+      "epoch": 0.30864745011086475,
+      "grad_norm": 0.07537892460823059,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 10.3829,
+      "step": 87
+    },
+    {
+      "epoch": 0.3121951219512195,
+      "grad_norm": 0.09276334941387177,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 10.3844,
+      "step": 88
+    },
+    {
+      "epoch": 0.31574279379157427,
+      "grad_norm": 0.08197739720344543,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 10.3815,
+      "step": 89
+    },
+    {
+      "epoch": 0.31929046563192903,
+      "grad_norm": 0.07491984963417053,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 10.3815,
+      "step": 90
+    },
+    {
+      "epoch": 0.3228381374722838,
+      "grad_norm": 0.07267613708972931,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 10.3878,
+      "step": 91
+    },
+    {
+      "epoch": 0.3263858093126386,
+      "grad_norm": 0.07037529349327087,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 10.3775,
+      "step": 92
+    },
+    {
+      "epoch": 0.32993348115299337,
+      "grad_norm": 0.07753679901361465,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 10.382,
+      "step": 93
+    },
+    {
+      "epoch": 0.33348115299334813,
+      "grad_norm": 0.07639942318201065,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 10.3824,
+      "step": 94
+    },
+    {
+      "epoch": 0.3370288248337029,
+      "grad_norm": 0.07099409401416779,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 10.3846,
+      "step": 95
+    },
+    {
+      "epoch": 0.34057649667405765,
+      "grad_norm": 0.083161860704422,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 10.3851,
+      "step": 96
+    },
+    {
+      "epoch": 0.3441241685144124,
+      "grad_norm": 0.08214224874973297,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 10.385,
+      "step": 97
+    },
+    {
+      "epoch": 0.34767184035476717,
+      "grad_norm": 0.0753604993224144,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 10.3789,
+      "step": 98
+    },
+    {
+      "epoch": 0.35121951219512193,
+      "grad_norm": 0.08185871690511703,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 10.3755,
+      "step": 99
+    },
+    {
+      "epoch": 0.35476718403547675,
+      "grad_norm": 0.10917291790246964,
+      "learning_rate": 0.0,
+      "loss": 10.3827,
+      "step": 100
+    },
+    {
+      "epoch": 0.35476718403547675,
+      "eval_loss": 10.382617950439453,
+      "eval_runtime": 1.2497,
+      "eval_samples_per_second": 380.094,
+      "eval_steps_per_second": 95.224,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 11938033434624.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null