Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28054698b32824de1f758a0d0e04768ba46c478f17586423d8ef37ab9b034ad9
 size 50358592

 version https://git-lfs.github.com/spec/v1
+oid sha256:39046f838b61e097138aa0f9868714713f8679d01331dc399251ae1040b04521
 size 50358592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b22f01cc9ddace2407f5f620ea6cb06d553468fc78f0b9296b30b36149f63ae
 size 100824826

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8dac24fefdc9abcdd049f6168385ad481dbeb32f3d41ef5afd178b4732b75ed
 size 100824826

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb813eb13c16e858e01065f19173c74f78cecf3d210be28819791ee718996f35
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c86418cb1f28236426f396b5c38395696104a8c5541d49e2f8efe3762ca62a0a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.1268985271453857,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.05327295731504295,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 18.607,
       "eval_steps_per_second": 2.605,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.831294745431245e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.0014615058898926,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.07103060975339061,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.607,
       "eval_steps_per_second": 2.605,
       "step": 150
+    },
+    {
+      "epoch": 0.053628110363809905,
+      "grad_norm": 91.58993530273438,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 99.1962,
+      "step": 151
+    },
+    {
+      "epoch": 0.05398326341257686,
+      "grad_norm": 91.98210144042969,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 100.0753,
+      "step": 152
+    },
+    {
+      "epoch": 0.05433841646134381,
+      "grad_norm": 81.17693328857422,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 91.3806,
+      "step": 153
+    },
+    {
+      "epoch": 0.054693569510110764,
+      "grad_norm": 90.48416900634766,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 101.1154,
+      "step": 154
+    },
+    {
+      "epoch": 0.055048722558877716,
+      "grad_norm": 86.93634796142578,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 103.3673,
+      "step": 155
+    },
+    {
+      "epoch": 0.05540387560764467,
+      "grad_norm": 78.2991943359375,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 98.0419,
+      "step": 156
+    },
+    {
+      "epoch": 0.05575902865641162,
+      "grad_norm": 69.35735321044922,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 95.0316,
+      "step": 157
+    },
+    {
+      "epoch": 0.056114181705178574,
+      "grad_norm": 64.88497161865234,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 100.8172,
+      "step": 158
+    },
+    {
+      "epoch": 0.05646933475394553,
+      "grad_norm": 71.2990951538086,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 97.2378,
+      "step": 159
+    },
+    {
+      "epoch": 0.05682448780271248,
+      "grad_norm": 63.69855499267578,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 68.8525,
+      "step": 160
+    },
+    {
+      "epoch": 0.05717964085147943,
+      "grad_norm": 67.04605102539062,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 105.3696,
+      "step": 161
+    },
+    {
+      "epoch": 0.057534793900246385,
+      "grad_norm": 58.97417449951172,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 92.3282,
+      "step": 162
+    },
+    {
+      "epoch": 0.05788994694901334,
+      "grad_norm": 67.11451721191406,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 114.9778,
+      "step": 163
+    },
+    {
+      "epoch": 0.05824509999778029,
+      "grad_norm": 60.5018196105957,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 105.1631,
+      "step": 164
+    },
+    {
+      "epoch": 0.05860025304654724,
+      "grad_norm": 62.042137145996094,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 106.7786,
+      "step": 165
+    },
+    {
+      "epoch": 0.0589554060953142,
+      "grad_norm": 62.32429504394531,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 102.9553,
+      "step": 166
+    },
+    {
+      "epoch": 0.059310559144081156,
+      "grad_norm": 64.58570098876953,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 104.744,
+      "step": 167
+    },
+    {
+      "epoch": 0.05966571219284811,
+      "grad_norm": 60.26283645629883,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 105.8727,
+      "step": 168
+    },
+    {
+      "epoch": 0.06002086524161506,
+      "grad_norm": 61.51892852783203,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 94.4389,
+      "step": 169
+    },
+    {
+      "epoch": 0.060376018290382014,
+      "grad_norm": 61.51341247558594,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 103.2939,
+      "step": 170
+    },
+    {
+      "epoch": 0.060731171339148966,
+      "grad_norm": 70.73543548583984,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 99.6923,
+      "step": 171
+    },
+    {
+      "epoch": 0.06108632438791592,
+      "grad_norm": 59.376461029052734,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 82.9301,
+      "step": 172
+    },
+    {
+      "epoch": 0.06144147743668287,
+      "grad_norm": 64.98381805419922,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 104.8384,
+      "step": 173
+    },
+    {
+      "epoch": 0.061796630485449824,
+      "grad_norm": 62.95907974243164,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 112.1615,
+      "step": 174
+    },
+    {
+      "epoch": 0.06215178353421678,
+      "grad_norm": 68.76789093017578,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 96.1017,
+      "step": 175
+    },
+    {
+      "epoch": 0.06215178353421678,
+      "eval_loss": 3.028630018234253,
+      "eval_runtime": 2.6853,
+      "eval_samples_per_second": 18.62,
+      "eval_steps_per_second": 2.607,
+      "step": 175
+    },
+    {
+      "epoch": 0.06250693658298373,
+      "grad_norm": 68.70271301269531,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 109.3617,
+      "step": 176
+    },
+    {
+      "epoch": 0.06286208963175069,
+      "grad_norm": 70.55747985839844,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 108.6827,
+      "step": 177
+    },
+    {
+      "epoch": 0.06321724268051764,
+      "grad_norm": 81.36849212646484,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 105.8935,
+      "step": 178
+    },
+    {
+      "epoch": 0.0635723957292846,
+      "grad_norm": 63.354278564453125,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 103.6249,
+      "step": 179
+    },
+    {
+      "epoch": 0.06392754877805154,
+      "grad_norm": 64.01313018798828,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 103.1221,
+      "step": 180
+    },
+    {
+      "epoch": 0.0642827018268185,
+      "grad_norm": 66.24797821044922,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 102.4062,
+      "step": 181
+    },
+    {
+      "epoch": 0.06463785487558545,
+      "grad_norm": 84.59285736083984,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 110.9548,
+      "step": 182
+    },
+    {
+      "epoch": 0.0649930079243524,
+      "grad_norm": 69.86466217041016,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 96.9359,
+      "step": 183
+    },
+    {
+      "epoch": 0.06534816097311935,
+      "grad_norm": 76.62171173095703,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 110.0732,
+      "step": 184
+    },
+    {
+      "epoch": 0.06570331402188631,
+      "grad_norm": 60.51416015625,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 94.4039,
+      "step": 185
+    },
+    {
+      "epoch": 0.06605846707065326,
+      "grad_norm": 73.63145446777344,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 97.3994,
+      "step": 186
+    },
+    {
+      "epoch": 0.06641362011942022,
+      "grad_norm": 65.53382110595703,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 99.5428,
+      "step": 187
+    },
+    {
+      "epoch": 0.06676877316818716,
+      "grad_norm": 77.5784912109375,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 114.5964,
+      "step": 188
+    },
+    {
+      "epoch": 0.06712392621695412,
+      "grad_norm": 75.30887603759766,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 92.6546,
+      "step": 189
+    },
+    {
+      "epoch": 0.06747907926572107,
+      "grad_norm": 74.56192779541016,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 97.1177,
+      "step": 190
+    },
+    {
+      "epoch": 0.06783423231448803,
+      "grad_norm": 82.70829772949219,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 105.1003,
+      "step": 191
+    },
+    {
+      "epoch": 0.06818938536325497,
+      "grad_norm": 76.390869140625,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 99.6529,
+      "step": 192
+    },
+    {
+      "epoch": 0.06854453841202193,
+      "grad_norm": 72.76335906982422,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 103.1085,
+      "step": 193
+    },
+    {
+      "epoch": 0.06889969146078888,
+      "grad_norm": 80.61557006835938,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 108.7105,
+      "step": 194
+    },
+    {
+      "epoch": 0.06925484450955584,
+      "grad_norm": 71.13042449951172,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 98.3694,
+      "step": 195
+    },
+    {
+      "epoch": 0.06960999755832278,
+      "grad_norm": 73.73394775390625,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 101.3414,
+      "step": 196
+    },
+    {
+      "epoch": 0.06996515060708974,
+      "grad_norm": 83.72611236572266,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 114.0367,
+      "step": 197
+    },
+    {
+      "epoch": 0.07032030365585669,
+      "grad_norm": 90.48454284667969,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 103.9391,
+      "step": 198
+    },
+    {
+      "epoch": 0.07067545670462365,
+      "grad_norm": 92.77326202392578,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 105.4838,
+      "step": 199
+    },
+    {
+      "epoch": 0.07103060975339061,
+      "grad_norm": 111.87572479248047,
+      "learning_rate": 0.0,
+      "loss": 82.6931,
+      "step": 200
+    },
+    {
+      "epoch": 0.07103060975339061,
+      "eval_loss": 3.0014615058898926,
+      "eval_runtime": 2.6792,
+      "eval_samples_per_second": 18.662,
+      "eval_steps_per_second": 2.613,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.108392993908326e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null