Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cee8dc9dbcd004cc9b683fe8715106c01b9001cc162557f96cbb970281bd41c4
 size 247944

 version https://git-lfs.github.com/spec/v1
+oid sha256:01d59ce2a7341922b7a9bf5e3e23574b2900e413612f1c4d474469579bb705c6
 size 247944

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32c3fa48749ffddc3ff5d25f38cf9ded6d3ffe0eb41fe653477ccfbc0b095690
 size 505518

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd6ae76c436a71395bf794c988db3b4d58532b087016948fa2f243b3dc0d5c4f
 size 505518

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1a548d98c4d01d4cda909636049cfa125aae686a9d422d7b0eff894ec6174f2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0105e19c5d536965e33c0c61571ab948ed5b34b45e0897cd3142cc89461cc76f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.10726547241211,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.7797270955165692,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 363.086,
       "eval_steps_per_second": 90.772,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4315907358720.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.10726547241211,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.5594541910331383,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 363.086,
       "eval_steps_per_second": 90.772,
       "step": 50
+    },
+    {
+      "epoch": 0.7953216374269005,
+      "grad_norm": 1.4868738651275635,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 88.9165,
+      "step": 51
+    },
+    {
+      "epoch": 0.8109161793372319,
+      "grad_norm": 1.5754703283309937,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 88.8994,
+      "step": 52
+    },
+    {
+      "epoch": 0.8265107212475633,
+      "grad_norm": 1.521441102027893,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 88.8922,
+      "step": 53
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 1.5741850137710571,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 88.9125,
+      "step": 54
+    },
+    {
+      "epoch": 0.8576998050682261,
+      "grad_norm": 1.6995844841003418,
+      "learning_rate": 2.5e-06,
+      "loss": 88.8839,
+      "step": 55
+    },
+    {
+      "epoch": 0.8732943469785575,
+      "grad_norm": 1.7483824491500854,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 88.8931,
+      "step": 56
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 1.6258654594421387,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 88.9227,
+      "step": 57
+    },
+    {
+      "epoch": 0.9044834307992202,
+      "grad_norm": 1.6917520761489868,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 88.8958,
+      "step": 58
+    },
+    {
+      "epoch": 0.9200779727095516,
+      "grad_norm": 1.9397125244140625,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 88.8976,
+      "step": 59
+    },
+    {
+      "epoch": 0.935672514619883,
+      "grad_norm": 2.0061938762664795,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 88.8821,
+      "step": 60
+    },
+    {
+      "epoch": 0.9512670565302144,
+      "grad_norm": 2.2613093852996826,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 88.9129,
+      "step": 61
+    },
+    {
+      "epoch": 0.9668615984405458,
+      "grad_norm": 2.683389186859131,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 88.7381,
+      "step": 62
+    },
+    {
+      "epoch": 0.9824561403508771,
+      "grad_norm": 3.433419704437256,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 88.8607,
+      "step": 63
+    },
+    {
+      "epoch": 0.9980506822612085,
+      "grad_norm": 6.409455299377441,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 88.5442,
+      "step": 64
+    },
+    {
+      "epoch": 1.01364522417154,
+      "grad_norm": 1.4463061094284058,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 88.9063,
+      "step": 65
+    },
+    {
+      "epoch": 1.0292397660818713,
+      "grad_norm": 1.4709982872009277,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 88.9036,
+      "step": 66
+    },
+    {
+      "epoch": 1.0448343079922027,
+      "grad_norm": 1.5933891534805298,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 88.9131,
+      "step": 67
+    },
+    {
+      "epoch": 1.060428849902534,
+      "grad_norm": 1.418686032295227,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 88.8879,
+      "step": 68
+    },
+    {
+      "epoch": 1.0760233918128654,
+      "grad_norm": 1.5968884229660034,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 88.8766,
+      "step": 69
+    },
+    {
+      "epoch": 1.0916179337231968,
+      "grad_norm": 1.7474418878555298,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 88.9008,
+      "step": 70
+    },
+    {
+      "epoch": 1.1072124756335282,
+      "grad_norm": 1.7670066356658936,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 88.9295,
+      "step": 71
+    },
+    {
+      "epoch": 1.1228070175438596,
+      "grad_norm": 1.739268183708191,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 88.8783,
+      "step": 72
+    },
+    {
+      "epoch": 1.138401559454191,
+      "grad_norm": 1.924734354019165,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 88.9094,
+      "step": 73
+    },
+    {
+      "epoch": 1.1539961013645224,
+      "grad_norm": 1.8002381324768066,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 88.9047,
+      "step": 74
+    },
+    {
+      "epoch": 1.1695906432748537,
+      "grad_norm": 1.9778398275375366,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 88.8734,
+      "step": 75
+    },
+    {
+      "epoch": 1.1851851851851851,
+      "grad_norm": 2.0487163066864014,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 88.8375,
+      "step": 76
+    },
+    {
+      "epoch": 1.2007797270955165,
+      "grad_norm": 2.143615484237671,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 88.8514,
+      "step": 77
+    },
+    {
+      "epoch": 1.2163742690058479,
+      "grad_norm": 2.540370225906372,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 88.7744,
+      "step": 78
+    },
+    {
+      "epoch": 1.2319688109161793,
+      "grad_norm": 3.210033416748047,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 88.7798,
+      "step": 79
+    },
+    {
+      "epoch": 1.2475633528265107,
+      "grad_norm": 5.062490463256836,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 88.712,
+      "step": 80
+    },
+    {
+      "epoch": 1.263157894736842,
+      "grad_norm": 2.2439610958099365,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 88.918,
+      "step": 81
+    },
+    {
+      "epoch": 1.2787524366471734,
+      "grad_norm": 1.4683839082717896,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 88.9326,
+      "step": 82
+    },
+    {
+      "epoch": 1.2943469785575048,
+      "grad_norm": 1.4734752178192139,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 88.9029,
+      "step": 83
+    },
+    {
+      "epoch": 1.3099415204678362,
+      "grad_norm": 1.5223207473754883,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 88.9191,
+      "step": 84
+    },
+    {
+      "epoch": 1.3255360623781676,
+      "grad_norm": 1.5164719820022583,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 88.8715,
+      "step": 85
+    },
+    {
+      "epoch": 1.341130604288499,
+      "grad_norm": 1.6114667654037476,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 88.8649,
+      "step": 86
+    },
+    {
+      "epoch": 1.3567251461988303,
+      "grad_norm": 1.7107863426208496,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 88.8973,
+      "step": 87
+    },
+    {
+      "epoch": 1.3723196881091617,
+      "grad_norm": 1.7419860363006592,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 88.8746,
+      "step": 88
+    },
+    {
+      "epoch": 1.387914230019493,
+      "grad_norm": 1.6827164888381958,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 88.8891,
+      "step": 89
+    },
+    {
+      "epoch": 1.4035087719298245,
+      "grad_norm": 1.8386940956115723,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 88.8503,
+      "step": 90
+    },
+    {
+      "epoch": 1.4191033138401559,
+      "grad_norm": 1.9322350025177002,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 88.8244,
+      "step": 91
+    },
+    {
+      "epoch": 1.4346978557504872,
+      "grad_norm": 1.968429684638977,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 88.882,
+      "step": 92
+    },
+    {
+      "epoch": 1.4502923976608186,
+      "grad_norm": 2.1462831497192383,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 88.8721,
+      "step": 93
+    },
+    {
+      "epoch": 1.46588693957115,
+      "grad_norm": 2.4528849124908447,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 88.8235,
+      "step": 94
+    },
+    {
+      "epoch": 1.4814814814814814,
+      "grad_norm": 2.8609659671783447,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 88.7827,
+      "step": 95
+    },
+    {
+      "epoch": 1.4970760233918128,
+      "grad_norm": 4.6707563400268555,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 88.6615,
+      "step": 96
+    },
+    {
+      "epoch": 1.5126705653021442,
+      "grad_norm": 2.532287120819092,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 88.8792,
+      "step": 97
+    },
+    {
+      "epoch": 1.5282651072124755,
+      "grad_norm": 1.508450984954834,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 88.9139,
+      "step": 98
+    },
+    {
+      "epoch": 1.543859649122807,
+      "grad_norm": 1.4835156202316284,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 88.9422,
+      "step": 99
+    },
+    {
+      "epoch": 1.5594541910331383,
+      "grad_norm": 1.498764991760254,
+      "learning_rate": 0.0,
+      "loss": 88.9188,
+      "step": 100
+    },
+    {
+      "epoch": 1.5594541910331383,
+      "eval_loss": 11.107521057128906,
+      "eval_runtime": 0.3045,
+      "eval_samples_per_second": 354.702,
+      "eval_steps_per_second": 88.675,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8592815554560.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null