Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18aa5b9082738c47aa82582e18b6a0d3db14e1ca62110782d67d35f7651d6bb4
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:e093ea23c15c7271ff7e0084a5fb4a03ec601173c0450eedbccbcaed904eb42f
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08a544f5a11ebb57eab41dadfea8761d7b143f0abe46d540bc6aebe615be1d3e
 size 147359892

 version https://git-lfs.github.com/spec/v1
+oid sha256:963ad2eb7f68f2d32e37acb84600628d848a1825d0bbee942d8d284c8d275bac
 size 147359892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3c8687d9cf0e416074c6cb6a0f12d990c5bc1a3a2c1d07d702c17cf6cd21e47
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdfe133c1a6f6019cbe7ed1ccb35e2964bda7cab261ca5f4ab32375c8cefd0d7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.44630166888237,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.22530980097634248,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 33.674,
       "eval_steps_per_second": 8.433,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.980105357872333e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.427767276763916,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.3004130679684566,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.674,
       "eval_steps_per_second": 8.433,
       "step": 150
+    },
+    {
+      "epoch": 0.22681186631618475,
+      "grad_norm": 0.6761639714241028,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 0.556,
+      "step": 151
+    },
+    {
+      "epoch": 0.22831393165602704,
+      "grad_norm": 0.5771117806434631,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 0.5283,
+      "step": 152
+    },
+    {
+      "epoch": 0.22981599699586933,
+      "grad_norm": 0.4977632462978363,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 0.5066,
+      "step": 153
+    },
+    {
+      "epoch": 0.23131806233571162,
+      "grad_norm": 0.4167040288448334,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 0.4982,
+      "step": 154
+    },
+    {
+      "epoch": 0.23282012767555388,
+      "grad_norm": 0.37243950366973877,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 0.4843,
+      "step": 155
+    },
+    {
+      "epoch": 0.23432219301539617,
+      "grad_norm": 0.3721346855163574,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 0.4715,
+      "step": 156
+    },
+    {
+      "epoch": 0.23582425835523846,
+      "grad_norm": 0.3776877522468567,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 0.4811,
+      "step": 157
+    },
+    {
+      "epoch": 0.23732632369508075,
+      "grad_norm": 0.28867435455322266,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 0.4688,
+      "step": 158
+    },
+    {
+      "epoch": 0.238828389034923,
+      "grad_norm": 0.24486391246318817,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 0.4596,
+      "step": 159
+    },
+    {
+      "epoch": 0.2403304543747653,
+      "grad_norm": 0.2534550130367279,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 0.4498,
+      "step": 160
+    },
+    {
+      "epoch": 0.2418325197146076,
+      "grad_norm": 0.2360064685344696,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 0.4588,
+      "step": 161
+    },
+    {
+      "epoch": 0.24333458505444988,
+      "grad_norm": 0.2181614190340042,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.448,
+      "step": 162
+    },
+    {
+      "epoch": 0.24483665039429214,
+      "grad_norm": 0.23044590651988983,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 0.4488,
+      "step": 163
+    },
+    {
+      "epoch": 0.24633871573413443,
+      "grad_norm": 0.23173829913139343,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 0.4436,
+      "step": 164
+    },
+    {
+      "epoch": 0.24784078107397672,
+      "grad_norm": 0.2390686273574829,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 0.4539,
+      "step": 165
+    },
+    {
+      "epoch": 0.249342846413819,
+      "grad_norm": 0.24815574288368225,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 0.4399,
+      "step": 166
+    },
+    {
+      "epoch": 0.2508449117536613,
+      "grad_norm": 0.22618462145328522,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 0.4393,
+      "step": 167
+    },
+    {
+      "epoch": 0.25234697709350357,
+      "grad_norm": 0.27694129943847656,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 0.4605,
+      "step": 168
+    },
+    {
+      "epoch": 0.25384904243334583,
+      "grad_norm": 0.2610919773578644,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 0.4299,
+      "step": 169
+    },
+    {
+      "epoch": 0.25535110777318815,
+      "grad_norm": 0.2240491807460785,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 0.4194,
+      "step": 170
+    },
+    {
+      "epoch": 0.2568531731130304,
+      "grad_norm": 0.22954358160495758,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.4403,
+      "step": 171
+    },
+    {
+      "epoch": 0.2583552384528727,
+      "grad_norm": 0.2217157781124115,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 0.4206,
+      "step": 172
+    },
+    {
+      "epoch": 0.259857303792715,
+      "grad_norm": 0.232159823179245,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 0.4164,
+      "step": 173
+    },
+    {
+      "epoch": 0.26135936913255725,
+      "grad_norm": 0.19874323904514313,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.4237,
+      "step": 174
+    },
+    {
+      "epoch": 0.26286143447239957,
+      "grad_norm": 0.22404231131076813,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 0.4112,
+      "step": 175
+    },
+    {
+      "epoch": 0.26436349981224183,
+      "grad_norm": 0.23319314420223236,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 0.4042,
+      "step": 176
+    },
+    {
+      "epoch": 0.2658655651520841,
+      "grad_norm": 0.21007098257541656,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.4308,
+      "step": 177
+    },
+    {
+      "epoch": 0.2673676304919264,
+      "grad_norm": 0.22640475630760193,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 0.4037,
+      "step": 178
+    },
+    {
+      "epoch": 0.2688696958317687,
+      "grad_norm": 0.22629423439502716,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 0.4074,
+      "step": 179
+    },
+    {
+      "epoch": 0.270371761171611,
+      "grad_norm": 0.2365429401397705,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.4236,
+      "step": 180
+    },
+    {
+      "epoch": 0.27187382651145325,
+      "grad_norm": 0.2148093730211258,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.4224,
+      "step": 181
+    },
+    {
+      "epoch": 0.2733758918512955,
+      "grad_norm": 0.24300482869148254,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 0.412,
+      "step": 182
+    },
+    {
+      "epoch": 0.27487795719113783,
+      "grad_norm": 0.24461029469966888,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.4206,
+      "step": 183
+    },
+    {
+      "epoch": 0.2763800225309801,
+      "grad_norm": 0.23772835731506348,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 0.3985,
+      "step": 184
+    },
+    {
+      "epoch": 0.27788208787082236,
+      "grad_norm": 0.22865405678749084,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 0.3981,
+      "step": 185
+    },
+    {
+      "epoch": 0.2793841532106647,
+      "grad_norm": 0.23448914289474487,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.4014,
+      "step": 186
+    },
+    {
+      "epoch": 0.28088621855050694,
+      "grad_norm": 0.2333112210035324,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 0.3981,
+      "step": 187
+    },
+    {
+      "epoch": 0.28238828389034926,
+      "grad_norm": 0.23065517842769623,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 0.4051,
+      "step": 188
+    },
+    {
+      "epoch": 0.2838903492301915,
+      "grad_norm": 0.2554570138454437,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 0.4156,
+      "step": 189
+    },
+    {
+      "epoch": 0.2853924145700338,
+      "grad_norm": 0.24712638556957245,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 0.4041,
+      "step": 190
+    },
+    {
+      "epoch": 0.2868944799098761,
+      "grad_norm": 0.25269147753715515,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 0.4001,
+      "step": 191
+    },
+    {
+      "epoch": 0.28839654524971836,
+      "grad_norm": 0.27400457859039307,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 0.4021,
+      "step": 192
+    },
+    {
+      "epoch": 0.2898986105895606,
+      "grad_norm": 0.28076010942459106,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 0.3883,
+      "step": 193
+    },
+    {
+      "epoch": 0.29140067592940294,
+      "grad_norm": 0.2610340714454651,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 0.407,
+      "step": 194
+    },
+    {
+      "epoch": 0.2929027412692452,
+      "grad_norm": 0.28357601165771484,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 0.3873,
+      "step": 195
+    },
+    {
+      "epoch": 0.2944048066090875,
+      "grad_norm": 0.3212686777114868,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 0.4189,
+      "step": 196
+    },
+    {
+      "epoch": 0.2959068719489298,
+      "grad_norm": 0.34003758430480957,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 0.426,
+      "step": 197
+    },
+    {
+      "epoch": 0.29740893728877205,
+      "grad_norm": 0.3577791154384613,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 0.4195,
+      "step": 198
+    },
+    {
+      "epoch": 0.29891100262861436,
+      "grad_norm": 0.3521157503128052,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 0.4127,
+      "step": 199
+    },
+    {
+      "epoch": 0.3004130679684566,
+      "grad_norm": 0.45909836888313293,
+      "learning_rate": 0.0,
+      "loss": 0.4286,
+      "step": 200
+    },
+    {
+      "epoch": 0.3004130679684566,
+      "eval_loss": 0.427767276763916,
+      "eval_runtime": 33.3252,
+      "eval_samples_per_second": 33.668,
+      "eval_steps_per_second": 8.432,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.634625410321613e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null