Training in progress, step 342, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +306 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76ddad9fbeff874dda8ca1839e7bb1197b79f011a28582bf37a51641b4be011a
 size 1521616

 version https://git-lfs.github.com/spec/v1
+oid sha256:368e00a32e5e67904cdc68fad7b0d3ebbf6aa48a43908471978f2a2f044c71d6
 size 1521616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae8d18d29e4ff91cb5bdd564154a39689a95358bc39817fda9038cf84430b7ea
 size 3108666

 version https://git-lfs.github.com/spec/v1
+oid sha256:13f99e35e1cd72dd67928139d82e8f270376e333cc058eeeca058766e79fd6da
 size 3108666

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38556c732afc59cb1a699286598dbc47ed9ce8c5e433b5847ecb635b371d31ad
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b725e55dc7e3cfcd2470e0b973a56ed83568003bb225faa55cd6d2bf9770b30b
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fccdf7b83d8154c1ea0818dc3f4385f34041c8c4464b2b411fb0c6aa67d41b4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3322560e39a902081358f17ff4055da3283ebe3e2fa1e6b657dab85c86236f7
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c77677d38c53addb49d40cbf8a8c4cd2947a26f613e7431ff240294c5c6aea18
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea66678ca299dd2b3d155e3c39e632e66dc28be0c18e9664e7c6b137c3f76689
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f16ecca0e5c37740bc12b846283c8173114d94f01e5134b62290fc0f5663c8a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f314b3eeeb0bd949fe539d551bea83f6ef4f1ddb092c681433bf0279c6b7fe91
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96480ebcfed4000c6ecf039795063cb715ce6511f645a848a58f5db20e8ed45b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0aa55a29e9da8bc1ccf97b1a76ccd03130ba0539481d5a33300527a24e5c40c2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 6.472244739532471,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.07734388217948615,
   "eval_steps": 25,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2211,6 +2211,308 @@
       "eval_samples_per_second": 249.929,
       "eval_steps_per_second": 64.982,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2234,12 +2536,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2787247007989760.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 6.472244739532471,
   "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.08817202568461421,
   "eval_steps": 25,
+  "global_step": 342,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 249.929,
       "eval_steps_per_second": 64.982,
       "step": 300
+    },
+    {
+      "epoch": 0.07760169512008443,
+      "grad_norm": 1.2554875612258911,
+      "learning_rate": 1.3404895720259053e-05,
+      "loss": 7.3701,
+      "step": 301
+    },
+    {
+      "epoch": 0.07785950806068272,
+      "grad_norm": 0.8463560342788696,
+      "learning_rate": 1.3242829026768597e-05,
+      "loss": 6.4277,
+      "step": 302
+    },
+    {
+      "epoch": 0.078117321001281,
+      "grad_norm": 0.644782304763794,
+      "learning_rate": 1.3084569796502682e-05,
+      "loss": 6.356,
+      "step": 303
+    },
+    {
+      "epoch": 0.0783751339418793,
+      "grad_norm": 0.7398347854614258,
+      "learning_rate": 1.293013245970609e-05,
+      "loss": 6.2324,
+      "step": 304
+    },
+    {
+      "epoch": 0.07863294688247759,
+      "grad_norm": 0.5900807976722717,
+      "learning_rate": 1.2779531098139333e-05,
+      "loss": 6.1167,
+      "step": 305
+    },
+    {
+      "epoch": 0.07889075982307588,
+      "grad_norm": 0.6693830490112305,
+      "learning_rate": 1.263277944379459e-05,
+      "loss": 6.2377,
+      "step": 306
+    },
+    {
+      "epoch": 0.07914857276367415,
+      "grad_norm": 0.6465635895729065,
+      "learning_rate": 1.248989087764366e-05,
+      "loss": 6.1853,
+      "step": 307
+    },
+    {
+      "epoch": 0.07940638570427244,
+      "grad_norm": 0.6336793303489685,
+      "learning_rate": 1.2350878428417839e-05,
+      "loss": 5.9979,
+      "step": 308
+    },
+    {
+      "epoch": 0.07966419864487073,
+      "grad_norm": 0.6897872090339661,
+      "learning_rate": 1.2215754771419997e-05,
+      "loss": 6.0263,
+      "step": 309
+    },
+    {
+      "epoch": 0.07992201158546902,
+      "grad_norm": 0.7997320294380188,
+      "learning_rate": 1.2084532227368761e-05,
+      "loss": 6.3959,
+      "step": 310
+    },
+    {
+      "epoch": 0.08017982452606731,
+      "grad_norm": 1.0460010766983032,
+      "learning_rate": 1.1957222761275149e-05,
+      "loss": 6.7064,
+      "step": 311
+    },
+    {
+      "epoch": 0.08043763746666559,
+      "grad_norm": 1.4831907749176025,
+      "learning_rate": 1.183383798135157e-05,
+      "loss": 6.8091,
+      "step": 312
+    },
+    {
+      "epoch": 0.08069545040726388,
+      "grad_norm": 2.0493836402893066,
+      "learning_rate": 1.171438913795338e-05,
+      "loss": 7.5173,
+      "step": 313
+    },
+    {
+      "epoch": 0.08095326334786217,
+      "grad_norm": 1.0544261932373047,
+      "learning_rate": 1.1598887122553061e-05,
+      "loss": 6.7366,
+      "step": 314
+    },
+    {
+      "epoch": 0.08121107628846046,
+      "grad_norm": 0.7413960099220276,
+      "learning_rate": 1.1487342466747112e-05,
+      "loss": 6.2798,
+      "step": 315
+    },
+    {
+      "epoch": 0.08146888922905875,
+      "grad_norm": 0.6017177700996399,
+      "learning_rate": 1.137976534129579e-05,
+      "loss": 6.3163,
+      "step": 316
+    },
+    {
+      "epoch": 0.08172670216965702,
+      "grad_norm": 0.6235430240631104,
+      "learning_rate": 1.127616555519573e-05,
+      "loss": 6.2398,
+      "step": 317
+    },
+    {
+      "epoch": 0.08198451511025531,
+      "grad_norm": 0.6497268676757812,
+      "learning_rate": 1.1176552554785504e-05,
+      "loss": 6.2297,
+      "step": 318
+    },
+    {
+      "epoch": 0.0822423280508536,
+      "grad_norm": 0.6185296177864075,
+      "learning_rate": 1.1080935422884358e-05,
+      "loss": 6.0099,
+      "step": 319
+    },
+    {
+      "epoch": 0.08250014099145189,
+      "grad_norm": 0.7389028668403625,
+      "learning_rate": 1.0989322877963985e-05,
+      "loss": 5.8818,
+      "step": 320
+    },
+    {
+      "epoch": 0.08275795393205018,
+      "grad_norm": 0.7540916204452515,
+      "learning_rate": 1.0901723273353597e-05,
+      "loss": 6.1902,
+      "step": 321
+    },
+    {
+      "epoch": 0.08301576687264846,
+      "grad_norm": 0.7044777274131775,
+      "learning_rate": 1.0818144596478224e-05,
+      "loss": 6.1747,
+      "step": 322
+    },
+    {
+      "epoch": 0.08327357981324675,
+      "grad_norm": 0.9389222264289856,
+      "learning_rate": 1.0738594468130452e-05,
+      "loss": 6.479,
+      "step": 323
+    },
+    {
+      "epoch": 0.08353139275384504,
+      "grad_norm": 1.1994636058807373,
+      "learning_rate": 1.0663080141775504e-05,
+      "loss": 6.6693,
+      "step": 324
+    },
+    {
+      "epoch": 0.08378920569444333,
+      "grad_norm": 3.2903223037719727,
+      "learning_rate": 1.0591608502889928e-05,
+      "loss": 6.1876,
+      "step": 325
+    },
+    {
+      "epoch": 0.08378920569444333,
+      "eval_loss": 6.461916446685791,
+      "eval_runtime": 0.1956,
+      "eval_samples_per_second": 255.68,
+      "eval_steps_per_second": 66.477,
+      "step": 325
+    },
+    {
+      "epoch": 0.08404701863504162,
+      "grad_norm": 1.2868388891220093,
+      "learning_rate": 1.0524186068333692e-05,
+      "loss": 7.4553,
+      "step": 326
+    },
+    {
+      "epoch": 0.0843048315756399,
+      "grad_norm": 0.8954585194587708,
+      "learning_rate": 1.046081898575604e-05,
+      "loss": 6.399,
+      "step": 327
+    },
+    {
+      "epoch": 0.08456264451623818,
+      "grad_norm": 0.612129807472229,
+      "learning_rate": 1.04015130330349e-05,
+      "loss": 6.4022,
+      "step": 328
+    },
+    {
+      "epoch": 0.08482045745683647,
+      "grad_norm": 0.6739881634712219,
+      "learning_rate": 1.0346273617750057e-05,
+      "loss": 6.0706,
+      "step": 329
+    },
+    {
+      "epoch": 0.08507827039743476,
+      "grad_norm": 0.7707552909851074,
+      "learning_rate": 1.0295105776690108e-05,
+      "loss": 6.0031,
+      "step": 330
+    },
+    {
+      "epoch": 0.08533608333803305,
+      "grad_norm": 0.7329960465431213,
+      "learning_rate": 1.0248014175393177e-05,
+      "loss": 6.1073,
+      "step": 331
+    },
+    {
+      "epoch": 0.08559389627863133,
+      "grad_norm": 0.7465705275535583,
+      "learning_rate": 1.0205003107721506e-05,
+      "loss": 6.0385,
+      "step": 332
+    },
+    {
+      "epoch": 0.08585170921922962,
+      "grad_norm": 0.6375886797904968,
+      "learning_rate": 1.0166076495469963e-05,
+      "loss": 6.0305,
+      "step": 333
+    },
+    {
+      "epoch": 0.08610952215982791,
+      "grad_norm": 0.7056543231010437,
+      "learning_rate": 1.0131237888008412e-05,
+      "loss": 6.1335,
+      "step": 334
+    },
+    {
+      "epoch": 0.0863673351004262,
+      "grad_norm": 0.8683858513832092,
+      "learning_rate": 1.0100490461958109e-05,
+      "loss": 6.3272,
+      "step": 335
+    },
+    {
+      "epoch": 0.08662514804102449,
+      "grad_norm": 0.9683743119239807,
+      "learning_rate": 1.0073837020902033e-05,
+      "loss": 6.5318,
+      "step": 336
+    },
+    {
+      "epoch": 0.08688296098162278,
+      "grad_norm": 1.4957187175750732,
+      "learning_rate": 1.0051279995129273e-05,
+      "loss": 6.8366,
+      "step": 337
+    },
+    {
+      "epoch": 0.08714077392222105,
+      "grad_norm": 1.5700278282165527,
+      "learning_rate": 1.0032821441413394e-05,
+      "loss": 7.7112,
+      "step": 338
+    },
+    {
+      "epoch": 0.08739858686281934,
+      "grad_norm": 1.0813219547271729,
+      "learning_rate": 1.0018463042824957e-05,
+      "loss": 6.923,
+      "step": 339
+    },
+    {
+      "epoch": 0.08765639980341763,
+      "grad_norm": 0.6960993409156799,
+      "learning_rate": 1.0008206108577992e-05,
+      "loss": 6.4607,
+      "step": 340
+    },
+    {
+      "epoch": 0.08791421274401592,
+      "grad_norm": 0.6256797313690186,
+      "learning_rate": 1.0002051573910671e-05,
+      "loss": 6.3162,
+      "step": 341
+    },
+    {
+      "epoch": 0.08817202568461421,
+      "grad_norm": 0.5889110565185547,
+      "learning_rate": 1e-05,
+      "loss": 6.0812,
+      "step": 342
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3177646474133504.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null