Training in progress, step 46, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9150886ce582d66496b414083ffc281de8b6d0d1b94fc24395d75005d48df296
 size 239135488

 version https://git-lfs.github.com/spec/v1
+oid sha256:39d73839fd17fbb3152712d62056dfad250b811cffec502c59277fbdad0c6426
 size 239135488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:232adc57452effe2e8cbee1490481e1383a36444190d01599572f92e6f2a4490
 size 478528978

 version https://git-lfs.github.com/spec/v1
+oid sha256:3558693fed5e95a1cb01dfa90c09e073c04ff7781c90b33a0a1668c4ef104192
 size 478528978

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da124dfdd7288f41ef0267372fe0db647ed3b93cda51aef4c0cdf02543a1b56f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad346de393b69930cd98c2d9dddc522df1b6f98a1fade4ed7cea875d94c705ed
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d48cdbb9d105adc3bb0ea6c176e5d727bd3d4503626c1854c5d652b11520e15
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac1fc96886b8b36c0c4e12b54b517ec1b3b53a13740b77a2b657ee0fd87838e4
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:641ab8398bda456359862a5efb58165dc81801155740fd2d4e5627c64144a7ba
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:08396b0649912549a5f42912420531e94ad2f2c261e62383dc76a8fae8784d6d
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feb7d5312d25517ff2ded966d5b732b14796b9a2a0bc92939da1b2b91e25f992
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ec40c3e114ae92aa2ccf7d0cc201894f246f53e9c45c1c047cd9733d4254bf0
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43c559cbde9153c17d7a372e6ef412013941e6297ae86a5518b43caf90efbb1f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7f1ee7e5310274a73405630534a716def8b331763fa23521c74926adf94696e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 9.99334716796875,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.6597510373443982,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,153 @@
       "eval_samples_per_second": 46.093,
       "eval_steps_per_second": 5.875,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +368,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.487279663087616e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 9.99334716796875,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.0539419087136928,
   "eval_steps": 25,
+  "global_step": 46,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 46.093,
       "eval_steps_per_second": 5.875,
       "step": 25
+    },
+    {
+      "epoch": 1.7261410788381744,
+      "grad_norm": 9312404.0,
+      "learning_rate": 4.288425808633575e-05,
+      "loss": 9.8302,
+      "step": 26
+    },
+    {
+      "epoch": 1.79253112033195,
+      "grad_norm": 1229930496000.0,
+      "learning_rate": 3.937173552235117e-05,
+      "loss": 10.3458,
+      "step": 27
+    },
+    {
+      "epoch": 1.8589211618257262,
+      "grad_norm": 39236595712.0,
+      "learning_rate": 3.591337215792852e-05,
+      "loss": 10.2447,
+      "step": 28
+    },
+    {
+      "epoch": 1.9253112033195021,
+      "grad_norm": 7421381120.0,
+      "learning_rate": 3.2526791020045086e-05,
+      "loss": 10.1415,
+      "step": 29
+    },
+    {
+      "epoch": 1.991701244813278,
+      "grad_norm": 57041700.0,
+      "learning_rate": 2.9229249349905684e-05,
+      "loss": 9.8004,
+      "step": 30
+    },
+    {
+      "epoch": 2.0580912863070537,
+      "grad_norm": 3407954575360.0,
+      "learning_rate": 2.603755066399718e-05,
+      "loss": 20.4544,
+      "step": 31
+    },
+    {
+      "epoch": 2.12448132780083,
+      "grad_norm": 2498396928.0,
+      "learning_rate": 2.296795912722014e-05,
+      "loss": 10.1161,
+      "step": 32
+    },
+    {
+      "epoch": 2.190871369294606,
+      "grad_norm": 738044672.0,
+      "learning_rate": 2.0036116674432654e-05,
+      "loss": 10.0431,
+      "step": 33
+    },
+    {
+      "epoch": 2.2572614107883817,
+      "grad_norm": 8632801049968640.0,
+      "learning_rate": 1.725696330273575e-05,
+      "loss": 9.9673,
+      "step": 34
+    },
+    {
+      "epoch": 2.323651452282158,
+      "grad_norm": 222836080640.0,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 10.3291,
+      "step": 35
+    },
+    {
+      "epoch": 2.3900414937759336,
+      "grad_norm": 264477792.0,
+      "learning_rate": 1.2212521282287092e-05,
+      "loss": 10.2066,
+      "step": 36
+    },
+    {
+      "epoch": 2.4564315352697097,
+      "grad_norm": 187083776.0,
+      "learning_rate": 9.972937953781986e-06,
+      "loss": 10.1565,
+      "step": 37
+    },
+    {
+      "epoch": 2.5228215767634854,
+      "grad_norm": 22868152811520.0,
+      "learning_rate": 7.937323358440935e-06,
+      "loss": 10.3219,
+      "step": 38
+    },
+    {
+      "epoch": 2.5892116182572615,
+      "grad_norm": 43378626560.0,
+      "learning_rate": 6.116050521637218e-06,
+      "loss": 10.4927,
+      "step": 39
+    },
+    {
+      "epoch": 2.6556016597510372,
+      "grad_norm": 13558889472.0,
+      "learning_rate": 4.5184002322740785e-06,
+      "loss": 9.9935,
+      "step": 40
+    },
+    {
+      "epoch": 2.7219917012448134,
+      "grad_norm": 194045920.0,
+      "learning_rate": 3.1525137500119207e-06,
+      "loss": 9.6793,
+      "step": 41
+    },
+    {
+      "epoch": 2.788381742738589,
+      "grad_norm": 1.1480066189112115e+17,
+      "learning_rate": 2.0253513192751373e-06,
+      "loss": 10.429,
+      "step": 42
+    },
+    {
+      "epoch": 2.854771784232365,
+      "grad_norm": 744684257280.0,
+      "learning_rate": 1.1426567014420297e-06,
+      "loss": 10.2057,
+      "step": 43
+    },
+    {
+      "epoch": 2.921161825726141,
+      "grad_norm": 233727787008.0,
+      "learning_rate": 5.089279059533658e-07,
+      "loss": 10.2095,
+      "step": 44
+    },
+    {
+      "epoch": 2.987551867219917,
+      "grad_norm": 51029136.0,
+      "learning_rate": 1.2739426948732424e-07,
+      "loss": 9.6324,
+      "step": 45
+    },
+    {
+      "epoch": 3.0539419087136928,
+      "grad_norm": 4598173196615680.0,
+      "learning_rate": 0.0,
+      "loss": 20.0993,
+      "step": 46
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.7365945800812134e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null