Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f719d2cd7ae2f40700ff0d22296f9f9afe8a848bfc9bb14b31a273d24832947
 size 48552

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba1bc6da0b2c055b7a4881cba4c607677da86475b9784d1e5d626ad9120cdd5b
 size 48552

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0154812155c2980a75ca0fe09a7b6716ccdb13475739a5c6fb12ff7641519755
 size 113994

 version https://git-lfs.github.com/spec/v1
+oid sha256:c596d6d4a9982f2b272a741835e3e8e335de667f20c02e23978bb7798e4b2280
 size 113994

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:460989c1ce4e043c1d1af37d05f8314713c0f512d011ad8cdd9994b784859659
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf6b48987ce1e1af83749656216c94984d12dba2a119ddbb0c292fc3f7f25fb0
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57f62b0656489bef0e6ce6744b4a5fa5eecd366c4d849a324eac56c36b3e4739
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5ff859256d3832944fca7c57b7c7a2747b7ae6ff47cd3df95234120cd73b1de
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c809d8822c392da35824e7574422d50cb20961e44292b32f112ffdc99246e51d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:75f9508004f4dd23969692d995988d56b44ddfabffb26a6058ad19c0e5409e07
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:470754ca2aa4570dbbeb50b02cea4d31628c903fed521104f9c33befb77c1720
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9699cee4399748d88508a47d0da926ec82cfeb09652d1df3dcdd199287a1d11
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.936127662658691,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.00647615963733506,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 356.492,
       "eval_steps_per_second": 92.688,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 521247129600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.934417724609375,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.01295231927467012,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 356.492,
       "eval_steps_per_second": 92.688,
       "step": 25
+    },
+    {
+      "epoch": 0.006735206022828463,
+      "grad_norm": 0.06334749609231949,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 11.9275,
+      "step": 26
+    },
+    {
+      "epoch": 0.006994252408321865,
+      "grad_norm": 0.07618872076272964,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 11.9327,
+      "step": 27
+    },
+    {
+      "epoch": 0.007253298793815268,
+      "grad_norm": 0.07427621632814407,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 11.9458,
+      "step": 28
+    },
+    {
+      "epoch": 0.00751234517930867,
+      "grad_norm": 0.06637663394212723,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 11.9414,
+      "step": 29
+    },
+    {
+      "epoch": 0.007771391564802072,
+      "grad_norm": 0.0809975415468216,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 11.9279,
+      "step": 30
+    },
+    {
+      "epoch": 0.008030437950295476,
+      "grad_norm": 0.10032597184181213,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 11.9344,
+      "step": 31
+    },
+    {
+      "epoch": 0.008289484335788877,
+      "grad_norm": 0.08284123986959457,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 11.9403,
+      "step": 32
+    },
+    {
+      "epoch": 0.008548530721282279,
+      "grad_norm": 0.0779564306139946,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 11.9337,
+      "step": 33
+    },
+    {
+      "epoch": 0.008807577106775683,
+      "grad_norm": 0.08791735768318176,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 11.9397,
+      "step": 34
+    },
+    {
+      "epoch": 0.009066623492269084,
+      "grad_norm": 0.07594088464975357,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 11.9452,
+      "step": 35
+    },
+    {
+      "epoch": 0.009325669877762488,
+      "grad_norm": 0.09855371713638306,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 11.9331,
+      "step": 36
+    },
+    {
+      "epoch": 0.00958471626325589,
+      "grad_norm": 0.07780013233423233,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 11.9263,
+      "step": 37
+    },
+    {
+      "epoch": 0.009843762648749291,
+      "grad_norm": 0.07891754806041718,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 11.9286,
+      "step": 38
+    },
+    {
+      "epoch": 0.010102809034242695,
+      "grad_norm": 0.06667237728834152,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 11.9329,
+      "step": 39
+    },
+    {
+      "epoch": 0.010361855419736096,
+      "grad_norm": 0.08543813973665237,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 11.9436,
+      "step": 40
+    },
+    {
+      "epoch": 0.0106209018052295,
+      "grad_norm": 0.08076290041208267,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 11.9388,
+      "step": 41
+    },
+    {
+      "epoch": 0.010879948190722901,
+      "grad_norm": 0.0808703601360321,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 11.9286,
+      "step": 42
+    },
+    {
+      "epoch": 0.011138994576216303,
+      "grad_norm": 0.11797121912240982,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 11.9377,
+      "step": 43
+    },
+    {
+      "epoch": 0.011398040961709707,
+      "grad_norm": 0.09217866510152817,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 11.9271,
+      "step": 44
+    },
+    {
+      "epoch": 0.011657087347203108,
+      "grad_norm": 0.09391262382268906,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 11.9372,
+      "step": 45
+    },
+    {
+      "epoch": 0.011916133732696512,
+      "grad_norm": 0.07499683648347855,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 11.9463,
+      "step": 46
+    },
+    {
+      "epoch": 0.012175180118189913,
+      "grad_norm": 0.11021185666322708,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 11.9321,
+      "step": 47
+    },
+    {
+      "epoch": 0.012434226503683315,
+      "grad_norm": 0.09896716475486755,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 11.9329,
+      "step": 48
+    },
+    {
+      "epoch": 0.012693272889176719,
+      "grad_norm": 0.13691703975200653,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 11.9287,
+      "step": 49
+    },
+    {
+      "epoch": 0.01295231927467012,
+      "grad_norm": 0.0724516212940216,
+      "learning_rate": 1e-05,
+      "loss": 11.9391,
+      "step": 50
+    },
+    {
+      "epoch": 0.01295231927467012,
+      "eval_loss": 11.934417724609375,
+      "eval_runtime": 0.1395,
+      "eval_samples_per_second": 358.339,
+      "eval_steps_per_second": 93.168,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1042494259200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null