Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1bcd237ec97f27cfa9a8d03f2a0f0eead799216ebda4990a0a797c550b679ed
 size 432223744

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8d50e07fef5cd05e9fb5d3a4caaa55c28d51e6f6b580c3a294ed154f7466140
 size 432223744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9efbc021556258d06c51d06ec48fc1f21cbf915a409a9bc355e60625fdd5f238
 size 864785974

 version https://git-lfs.github.com/spec/v1
+oid sha256:432c3d9be7764aa256113a9b638ff1478eff07f0c7bf01dc330e43991392f0f0
 size 864785974

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:736ca8638e67612c27b462d7d0144f763a97f5dc7583efb653184d46d254b3a2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4deb9723d0ae9ab1702b5acae73e2feb343dbd78a72ce9126ee8ba5efc34bef
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d28400c5dd088e87ee26ebfbb25cd9f81aca15a870ddee0cf89e4bbf6ac787e4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff17fd676ada7fe57f49151a5d3cad7799bab978baf67e871fc112b417053757
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:162db2f53516f98ad36b63c56f89ae4130220b6c3a8db8da39458d04744aaff3
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b17cace2ba47e5cd8666fd6dda94302ede6d17c3f19ab193b06d5116b0e09c3
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35421cced062138484cb21d06c92c44ab67c23449b3186ea55cd6ad8764f0a8c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ff9c239e00f5c677c7701514db3f4540388ad28b86e5a94473469917dc11460
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.07115349173545837,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.04735409020954185,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 14.729,
       "eval_steps_per_second": 3.83,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.315690861756416e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.07115349173545837,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.0947081804190837,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.729,
       "eval_steps_per_second": 3.83,
       "step": 25
+    },
+    {
+      "epoch": 0.049248253817923526,
+      "grad_norm": 0.4402880370616913,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.0597,
+      "step": 26
+    },
+    {
+      "epoch": 0.051142417426305195,
+      "grad_norm": 0.47798994183540344,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 0.0612,
+      "step": 27
+    },
+    {
+      "epoch": 0.05303658103468687,
+      "grad_norm": 0.46198028326034546,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 0.0659,
+      "step": 28
+    },
+    {
+      "epoch": 0.054930744643068546,
+      "grad_norm": 0.6352492570877075,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 0.0778,
+      "step": 29
+    },
+    {
+      "epoch": 0.05682490825145022,
+      "grad_norm": 0.4305189847946167,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 0.0541,
+      "step": 30
+    },
+    {
+      "epoch": 0.05871907185983189,
+      "grad_norm": 0.5531485676765442,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 0.049,
+      "step": 31
+    },
+    {
+      "epoch": 0.06061323546821357,
+      "grad_norm": 0.39484867453575134,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.0118,
+      "step": 32
+    },
+    {
+      "epoch": 0.06250739907659524,
+      "grad_norm": 0.8744118809700012,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 0.0918,
+      "step": 33
+    },
+    {
+      "epoch": 0.06440156268497692,
+      "grad_norm": 0.8326122164726257,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 0.0415,
+      "step": 34
+    },
+    {
+      "epoch": 0.06629572629335859,
+      "grad_norm": 0.5290285348892212,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 0.045,
+      "step": 35
+    },
+    {
+      "epoch": 0.06818988990174027,
+      "grad_norm": 0.649003803730011,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 0.0923,
+      "step": 36
+    },
+    {
+      "epoch": 0.07008405351012194,
+      "grad_norm": 0.729088544845581,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 0.0805,
+      "step": 37
+    },
+    {
+      "epoch": 0.0719782171185036,
+      "grad_norm": 0.4393479824066162,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.0542,
+      "step": 38
+    },
+    {
+      "epoch": 0.07387238072688529,
+      "grad_norm": 0.4222414791584015,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 0.0428,
+      "step": 39
+    },
+    {
+      "epoch": 0.07576654433526696,
+      "grad_norm": 0.5799451470375061,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 0.0808,
+      "step": 40
+    },
+    {
+      "epoch": 0.07766070794364863,
+      "grad_norm": 0.41754525899887085,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 0.0577,
+      "step": 41
+    },
+    {
+      "epoch": 0.07955487155203031,
+      "grad_norm": 0.46539899706840515,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 0.0377,
+      "step": 42
+    },
+    {
+      "epoch": 0.08144903516041198,
+      "grad_norm": 0.9608904123306274,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 0.0952,
+      "step": 43
+    },
+    {
+      "epoch": 0.08334319876879366,
+      "grad_norm": 0.5402638912200928,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 0.0921,
+      "step": 44
+    },
+    {
+      "epoch": 0.08523736237717533,
+      "grad_norm": 0.7494412660598755,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 0.0793,
+      "step": 45
+    },
+    {
+      "epoch": 0.087131525985557,
+      "grad_norm": 0.40757372975349426,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 0.0604,
+      "step": 46
+    },
+    {
+      "epoch": 0.08902568959393868,
+      "grad_norm": 0.6910350918769836,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 0.104,
+      "step": 47
+    },
+    {
+      "epoch": 0.09091985320232035,
+      "grad_norm": 0.6446797847747803,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 0.0499,
+      "step": 48
+    },
+    {
+      "epoch": 0.09281401681070202,
+      "grad_norm": 0.7581434845924377,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 0.0707,
+      "step": 49
+    },
+    {
+      "epoch": 0.0947081804190837,
+      "grad_norm": 0.5170426368713379,
+      "learning_rate": 1e-05,
+      "loss": 0.0702,
+      "step": 50
+    },
+    {
+      "epoch": 0.0947081804190837,
+      "eval_loss": 0.08390604704618454,
+      "eval_runtime": 3.3968,
+      "eval_samples_per_second": 14.72,
+      "eval_steps_per_second": 3.827,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.631381723512832e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null