Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9257ac4bd5394e1a091d3849a60d9e88ad607d26e1c814afb826237b140b2fe9
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:61ffbe03e4e2afb49254b050b5db37395331ea153a803231a9febca8ae6c731c
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83456b36b0dce72567d87510e714139be0044d8bc08e8e3ff02de707249fd696
 size 41119636

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ff8ebcc58bb0d8a00287d72885e5d89d5f60f8f3b790e41d6e3ee7b7973eb7d
 size 41119636

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed392f1e42c4e7f65661970b1ec941968361613202f48a3141f9aacaa4003064
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0ba5d87c7444a833ee26f2d1c68d635206254e2a48012a055a0b86cb260dd27
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28418a35cb7e15ebbce37743b08fd366c25ee320167b307a3e449a74781d02de
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:439e51f57871ee9c2bc8b35458a0c03f9b948af7a0d15ffe5e1cf9789955c6c8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007752708478282477,
   "eval_steps": 13,
-  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -312,6 +312,83 @@
       "eval_samples_per_second": 28.646,
       "eval_steps_per_second": 14.33,
       "step": 39
     }
   ],
   "logging_steps": 1,
@@ -326,12 +403,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6392777558458368.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.009939369843951893,
   "eval_steps": 13,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.646,
       "eval_steps_per_second": 14.33,
       "step": 39
+    },
+    {
+      "epoch": 0.007951495875161516,
+      "grad_norm": 3.0600802898406982,
+      "learning_rate": 7.3223304703363135e-06,
+      "loss": 12.3082,
+      "step": 40
+    },
+    {
+      "epoch": 0.008150283272040553,
+      "grad_norm": 2.5751516819000244,
+      "learning_rate": 5.989850859999227e-06,
+      "loss": 11.4527,
+      "step": 41
+    },
+    {
+      "epoch": 0.00834907066891959,
+      "grad_norm": 3.5589427947998047,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 11.6761,
+      "step": 42
+    },
+    {
+      "epoch": 0.008547858065798629,
+      "grad_norm": 4.126989364624023,
+      "learning_rate": 3.6839958911476957e-06,
+      "loss": 11.1792,
+      "step": 43
+    },
+    {
+      "epoch": 0.008746645462677666,
+      "grad_norm": 2.9589903354644775,
+      "learning_rate": 2.7248368952908053e-06,
+      "loss": 10.7405,
+      "step": 44
+    },
+    {
+      "epoch": 0.008945432859556703,
+      "grad_norm": 3.088578224182129,
+      "learning_rate": 1.9030116872178316e-06,
+      "loss": 12.7116,
+      "step": 45
+    },
+    {
+      "epoch": 0.009144220256435742,
+      "grad_norm": 2.728550672531128,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 11.8275,
+      "step": 46
+    },
+    {
+      "epoch": 0.00934300765331478,
+      "grad_norm": 3.458416223526001,
+      "learning_rate": 6.907519900580861e-07,
+      "loss": 11.6581,
+      "step": 47
+    },
+    {
+      "epoch": 0.009541795050193818,
+      "grad_norm": 3.6537857055664062,
+      "learning_rate": 3.077914851215585e-07,
+      "loss": 11.4921,
+      "step": 48
+    },
+    {
+      "epoch": 0.009740582447072856,
+      "grad_norm": 2.9352803230285645,
+      "learning_rate": 7.706665667180091e-08,
+      "loss": 11.6247,
+      "step": 49
+    },
+    {
+      "epoch": 0.009939369843951893,
+      "grad_norm": 3.011121988296509,
+      "learning_rate": 0.0,
+      "loss": 12.5026,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8184384007962624.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null