Training in progress, step 80, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e672f97b29a9edce6f91b2e15c39845acd8e5b4753ab13d398e905d5543c13a
 size 59827904

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13ba07bc586558fb64f1b66825209f18db56ce572477ab8155107c3ff3f0a88
 size 59827904

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aea7773082e0f49ff710e5dd6702209f08402f1b7153c857b70854dc0dd63a98
 size 30875540

 version https://git-lfs.github.com/spec/v1
+oid sha256:030da15cbb81bbe486c293ede63079918942591b2c0aab7e2ba0cf09fe49d1a5
 size 30875540

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02cc5e0d782e790ab911b5caf87830bdc24d1c04827eeec883676aadfd733f51
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:646ade6023f73d01b10ebe8ac45df7f64238b06f8264b4a748bbde983b0bdd8f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52408658f8afed2d0ac64c41a3c3f93b4c8a8478fa6362941012d17f634f7dad
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4bc75fc1c14b28d29d31fa9d4252536c919fc25a390fac3a1e8c09d6575b4029
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.03667705849990831,
   "eval_steps": 34,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -556,6 +556,41 @@
       "learning_rate": 0.00016736956436465573,
       "loss": 9.1275,
       "step": 75
     }
   ],
   "logging_steps": 1,
@@ -575,7 +610,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.653359756967936e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.039122195733235526,
   "eval_steps": 34,
+  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00016736956436465573,
       "loss": 9.1275,
       "step": 75
+    },
+    {
+      "epoch": 0.03716608594657375,
+      "grad_norm": Infinity,
+      "learning_rate": 0.0001659924534878723,
+      "loss": 8.57,
+      "step": 76
+    },
+    {
+      "epoch": 0.037655113393239195,
+      "grad_norm": Infinity,
+      "learning_rate": 0.00016459280624867874,
+      "loss": 8.7525,
+      "step": 77
+    },
+    {
+      "epoch": 0.03814414083990464,
+      "grad_norm": 1.074333141353418e+19,
+      "learning_rate": 0.0001631711006253251,
+      "loss": 8.9513,
+      "step": 78
+    },
+    {
+      "epoch": 0.03863316828657008,
+      "grad_norm": Infinity,
+      "learning_rate": 0.0001617278221289793,
+      "loss": 8.9845,
+      "step": 79
+    },
+    {
+      "epoch": 0.039122195733235526,
+      "grad_norm": Infinity,
+      "learning_rate": 0.00016026346363792567,
+      "loss": 9.2195,
+      "step": 80
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.7635837407657984e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null