Training in progress, step 28500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddaa72deb8ab44e6b062f6762445853e6cbb361c09cd5f5ca7250588ef827f6a
 size 242041896

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbbf965be7942c0e27651f08aa3191ec2be85d55d412a0d1c519a5ca39ba3f93
 size 242041896

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa38d7628f4b5efece14b6017eb194c8d0f5bb07dd80fa4cb3a1b986649e1b3c
 size 484163514

 version https://git-lfs.github.com/spec/v1
+oid sha256:446eea2f164ee2cfcbd190b9ccf3436cb46689a0e0520c77c30c25096c4ca56c
 size 484163514

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bad8f074d634b0b7f7ecbd77330c361a45090adb1753e3cd2b0453d620c49cb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:11c8922e25c3188be38e2c57336530b528daa01c5a2d6eff2a6088151699e8c2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eacef275294161468a385a147f2879c9e08ddcbb47ca8cc9e49fdd2dffa3b0be
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd4bc404586b9c1e5f5df9057ed7bbfb2604dbfed03f22beda6e7caf655bf091
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.297114794352363,
   "eval_steps": 500,
-  "global_step": 28000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -383,13 +383,19 @@
       "learning_rate": 0.00014094536525475752,
       "loss": 0.4547,
       "step": 28000
     }
   ],
   "logging_steps": 500,
   "max_steps": 32580,
   "num_train_epochs": 5,
   "save_steps": 500,
-  "total_flos": 1.4845312307822592e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.373848987108656,
   "eval_steps": 500,
+  "global_step": 28500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00014094536525475752,
       "loss": 0.4547,
       "step": 28000
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 0.00012559852670349908,
+      "loss": 0.4588,
+      "step": 28500
     }
   ],
   "logging_steps": 500,
   "max_steps": 32580,
   "num_train_epochs": 5,
   "save_steps": 500,
+  "total_flos": 1.5109964741738496e+16,
   "trial_name": null,
   "trial_params": null
 }