Training in progress, step 9, checkpoint

Files changed (5) hide show

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec47f99dec8473345e8df03a493894e64c88f4f6d92f565331342be81f948ea7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cdd1fadf4861107e876284fc76126b27363e928e424ba4c41bf13a06eb55c86
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c658c881f46e445a824f6ddc3c9235cdbb1cac8d2ce518e9bbce72b92c4aafe7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb1680d6054963db1bb6e4e92ac6a6242d5457f4dd0a8b34b36e385d38394429
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b55c13f9f5104f2999a6506d1f065ddf7c993442a9065ce2d8edf832a642877e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:272e2c8beff4e70c47fbbe3b0c3808da37a706bb9051b90bf4c439d64b5bcd5c
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f159f53630400a152bc7ff7c4bd91e8254f975ab294b8b6f5b7111fae6f5aced
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:71b7a593ab14dcd892c315f7df33b2772361aca7df55be4a25872d1fbb24ae3a
 size 15024

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.011054813450023031,
   "eval_steps": 3,
-  "global_step": 6,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -73,6 +73,35 @@
       "eval_samples_per_second": 22.685,
       "eval_steps_per_second": 5.709,
       "step": 6
     }
   ],
   "logging_steps": 1,
@@ -92,7 +121,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8853171795394560.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.016582220175034548,
   "eval_steps": 3,
+  "global_step": 9,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.685,
       "eval_steps_per_second": 5.709,
       "step": 6
+    },
+    {
+      "epoch": 0.012897282358360202,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 11.4302,
+      "step": 7
+    },
+    {
+      "epoch": 0.014739751266697375,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 10.3072,
+      "step": 8
+    },
+    {
+      "epoch": 0.016582220175034548,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 11.2267,
+      "step": 9
+    },
+    {
+      "epoch": 0.016582220175034548,
+      "eval_loss": 10.444669723510742,
+      "eval_runtime": 20.1358,
+      "eval_samples_per_second": 22.696,
+      "eval_steps_per_second": 5.711,
+      "step": 9
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.327975769309184e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null