Training in progress, step 26, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89a03036b254b2c179b14794d53f184786a397f18d41c697622961f9d8a72129
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:f50801ca7fdea39ab8dffd8246fb170afc53418cabe1fa04245417373645482a
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3da90b2b1ae8a31883fd0e90f326f8950471e643d70eff75244a0f4a60803c01
 size 41119636

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6ee54638816f60627fe6da0187befe39f9d5e4113122bcd4fd88bb1acb59217
 size 41119636

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a5b67d013f15705cfe81f9c786848554d1fdef3768c4bbc0fd1b39326d74c4a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e95801a4c403b551085cb43e2962db50d33a2ed7cc9363c38784e3077ff4975
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea3a68034a6fca5f7c1565a9344e41aadbdff3752a8799ae94336c8c84c6d63d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa0fde01e60897f55f4ebaca5a490a1a483a2c53b2ae8e7543860a62c5b3298c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.011056772273017223,
   "eval_steps": 13,
-  "global_step": 13,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -114,6 +114,105 @@
       "eval_samples_per_second": 27.228,
       "eval_steps_per_second": 13.642,
       "step": 13
     }
   ],
   "logging_steps": 1,
@@ -133,7 +232,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2158071405084672.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.022113544546034446,
   "eval_steps": 13,
+  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.228,
       "eval_steps_per_second": 13.642,
       "step": 13
+    },
+    {
+      "epoch": 0.011907293217095471,
+      "grad_norm": 3.9054300785064697,
+      "learning_rate": 4.877641290737884e-05,
+      "loss": 7.6945,
+      "step": 14
+    },
+    {
+      "epoch": 0.01275781416117372,
+      "grad_norm": 6.3420305252075195,
+      "learning_rate": 4.8096988312782174e-05,
+      "loss": 8.6295,
+      "step": 15
+    },
+    {
+      "epoch": 0.013608335105251966,
+      "grad_norm": 6.098008155822754,
+      "learning_rate": 4.72751631047092e-05,
+      "loss": 7.3762,
+      "step": 16
+    },
+    {
+      "epoch": 0.014458856049330214,
+      "grad_norm": 6.397308826446533,
+      "learning_rate": 4.6316004108852305e-05,
+      "loss": 8.6222,
+      "step": 17
+    },
+    {
+      "epoch": 0.015309376993408463,
+      "grad_norm": 4.59157133102417,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 7.8273,
+      "step": 18
+    },
+    {
+      "epoch": 0.01615989793748671,
+      "grad_norm": 5.998552322387695,
+      "learning_rate": 4.401014914000078e-05,
+      "loss": 9.8392,
+      "step": 19
+    },
+    {
+      "epoch": 0.01701041888156496,
+      "grad_norm": 4.757502555847168,
+      "learning_rate": 4.267766952966369e-05,
+      "loss": 8.913,
+      "step": 20
+    },
+    {
+      "epoch": 0.017860939825643206,
+      "grad_norm": 5.794950485229492,
+      "learning_rate": 4.123620120825459e-05,
+      "loss": 8.4942,
+      "step": 21
+    },
+    {
+      "epoch": 0.018711460769721456,
+      "grad_norm": 7.268848419189453,
+      "learning_rate": 3.969463130731183e-05,
+      "loss": 9.6959,
+      "step": 22
+    },
+    {
+      "epoch": 0.019561981713799702,
+      "grad_norm": 4.623784065246582,
+      "learning_rate": 3.8062464117898724e-05,
+      "loss": 7.3824,
+      "step": 23
+    },
+    {
+      "epoch": 0.02041250265787795,
+      "grad_norm": 4.572867393493652,
+      "learning_rate": 3.634976249348867e-05,
+      "loss": 8.0812,
+      "step": 24
+    },
+    {
+      "epoch": 0.0212630236019562,
+      "grad_norm": 6.01804780960083,
+      "learning_rate": 3.456708580912725e-05,
+      "loss": 8.0338,
+      "step": 25
+    },
+    {
+      "epoch": 0.022113544546034446,
+      "grad_norm": 4.490617275238037,
+      "learning_rate": 3.272542485937369e-05,
+      "loss": 7.893,
+      "step": 26
+    },
+    {
+      "epoch": 0.022113544546034446,
+      "eval_loss": 1.961138367652893,
+      "eval_runtime": 18.1047,
+      "eval_samples_per_second": 27.341,
+      "eval_steps_per_second": 13.698,
+      "step": 26
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4316142810169344.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null