Training in progress, step 26, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48d0f5c06b15d2287468433011bb8d33fb6fa8615006cb2aef854693bcd667dd
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:443e4e4f937d73e0119310938caeeaeb381794e87ba4c05deb852213a5fa4f98
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9335b7988d854204003dfd2d754bb20a6a1235830f7cb713558251e3291f93d8
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:f40a11bcf1f0c7118ef3e1efd4e6c078e8c6b74b94cd2e415a2af1297f7ed79f
 size 23159290

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29f1d72b6c239612cdd5f3fa99a08419a8c573c441fb60cde5f8c820aea0be58
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb90d719e937dd8276373ed7d70fad2369f124f7cf828fbeecbc84855fde6133
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cc97b0c454fb2242f4c07e8b154a05c758918a1383f9c98000cd4ccc34580b5
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:1977bb9f4ad57391ef2df027cb00edafbd5be714333f455abd93cefc987f397c
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5adea6def46a60dd5782726d51a89bdf0f30226cd791e511d8af09a5644f99e2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e65bdb10468d12c8b6afa89fe8730e38a94a4f704431de04b0c1bf27440afce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0030924999442457943,
   "eval_steps": 13,
-  "global_step": 13,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -114,6 +114,105 @@
       "eval_samples_per_second": 39.826,
       "eval_steps_per_second": 9.957,
       "step": 13
     }
   ],
   "logging_steps": 1,
@@ -133,7 +232,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.025725889386906e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0061849998884915885,
   "eval_steps": 13,
+  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.826,
       "eval_steps_per_second": 9.957,
       "step": 13
+    },
+    {
+      "epoch": 0.0033303845553416247,
+      "grad_norm": 5.04780387878418,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 0.131,
+      "step": 14
+    },
+    {
+      "epoch": 0.0035682691664374547,
+      "grad_norm": 1.841731309890747,
+      "learning_rate": 8.296729075500344e-05,
+      "loss": 0.0827,
+      "step": 15
+    },
+    {
+      "epoch": 0.003806153777533285,
+      "grad_norm": 1.6053825616836548,
+      "learning_rate": 8.043807145043604e-05,
+      "loss": 0.0752,
+      "step": 16
+    },
+    {
+      "epoch": 0.004044038388629116,
+      "grad_norm": 1.511683702468872,
+      "learning_rate": 7.777851165098012e-05,
+      "loss": 0.0741,
+      "step": 17
+    },
+    {
+      "epoch": 0.004281922999724946,
+      "grad_norm": 5.841466903686523,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.0867,
+      "step": 18
+    },
+    {
+      "epoch": 0.004519807610820777,
+      "grad_norm": 5.441026210784912,
+      "learning_rate": 7.211443451095007e-05,
+      "loss": 0.0663,
+      "step": 19
+    },
+    {
+      "epoch": 0.004757692221916607,
+      "grad_norm": 1.9340760707855225,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 0.0498,
+      "step": 20
+    },
+    {
+      "epoch": 0.004995576833012437,
+      "grad_norm": 1.7755597829818726,
+      "learning_rate": 6.607197326515808e-05,
+      "loss": 0.0431,
+      "step": 21
+    },
+    {
+      "epoch": 0.0052334614441082675,
+      "grad_norm": 0.8990055918693542,
+      "learning_rate": 6.294095225512603e-05,
+      "loss": 0.0285,
+      "step": 22
+    },
+    {
+      "epoch": 0.0054713460552040976,
+      "grad_norm": 1.0627079010009766,
+      "learning_rate": 5.9754516100806423e-05,
+      "loss": 0.0427,
+      "step": 23
+    },
+    {
+      "epoch": 0.005709230666299928,
+      "grad_norm": 0.7508691549301147,
+      "learning_rate": 5.6526309611002594e-05,
+      "loss": 0.0114,
+      "step": 24
+    },
+    {
+      "epoch": 0.0059471152773957585,
+      "grad_norm": 0.787846028804779,
+      "learning_rate": 5.327015646150716e-05,
+      "loss": 0.0305,
+      "step": 25
+    },
+    {
+      "epoch": 0.0061849998884915885,
+      "grad_norm": 1.2530877590179443,
+      "learning_rate": 5e-05,
+      "loss": 0.0366,
+      "step": 26
+    },
+    {
+      "epoch": 0.0061849998884915885,
+      "eval_loss": 0.02799982577562332,
+      "eval_runtime": 711.2213,
+      "eval_samples_per_second": 39.819,
+      "eval_steps_per_second": 9.955,
+      "step": 26
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.051451778773811e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null