Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f9d3a455b48f6346461c9f928d1610c220bf18f1cefcc0e15177b76d372a6b4
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a79d6468f9a8226984a35f278d2afc4996fc80bf229f9e9f4696b88b0244d70
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd021e5e05e7646b803bdd3cb4aca11c66b541e5f7ae4070bb9dd51104cce573
 size 170920084

 version https://git-lfs.github.com/spec/v1
+oid sha256:eed3ef08c89e9a07cd3218a687a41dd717cf80833936a581ab6db0cd1f2aabdf
 size 170920084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9be6a0bd48f6bc89b56368d3add1033bdc63312da525cee5e56fe50f3a009c4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fa0933e237e365bbcbd10bdc7014a011ec526d46395768542a06f4bd5bcda7b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.2894105911254883,
   "best_model_checkpoint": "miner_id_24/checkpoint-80",
-  "epoch": 0.025414754677020826,
   "eval_steps": 10,
-  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -717,6 +717,84 @@
       "eval_samples_per_second": 1.774,
       "eval_steps_per_second": 1.774,
       "step": 90
     }
   ],
   "logging_steps": 1,
@@ -731,7 +809,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -745,7 +823,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.714094069481472e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.2894105911254883,
   "best_model_checkpoint": "miner_id_24/checkpoint-80",
+  "epoch": 0.028238616307800918,
   "eval_steps": 10,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.774,
       "eval_steps_per_second": 1.774,
       "step": 90
+    },
+    {
+      "epoch": 0.025697140840098835,
+      "grad_norm": 0.922839343547821,
+      "learning_rate": 0.00018681546242521786,
+      "loss": 2.2968,
+      "step": 91
+    },
+    {
+      "epoch": 0.025979527003176843,
+      "grad_norm": 0.9457669258117676,
+      "learning_rate": 0.00018649548579446936,
+      "loss": 1.5159,
+      "step": 92
+    },
+    {
+      "epoch": 0.026261913166254855,
+      "grad_norm": 1.3212480545043945,
+      "learning_rate": 0.0001861719536730795,
+      "loss": 1.8084,
+      "step": 93
+    },
+    {
+      "epoch": 0.026544299329332863,
+      "grad_norm": 1.1744962930679321,
+      "learning_rate": 0.00018584487936018661,
+      "loss": 2.3983,
+      "step": 94
+    },
+    {
+      "epoch": 0.026826685492410872,
+      "grad_norm": 0.9709725379943848,
+      "learning_rate": 0.00018551427630053463,
+      "loss": 1.8034,
+      "step": 95
+    },
+    {
+      "epoch": 0.02710907165548888,
+      "grad_norm": 0.878976047039032,
+      "learning_rate": 0.00018518015808392045,
+      "loss": 1.2276,
+      "step": 96
+    },
+    {
+      "epoch": 0.02739145781856689,
+      "grad_norm": 1.220984935760498,
+      "learning_rate": 0.00018484253844463526,
+      "loss": 1.0919,
+      "step": 97
+    },
+    {
+      "epoch": 0.0276738439816449,
+      "grad_norm": 0.41714727878570557,
+      "learning_rate": 0.00018450143126090015,
+      "loss": 1.0902,
+      "step": 98
+    },
+    {
+      "epoch": 0.02795623014472291,
+      "grad_norm": 0.9148246049880981,
+      "learning_rate": 0.00018415685055429533,
+      "loss": 2.1429,
+      "step": 99
+    },
+    {
+      "epoch": 0.028238616307800918,
+      "grad_norm": 0.8029168844223022,
+      "learning_rate": 0.00018380881048918405,
+      "loss": 1.5813,
+      "step": 100
+    },
+    {
+      "epoch": 0.028238616307800918,
+      "eval_loss": 1.3071253299713135,
+      "eval_runtime": 421.1658,
+      "eval_samples_per_second": 1.771,
+      "eval_steps_per_second": 1.771,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 7.46010452164608e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null