Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4145acb363744983f844a9a5251e587388fcc8de8c64dfe169e4ad837e165a0
 size 432223744

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5375ec881aa698754f90c40cdf5359944bb360e2ddaa4cb40ed758345651d51
 size 432223744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e806e0abe2c0811deaa59aa2f21fe09c55f3ca1dc18cfc03d208fc1c9f5d0918
 size 864785974

 version https://git-lfs.github.com/spec/v1
+oid sha256:683605dd3c1c97a7efc3494a31d1074f78502fab5e08f1454c801a606f9269f3
 size 864785974

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5076b25f39846e460066a033bfb8f433a2290c8bf5209af10bbe14ddf5fc2272
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:aab8ed90bc42e4702ed3cc2cec62f1dc813167f233f6c7658a7cf647d677d716
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f3ca288b385f92a814663316dd9235e744fb8c340e52eced7ed45314d528386
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:266270fc9abcf187991b228fc967bc66902354a93bc655b3b2f55f88f3d109cf
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:532d7aa98ba518b9fa43956bbc74bed5c56425009226efddebd4f0f4dc43545d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:37550849c1b1d45102ffdbcd8f3d5a4f9eaafd1911068339b8b4168535e59545
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b60ea2bc5a28663aeb0ba38eb289edeed1ac122a7b3924f3a749852f0b01831
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:42c253ce38c157f94eed8e6df903bbc231c08ed9740c40daaea0ff634f30cce9
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5360515117645264,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.121765601217656,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 19.353,
       "eval_steps_per_second": 2.421,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.317763172739318e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3633440434932709,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.243531202435312,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.353,
       "eval_steps_per_second": 2.421,
       "step": 25
+    },
+    {
+      "epoch": 0.12663622526636226,
+      "grad_norm": 2.587963819503784,
+      "learning_rate": 5e-05,
+      "loss": 0.4985,
+      "step": 26
+    },
+    {
+      "epoch": 0.13150684931506848,
+      "grad_norm": 2.144798994064331,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.4112,
+      "step": 27
+    },
+    {
+      "epoch": 0.13637747336377473,
+      "grad_norm": 2.472270965576172,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.4643,
+      "step": 28
+    },
+    {
+      "epoch": 0.14124809741248098,
+      "grad_norm": 2.4505295753479004,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.4096,
+      "step": 29
+    },
+    {
+      "epoch": 0.1461187214611872,
+      "grad_norm": 3.142169237136841,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.5782,
+      "step": 30
+    },
+    {
+      "epoch": 0.15098934550989346,
+      "grad_norm": 2.252835988998413,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.3989,
+      "step": 31
+    },
+    {
+      "epoch": 0.1558599695585997,
+      "grad_norm": 2.8984851837158203,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.4837,
+      "step": 32
+    },
+    {
+      "epoch": 0.16073059360730593,
+      "grad_norm": 2.4451897144317627,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.5014,
+      "step": 33
+    },
+    {
+      "epoch": 0.16560121765601218,
+      "grad_norm": 3.045475721359253,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.5486,
+      "step": 34
+    },
+    {
+      "epoch": 0.1704718417047184,
+      "grad_norm": 2.2585620880126953,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.4324,
+      "step": 35
+    },
+    {
+      "epoch": 0.17534246575342466,
+      "grad_norm": 2.714506149291992,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.4549,
+      "step": 36
+    },
+    {
+      "epoch": 0.1802130898021309,
+      "grad_norm": 3.031651258468628,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.5828,
+      "step": 37
+    },
+    {
+      "epoch": 0.18508371385083713,
+      "grad_norm": 3.5300300121307373,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.4747,
+      "step": 38
+    },
+    {
+      "epoch": 0.18995433789954339,
+      "grad_norm": 1.8795702457427979,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.3026,
+      "step": 39
+    },
+    {
+      "epoch": 0.1948249619482496,
+      "grad_norm": 1.9291534423828125,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.2992,
+      "step": 40
+    },
+    {
+      "epoch": 0.19969558599695586,
+      "grad_norm": 2.1998214721679688,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.3774,
+      "step": 41
+    },
+    {
+      "epoch": 0.2045662100456621,
+      "grad_norm": 2.155195474624634,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.3231,
+      "step": 42
+    },
+    {
+      "epoch": 0.20943683409436833,
+      "grad_norm": 2.3852322101593018,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.3971,
+      "step": 43
+    },
+    {
+      "epoch": 0.2143074581430746,
+      "grad_norm": 1.8780982494354248,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.2854,
+      "step": 44
+    },
+    {
+      "epoch": 0.2191780821917808,
+      "grad_norm": 2.590459108352661,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.4148,
+      "step": 45
+    },
+    {
+      "epoch": 0.22404870624048706,
+      "grad_norm": 2.508012533187866,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.3513,
+      "step": 46
+    },
+    {
+      "epoch": 0.2289193302891933,
+      "grad_norm": 2.587348461151123,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.3913,
+      "step": 47
+    },
+    {
+      "epoch": 0.23378995433789954,
+      "grad_norm": 2.522669792175293,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.3819,
+      "step": 48
+    },
+    {
+      "epoch": 0.2386605783866058,
+      "grad_norm": 2.7458508014678955,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.4454,
+      "step": 49
+    },
+    {
+      "epoch": 0.243531202435312,
+      "grad_norm": 3.5217461585998535,
+      "learning_rate": 0.0,
+      "loss": 0.6224,
+      "step": 50
+    },
+    {
+      "epoch": 0.243531202435312,
+      "eval_loss": 0.3633440434932709,
+      "eval_runtime": 71.4669,
+      "eval_samples_per_second": 19.352,
+      "eval_steps_per_second": 2.421,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.633454034495734e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null