Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:864af0cc3ce39c8ddf96fb0943cfcb694123fc73c223b009c56e1b9516c05d24
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:93517d21005be70e4c463b84e7c3e68f8c431eadd1fd10f1058fcc8d328f0fa5
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5843e9c3965b9c400f8cdff7a29eca288c1f3812e31f7cc55b243d75d4295414
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:53d6c512d08cdfcc30f40caab3dc36006165eca1e237c4d6b8f2251cd5911b1f
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3fd520aa016ab6b2b4fb3d5f77690bcead8650f76330a74ec630e62e2557831
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:330e5faaa3878010c9991fb905084cef46b2217cbfd5607ef9941420efefa448
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:058219947ff711529dad66039b98666e7f25c784536319d0c623dbf17121adfa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c535b91583e044007e2d58e4865f12783f75470fac48c23be105e95a5f5108d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.300241470336914,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 1.9138755980861244,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 427.459,
       "eval_steps_per_second": 106.865,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 11155066060800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.294066429138184,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 2.8708133971291865,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 427.459,
       "eval_steps_per_second": 106.865,
       "step": 100
+    },
+    {
+      "epoch": 1.9330143540669855,
+      "grad_norm": 0.14589062333106995,
+      "learning_rate": 3.173294878168025e-05,
+      "loss": 10.4054,
+      "step": 101
+    },
+    {
+      "epoch": 1.9521531100478469,
+      "grad_norm": 0.13475301861763,
+      "learning_rate": 3.074249318355046e-05,
+      "loss": 10.1998,
+      "step": 102
+    },
+    {
+      "epoch": 1.9712918660287082,
+      "grad_norm": 0.14838005602359772,
+      "learning_rate": 2.976083284388031e-05,
+      "loss": 10.3857,
+      "step": 103
+    },
+    {
+      "epoch": 1.9904306220095693,
+      "grad_norm": 0.15033847093582153,
+      "learning_rate": 2.8788416105048122e-05,
+      "loss": 10.591,
+      "step": 104
+    },
+    {
+      "epoch": 2.0095693779904304,
+      "grad_norm": 0.22326327860355377,
+      "learning_rate": 2.7825687087709328e-05,
+      "loss": 18.1498,
+      "step": 105
+    },
+    {
+      "epoch": 2.028708133971292,
+      "grad_norm": 0.14351670444011688,
+      "learning_rate": 2.687308548795825e-05,
+      "loss": 9.8884,
+      "step": 106
+    },
+    {
+      "epoch": 2.047846889952153,
+      "grad_norm": 0.12684540450572968,
+      "learning_rate": 2.5931046376510877e-05,
+      "loss": 10.2986,
+      "step": 107
+    },
+    {
+      "epoch": 2.0669856459330145,
+      "grad_norm": 0.14553603529930115,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 10.2596,
+      "step": 108
+    },
+    {
+      "epoch": 2.0861244019138754,
+      "grad_norm": 0.13355287909507751,
+      "learning_rate": 2.4080371584473748e-05,
+      "loss": 10.2515,
+      "step": 109
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 0.1391756683588028,
+      "learning_rate": 2.317258114118686e-05,
+      "loss": 10.3434,
+      "step": 110
+    },
+    {
+      "epoch": 2.124401913875598,
+      "grad_norm": 0.1632007211446762,
+      "learning_rate": 2.2277043274773857e-05,
+      "loss": 10.3774,
+      "step": 111
+    },
+    {
+      "epoch": 2.1435406698564594,
+      "grad_norm": 0.11006581783294678,
+      "learning_rate": 2.139416699389153e-05,
+      "loss": 10.237,
+      "step": 112
+    },
+    {
+      "epoch": 2.1626794258373208,
+      "grad_norm": 0.11960204690694809,
+      "learning_rate": 2.0524355524417017e-05,
+      "loss": 10.2388,
+      "step": 113
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 0.13326919078826904,
+      "learning_rate": 1.966800612528723e-05,
+      "loss": 10.3773,
+      "step": 114
+    },
+    {
+      "epoch": 2.200956937799043,
+      "grad_norm": 0.12038934975862503,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 10.2409,
+      "step": 115
+    },
+    {
+      "epoch": 2.2200956937799043,
+      "grad_norm": 0.15435650944709778,
+      "learning_rate": 1.7997251653303248e-05,
+      "loss": 10.3346,
+      "step": 116
+    },
+    {
+      "epoch": 2.2392344497607657,
+      "grad_norm": 0.14944836497306824,
+      "learning_rate": 1.7183609644824096e-05,
+      "loss": 10.6496,
+      "step": 117
+    },
+    {
+      "epoch": 2.258373205741627,
+      "grad_norm": 0.12455789744853973,
+      "learning_rate": 1.6384955486934156e-05,
+      "loss": 10.3332,
+      "step": 118
+    },
+    {
+      "epoch": 2.277511961722488,
+      "grad_norm": 0.11132438480854034,
+      "learning_rate": 1.5601653939714074e-05,
+      "loss": 9.8656,
+      "step": 119
+    },
+    {
+      "epoch": 2.2966507177033493,
+      "grad_norm": 0.13063865900039673,
+      "learning_rate": 1.4834062751424015e-05,
+      "loss": 10.3357,
+      "step": 120
+    },
+    {
+      "epoch": 2.3157894736842106,
+      "grad_norm": 0.11636406928300858,
+      "learning_rate": 1.4082532495113626e-05,
+      "loss": 10.2896,
+      "step": 121
+    },
+    {
+      "epoch": 2.334928229665072,
+      "grad_norm": 0.1426575481891632,
+      "learning_rate": 1.3347406408508695e-05,
+      "loss": 10.2307,
+      "step": 122
+    },
+    {
+      "epoch": 2.354066985645933,
+      "grad_norm": 0.14399978518486023,
+      "learning_rate": 1.262902023724824e-05,
+      "loss": 10.3345,
+      "step": 123
+    },
+    {
+      "epoch": 2.373205741626794,
+      "grad_norm": 0.1384967416524887,
+      "learning_rate": 1.1927702081543279e-05,
+      "loss": 10.3376,
+      "step": 124
+    },
+    {
+      "epoch": 2.3923444976076556,
+      "grad_norm": 0.10496751964092255,
+      "learning_rate": 1.1243772246327416e-05,
+      "loss": 10.221,
+      "step": 125
+    },
+    {
+      "epoch": 2.411483253588517,
+      "grad_norm": 0.13061700761318207,
+      "learning_rate": 1.0577543094967612e-05,
+      "loss": 10.3965,
+      "step": 126
+    },
+    {
+      "epoch": 2.430622009569378,
+      "grad_norm": 0.12669506669044495,
+      "learning_rate": 9.929318906602175e-06,
+      "loss": 10.2988,
+      "step": 127
+    },
+    {
+      "epoch": 2.449760765550239,
+      "grad_norm": 0.10814495384693146,
+      "learning_rate": 9.299395737170757e-06,
+      "loss": 10.2391,
+      "step": 128
+    },
+    {
+      "epoch": 2.4688995215311005,
+      "grad_norm": 0.12308809906244278,
+      "learning_rate": 8.688061284200266e-06,
+      "loss": 10.328,
+      "step": 129
+    },
+    {
+      "epoch": 2.488038277511962,
+      "grad_norm": 0.14173907041549683,
+      "learning_rate": 8.09559475540797e-06,
+      "loss": 10.8135,
+      "step": 130
+    },
+    {
+      "epoch": 2.507177033492823,
+      "grad_norm": 0.12133615463972092,
+      "learning_rate": 7.522266741182305e-06,
+      "loss": 10.2866,
+      "step": 131
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 0.10576584190130234,
+      "learning_rate": 6.968339090999187e-06,
+      "loss": 9.7405,
+      "step": 132
+    },
+    {
+      "epoch": 2.5454545454545454,
+      "grad_norm": 0.10492077469825745,
+      "learning_rate": 6.43406479383053e-06,
+      "loss": 10.3311,
+      "step": 133
+    },
+    {
+      "epoch": 2.5645933014354068,
+      "grad_norm": 0.13342593610286713,
+      "learning_rate": 5.919687862599549e-06,
+      "loss": 10.2719,
+      "step": 134
+    },
+    {
+      "epoch": 2.583732057416268,
+      "grad_norm": 0.12062268704175949,
+      "learning_rate": 5.425443222735527e-06,
+      "loss": 10.2956,
+      "step": 135
+    },
+    {
+      "epoch": 2.6028708133971294,
+      "grad_norm": 0.11691385507583618,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 10.4012,
+      "step": 136
+    },
+    {
+      "epoch": 2.6220095693779903,
+      "grad_norm": 0.1241801381111145,
+      "learning_rate": 4.498244441786675e-06,
+      "loss": 10.2174,
+      "step": 137
+    },
+    {
+      "epoch": 2.6411483253588517,
+      "grad_norm": 0.09667439758777618,
+      "learning_rate": 4.065713769482082e-06,
+      "loss": 10.2467,
+      "step": 138
+    },
+    {
+      "epoch": 2.660287081339713,
+      "grad_norm": 0.09752865880727768,
+      "learning_rate": 3.654162132698918e-06,
+      "loss": 10.3165,
+      "step": 139
+    },
+    {
+      "epoch": 2.679425837320574,
+      "grad_norm": 0.1245708093047142,
+      "learning_rate": 3.2637774946584486e-06,
+      "loss": 10.3543,
+      "step": 140
+    },
+    {
+      "epoch": 2.6985645933014353,
+      "grad_norm": 0.12063057720661163,
+      "learning_rate": 2.894738151223331e-06,
+      "loss": 10.2164,
+      "step": 141
+    },
+    {
+      "epoch": 2.7177033492822966,
+      "grad_norm": 0.14080384373664856,
+      "learning_rate": 2.547212649466568e-06,
+      "loss": 10.3849,
+      "step": 142
+    },
+    {
+      "epoch": 2.736842105263158,
+      "grad_norm": 0.13528694212436676,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 10.7423,
+      "step": 143
+    },
+    {
+      "epoch": 2.7559808612440193,
+      "grad_norm": 0.10822786390781403,
+      "learning_rate": 1.9173281579481892e-06,
+      "loss": 10.0585,
+      "step": 144
+    },
+    {
+      "epoch": 2.77511961722488,
+      "grad_norm": 0.10477904230356216,
+      "learning_rate": 1.6352568480485276e-06,
+      "loss": 10.0791,
+      "step": 145
+    },
+    {
+      "epoch": 2.7942583732057416,
+      "grad_norm": 0.1134805977344513,
+      "learning_rate": 1.3752746081624467e-06,
+      "loss": 10.2513,
+      "step": 146
+    },
+    {
+      "epoch": 2.813397129186603,
+      "grad_norm": 0.14501194655895233,
+      "learning_rate": 1.1375001769727999e-06,
+      "loss": 10.2943,
+      "step": 147
+    },
+    {
+      "epoch": 2.8325358851674642,
+      "grad_norm": 0.11421407014131546,
+      "learning_rate": 9.220421504467281e-07,
+      "loss": 10.2723,
+      "step": 148
+    },
+    {
+      "epoch": 2.8516746411483256,
+      "grad_norm": 0.12981349229812622,
+      "learning_rate": 7.289989322378732e-07,
+      "loss": 10.4036,
+      "step": 149
+    },
+    {
+      "epoch": 2.8708133971291865,
+      "grad_norm": 0.1481410712003708,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 10.2066,
+      "step": 150
+    },
+    {
+      "epoch": 2.8708133971291865,
+      "eval_loss": 10.294066429138184,
+      "eval_runtime": 0.2055,
+      "eval_samples_per_second": 428.249,
+      "eval_steps_per_second": 107.062,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 16732599091200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null