Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f62b5f2468f1a4794e105e0919ce798a67a1bfe01a19ad62be9eea0837ddea8c
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cfa960e26e7313f45ed66175129ede040128a91bbe7eefc552edd0291d0afb9
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca2476b2e9e85fd039e34ab77ba2fc189947c495e1edc8c124467f93ad7f5037
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:1acd47c782af25ea71657f1cd0cd0ead4d496b32e21378acfb48c4ae9d10bf8e
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19548bada73dc88e3b9c7d8f5ee2a174119be2d6de8a7d12c7595fb21d905767
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e745240ea492d302e6e50e9ee4c5ad0725ded989d45924a41c0064c4e363c2e0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad2841b888ce0ae948634757c3fcacf0119c249e0fec8f3ca61ea266369ef92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d2a6c6aafc669cea03b9634666f204de949a3d45ce2f48a07e7e3eaf18c715
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.91733169555664,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.11019283746556474,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 189.053,
       "eval_steps_per_second": 26.467,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 79631155200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.898636817932129,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.22038567493112948,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 189.053,
       "eval_steps_per_second": 26.467,
       "step": 50
+    },
+    {
+      "epoch": 0.11239669421487604,
+      "grad_norm": 0.09227705001831055,
+      "learning_rate": 0.0002668315918143169,
+      "loss": 11.9309,
+      "step": 51
+    },
+    {
+      "epoch": 0.11460055096418732,
+      "grad_norm": 0.1322363317012787,
+      "learning_rate": 0.00026526016662852886,
+      "loss": 11.9193,
+      "step": 52
+    },
+    {
+      "epoch": 0.11680440771349862,
+      "grad_norm": 0.12950630486011505,
+      "learning_rate": 0.00026365723046405023,
+      "loss": 11.914,
+      "step": 53
+    },
+    {
+      "epoch": 0.11900826446280992,
+      "grad_norm": 0.14015230536460876,
+      "learning_rate": 0.0002620232215476231,
+      "loss": 11.9153,
+      "step": 54
+    },
+    {
+      "epoch": 0.12121212121212122,
+      "grad_norm": 0.14919088780879974,
+      "learning_rate": 0.0002603585866009697,
+      "loss": 11.9099,
+      "step": 55
+    },
+    {
+      "epoch": 0.12341597796143251,
+      "grad_norm": 0.11809884756803513,
+      "learning_rate": 0.00025866378071866334,
+      "loss": 11.9087,
+      "step": 56
+    },
+    {
+      "epoch": 0.1256198347107438,
+      "grad_norm": 0.13996019959449768,
+      "learning_rate": 0.00025693926724370956,
+      "loss": 11.9073,
+      "step": 57
+    },
+    {
+      "epoch": 0.1278236914600551,
+      "grad_norm": 0.13844236731529236,
+      "learning_rate": 0.00025518551764087326,
+      "loss": 11.9084,
+      "step": 58
+    },
+    {
+      "epoch": 0.13002754820936638,
+      "grad_norm": 0.1513601541519165,
+      "learning_rate": 0.00025340301136778483,
+      "loss": 11.9072,
+      "step": 59
+    },
+    {
+      "epoch": 0.1322314049586777,
+      "grad_norm": 0.15952962636947632,
+      "learning_rate": 0.00025159223574386114,
+      "loss": 11.9082,
+      "step": 60
+    },
+    {
+      "epoch": 0.13443526170798897,
+      "grad_norm": 0.11324327439069748,
+      "learning_rate": 0.0002497536858170772,
+      "loss": 11.8976,
+      "step": 61
+    },
+    {
+      "epoch": 0.13663911845730028,
+      "grad_norm": 0.1319524496793747,
+      "learning_rate": 0.00024788786422862526,
+      "loss": 11.8988,
+      "step": 62
+    },
+    {
+      "epoch": 0.13884297520661157,
+      "grad_norm": 0.13562187552452087,
+      "learning_rate": 0.00024599528107549745,
+      "loss": 11.9001,
+      "step": 63
+    },
+    {
+      "epoch": 0.14104683195592285,
+      "grad_norm": 0.10524652153253555,
+      "learning_rate": 0.00024407645377103054,
+      "loss": 11.9006,
+      "step": 64
+    },
+    {
+      "epoch": 0.14325068870523416,
+      "grad_norm": 0.13076205551624298,
+      "learning_rate": 0.00024213190690345018,
+      "loss": 11.9137,
+      "step": 65
+    },
+    {
+      "epoch": 0.14545454545454545,
+      "grad_norm": 0.14588449895381927,
+      "learning_rate": 0.00024016217209245374,
+      "loss": 11.9071,
+      "step": 66
+    },
+    {
+      "epoch": 0.14765840220385676,
+      "grad_norm": 0.16398610174655914,
+      "learning_rate": 0.00023816778784387094,
+      "loss": 11.9,
+      "step": 67
+    },
+    {
+      "epoch": 0.14986225895316804,
+      "grad_norm": 0.13749021291732788,
+      "learning_rate": 0.0002361492994024415,
+      "loss": 11.8945,
+      "step": 68
+    },
+    {
+      "epoch": 0.15206611570247933,
+      "grad_norm": 0.13079959154129028,
+      "learning_rate": 0.0002341072586027509,
+      "loss": 11.8949,
+      "step": 69
+    },
+    {
+      "epoch": 0.15426997245179064,
+      "grad_norm": 0.10493382811546326,
+      "learning_rate": 0.00023204222371836405,
+      "loss": 11.8943,
+      "step": 70
+    },
+    {
+      "epoch": 0.15647382920110192,
+      "grad_norm": 0.10082471370697021,
+      "learning_rate": 0.00022995475930919905,
+      "loss": 11.8976,
+      "step": 71
+    },
+    {
+      "epoch": 0.15867768595041323,
+      "grad_norm": 0.138931542634964,
+      "learning_rate": 0.00022784543606718227,
+      "loss": 11.9008,
+      "step": 72
+    },
+    {
+      "epoch": 0.16088154269972452,
+      "grad_norm": 0.16665118932724,
+      "learning_rate": 0.00022571483066022657,
+      "loss": 11.8992,
+      "step": 73
+    },
+    {
+      "epoch": 0.1630853994490358,
+      "grad_norm": 0.09672952443361282,
+      "learning_rate": 0.0002235635255745762,
+      "loss": 11.8893,
+      "step": 74
+    },
+    {
+      "epoch": 0.1652892561983471,
+      "grad_norm": 0.0886247456073761,
+      "learning_rate": 0.00022139210895556104,
+      "loss": 11.8885,
+      "step": 75
+    },
+    {
+      "epoch": 0.1652892561983471,
+      "eval_loss": 11.899949073791504,
+      "eval_runtime": 0.2633,
+      "eval_samples_per_second": 189.892,
+      "eval_steps_per_second": 26.585,
+      "step": 75
+    },
+    {
+      "epoch": 0.1674931129476584,
+      "grad_norm": 0.09242913126945496,
+      "learning_rate": 0.00021920117444680317,
+      "loss": 11.8931,
+      "step": 76
+    },
+    {
+      "epoch": 0.1696969696969697,
+      "grad_norm": 0.09106940031051636,
+      "learning_rate": 0.00021699132102792097,
+      "loss": 11.8972,
+      "step": 77
+    },
+    {
+      "epoch": 0.171900826446281,
+      "grad_norm": 0.09342004358768463,
+      "learning_rate": 0.0002147631528507739,
+      "loss": 11.8948,
+      "step": 78
+    },
+    {
+      "epoch": 0.17410468319559227,
+      "grad_norm": 0.1103108748793602,
+      "learning_rate": 0.00021251727907429355,
+      "loss": 11.8963,
+      "step": 79
+    },
+    {
+      "epoch": 0.1763085399449036,
+      "grad_norm": 0.14813131093978882,
+      "learning_rate": 0.0002102543136979454,
+      "loss": 11.8954,
+      "step": 80
+    },
+    {
+      "epoch": 0.17851239669421487,
+      "grad_norm": 0.08995470404624939,
+      "learning_rate": 0.0002079748753938678,
+      "loss": 11.8883,
+      "step": 81
+    },
+    {
+      "epoch": 0.18071625344352618,
+      "grad_norm": 0.08261939138174057,
+      "learning_rate": 0.0002056795873377331,
+      "loss": 11.8938,
+      "step": 82
+    },
+    {
+      "epoch": 0.18292011019283747,
+      "grad_norm": 0.08036128431558609,
+      "learning_rate": 0.00020336907703837748,
+      "loss": 11.8852,
+      "step": 83
+    },
+    {
+      "epoch": 0.18512396694214875,
+      "grad_norm": 0.122383251786232,
+      "learning_rate": 0.00020104397616624645,
+      "loss": 11.8976,
+      "step": 84
+    },
+    {
+      "epoch": 0.18732782369146006,
+      "grad_norm": 0.06162508204579353,
+      "learning_rate": 0.00019870492038070252,
+      "loss": 11.8828,
+      "step": 85
+    },
+    {
+      "epoch": 0.18953168044077134,
+      "grad_norm": 0.05778353661298752,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 11.8874,
+      "step": 86
+    },
+    {
+      "epoch": 0.19173553719008266,
+      "grad_norm": 0.0797528401017189,
+      "learning_rate": 0.0001939875056076697,
+      "loss": 11.8865,
+      "step": 87
+    },
+    {
+      "epoch": 0.19393939393939394,
+      "grad_norm": 0.0552869513630867,
+      "learning_rate": 0.00019161043631427666,
+      "loss": 11.8921,
+      "step": 88
+    },
+    {
+      "epoch": 0.19614325068870522,
+      "grad_norm": 0.07009490579366684,
+      "learning_rate": 0.00018922199114307294,
+      "loss": 11.8908,
+      "step": 89
+    },
+    {
+      "epoch": 0.19834710743801653,
+      "grad_norm": 0.11296062171459198,
+      "learning_rate": 0.00018682282307111987,
+      "loss": 11.8894,
+      "step": 90
+    },
+    {
+      "epoch": 0.20055096418732782,
+      "grad_norm": 0.046599071472883224,
+      "learning_rate": 0.00018441358800701273,
+      "loss": 11.879,
+      "step": 91
+    },
+    {
+      "epoch": 0.20275482093663913,
+      "grad_norm": 0.06188511103391647,
+      "learning_rate": 0.00018199494461156203,
+      "loss": 11.8923,
+      "step": 92
+    },
+    {
+      "epoch": 0.2049586776859504,
+      "grad_norm": 0.08621121942996979,
+      "learning_rate": 0.000179567554117722,
+      "loss": 11.8869,
+      "step": 93
+    },
+    {
+      "epoch": 0.2071625344352617,
+      "grad_norm": 0.11004097014665604,
+      "learning_rate": 0.00017713208014981648,
+      "loss": 11.8952,
+      "step": 94
+    },
+    {
+      "epoch": 0.209366391184573,
+      "grad_norm": 0.07841908186674118,
+      "learning_rate": 0.00017468918854211007,
+      "loss": 11.8892,
+      "step": 95
+    },
+    {
+      "epoch": 0.2115702479338843,
+      "grad_norm": 0.11397922039031982,
+      "learning_rate": 0.00017223954715677627,
+      "loss": 11.8855,
+      "step": 96
+    },
+    {
+      "epoch": 0.2137741046831956,
+      "grad_norm": 0.1289277821779251,
+      "learning_rate": 0.00016978382570131034,
+      "loss": 11.8875,
+      "step": 97
+    },
+    {
+      "epoch": 0.2159779614325069,
+      "grad_norm": 0.08124374598264694,
+      "learning_rate": 0.00016732269554543794,
+      "loss": 11.8823,
+      "step": 98
+    },
+    {
+      "epoch": 0.21818181818181817,
+      "grad_norm": 0.09473321586847305,
+      "learning_rate": 0.00016485682953756942,
+      "loss": 11.8845,
+      "step": 99
+    },
+    {
+      "epoch": 0.22038567493112948,
+      "grad_norm": 0.34638768434524536,
+      "learning_rate": 0.00016238690182084986,
+      "loss": 11.8753,
+      "step": 100
+    },
+    {
+      "epoch": 0.22038567493112948,
+      "eval_loss": 11.898636817932129,
+      "eval_runtime": 0.2629,
+      "eval_samples_per_second": 190.203,
+      "eval_steps_per_second": 26.628,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 159262310400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null