Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8dedffd8550efe6cef1264ca534f2ebfdd3e9298833032c7f316796eeb1b1f05
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab05564fb44196c5cba92d365a858ab95e05f5d8bc72db4656a1e1bd23149531
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea7c19235bf4e5ff6a997efbd1d3c4b826ba06a3cca2fe0892904b272c2b13b6
 size 281824770

 version https://git-lfs.github.com/spec/v1
+oid sha256:f54111d2ccd24bd89866b0d094f10aff1f4f7a249c164dd7bad23f974ae3120d
 size 281824770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00227713f699917b1c3ffed83e964fa9547c9b417b55a6adb09048496d05dd32
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:59eafd554f9dd23e02f02cfe6313d07221fb83f041422662ed1b942860089ced
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5335737b4b4d079cdec612fe6ee661ed4d29f9e0e55eb4eada44a9677240228
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e0244c146d76bf610ae39789eea36d0bff336b81d211db008e020e66921060c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.3963160514831543,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.01858045336306206,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 41.146,
       "eval_steps_per_second": 20.573,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3883562421977088.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.239711284637451,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.03716090672612412,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 41.146,
       "eval_steps_per_second": 20.573,
       "step": 50
+    },
+    {
+      "epoch": 0.0189520624303233,
+      "grad_norm": 3.2213802337646484,
+      "learning_rate": 9.285836503510562e-05,
+      "loss": 3.7682,
+      "step": 51
+    },
+    {
+      "epoch": 0.01932367149758454,
+      "grad_norm": 2.897183418273926,
+      "learning_rate": 9.24024048078213e-05,
+      "loss": 3.6962,
+      "step": 52
+    },
+    {
+      "epoch": 0.019695280564845784,
+      "grad_norm": 2.333893299102783,
+      "learning_rate": 9.193352839727121e-05,
+      "loss": 3.55,
+      "step": 53
+    },
+    {
+      "epoch": 0.020066889632107024,
+      "grad_norm": 2.0622718334198,
+      "learning_rate": 9.145187862775209e-05,
+      "loss": 3.6484,
+      "step": 54
+    },
+    {
+      "epoch": 0.020438498699368264,
+      "grad_norm": 1.9058257341384888,
+      "learning_rate": 9.09576022144496e-05,
+      "loss": 3.5701,
+      "step": 55
+    },
+    {
+      "epoch": 0.020810107766629504,
+      "grad_norm": 1.4503419399261475,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 3.4605,
+      "step": 56
+    },
+    {
+      "epoch": 0.021181716833890748,
+      "grad_norm": 1.41510808467865,
+      "learning_rate": 8.993177550236464e-05,
+      "loss": 3.3843,
+      "step": 57
+    },
+    {
+      "epoch": 0.021553325901151988,
+      "grad_norm": 1.5639384984970093,
+      "learning_rate": 8.940053768033609e-05,
+      "loss": 3.2906,
+      "step": 58
+    },
+    {
+      "epoch": 0.021924934968413228,
+      "grad_norm": 1.5347603559494019,
+      "learning_rate": 8.885729807284856e-05,
+      "loss": 3.586,
+      "step": 59
+    },
+    {
+      "epoch": 0.022296544035674472,
+      "grad_norm": 1.5613843202590942,
+      "learning_rate": 8.83022221559489e-05,
+      "loss": 3.3605,
+      "step": 60
+    },
+    {
+      "epoch": 0.022668153102935712,
+      "grad_norm": 1.5832593441009521,
+      "learning_rate": 8.773547901113862e-05,
+      "loss": 3.415,
+      "step": 61
+    },
+    {
+      "epoch": 0.023039762170196952,
+      "grad_norm": 1.6786587238311768,
+      "learning_rate": 8.715724127386972e-05,
+      "loss": 3.3806,
+      "step": 62
+    },
+    {
+      "epoch": 0.023411371237458192,
+      "grad_norm": 1.6316285133361816,
+      "learning_rate": 8.656768508095853e-05,
+      "loss": 3.4167,
+      "step": 63
+    },
+    {
+      "epoch": 0.023782980304719436,
+      "grad_norm": 1.6437264680862427,
+      "learning_rate": 8.596699001693255e-05,
+      "loss": 3.4412,
+      "step": 64
+    },
+    {
+      "epoch": 0.024154589371980676,
+      "grad_norm": 1.5929906368255615,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 3.379,
+      "step": 65
+    },
+    {
+      "epoch": 0.024526198439241916,
+      "grad_norm": 1.6711010932922363,
+      "learning_rate": 8.473291852294987e-05,
+      "loss": 3.2288,
+      "step": 66
+    },
+    {
+      "epoch": 0.02489780750650316,
+      "grad_norm": 1.816035509109497,
+      "learning_rate": 8.409991800312493e-05,
+      "loss": 3.2581,
+      "step": 67
+    },
+    {
+      "epoch": 0.0252694165737644,
+      "grad_norm": 1.5691595077514648,
+      "learning_rate": 8.345653031794292e-05,
+      "loss": 3.3002,
+      "step": 68
+    },
+    {
+      "epoch": 0.02564102564102564,
+      "grad_norm": 1.7285726070404053,
+      "learning_rate": 8.280295144952536e-05,
+      "loss": 3.3853,
+      "step": 69
+    },
+    {
+      "epoch": 0.026012634708286884,
+      "grad_norm": 1.6965199708938599,
+      "learning_rate": 8.213938048432697e-05,
+      "loss": 3.1998,
+      "step": 70
+    },
+    {
+      "epoch": 0.026384243775548124,
+      "grad_norm": 1.7152042388916016,
+      "learning_rate": 8.146601955249188e-05,
+      "loss": 3.304,
+      "step": 71
+    },
+    {
+      "epoch": 0.026755852842809364,
+      "grad_norm": 1.8366388082504272,
+      "learning_rate": 8.07830737662829e-05,
+      "loss": 3.1231,
+      "step": 72
+    },
+    {
+      "epoch": 0.027127461910070604,
+      "grad_norm": 1.8617119789123535,
+      "learning_rate": 8.009075115760243e-05,
+      "loss": 3.3145,
+      "step": 73
+    },
+    {
+      "epoch": 0.027499070977331848,
+      "grad_norm": 1.8905341625213623,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 3.3134,
+      "step": 74
+    },
+    {
+      "epoch": 0.027870680044593088,
+      "grad_norm": 1.882635474205017,
+      "learning_rate": 7.86788218175523e-05,
+      "loss": 3.3169,
+      "step": 75
+    },
+    {
+      "epoch": 0.028242289111854328,
+      "grad_norm": 2.0442984104156494,
+      "learning_rate": 7.795964517353735e-05,
+      "loss": 3.1649,
+      "step": 76
+    },
+    {
+      "epoch": 0.028613898179115572,
+      "grad_norm": 1.940576434135437,
+      "learning_rate": 7.723195175075136e-05,
+      "loss": 3.1936,
+      "step": 77
+    },
+    {
+      "epoch": 0.028985507246376812,
+      "grad_norm": 2.0615429878234863,
+      "learning_rate": 7.649596321166024e-05,
+      "loss": 3.3171,
+      "step": 78
+    },
+    {
+      "epoch": 0.029357116313638052,
+      "grad_norm": 2.0494372844696045,
+      "learning_rate": 7.575190374550272e-05,
+      "loss": 3.2035,
+      "step": 79
+    },
+    {
+      "epoch": 0.029728725380899292,
+      "grad_norm": 2.0333774089813232,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 3.1547,
+      "step": 80
+    },
+    {
+      "epoch": 0.030100334448160536,
+      "grad_norm": 2.141954183578491,
+      "learning_rate": 7.424048101231686e-05,
+      "loss": 2.9143,
+      "step": 81
+    },
+    {
+      "epoch": 0.030471943515421776,
+      "grad_norm": 2.22705340385437,
+      "learning_rate": 7.347357813929454e-05,
+      "loss": 3.1911,
+      "step": 82
+    },
+    {
+      "epoch": 0.030843552582683016,
+      "grad_norm": 2.196378231048584,
+      "learning_rate": 7.269952498697734e-05,
+      "loss": 3.2222,
+      "step": 83
+    },
+    {
+      "epoch": 0.03121516164994426,
+      "grad_norm": 2.2278859615325928,
+      "learning_rate": 7.191855733945387e-05,
+      "loss": 3.0329,
+      "step": 84
+    },
+    {
+      "epoch": 0.031586770717205497,
+      "grad_norm": 2.3488399982452393,
+      "learning_rate": 7.113091308703498e-05,
+      "loss": 2.9382,
+      "step": 85
+    },
+    {
+      "epoch": 0.031958379784466744,
+      "grad_norm": 2.3422460556030273,
+      "learning_rate": 7.033683215379002e-05,
+      "loss": 3.1308,
+      "step": 86
+    },
+    {
+      "epoch": 0.032329988851727984,
+      "grad_norm": 2.4222495555877686,
+      "learning_rate": 6.953655642446368e-05,
+      "loss": 2.9883,
+      "step": 87
+    },
+    {
+      "epoch": 0.032701597918989224,
+      "grad_norm": 2.425354242324829,
+      "learning_rate": 6.873032967079561e-05,
+      "loss": 3.1242,
+      "step": 88
+    },
+    {
+      "epoch": 0.033073206986250464,
+      "grad_norm": 2.56612491607666,
+      "learning_rate": 6.7918397477265e-05,
+      "loss": 3.0771,
+      "step": 89
+    },
+    {
+      "epoch": 0.033444816053511704,
+      "grad_norm": 2.566655397415161,
+      "learning_rate": 6.710100716628344e-05,
+      "loss": 2.9399,
+      "step": 90
+    },
+    {
+      "epoch": 0.033816425120772944,
+      "grad_norm": 2.3641583919525146,
+      "learning_rate": 6.627840772285784e-05,
+      "loss": 2.9741,
+      "step": 91
+    },
+    {
+      "epoch": 0.03418803418803419,
+      "grad_norm": 2.47420334815979,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 2.7321,
+      "step": 92
+    },
+    {
+      "epoch": 0.03455964325529543,
+      "grad_norm": 2.7138381004333496,
+      "learning_rate": 6.461858523613684e-05,
+      "loss": 3.027,
+      "step": 93
+    },
+    {
+      "epoch": 0.03493125232255667,
+      "grad_norm": 2.655938148498535,
+      "learning_rate": 6.378186779084995e-05,
+      "loss": 2.9049,
+      "step": 94
+    },
+    {
+      "epoch": 0.03530286138981791,
+      "grad_norm": 2.87831974029541,
+      "learning_rate": 6.294095225512603e-05,
+      "loss": 3.2944,
+      "step": 95
+    },
+    {
+      "epoch": 0.03567447045707915,
+      "grad_norm": 2.891333818435669,
+      "learning_rate": 6.209609477998338e-05,
+      "loss": 3.2566,
+      "step": 96
+    },
+    {
+      "epoch": 0.03604607952434039,
+      "grad_norm": 2.8243558406829834,
+      "learning_rate": 6.124755271719325e-05,
+      "loss": 3.1022,
+      "step": 97
+    },
+    {
+      "epoch": 0.03641768859160163,
+      "grad_norm": 3.1256723403930664,
+      "learning_rate": 6.0395584540887963e-05,
+      "loss": 3.3878,
+      "step": 98
+    },
+    {
+      "epoch": 0.03678929765886288,
+      "grad_norm": 3.284562349319458,
+      "learning_rate": 5.9540449768827246e-05,
+      "loss": 3.376,
+      "step": 99
+    },
+    {
+      "epoch": 0.03716090672612412,
+      "grad_norm": 3.2456462383270264,
+      "learning_rate": 5.868240888334653e-05,
+      "loss": 3.7382,
+      "step": 100
+    },
+    {
+      "epoch": 0.03716090672612412,
+      "eval_loss": 3.239711284637451,
+      "eval_runtime": 109.8084,
+      "eval_samples_per_second": 41.272,
+      "eval_steps_per_second": 20.636,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7747803637874688.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null