{ "best_metric": 6.85532283782959, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 1.443089430894309, "eval_steps": 25, "global_step": 71, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02032520325203252, "grad_norm": 0.19118568301200867, "learning_rate": 0.00015, "loss": 6.9281, "step": 1 }, { "epoch": 0.02032520325203252, "eval_loss": 6.930131435394287, "eval_runtime": 0.0812, "eval_samples_per_second": 615.967, "eval_steps_per_second": 36.958, "step": 1 }, { "epoch": 0.04065040650406504, "grad_norm": 0.1888657510280609, "learning_rate": 0.0003, "loss": 6.928, "step": 2 }, { "epoch": 0.06097560975609756, "grad_norm": 0.18045403063297272, "learning_rate": 0.0002998600959423082, "loss": 6.9274, "step": 3 }, { "epoch": 0.08130081300813008, "grad_norm": 0.18127678334712982, "learning_rate": 0.0002994406737417567, "loss": 6.9245, "step": 4 }, { "epoch": 0.1016260162601626, "grad_norm": 0.18910805881023407, "learning_rate": 0.00029874260271490463, "loss": 6.9186, "step": 5 }, { "epoch": 0.12195121951219512, "grad_norm": 0.18972323834896088, "learning_rate": 0.00029776732972055516, "loss": 6.9156, "step": 6 }, { "epoch": 0.14227642276422764, "grad_norm": 0.19272679090499878, "learning_rate": 0.0002965168761609197, "loss": 6.9113, "step": 7 }, { "epoch": 0.16260162601626016, "grad_norm": 0.19531318545341492, "learning_rate": 0.0002949938337919529, "loss": 6.9078, "step": 8 }, { "epoch": 0.18292682926829268, "grad_norm": 0.19558368623256683, "learning_rate": 0.0002932013593515431, "loss": 6.904, "step": 9 }, { "epoch": 0.2032520325203252, "grad_norm": 0.19056282937526703, "learning_rate": 0.00029114316801669057, "loss": 6.9027, "step": 10 }, { "epoch": 0.22357723577235772, "grad_norm": 0.19243130087852478, "learning_rate": 0.00028882352570323616, "loss": 6.9009, "step": 11 }, { "epoch": 0.24390243902439024, "grad_norm": 0.2011752426624298, "learning_rate": 0.00028624724022409897, "loss": 6.8905, "step": 12 }, { "epoch": 0.26422764227642276, "grad_norm": 0.18933506309986115, "learning_rate": 0.0002834196513243502, "loss": 6.8896, "step": 13 }, { "epoch": 0.2845528455284553, "grad_norm": 0.18284353613853455, "learning_rate": 0.0002803466196137759, "loss": 6.8857, "step": 14 }, { "epoch": 0.3048780487804878, "grad_norm": 0.1695403903722763, "learning_rate": 0.00027703451441986836, "loss": 6.8842, "step": 15 }, { "epoch": 0.3252032520325203, "grad_norm": 0.15177474915981293, "learning_rate": 0.000273490200586422, "loss": 6.8813, "step": 16 }, { "epoch": 0.34552845528455284, "grad_norm": 0.13989834487438202, "learning_rate": 0.00026972102424509665, "loss": 6.8778, "step": 17 }, { "epoch": 0.36585365853658536, "grad_norm": 0.12882831692695618, "learning_rate": 0.00026573479758943753, "loss": 6.8743, "step": 18 }, { "epoch": 0.3861788617886179, "grad_norm": 0.1219935417175293, "learning_rate": 0.0002615397826829114, "loss": 6.8712, "step": 19 }, { "epoch": 0.4065040650406504, "grad_norm": 0.11544951051473618, "learning_rate": 0.0002571446743345183, "loss": 6.8704, "step": 20 }, { "epoch": 0.4268292682926829, "grad_norm": 0.10308007150888443, "learning_rate": 0.00025255858207747205, "loss": 6.8684, "step": 21 }, { "epoch": 0.44715447154471544, "grad_norm": 0.09864865243434906, "learning_rate": 0.0002477910112883017, "loss": 6.8696, "step": 22 }, { "epoch": 0.46747967479674796, "grad_norm": 0.09344462305307388, "learning_rate": 0.00024285184348550706, "loss": 6.8741, "step": 23 }, { "epoch": 0.4878048780487805, "grad_norm": 0.09744031727313995, "learning_rate": 0.0002377513158486027, "loss": 6.8679, "step": 24 }, { "epoch": 0.508130081300813, "grad_norm": 0.10219009965658188, "learning_rate": 0.00023249999999999999, "loss": 6.8657, "step": 25 }, { "epoch": 0.508130081300813, "eval_loss": 6.868204116821289, "eval_runtime": 0.0803, "eval_samples_per_second": 622.86, "eval_steps_per_second": 37.372, "step": 25 }, { "epoch": 0.5284552845528455, "grad_norm": 0.10324902832508087, "learning_rate": 0.00022710878009370554, "loss": 6.8658, "step": 26 }, { "epoch": 0.5487804878048781, "grad_norm": 0.0966915562748909, "learning_rate": 0.00022158883025624965, "loss": 6.8638, "step": 27 }, { "epoch": 0.5691056910569106, "grad_norm": 0.08991527557373047, "learning_rate": 0.0002159515914266029, "loss": 6.8639, "step": 28 }, { "epoch": 0.5894308943089431, "grad_norm": 0.09582707285881042, "learning_rate": 0.0002102087476430831, "loss": 6.8622, "step": 29 }, { "epoch": 0.6097560975609756, "grad_norm": 0.09370791167020798, "learning_rate": 0.00020437220182640135, "loss": 6.8624, "step": 30 }, { "epoch": 0.6300813008130082, "grad_norm": 0.0968514159321785, "learning_rate": 0.00019845405110904146, "loss": 6.8602, "step": 31 }, { "epoch": 0.6504065040650406, "grad_norm": 0.09518780559301376, "learning_rate": 0.00019246656176210558, "loss": 6.859, "step": 32 }, { "epoch": 0.6707317073170732, "grad_norm": 0.09533347189426422, "learning_rate": 0.0001864221437715939, "loss": 6.8596, "step": 33 }, { "epoch": 0.6910569105691057, "grad_norm": 0.08625080436468124, "learning_rate": 0.0001803333251168141, "loss": 6.8606, "step": 34 }, { "epoch": 0.7113821138211383, "grad_norm": 0.08176233619451523, "learning_rate": 0.00017421272580423058, "loss": 6.8639, "step": 35 }, { "epoch": 0.7317073170731707, "grad_norm": 0.0815698653459549, "learning_rate": 0.00016807303171057425, "loss": 6.8624, "step": 36 }, { "epoch": 0.7520325203252033, "grad_norm": 0.0918528214097023, "learning_rate": 0.00016192696828942573, "loss": 6.8561, "step": 37 }, { "epoch": 0.7723577235772358, "grad_norm": 0.0833517462015152, "learning_rate": 0.00015578727419576942, "loss": 6.8576, "step": 38 }, { "epoch": 0.7926829268292683, "grad_norm": 0.07739175111055374, "learning_rate": 0.00014966667488318586, "loss": 6.8582, "step": 39 }, { "epoch": 0.8130081300813008, "grad_norm": 0.07285133749246597, "learning_rate": 0.00014357785622840606, "loss": 6.8576, "step": 40 }, { "epoch": 0.8333333333333334, "grad_norm": 0.07432258129119873, "learning_rate": 0.00013753343823789445, "loss": 6.8563, "step": 41 }, { "epoch": 0.8536585365853658, "grad_norm": 0.07176032662391663, "learning_rate": 0.00013154594889095854, "loss": 6.8558, "step": 42 }, { "epoch": 0.8739837398373984, "grad_norm": 0.07529640197753906, "learning_rate": 0.00012562779817359865, "loss": 6.8532, "step": 43 }, { "epoch": 0.8943089430894309, "grad_norm": 0.07157547771930695, "learning_rate": 0.00011979125235691685, "loss": 6.8528, "step": 44 }, { "epoch": 0.9146341463414634, "grad_norm": 0.07007233798503876, "learning_rate": 0.00011404840857339706, "loss": 6.8542, "step": 45 }, { "epoch": 0.9349593495934959, "grad_norm": 0.07017088681459427, "learning_rate": 0.0001084111697437504, "loss": 6.8531, "step": 46 }, { "epoch": 0.9552845528455285, "grad_norm": 0.07130666822195053, "learning_rate": 0.00010289121990629447, "loss": 6.8574, "step": 47 }, { "epoch": 0.975609756097561, "grad_norm": 0.07272395491600037, "learning_rate": 9.750000000000003e-05, "loss": 6.8621, "step": 48 }, { "epoch": 0.9959349593495935, "grad_norm": 0.08354395627975464, "learning_rate": 9.22486841513973e-05, "loss": 6.8516, "step": 49 }, { "epoch": 1.016260162601626, "grad_norm": 0.13025087118148804, "learning_rate": 8.714815651449293e-05, "loss": 12.4624, "step": 50 }, { "epoch": 1.016260162601626, "eval_loss": 6.85532283782959, "eval_runtime": 0.0806, "eval_samples_per_second": 620.406, "eval_steps_per_second": 37.224, "step": 50 }, { "epoch": 1.0365853658536586, "grad_norm": 0.0706627145409584, "learning_rate": 8.220898871169827e-05, "loss": 6.7974, "step": 51 }, { "epoch": 1.056910569105691, "grad_norm": 0.06643562018871307, "learning_rate": 7.744141792252794e-05, "loss": 6.8372, "step": 52 }, { "epoch": 1.0772357723577235, "grad_norm": 0.06472016870975494, "learning_rate": 7.285532566548172e-05, "loss": 6.8331, "step": 53 }, { "epoch": 1.0975609756097562, "grad_norm": 0.06576112657785416, "learning_rate": 6.846021731708856e-05, "loss": 6.8458, "step": 54 }, { "epoch": 1.1178861788617886, "grad_norm": 0.06294548511505127, "learning_rate": 6.426520241056245e-05, "loss": 6.8476, "step": 55 }, { "epoch": 1.1382113821138211, "grad_norm": 0.06356498599052429, "learning_rate": 6.0278975754903317e-05, "loss": 6.846, "step": 56 }, { "epoch": 1.1585365853658536, "grad_norm": 0.06702162325382233, "learning_rate": 5.6509799413577934e-05, "loss": 6.8533, "step": 57 }, { "epoch": 1.1788617886178863, "grad_norm": 0.06253218650817871, "learning_rate": 5.296548558013161e-05, "loss": 6.8537, "step": 58 }, { "epoch": 1.1991869918699187, "grad_norm": 0.06217321753501892, "learning_rate": 4.9653380386224046e-05, "loss": 6.8631, "step": 59 }, { "epoch": 1.2195121951219512, "grad_norm": 0.06489800661802292, "learning_rate": 4.658034867564977e-05, "loss": 6.9336, "step": 60 }, { "epoch": 1.2398373983739837, "grad_norm": 0.07119685411453247, "learning_rate": 4.375275977590104e-05, "loss": 6.8487, "step": 61 }, { "epoch": 1.2601626016260163, "grad_norm": 0.06628692150115967, "learning_rate": 4.117647429676387e-05, "loss": 6.5646, "step": 62 }, { "epoch": 1.2804878048780488, "grad_norm": 0.07155509293079376, "learning_rate": 3.885683198330941e-05, "loss": 7.0993, "step": 63 }, { "epoch": 1.3008130081300813, "grad_norm": 0.06444606184959412, "learning_rate": 3.679864064845691e-05, "loss": 6.8313, "step": 64 }, { "epoch": 1.321138211382114, "grad_norm": 0.061461322009563446, "learning_rate": 3.500616620804712e-05, "loss": 6.8289, "step": 65 }, { "epoch": 1.3414634146341464, "grad_norm": 0.06256741285324097, "learning_rate": 3.348312383908033e-05, "loss": 6.8465, "step": 66 }, { "epoch": 1.3617886178861789, "grad_norm": 0.06616440415382385, "learning_rate": 3.223267027944483e-05, "loss": 6.8441, "step": 67 }, { "epoch": 1.3821138211382114, "grad_norm": 0.06352795660495758, "learning_rate": 3.125739728509535e-05, "loss": 6.8487, "step": 68 }, { "epoch": 1.4024390243902438, "grad_norm": 0.0632796511054039, "learning_rate": 3.055932625824328e-05, "loss": 6.8438, "step": 69 }, { "epoch": 1.4227642276422765, "grad_norm": 0.06695584207773209, "learning_rate": 3.0139904057691777e-05, "loss": 6.8565, "step": 70 }, { "epoch": 1.443089430894309, "grad_norm": 0.06280119717121124, "learning_rate": 2.9999999999999997e-05, "loss": 6.8591, "step": 71 } ], "logging_steps": 1, "max_steps": 71, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 15192111513600.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }