{ "best_metric": 0.7373384237289429, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.07159904534606205, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002386634844868735, "grad_norm": 0.8340703845024109, "learning_rate": 2e-05, "loss": 0.8038, "step": 1 }, { "epoch": 0.002386634844868735, "eval_loss": 0.8742901682853699, "eval_runtime": 88.58, "eval_samples_per_second": 1.998, "eval_steps_per_second": 1.005, "step": 1 }, { "epoch": 0.00477326968973747, "grad_norm": 0.2642470896244049, "learning_rate": 4e-05, "loss": 0.7002, "step": 2 }, { "epoch": 0.007159904534606206, "grad_norm": 0.3332902491092682, "learning_rate": 6e-05, "loss": 0.7867, "step": 3 }, { "epoch": 0.00954653937947494, "grad_norm": 0.3536009192466736, "learning_rate": 8e-05, "loss": 0.6321, "step": 4 }, { "epoch": 0.011933174224343675, "grad_norm": 0.3561478853225708, "learning_rate": 0.0001, "loss": 0.6547, "step": 5 }, { "epoch": 0.011933174224343675, "eval_loss": 0.8676325082778931, "eval_runtime": 88.5868, "eval_samples_per_second": 1.998, "eval_steps_per_second": 1.005, "step": 5 }, { "epoch": 0.014319809069212411, "grad_norm": 0.48244509100914, "learning_rate": 0.00012, "loss": 0.7607, "step": 6 }, { "epoch": 0.016706443914081145, "grad_norm": 0.48935917019844055, "learning_rate": 0.00014, "loss": 0.6955, "step": 7 }, { "epoch": 0.01909307875894988, "grad_norm": 0.42430683970451355, "learning_rate": 0.00016, "loss": 0.9223, "step": 8 }, { "epoch": 0.021479713603818614, "grad_norm": 0.31512251496315, "learning_rate": 0.00018, "loss": 0.5808, "step": 9 }, { "epoch": 0.02386634844868735, "grad_norm": 0.3201429843902588, "learning_rate": 0.0002, "loss": 0.6558, "step": 10 }, { "epoch": 0.02386634844868735, "eval_loss": 0.7947567105293274, "eval_runtime": 88.634, "eval_samples_per_second": 1.997, "eval_steps_per_second": 1.004, "step": 10 }, { "epoch": 0.026252983293556086, "grad_norm": 1.7070752382278442, "learning_rate": 0.0001999979446958366, "loss": 0.8117, "step": 11 }, { "epoch": 0.028639618138424822, "grad_norm": 0.6111428737640381, "learning_rate": 0.00019999177886783194, "loss": 0.9051, "step": 12 }, { "epoch": 0.031026252983293555, "grad_norm": 0.7262500524520874, "learning_rate": 0.00019998150276943902, "loss": 0.6746, "step": 13 }, { "epoch": 0.03341288782816229, "grad_norm": 0.4517431855201721, "learning_rate": 0.000199967116823068, "loss": 0.5843, "step": 14 }, { "epoch": 0.03579952267303103, "grad_norm": 0.5962727069854736, "learning_rate": 0.0001999486216200688, "loss": 0.7573, "step": 15 }, { "epoch": 0.03579952267303103, "eval_loss": 0.7667846083641052, "eval_runtime": 88.6553, "eval_samples_per_second": 1.996, "eval_steps_per_second": 1.004, "step": 15 }, { "epoch": 0.03818615751789976, "grad_norm": 0.35535097122192383, "learning_rate": 0.00019992601792070679, "loss": 0.6861, "step": 16 }, { "epoch": 0.0405727923627685, "grad_norm": 0.37387725710868835, "learning_rate": 0.00019989930665413147, "loss": 0.7648, "step": 17 }, { "epoch": 0.04295942720763723, "grad_norm": 0.42131108045578003, "learning_rate": 0.00019986848891833845, "loss": 0.8014, "step": 18 }, { "epoch": 0.045346062052505964, "grad_norm": 0.35316216945648193, "learning_rate": 0.0001998335659801241, "loss": 0.5586, "step": 19 }, { "epoch": 0.0477326968973747, "grad_norm": 0.664710283279419, "learning_rate": 0.00019979453927503364, "loss": 0.625, "step": 20 }, { "epoch": 0.0477326968973747, "eval_loss": 0.756880521774292, "eval_runtime": 88.6671, "eval_samples_per_second": 1.996, "eval_steps_per_second": 1.004, "step": 20 }, { "epoch": 0.050119331742243436, "grad_norm": 0.7394424676895142, "learning_rate": 0.00019975141040730207, "loss": 0.7499, "step": 21 }, { "epoch": 0.05250596658711217, "grad_norm": 0.4532185196876526, "learning_rate": 0.0001997041811497882, "loss": 0.4935, "step": 22 }, { "epoch": 0.05489260143198091, "grad_norm": 0.3705839514732361, "learning_rate": 0.00019965285344390184, "loss": 0.7115, "step": 23 }, { "epoch": 0.057279236276849645, "grad_norm": 0.3674144446849823, "learning_rate": 0.00019959742939952392, "loss": 0.6629, "step": 24 }, { "epoch": 0.059665871121718374, "grad_norm": 0.9410332441329956, "learning_rate": 0.00019953791129491983, "loss": 0.6523, "step": 25 }, { "epoch": 0.059665871121718374, "eval_loss": 0.7463199496269226, "eval_runtime": 89.0491, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.999, "step": 25 }, { "epoch": 0.06205250596658711, "grad_norm": 0.4582875669002533, "learning_rate": 0.00019947430157664576, "loss": 0.8828, "step": 26 }, { "epoch": 0.06443914081145585, "grad_norm": 1.0290837287902832, "learning_rate": 0.00019940660285944803, "loss": 0.7447, "step": 27 }, { "epoch": 0.06682577565632458, "grad_norm": 0.24490249156951904, "learning_rate": 0.00019933481792615583, "loss": 0.5415, "step": 28 }, { "epoch": 0.06921241050119331, "grad_norm": 0.6698419451713562, "learning_rate": 0.0001992589497275665, "loss": 0.7669, "step": 29 }, { "epoch": 0.07159904534606205, "grad_norm": 1.0689176321029663, "learning_rate": 0.0001991790013823246, "loss": 1.0212, "step": 30 }, { "epoch": 0.07159904534606205, "eval_loss": 0.7373384237289429, "eval_runtime": 88.6141, "eval_samples_per_second": 1.997, "eval_steps_per_second": 1.004, "step": 30 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.476062712987648e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }