{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 25, "global_step": 206, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.999712746672285e-05, "loss": 3.1322, "step": 5 }, { "epoch": 0.05, "learning_rate": 4.998844107474698e-05, "loss": 0.2475, "step": 10 }, { "epoch": 0.07, "learning_rate": 4.997394273973373e-05, "loss": 0.1435, "step": 15 }, { "epoch": 0.1, "learning_rate": 4.9953635833782084e-05, "loss": 0.1028, "step": 20 }, { "epoch": 0.12, "learning_rate": 4.992752507997904e-05, "loss": 0.0952, "step": 25 }, { "epoch": 0.12, "eval_loss": 0.09023847430944443, "eval_runtime": 0.122, "eval_samples_per_second": 3368.392, "eval_steps_per_second": 106.543, "step": 25 }, { "epoch": 0.15, "learning_rate": 4.989561655130103e-05, "loss": 0.1171, "step": 30 }, { "epoch": 0.17, "learning_rate": 4.9857917669201445e-05, "loss": 0.0777, "step": 35 }, { "epoch": 0.19, "learning_rate": 4.981443720188455e-05, "loss": 0.1283, "step": 40 }, { "epoch": 0.22, "learning_rate": 4.97651852622661e-05, "loss": 0.0895, "step": 45 }, { "epoch": 0.24, "learning_rate": 4.9710173305621214e-05, "loss": 0.0873, "step": 50 }, { "epoch": 0.24, "eval_loss": 0.07891710102558136, "eval_runtime": 0.1169, "eval_samples_per_second": 3515.748, "eval_steps_per_second": 111.204, "step": 50 }, { "epoch": 0.27, "learning_rate": 4.964941412692007e-05, "loss": 0.0729, "step": 55 }, { "epoch": 0.29, "learning_rate": 4.9582921857851984e-05, "loss": 0.0668, "step": 60 }, { "epoch": 0.32, "learning_rate": 4.951071196353857e-05, "loss": 0.0755, "step": 65 }, { "epoch": 0.34, "learning_rate": 4.9432801238936795e-05, "loss": 0.1041, "step": 70 }, { "epoch": 0.36, "learning_rate": 4.9349207804932704e-05, "loss": 0.0446, "step": 75 }, { "epoch": 0.36, "eval_loss": 0.0668846070766449, "eval_runtime": 0.1152, "eval_samples_per_second": 3569.236, "eval_steps_per_second": 112.896, "step": 75 }, { "epoch": 0.39, "learning_rate": 4.925995110412681e-05, "loss": 0.0687, "step": 80 }, { "epoch": 0.41, "learning_rate": 4.916505189631196e-05, "loss": 0.0395, "step": 85 }, { "epoch": 0.44, "learning_rate": 4.906453225364499e-05, "loss": 0.0801, "step": 90 }, { "epoch": 0.46, "learning_rate": 4.8958415555513e-05, "loss": 0.0977, "step": 95 }, { "epoch": 0.49, "learning_rate": 4.884672648309572e-05, "loss": 0.0902, "step": 100 }, { "epoch": 0.49, "eval_loss": 0.06855440139770508, "eval_runtime": 0.1158, "eval_samples_per_second": 3547.794, "eval_steps_per_second": 112.217, "step": 100 }, { "epoch": 0.51, "learning_rate": 4.8729491013624956e-05, "loss": 0.0672, "step": 105 }, { "epoch": 0.53, "learning_rate": 4.860673641434275e-05, "loss": 0.0433, "step": 110 }, { "epoch": 0.56, "learning_rate": 4.847849123615934e-05, "loss": 0.0619, "step": 115 }, { "epoch": 0.58, "learning_rate": 4.834478530701271e-05, "loss": 0.0422, "step": 120 }, { "epoch": 0.61, "learning_rate": 4.820564972493101e-05, "loss": 0.1589, "step": 125 }, { "epoch": 0.61, "eval_loss": 0.15087510645389557, "eval_runtime": 0.1202, "eval_samples_per_second": 3418.754, "eval_steps_per_second": 108.136, "step": 125 }, { "epoch": 0.63, "learning_rate": 4.8061116850799625e-05, "loss": 0.147, "step": 130 }, { "epoch": 0.66, "learning_rate": 4.79112203008345e-05, "loss": 0.1062, "step": 135 }, { "epoch": 0.68, "learning_rate": 4.775599493876354e-05, "loss": 0.052, "step": 140 }, { "epoch": 0.7, "learning_rate": 4.759547686771774e-05, "loss": 0.0462, "step": 145 }, { "epoch": 0.73, "learning_rate": 4.742970342183424e-05, "loss": 0.1183, "step": 150 }, { "epoch": 0.73, "eval_loss": 0.0662284642457962, "eval_runtime": 0.1095, "eval_samples_per_second": 3755.038, "eval_steps_per_second": 118.772, "step": 150 }, { "epoch": 0.75, "learning_rate": 4.7258713157572866e-05, "loss": 0.0695, "step": 155 }, { "epoch": 0.78, "learning_rate": 4.70825458447485e-05, "loss": 0.049, "step": 160 }, { "epoch": 0.8, "learning_rate": 4.690124245728121e-05, "loss": 0.0352, "step": 165 }, { "epoch": 0.83, "learning_rate": 4.671484516366633e-05, "loss": 0.1032, "step": 170 }, { "epoch": 0.85, "learning_rate": 4.652339731716663e-05, "loss": 0.0627, "step": 175 }, { "epoch": 0.85, "eval_loss": 0.05767974629998207, "eval_runtime": 0.1141, "eval_samples_per_second": 3600.998, "eval_steps_per_second": 113.9, "step": 175 }, { "epoch": 0.87, "learning_rate": 4.6326943445729075e-05, "loss": 0.0352, "step": 180 }, { "epoch": 0.9, "learning_rate": 4.612552924162822e-05, "loss": 0.0294, "step": 185 }, { "epoch": 0.92, "learning_rate": 4.591920155083887e-05, "loss": 0.0485, "step": 190 }, { "epoch": 0.95, "learning_rate": 4.570800836214041e-05, "loss": 0.0332, "step": 195 }, { "epoch": 0.97, "learning_rate": 4.5491998795955314e-05, "loss": 0.0741, "step": 200 }, { "epoch": 0.97, "eval_loss": 0.06550680845975876, "eval_runtime": 0.1437, "eval_samples_per_second": 2859.8, "eval_steps_per_second": 90.456, "step": 200 }, { "epoch": 1.0, "learning_rate": 4.527122309292447e-05, "loss": 0.0169, "step": 205 } ], "logging_steps": 5, "max_steps": 1030, "num_train_epochs": 5, "save_steps": 500, "total_flos": 91858718883840.0, "trial_name": null, "trial_params": null }