{ "best_metric": 0.15162613987922668, "best_model_checkpoint": "output/eminem/checkpoint-455", "epoch": 1.0, "global_step": 455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001315617372315331, "loss": 0.3859, "step": 5 }, { "epoch": 0.02, "learning_rate": 0.0001305819167834288, "loss": 0.4039, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0001295279055913259, "loss": 0.3636, "step": 15 }, { "epoch": 0.04, "learning_rate": 0.00012840096527924066, "loss": 0.365, "step": 20 }, { "epoch": 0.06, "learning_rate": 0.00012720244476544407, "loss": 0.4157, "step": 25 }, { "epoch": 0.07, "learning_rate": 0.00012593377864785258, "loss": 0.3804, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.00012459648548684795, "loss": 0.3737, "step": 35 }, { "epoch": 0.09, "learning_rate": 0.00012319216598759544, "loss": 0.4087, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.0001217225010840549, "loss": 0.3864, "step": 45 }, { "epoch": 0.11, "learning_rate": 0.00012018924992692434, "loss": 0.3832, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.0001185942477780086, "loss": 0.3928, "step": 55 }, { "epoch": 0.13, "learning_rate": 0.00011693940381343527, "loss": 0.3682, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00011522669883843705, "loss": 0.3798, "step": 65 }, { "epoch": 0.15, "learning_rate": 0.00011345818291636727, "loss": 0.3447, "step": 70 }, { "epoch": 0.17, "learning_rate": 0.00011163597291483855, "loss": 0.3718, "step": 75 }, { "epoch": 0.18, "learning_rate": 0.00010976224997187412, "loss": 0.4333, "step": 80 }, { "epoch": 0.19, "learning_rate": 0.00010783925688516201, "loss": 0.43, "step": 85 }, { "epoch": 0.2, "learning_rate": 0.00010586929542745496, "loss": 0.3813, "step": 90 }, { "epoch": 0.21, "learning_rate": 0.00010385472359144299, "loss": 0.3779, "step": 95 }, { "epoch": 0.22, "learning_rate": 0.00010179795276726699, "loss": 0.3846, "step": 100 }, { "epoch": 0.23, "learning_rate": 9.970144485616674e-05, "loss": 0.3528, "step": 105 }, { "epoch": 0.24, "learning_rate": 9.756770932364287e-05, "loss": 0.3909, "step": 110 }, { "epoch": 0.25, "learning_rate": 9.53993001956845e-05, "loss": 0.4177, "step": 115 }, { "epoch": 0.26, "learning_rate": 9.319881300168782e-05, "loss": 0.4063, "step": 120 }, { "epoch": 0.28, "learning_rate": 9.09688816676409e-05, "loss": 0.4443, "step": 125 }, { "epoch": 0.29, "learning_rate": 8.871217536342237e-05, "loss": 0.4231, "step": 130 }, { "epoch": 0.3, "learning_rate": 8.643139530783983e-05, "loss": 0.3991, "step": 135 }, { "epoch": 0.31, "learning_rate": 8.412927153536512e-05, "loss": 0.3839, "step": 140 }, { "epoch": 0.32, "learning_rate": 8.180855962833708e-05, "loss": 0.3923, "step": 145 }, { "epoch": 0.33, "learning_rate": 7.947203741861626e-05, "loss": 0.4375, "step": 150 }, { "epoch": 0.34, "learning_rate": 7.712250166257713e-05, "loss": 0.4036, "step": 155 }, { "epoch": 0.35, "learning_rate": 7.476276469347862e-05, "loss": 0.438, "step": 160 }, { "epoch": 0.36, "learning_rate": 7.239565105515707e-05, "loss": 0.3947, "step": 165 }, { "epoch": 0.37, "learning_rate": 7.002399412114568e-05, "loss": 0.3352, "step": 170 }, { "epoch": 0.39, "learning_rate": 6.765063270316318e-05, "loss": 0.3922, "step": 175 }, { "epoch": 0.4, "learning_rate": 6.527840765317078e-05, "loss": 0.3592, "step": 180 }, { "epoch": 0.41, "learning_rate": 6.291015846291898e-05, "loss": 0.4318, "step": 185 }, { "epoch": 0.42, "learning_rate": 6.054871986515355e-05, "loss": 0.4418, "step": 190 }, { "epoch": 0.43, "learning_rate": 5.819691844052574e-05, "loss": 0.3699, "step": 195 }, { "epoch": 0.44, "learning_rate": 5.585756923423138e-05, "loss": 0.3992, "step": 200 }, { "epoch": 0.45, "learning_rate": 5.353347238649785e-05, "loss": 0.3905, "step": 205 }, { "epoch": 0.46, "learning_rate": 5.122740978086224e-05, "loss": 0.3767, "step": 210 }, { "epoch": 0.47, "learning_rate": 4.894214171435194e-05, "loss": 0.3906, "step": 215 }, { "epoch": 0.48, "learning_rate": 4.6680403593463696e-05, "loss": 0.3188, "step": 220 }, { "epoch": 0.5, "learning_rate": 4.4444902659968225e-05, "loss": 0.3562, "step": 225 }, { "epoch": 0.51, "learning_rate": 4.22383147503972e-05, "loss": 0.406, "step": 230 }, { "epoch": 0.52, "learning_rate": 4.006328109315114e-05, "loss": 0.3462, "step": 235 }, { "epoch": 0.53, "learning_rate": 3.7922405147001485e-05, "loss": 0.3237, "step": 240 }, { "epoch": 0.54, "learning_rate": 3.581824948484096e-05, "loss": 0.3355, "step": 245 }, { "epoch": 0.55, "learning_rate": 3.375333272631733e-05, "loss": 0.3852, "step": 250 }, { "epoch": 0.56, "learning_rate": 3.1730126523147464e-05, "loss": 0.3729, "step": 255 }, { "epoch": 0.57, "learning_rate": 2.9751052600590164e-05, "loss": 0.3384, "step": 260 }, { "epoch": 0.58, "learning_rate": 2.7818479858707847e-05, "loss": 0.3507, "step": 265 }, { "epoch": 0.59, "learning_rate": 2.593472153686425e-05, "loss": 0.3956, "step": 270 }, { "epoch": 0.61, "learning_rate": 2.4102032444822844e-05, "loss": 0.3913, "step": 275 }, { "epoch": 0.62, "learning_rate": 2.232260626381796e-05, "loss": 0.3743, "step": 280 }, { "epoch": 0.63, "learning_rate": 2.0598572920747012e-05, "loss": 0.4364, "step": 285 }, { "epoch": 0.64, "learning_rate": 1.8931996038735562e-05, "loss": 0.3568, "step": 290 }, { "epoch": 0.65, "learning_rate": 1.7324870467006735e-05, "loss": 0.3423, "step": 295 }, { "epoch": 0.66, "learning_rate": 1.577911989311673e-05, "loss": 0.3864, "step": 300 }, { "epoch": 0.67, "learning_rate": 1.429659454033877e-05, "loss": 0.4152, "step": 305 }, { "epoch": 0.68, "learning_rate": 1.2879068953004088e-05, "loss": 0.3684, "step": 310 }, { "epoch": 0.69, "learning_rate": 1.1528239872408715e-05, "loss": 0.4387, "step": 315 }, { "epoch": 0.7, "learning_rate": 1.0245724205867534e-05, "loss": 0.4097, "step": 320 }, { "epoch": 0.72, "learning_rate": 9.033057091308445e-06, "loss": 0.3489, "step": 325 }, { "epoch": 0.73, "learning_rate": 7.891690059763513e-06, "loss": 0.3714, "step": 330 }, { "epoch": 0.74, "learning_rate": 6.822989297914713e-06, "loss": 0.3657, "step": 335 }, { "epoch": 0.75, "learning_rate": 5.828234012805588e-06, "loss": 0.3898, "step": 340 }, { "epoch": 0.76, "learning_rate": 4.90861490066865e-06, "loss": 0.3966, "step": 345 }, { "epoch": 0.77, "learning_rate": 4.065232721685712e-06, "loss": 0.3573, "step": 350 }, { "epoch": 0.78, "learning_rate": 3.299096982415909e-06, "loss": 0.3632, "step": 355 }, { "epoch": 0.79, "learning_rate": 2.6111247274298776e-06, "loss": 0.37, "step": 360 }, { "epoch": 0.8, "learning_rate": 2.0021394416451063e-06, "loss": 0.3912, "step": 365 }, { "epoch": 0.81, "learning_rate": 1.472870064624613e-06, "loss": 0.3434, "step": 370 }, { "epoch": 0.83, "learning_rate": 1.0239501180622683e-06, "loss": 0.4036, "step": 375 }, { "epoch": 0.84, "learning_rate": 6.559169474696805e-07, "loss": 0.3656, "step": 380 }, { "epoch": 0.85, "learning_rate": 3.6921107899052446e-07, "loss": 0.333, "step": 385 }, { "epoch": 0.86, "learning_rate": 1.6417569209981867e-07, "loss": 0.4543, "step": 390 }, { "epoch": 0.87, "learning_rate": 4.1056208828832076e-08, "loss": 0.3396, "step": 395 }, { "epoch": 0.88, "learning_rate": 0.0, "loss": 0.3405, "step": 400 }, { "epoch": 0.89, "learning_rate": 4.105620882877876e-08, "loss": 0.3422, "step": 405 }, { "epoch": 0.9, "learning_rate": 1.6417569209970443e-07, "loss": 0.3717, "step": 410 }, { "epoch": 0.91, "learning_rate": 3.692110789903493e-07, "loss": 0.3561, "step": 415 }, { "epoch": 0.93, "learning_rate": 6.559169474694444e-07, "loss": 0.4, "step": 420 }, { "epoch": 0.94, "learning_rate": 1.0239501180619788e-06, "loss": 0.3807, "step": 425 }, { "epoch": 0.95, "learning_rate": 1.4728700646242702e-06, "loss": 0.3506, "step": 430 }, { "epoch": 0.96, "learning_rate": 2.0021394416451673e-06, "loss": 0.3554, "step": 435 }, { "epoch": 0.97, "learning_rate": 2.6111247274294206e-06, "loss": 0.3542, "step": 440 }, { "epoch": 0.98, "learning_rate": 3.299096982415391e-06, "loss": 0.3452, "step": 445 }, { "epoch": 0.99, "learning_rate": 4.065232721685804e-06, "loss": 0.3457, "step": 450 }, { "epoch": 1.0, "eval_loss": 0.17592251300811768, "eval_runtime": 14.827, "eval_samples_per_second": 43.502, "eval_steps_per_second": 5.463, "step": 454 }, { "epoch": 1.0, "learning_rate": 4.181858176201967e-07, "loss": 0.3187, "step": 455 }, { "epoch": 1.0, "eval_loss": 0.15162613987922668, "eval_runtime": 13.378, "eval_samples_per_second": 48.139, "eval_steps_per_second": 6.055, "step": 455 } ], "max_steps": 910, "num_train_epochs": 2, "total_flos": 474636976128000.0, "trial_name": null, "trial_params": null }