{ "best_metric": 0.2455403357744217, "best_model_checkpoint": "output/eminem/checkpoint-460", "epoch": 1.0, "global_step": 460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00010098945265056683, "loss": 0.4239, "step": 5 }, { "epoch": 0.02, "learning_rate": 9.890598330141942e-05, "loss": 0.484, "step": 10 }, { "epoch": 0.03, "learning_rate": 9.678717853140213e-05, "loss": 0.4876, "step": 15 }, { "epoch": 0.04, "learning_rate": 9.46355087720941e-05, "loss": 0.4738, "step": 20 }, { "epoch": 0.05, "learning_rate": 9.245348277418726e-05, "loss": 0.4579, "step": 25 }, { "epoch": 0.07, "learning_rate": 9.024364468238985e-05, "loss": 0.4577, "step": 30 }, { "epoch": 0.08, "learning_rate": 8.800857106906198e-05, "loss": 0.4851, "step": 35 }, { "epoch": 0.09, "learning_rate": 8.575086793006042e-05, "loss": 0.4626, "step": 40 }, { "epoch": 0.1, "learning_rate": 8.34731676462491e-05, "loss": 0.4782, "step": 45 }, { "epoch": 0.11, "learning_rate": 8.117812591427207e-05, "loss": 0.5103, "step": 50 }, { "epoch": 0.12, "learning_rate": 7.886841865013191e-05, "loss": 0.5388, "step": 55 }, { "epoch": 0.13, "learning_rate": 7.654673886917425e-05, "loss": 0.4255, "step": 60 }, { "epoch": 0.14, "learning_rate": 7.421579354617336e-05, "loss": 0.4845, "step": 65 }, { "epoch": 0.15, "learning_rate": 7.187830045910335e-05, "loss": 0.4882, "step": 70 }, { "epoch": 0.16, "learning_rate": 6.953698502034407e-05, "loss": 0.4911, "step": 75 }, { "epoch": 0.17, "learning_rate": 6.719457709896656e-05, "loss": 0.445, "step": 80 }, { "epoch": 0.18, "learning_rate": 6.485380783782185e-05, "loss": 0.4824, "step": 85 }, { "epoch": 0.2, "learning_rate": 6.251740646916227e-05, "loss": 0.478, "step": 90 }, { "epoch": 0.21, "learning_rate": 6.0188097132472896e-05, "loss": 0.5299, "step": 95 }, { "epoch": 0.22, "learning_rate": 5.786859569824095e-05, "loss": 0.5087, "step": 100 }, { "epoch": 0.23, "learning_rate": 5.556160660138586e-05, "loss": 0.4854, "step": 105 }, { "epoch": 0.24, "learning_rate": 5.326981968799179e-05, "loss": 0.5017, "step": 110 }, { "epoch": 0.25, "learning_rate": 5.099590707908848e-05, "loss": 0.4689, "step": 115 }, { "epoch": 0.26, "learning_rate": 4.874252005506042e-05, "loss": 0.4921, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.651228596437459e-05, "loss": 0.4873, "step": 125 }, { "epoch": 0.28, "learning_rate": 4.4307805160220805e-05, "loss": 0.4682, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.2131647968601464e-05, "loss": 0.5228, "step": 135 }, { "epoch": 0.3, "learning_rate": 3.998635169145922e-05, "loss": 0.4468, "step": 140 }, { "epoch": 0.32, "learning_rate": 3.7874417648290806e-05, "loss": 0.452, "step": 145 }, { "epoch": 0.33, "learning_rate": 3.57983082597153e-05, "loss": 0.4363, "step": 150 }, { "epoch": 0.34, "learning_rate": 3.376044417641229e-05, "loss": 0.4868, "step": 155 }, { "epoch": 0.35, "learning_rate": 3.176320145674611e-05, "loss": 0.4906, "step": 160 }, { "epoch": 0.36, "learning_rate": 2.98089087963816e-05, "loss": 0.5023, "step": 165 }, { "epoch": 0.37, "learning_rate": 2.7899844813147157e-05, "loss": 0.4712, "step": 170 }, { "epoch": 0.38, "learning_rate": 2.603823539024549e-05, "loss": 0.4571, "step": 175 }, { "epoch": 0.39, "learning_rate": 2.422625108099115e-05, "loss": 0.4262, "step": 180 }, { "epoch": 0.4, "learning_rate": 2.2466004578035872e-05, "loss": 0.455, "step": 185 }, { "epoch": 0.41, "learning_rate": 2.0759548250059194e-05, "loss": 0.4506, "step": 190 }, { "epoch": 0.42, "learning_rate": 1.910887174880859e-05, "loss": 0.4219, "step": 195 }, { "epoch": 0.43, "learning_rate": 1.7515899689243134e-05, "loss": 0.4164, "step": 200 }, { "epoch": 0.45, "learning_rate": 1.5982489405528287e-05, "loss": 0.4133, "step": 205 }, { "epoch": 0.46, "learning_rate": 1.4510428785463036e-05, "loss": 0.4375, "step": 210 }, { "epoch": 0.47, "learning_rate": 1.3101434185879184e-05, "loss": 0.4882, "step": 215 }, { "epoch": 0.48, "learning_rate": 1.175714843145347e-05, "loss": 0.445, "step": 220 }, { "epoch": 0.49, "learning_rate": 1.0479138899242488e-05, "loss": 0.46, "step": 225 }, { "epoch": 0.5, "learning_rate": 9.268895691192436e-06, "loss": 0.5074, "step": 230 }, { "epoch": 0.51, "learning_rate": 8.127829896747804e-06, "loss": 0.4527, "step": 235 }, { "epoch": 0.52, "learning_rate": 7.057271947580458e-06, "loss": 0.4939, "step": 240 }, { "epoch": 0.53, "learning_rate": 6.058470066371539e-06, "loss": 0.5338, "step": 245 }, { "epoch": 0.54, "learning_rate": 5.132588811436518e-06, "loss": 0.4298, "step": 250 }, { "epoch": 0.55, "learning_rate": 4.280707718900271e-06, "loss": 0.4572, "step": 255 }, { "epoch": 0.57, "learning_rate": 3.5038200440114727e-06, "loss": 0.4096, "step": 260 }, { "epoch": 0.58, "learning_rate": 2.802831603045737e-06, "loss": 0.5071, "step": 265 }, { "epoch": 0.59, "learning_rate": 2.178559717169051e-06, "loss": 0.444, "step": 270 }, { "epoch": 0.6, "learning_rate": 1.6317322594756245e-06, "loss": 0.4421, "step": 275 }, { "epoch": 0.61, "learning_rate": 1.162986806318977e-06, "loss": 0.4878, "step": 280 }, { "epoch": 0.62, "learning_rate": 7.72869893930971e-07, "loss": 0.4151, "step": 285 }, { "epoch": 0.63, "learning_rate": 4.6183638117953145e-07, "loss": 0.4066, "step": 290 }, { "epoch": 0.64, "learning_rate": 2.3024891922762553e-07, "loss": 0.4453, "step": 295 }, { "epoch": 0.65, "learning_rate": 7.837752869653465e-08, "loss": 0.5799, "step": 300 }, { "epoch": 0.66, "learning_rate": 6.399284833793284e-09, "loss": 0.4665, "step": 305 }, { "epoch": 0.67, "learning_rate": 1.4398111052152874e-08, "loss": 0.4856, "step": 310 }, { "epoch": 0.68, "learning_rate": 1.0236468107794785e-07, "loss": 0.4853, "step": 315 }, { "epoch": 0.7, "learning_rate": 2.7019642982561925e-07, "loss": 0.5332, "step": 320 }, { "epoch": 0.71, "learning_rate": 5.176976729833616e-07, "loss": 0.4575, "step": 325 }, { "epoch": 0.72, "learning_rate": 8.445798351733662e-07, "loss": 0.4864, "step": 330 }, { "epoch": 0.73, "learning_rate": 1.2504617864165704e-06, "loss": 0.442, "step": 335 }, { "epoch": 0.74, "learning_rate": 1.7348702865138632e-06, "loss": 0.4695, "step": 340 }, { "epoch": 0.75, "learning_rate": 2.297240536822864e-06, "loss": 0.5056, "step": 345 }, { "epoch": 0.76, "learning_rate": 2.936916838787285e-06, "loss": 0.4522, "step": 350 }, { "epoch": 0.77, "learning_rate": 3.6531533584543354e-06, "loss": 0.4949, "step": 355 }, { "epoch": 0.78, "learning_rate": 4.445114996080512e-06, "loss": 0.4966, "step": 360 }, { "epoch": 0.79, "learning_rate": 5.3118783598219064e-06, "loss": 0.493, "step": 365 }, { "epoch": 0.8, "learning_rate": 6.252432842370185e-06, "loss": 0.4831, "step": 370 }, { "epoch": 0.82, "learning_rate": 7.265681799270346e-06, "loss": 0.4715, "step": 375 }, { "epoch": 0.83, "learning_rate": 8.350443827565338e-06, "loss": 0.4506, "step": 380 }, { "epoch": 0.84, "learning_rate": 9.505454143253312e-06, "loss": 0.4343, "step": 385 }, { "epoch": 0.85, "learning_rate": 1.072936605597144e-05, "loss": 0.4487, "step": 390 }, { "epoch": 0.86, "learning_rate": 1.2020752539181091e-05, "loss": 0.5072, "step": 395 }, { "epoch": 0.87, "learning_rate": 1.3378107894005304e-05, "loss": 0.447, "step": 400 }, { "epoch": 0.88, "learning_rate": 1.479984950482134e-05, "loss": 0.4754, "step": 405 }, { "epoch": 0.89, "learning_rate": 1.6284319684507726e-05, "loss": 0.4414, "step": 410 }, { "epoch": 0.9, "learning_rate": 1.782978760723747e-05, "loss": 0.4684, "step": 415 }, { "epoch": 0.91, "learning_rate": 1.9434451326547662e-05, "loss": 0.5108, "step": 420 }, { "epoch": 0.92, "learning_rate": 2.109643987632069e-05, "loss": 0.4894, "step": 425 }, { "epoch": 0.93, "learning_rate": 2.2813815452250717e-05, "loss": 0.5156, "step": 430 }, { "epoch": 0.95, "learning_rate": 2.4584575671241315e-05, "loss": 0.4643, "step": 435 }, { "epoch": 0.96, "learning_rate": 2.640665590608444e-05, "loss": 0.457, "step": 440 }, { "epoch": 0.97, "learning_rate": 2.8277931692735254e-05, "loss": 0.4539, "step": 445 }, { "epoch": 0.98, "learning_rate": 3.0196221207331556e-05, "loss": 0.4085, "step": 450 }, { "epoch": 0.99, "learning_rate": 3.215928781010894e-05, "loss": 0.4488, "step": 455 }, { "epoch": 1.0, "learning_rate": 3.416484265322113e-05, "loss": 0.4469, "step": 460 }, { "epoch": 1.0, "eval_loss": 0.2455403357744217, "eval_runtime": 13.8337, "eval_samples_per_second": 43.661, "eval_steps_per_second": 5.494, "step": 460 } ], "max_steps": 920, "num_train_epochs": 2, "total_flos": 479993462784000.0, "trial_name": null, "trial_params": null }