{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.914529914529915, "eval_steps": 500, "global_step": 290, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 4.996333534627809e-05, "loss": 0.4005, "step": 5 }, { "epoch": 0.34, "learning_rate": 4.985344892885899e-05, "loss": 0.3409, "step": 10 }, { "epoch": 0.51, "learning_rate": 4.967066306353816e-05, "loss": 0.3372, "step": 15 }, { "epoch": 0.68, "learning_rate": 4.941551389275217e-05, "loss": 0.334, "step": 20 }, { "epoch": 0.85, "learning_rate": 4.908874981298057e-05, "loss": 0.348, "step": 25 }, { "epoch": 1.03, "learning_rate": 4.869132927957007e-05, "loss": 0.3413, "step": 30 }, { "epoch": 1.2, "learning_rate": 4.822441799541979e-05, "loss": 0.2458, "step": 35 }, { "epoch": 1.37, "learning_rate": 4.768938549177393e-05, "loss": 0.2715, "step": 40 }, { "epoch": 1.54, "learning_rate": 4.708780111115057e-05, "loss": 0.2079, "step": 45 }, { "epoch": 1.71, "learning_rate": 4.642142940418973e-05, "loss": 0.2698, "step": 50 }, { "epoch": 1.88, "learning_rate": 4.5692224953922266e-05, "loss": 0.2543, "step": 55 }, { "epoch": 2.05, "learning_rate": 4.4902326642641095e-05, "loss": 0.2492, "step": 60 }, { "epoch": 2.22, "learning_rate": 4.4054051378190915e-05, "loss": 0.2308, "step": 65 }, { "epoch": 2.39, "learning_rate": 4.3149887298078276e-05, "loss": 0.2018, "step": 70 }, { "epoch": 2.56, "learning_rate": 4.2192486471335585e-05, "loss": 0.1668, "step": 75 }, { "epoch": 2.74, "learning_rate": 4.118465711954569e-05, "loss": 0.2045, "step": 80 }, { "epoch": 2.91, "learning_rate": 4.012935537984414e-05, "loss": 0.1961, "step": 85 }, { "epoch": 3.08, "learning_rate": 3.902967663405956e-05, "loss": 0.1978, "step": 90 }, { "epoch": 3.25, "learning_rate": 3.7888846429425546e-05, "loss": 0.1409, "step": 95 }, { "epoch": 3.42, "learning_rate": 3.671021101749476e-05, "loss": 0.1534, "step": 100 }, { "epoch": 3.59, "learning_rate": 3.5497227539006614e-05, "loss": 0.1634, "step": 105 }, { "epoch": 3.76, "learning_rate": 3.425345388349786e-05, "loss": 0.1796, "step": 110 }, { "epoch": 3.93, "learning_rate": 3.29825382533995e-05, "loss": 0.1492, "step": 115 }, { "epoch": 4.1, "learning_rate": 3.168820846323053e-05, "loss": 0.1366, "step": 120 }, { "epoch": 4.27, "learning_rate": 3.0374261005275607e-05, "loss": 0.1133, "step": 125 }, { "epoch": 4.44, "learning_rate": 2.9044549913819124e-05, "loss": 0.1561, "step": 130 }, { "epoch": 4.62, "learning_rate": 2.7702975460598547e-05, "loss": 0.1161, "step": 135 }, { "epoch": 4.79, "learning_rate": 2.635347271463544e-05, "loss": 0.137, "step": 140 }, { "epoch": 4.96, "learning_rate": 2.5e-05, "loss": 0.1271, "step": 145 }, { "epoch": 5.13, "learning_rate": 2.3646527285364565e-05, "loss": 0.1031, "step": 150 }, { "epoch": 5.3, "learning_rate": 2.2297024539401463e-05, "loss": 0.0881, "step": 155 }, { "epoch": 5.47, "learning_rate": 2.0955450086180882e-05, "loss": 0.094, "step": 160 }, { "epoch": 5.64, "learning_rate": 1.96257389947244e-05, "loss": 0.1292, "step": 165 }, { "epoch": 5.81, "learning_rate": 1.8311791536769483e-05, "loss": 0.1146, "step": 170 }, { "epoch": 5.98, "learning_rate": 1.7017461746600506e-05, "loss": 0.0954, "step": 175 }, { "epoch": 6.15, "learning_rate": 1.574654611650214e-05, "loss": 0.1114, "step": 180 }, { "epoch": 6.32, "learning_rate": 1.4502772460993385e-05, "loss": 0.0744, "step": 185 }, { "epoch": 6.5, "learning_rate": 1.328978898250525e-05, "loss": 0.0752, "step": 190 }, { "epoch": 6.67, "learning_rate": 1.2111153570574454e-05, "loss": 0.0957, "step": 195 }, { "epoch": 6.84, "learning_rate": 1.0970323365940444e-05, "loss": 0.0871, "step": 200 }, { "epoch": 7.01, "learning_rate": 9.870644620155877e-06, "loss": 0.087, "step": 205 }, { "epoch": 7.18, "learning_rate": 8.815342880454311e-06, "loss": 0.0768, "step": 210 }, { "epoch": 7.35, "learning_rate": 7.807513528664414e-06, "loss": 0.0738, "step": 215 }, { "epoch": 7.52, "learning_rate": 6.8501127019217346e-06, "loss": 0.0953, "step": 220 }, { "epoch": 7.69, "learning_rate": 5.945948621809091e-06, "loss": 0.0948, "step": 225 }, { "epoch": 7.86, "learning_rate": 5.097673357358907e-06, "loss": 0.075, "step": 230 }, { "epoch": 8.03, "learning_rate": 4.307775046077739e-06, "loss": 0.0638, "step": 235 }, { "epoch": 8.21, "learning_rate": 3.578570595810274e-06, "loss": 0.0898, "step": 240 }, { "epoch": 8.38, "learning_rate": 2.9121988888494297e-06, "loss": 0.0799, "step": 245 }, { "epoch": 8.55, "learning_rate": 2.310614508226078e-06, "loss": 0.0688, "step": 250 }, { "epoch": 8.72, "learning_rate": 1.7755820045802145e-06, "loss": 0.0676, "step": 255 }, { "epoch": 8.89, "learning_rate": 1.3086707204299414e-06, "loss": 0.0726, "step": 260 }, { "epoch": 9.06, "learning_rate": 9.112501870194273e-07, "loss": 0.072, "step": 265 }, { "epoch": 9.23, "learning_rate": 5.844861072478336e-07, "loss": 0.0584, "step": 270 }, { "epoch": 9.4, "learning_rate": 3.293369364618465e-07, "loss": 0.0617, "step": 275 }, { "epoch": 9.57, "learning_rate": 1.4655107114101007e-07, "loss": 0.0779, "step": 280 }, { "epoch": 9.74, "learning_rate": 3.666465372190453e-08, "loss": 0.0803, "step": 285 }, { "epoch": 9.91, "learning_rate": 0.0, "loss": 0.0796, "step": 290 }, { "epoch": 9.91, "step": 290, "total_flos": 2.384077839944909e+16, "train_loss": 0.15196624583211438, "train_runtime": 473.0573, "train_samples_per_second": 9.872, "train_steps_per_second": 0.613 } ], "logging_steps": 5, "max_steps": 290, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 2.384077839944909e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }