|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9008928537368774, |
|
"eval_loss": 0.2425430864095688, |
|
"eval_runtime": 15.3273, |
|
"eval_samples_per_second": 73.072, |
|
"eval_steps_per_second": 9.134, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.0769035067318356e-05, |
|
"loss": 0.2497, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.90625, |
|
"eval_loss": 0.2671281397342682, |
|
"eval_runtime": 15.4851, |
|
"eval_samples_per_second": 72.327, |
|
"eval_steps_per_second": 9.041, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9285714030265808, |
|
"eval_loss": 0.37120091915130615, |
|
"eval_runtime": 15.3992, |
|
"eval_samples_per_second": 72.731, |
|
"eval_steps_per_second": 9.091, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.775246300189499e-05, |
|
"loss": 0.0886, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9357143044471741, |
|
"eval_loss": 0.3282666504383087, |
|
"eval_runtime": 15.4636, |
|
"eval_samples_per_second": 72.428, |
|
"eval_steps_per_second": 9.054, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9303571581840515, |
|
"eval_loss": 0.36973103880882263, |
|
"eval_runtime": 15.65, |
|
"eval_samples_per_second": 71.565, |
|
"eval_steps_per_second": 8.946, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 2.473589093647162e-05, |
|
"loss": 0.0277, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9303571581840515, |
|
"eval_loss": 0.4028499722480774, |
|
"eval_runtime": 15.6492, |
|
"eval_samples_per_second": 71.569, |
|
"eval_steps_per_second": 8.946, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9285714030265808, |
|
"eval_loss": 0.4771631956100464, |
|
"eval_runtime": 15.5073, |
|
"eval_samples_per_second": 72.224, |
|
"eval_steps_per_second": 9.028, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.1719318871048256e-05, |
|
"loss": 0.023, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.5438315868377686, |
|
"eval_runtime": 15.6912, |
|
"eval_samples_per_second": 71.378, |
|
"eval_steps_per_second": 8.922, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.8702746805624887e-05, |
|
"loss": 0.0229, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9276785850524902, |
|
"eval_loss": 0.51451176404953, |
|
"eval_runtime": 15.4278, |
|
"eval_samples_per_second": 72.596, |
|
"eval_steps_per_second": 9.075, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9223214387893677, |
|
"eval_loss": 0.4909174144268036, |
|
"eval_runtime": 15.4206, |
|
"eval_samples_per_second": 72.63, |
|
"eval_steps_per_second": 9.079, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 1.5686174740201518e-05, |
|
"loss": 0.0151, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.5145156979560852, |
|
"eval_runtime": 15.4916, |
|
"eval_samples_per_second": 72.297, |
|
"eval_steps_per_second": 9.037, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9241071343421936, |
|
"eval_loss": 0.5320212244987488, |
|
"eval_runtime": 15.5222, |
|
"eval_samples_per_second": 72.155, |
|
"eval_steps_per_second": 9.019, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.2669602674778149e-05, |
|
"loss": 0.0164, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.925000011920929, |
|
"eval_loss": 0.4845915138721466, |
|
"eval_runtime": 15.5092, |
|
"eval_samples_per_second": 72.215, |
|
"eval_steps_per_second": 9.027, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9232142567634583, |
|
"eval_loss": 0.5342178344726562, |
|
"eval_runtime": 15.4696, |
|
"eval_samples_per_second": 72.4, |
|
"eval_steps_per_second": 9.05, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 9.653030609354778e-06, |
|
"loss": 0.0114, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9241071343421936, |
|
"eval_loss": 0.5672621726989746, |
|
"eval_runtime": 15.5099, |
|
"eval_samples_per_second": 72.212, |
|
"eval_steps_per_second": 9.027, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9258928298950195, |
|
"eval_loss": 0.562305748462677, |
|
"eval_runtime": 15.6284, |
|
"eval_samples_per_second": 71.664, |
|
"eval_steps_per_second": 8.958, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 6.636458543931411e-06, |
|
"loss": 0.0087, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9258928298950195, |
|
"eval_loss": 0.5690274238586426, |
|
"eval_runtime": 15.6003, |
|
"eval_samples_per_second": 71.793, |
|
"eval_steps_per_second": 8.974, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3.619886478508042e-06, |
|
"loss": 0.0086, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.925000011920929, |
|
"eval_loss": 0.5821966528892517, |
|
"eval_runtime": 15.4277, |
|
"eval_samples_per_second": 72.597, |
|
"eval_steps_per_second": 9.075, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.925000011920929, |
|
"eval_loss": 0.5867368578910828, |
|
"eval_runtime": 15.4548, |
|
"eval_samples_per_second": 72.469, |
|
"eval_steps_per_second": 9.059, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 6.033144130846736e-07, |
|
"loss": 0.0067, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9258928298950195, |
|
"eval_loss": 0.5937979221343994, |
|
"eval_runtime": 15.5033, |
|
"eval_samples_per_second": 72.243, |
|
"eval_steps_per_second": 9.03, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5600, |
|
"total_flos": 2.08752625385472e+16, |
|
"train_loss": 0.042873825313789506, |
|
"train_runtime": 6011.0282, |
|
"train_samples_per_second": 14.906, |
|
"train_steps_per_second": 0.932 |
|
} |
|
], |
|
"max_steps": 5600, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.08752625385472e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|