|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8955357074737549, |
|
"eval_loss": 0.33214062452316284, |
|
"eval_runtime": 7.5153, |
|
"eval_samples_per_second": 149.029, |
|
"eval_steps_per_second": 18.629, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.743006199096114e-05, |
|
"loss": 0.3501, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9142857193946838, |
|
"eval_loss": 0.29908323287963867, |
|
"eval_runtime": 7.7634, |
|
"eval_samples_per_second": 144.266, |
|
"eval_steps_per_second": 18.033, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9008928537368774, |
|
"eval_loss": 0.2781011164188385, |
|
"eval_runtime": 7.6341, |
|
"eval_samples_per_second": 146.71, |
|
"eval_steps_per_second": 18.339, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.474084022714142e-05, |
|
"loss": 0.1794, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9107142686843872, |
|
"eval_loss": 0.37903639674186707, |
|
"eval_runtime": 7.6349, |
|
"eval_samples_per_second": 146.696, |
|
"eval_steps_per_second": 18.337, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9214285612106323, |
|
"eval_loss": 0.40712061524391174, |
|
"eval_runtime": 7.4748, |
|
"eval_samples_per_second": 149.837, |
|
"eval_steps_per_second": 18.73, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 2.2051618463321703e-05, |
|
"loss": 0.0871, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9160714149475098, |
|
"eval_loss": 0.576979398727417, |
|
"eval_runtime": 7.7299, |
|
"eval_samples_per_second": 144.892, |
|
"eval_steps_per_second": 18.112, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9116071462631226, |
|
"eval_loss": 0.6520564556121826, |
|
"eval_runtime": 7.614, |
|
"eval_samples_per_second": 147.098, |
|
"eval_steps_per_second": 18.387, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.9362396699501985e-05, |
|
"loss": 0.0437, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9196428656578064, |
|
"eval_loss": 0.6204714179039001, |
|
"eval_runtime": 7.8377, |
|
"eval_samples_per_second": 142.899, |
|
"eval_steps_per_second": 17.862, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.6673174935682264e-05, |
|
"loss": 0.036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9169642925262451, |
|
"eval_loss": 0.5968001484870911, |
|
"eval_runtime": 7.7266, |
|
"eval_samples_per_second": 144.953, |
|
"eval_steps_per_second": 18.119, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9214285612106323, |
|
"eval_loss": 0.6099338531494141, |
|
"eval_runtime": 7.5404, |
|
"eval_samples_per_second": 148.534, |
|
"eval_steps_per_second": 18.567, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 1.3983953171862545e-05, |
|
"loss": 0.0199, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9169642925262451, |
|
"eval_loss": 0.6376703381538391, |
|
"eval_runtime": 7.6099, |
|
"eval_samples_per_second": 147.177, |
|
"eval_steps_per_second": 18.397, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.6757161021232605, |
|
"eval_runtime": 7.6257, |
|
"eval_samples_per_second": 146.872, |
|
"eval_steps_per_second": 18.359, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.1294731408042823e-05, |
|
"loss": 0.0198, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9205357432365417, |
|
"eval_loss": 0.6298871636390686, |
|
"eval_runtime": 7.5908, |
|
"eval_samples_per_second": 147.548, |
|
"eval_steps_per_second": 18.443, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9125000238418579, |
|
"eval_loss": 0.693723201751709, |
|
"eval_runtime": 7.7616, |
|
"eval_samples_per_second": 144.301, |
|
"eval_steps_per_second": 18.038, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 8.605509644223102e-06, |
|
"loss": 0.0119, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9169642925262451, |
|
"eval_loss": 0.6261208653450012, |
|
"eval_runtime": 7.7494, |
|
"eval_samples_per_second": 144.528, |
|
"eval_steps_per_second": 18.066, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9160714149475098, |
|
"eval_loss": 0.7174915671348572, |
|
"eval_runtime": 7.862, |
|
"eval_samples_per_second": 142.457, |
|
"eval_steps_per_second": 17.807, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 5.916287880403384e-06, |
|
"loss": 0.0065, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9169642925262451, |
|
"eval_loss": 0.7007285952568054, |
|
"eval_runtime": 7.7124, |
|
"eval_samples_per_second": 145.221, |
|
"eval_steps_per_second": 18.153, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3.2270661165836636e-06, |
|
"loss": 0.0069, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.918749988079071, |
|
"eval_loss": 0.7041569352149963, |
|
"eval_runtime": 7.8136, |
|
"eval_samples_per_second": 143.339, |
|
"eval_steps_per_second": 17.917, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9107142686843872, |
|
"eval_loss": 0.732833981513977, |
|
"eval_runtime": 7.5262, |
|
"eval_samples_per_second": 148.814, |
|
"eval_steps_per_second": 18.602, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 5.378443527639439e-07, |
|
"loss": 0.0034, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9116071462631226, |
|
"eval_loss": 0.7286208271980286, |
|
"eval_runtime": 7.6221, |
|
"eval_samples_per_second": 146.941, |
|
"eval_steps_per_second": 18.368, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5600, |
|
"total_flos": 5893793336524800.0, |
|
"train_loss": 0.06829224105924368, |
|
"train_runtime": 4007.6255, |
|
"train_samples_per_second": 22.357, |
|
"train_steps_per_second": 1.397 |
|
} |
|
], |
|
"max_steps": 5600, |
|
"num_train_epochs": 20, |
|
"total_flos": 5893793336524800.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|