|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 7.5e-05, |
|
"loss": 13.0978, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 13.780136108398438, |
|
"eval_runtime": 82.8799, |
|
"eval_samples_per_second": 7.891, |
|
"eval_wer": 1.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00015, |
|
"loss": 7.3093, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_loss": 3.198237419128418, |
|
"eval_runtime": 81.5893, |
|
"eval_samples_per_second": 8.016, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.000225, |
|
"loss": 3.0745, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_loss": 3.1082892417907715, |
|
"eval_runtime": 82.4037, |
|
"eval_samples_per_second": 7.937, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0003, |
|
"loss": 3.0551, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 3.0994772911071777, |
|
"eval_runtime": 82.7226, |
|
"eval_samples_per_second": 7.906, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.00028026315789473683, |
|
"loss": 3.0632, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 3.0916755199432373, |
|
"eval_runtime": 83.5323, |
|
"eval_samples_per_second": 7.829, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0002605263157894737, |
|
"loss": 3.0391, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_loss": 3.0707435607910156, |
|
"eval_runtime": 82.7328, |
|
"eval_samples_per_second": 7.905, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.00024078947368421052, |
|
"loss": 3.0321, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"eval_loss": 3.0443670749664307, |
|
"eval_runtime": 84.1437, |
|
"eval_samples_per_second": 7.772, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.00022105263157894733, |
|
"loss": 3.0069, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 2.998474359512329, |
|
"eval_runtime": 83.9178, |
|
"eval_samples_per_second": 7.793, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 0.0002013157894736842, |
|
"loss": 2.9623, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"eval_loss": 2.866849184036255, |
|
"eval_runtime": 82.5906, |
|
"eval_samples_per_second": 7.919, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.00018157894736842105, |
|
"loss": 2.4771, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_loss": 1.5367902517318726, |
|
"eval_runtime": 85.6456, |
|
"eval_samples_per_second": 7.636, |
|
"eval_wer": 0.9838912133891213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 0.00016184210526315788, |
|
"loss": 1.0561, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"eval_loss": 0.6924143433570862, |
|
"eval_runtime": 85.1658, |
|
"eval_samples_per_second": 7.679, |
|
"eval_wer": 0.7548117154811715, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 0.0001421052631578947, |
|
"loss": 0.5288, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"eval_loss": 0.5334728956222534, |
|
"eval_runtime": 83.737, |
|
"eval_samples_per_second": 7.81, |
|
"eval_wer": 0.6569037656903766, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"learning_rate": 0.00012236842105263157, |
|
"loss": 0.3581, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"eval_loss": 0.48591092228889465, |
|
"eval_runtime": 86.2479, |
|
"eval_samples_per_second": 7.583, |
|
"eval_wer": 0.605857740585774, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"learning_rate": 0.00010263157894736841, |
|
"loss": 0.2638, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"eval_loss": 0.4631027579307556, |
|
"eval_runtime": 84.0825, |
|
"eval_samples_per_second": 7.778, |
|
"eval_wer": 0.5648535564853556, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"learning_rate": 8.289473684210526e-05, |
|
"loss": 0.2284, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"eval_loss": 0.4597685933113098, |
|
"eval_runtime": 86.122, |
|
"eval_samples_per_second": 7.594, |
|
"eval_wer": 0.5594142259414226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.315789473684209e-05, |
|
"loss": 0.1965, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.4614764153957367, |
|
"eval_runtime": 86.0272, |
|
"eval_samples_per_second": 7.602, |
|
"eval_wer": 0.5535564853556485, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 0.1837, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"eval_loss": 0.4499300718307495, |
|
"eval_runtime": 89.3292, |
|
"eval_samples_per_second": 7.321, |
|
"eval_wer": 0.5349372384937239, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 2.3684210526315787e-05, |
|
"loss": 0.187, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"eval_loss": 0.45425695180892944, |
|
"eval_runtime": 85.6275, |
|
"eval_samples_per_second": 7.638, |
|
"eval_wer": 0.5345188284518828, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"learning_rate": 3.947368421052631e-06, |
|
"loss": 0.1568, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 29.69, |
|
"eval_loss": 0.4458238184452057, |
|
"eval_runtime": 84.9753, |
|
"eval_samples_per_second": 7.696, |
|
"eval_wer": 0.5290794979079498, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 960, |
|
"total_flos": 8.556740517881789e+18, |
|
"train_runtime": 8781.3793, |
|
"train_samples_per_second": 0.109 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.4455166161060333, |
|
"eval_runtime": 81.4513, |
|
"eval_samples_per_second": 8.029, |
|
"eval_wer": 0.5288702928870292, |
|
"step": 960 |
|
} |
|
], |
|
"max_steps": 960, |
|
"num_train_epochs": 30, |
|
"total_flos": 8.556740517881789e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|