|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.96969696969697, |
|
"global_step": 960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.88e-06, |
|
"loss": 8.6025, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 7.88e-06, |
|
"loss": 2.9845, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 1.188e-05, |
|
"loss": 2.8172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 24.97, |
|
"learning_rate": 1.588e-05, |
|
"loss": 2.1658, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 31.24, |
|
"learning_rate": 1.9880000000000003e-05, |
|
"loss": 1.7135, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 31.24, |
|
"eval_loss": 0.9609270691871643, |
|
"eval_runtime": 24.5357, |
|
"eval_samples_per_second": 20.745, |
|
"eval_steps_per_second": 1.304, |
|
"eval_wer": 0.692578468759167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 37.48, |
|
"learning_rate": 1.5782608695652177e-05, |
|
"loss": 1.4495, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 1.1434782608695654e-05, |
|
"loss": 1.253, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"learning_rate": 7.086956521739131e-06, |
|
"loss": 1.1265, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 56.24, |
|
"learning_rate": 2.7391304347826087e-06, |
|
"loss": 1.0423, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 59.97, |
|
"step": 960, |
|
"total_flos": 2.336891900871078e+19, |
|
"train_loss": 2.4739840825398765, |
|
"train_runtime": 4674.7861, |
|
"train_samples_per_second": 13.425, |
|
"train_steps_per_second": 0.205 |
|
} |
|
], |
|
"max_steps": 960, |
|
"num_train_epochs": 60, |
|
"total_flos": 2.336891900871078e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|