|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2938090241343127, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.7687216869069928, |
|
"eval_f1": 0.7665424923390772, |
|
"eval_loss": 0.9364227652549744, |
|
"eval_runtime": 684.6631, |
|
"eval_samples_per_second": 197.963, |
|
"eval_steps_per_second": 0.774, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.994774396642183e-06, |
|
"loss": 1.2511, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.8296492496569228, |
|
"eval_f1": 0.8293241244922317, |
|
"eval_loss": 0.8166272044181824, |
|
"eval_runtime": 684.0816, |
|
"eval_samples_per_second": 198.131, |
|
"eval_steps_per_second": 0.775, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.989538300104932e-06, |
|
"loss": 0.8696, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.846795732562086, |
|
"eval_f1": 0.8476031571339556, |
|
"eval_loss": 0.7674374580383301, |
|
"eval_runtime": 684.9072, |
|
"eval_samples_per_second": 197.892, |
|
"eval_steps_per_second": 0.774, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.98429171038825e-06, |
|
"loss": 0.8001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.8562469565730644, |
|
"eval_f1": 0.8555885462291756, |
|
"eval_loss": 0.7412897348403931, |
|
"eval_runtime": 685.4288, |
|
"eval_samples_per_second": 197.742, |
|
"eval_steps_per_second": 0.773, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.97906610703043e-06, |
|
"loss": 0.7653, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.8584087119479408, |
|
"eval_f1": 0.8589656756888883, |
|
"eval_loss": 0.7346311807632446, |
|
"eval_runtime": 1253.8639, |
|
"eval_samples_per_second": 108.096, |
|
"eval_steps_per_second": 0.423, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.8606590033791262, |
|
"eval_f1": 0.8613764873641668, |
|
"eval_loss": 0.7210016846656799, |
|
"eval_runtime": 749.1336, |
|
"eval_samples_per_second": 180.926, |
|
"eval_steps_per_second": 0.707, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.973840503672613e-06, |
|
"loss": 0.7517, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8668491493160589, |
|
"eval_f1": 0.8662439909072627, |
|
"eval_loss": 0.7115087509155273, |
|
"eval_runtime": 748.5848, |
|
"eval_samples_per_second": 181.059, |
|
"eval_steps_per_second": 0.708, |
|
"step": 2800 |
|
} |
|
], |
|
"max_steps": 953000, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.1748750372701952e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|