|
{ |
|
"best_metric": 70.29279085912718, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/albeto_base/epochs_4_bs_64_lr_5e-5/checkpoint-4200", |
|
"epoch": 3.9996101364522416, |
|
"global_step": 5128, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 36.4, |
|
"eval_f1": 61.84101912660733, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.517355694227769e-05, |
|
"loss": 2.3283, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_exact_match": 38.6, |
|
"eval_f1": 64.05444583625557, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_exact_match": 39.8, |
|
"eval_f1": 64.38318427781859, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.030811232449298e-05, |
|
"loss": 1.8958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_exact_match": 42.6, |
|
"eval_f1": 67.53463785062704, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.5432917316692674e-05, |
|
"loss": 1.7304, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 41.6, |
|
"eval_f1": 67.26857117036066, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 40.8, |
|
"eval_f1": 68.19818328108937, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.055772230889236e-05, |
|
"loss": 1.606, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 42.6, |
|
"eval_f1": 68.13227861379248, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_exact_match": 45.2, |
|
"eval_f1": 69.37715961143962, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.5682527301092048e-05, |
|
"loss": 1.5968, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_exact_match": 42.4, |
|
"eval_f1": 68.23125120786263, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.080733229329173e-05, |
|
"loss": 1.4139, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_exact_match": 43.2, |
|
"eval_f1": 69.28256897389873, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 43.8, |
|
"eval_f1": 69.8210897722378, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.593213728549142e-05, |
|
"loss": 1.3802, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_exact_match": 44.0, |
|
"eval_f1": 69.9704820053898, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_exact_match": 43.6, |
|
"eval_f1": 70.21665964298872, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1056942277691108e-05, |
|
"loss": 1.3413, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_exact_match": 44.0, |
|
"eval_f1": 70.29279085912718, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 6.1817472698907965e-06, |
|
"loss": 1.2214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_exact_match": 43.2, |
|
"eval_f1": 69.84257510948594, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_exact_match": 44.2, |
|
"eval_f1": 69.60819358399327, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.3065522620904836e-06, |
|
"loss": 1.2215, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_exact_match": 43.4, |
|
"eval_f1": 69.19469528834327, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 5128, |
|
"total_flos": 5575305579646752.0, |
|
"train_loss": 1.5647673971381462, |
|
"train_runtime": 3554.3046, |
|
"train_samples_per_second": 92.34, |
|
"train_steps_per_second": 1.443 |
|
} |
|
], |
|
"max_steps": 5128, |
|
"num_train_epochs": 4, |
|
"total_flos": 5575305579646752.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|