|
{ |
|
"best_metric": 80.33013168664009, |
|
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_squad_model/checkpoint-2790", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 2790, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 558, |
|
"train_exact_match": 64.83516483516483, |
|
"train_f1": 83.08204401574274, |
|
"train_runtime": 24.0955, |
|
"train_samples_per_second": 43.826, |
|
"train_steps_per_second": 1.577 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 80.57772827148438, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2698, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 61.25, |
|
"eval_f1": 79.38877529103148, |
|
"eval_runtime": 76.3721, |
|
"eval_samples_per_second": 43.982, |
|
"eval_steps_per_second": 1.571, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1116, |
|
"train_exact_match": 74.82517482517483, |
|
"train_f1": 89.2404305639639, |
|
"train_runtime": 24.2914, |
|
"train_samples_per_second": 42.855, |
|
"train_steps_per_second": 1.564 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 80.02055358886719, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.8886, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 62.3125, |
|
"eval_f1": 80.2126480616015, |
|
"eval_runtime": 77.9968, |
|
"eval_samples_per_second": 43.066, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1674, |
|
"train_exact_match": 78.52147852147853, |
|
"train_f1": 92.40489073606565, |
|
"train_runtime": 25.6503, |
|
"train_samples_per_second": 41.013, |
|
"train_steps_per_second": 1.481 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 28.389789581298828, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6286, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 61.9375, |
|
"eval_f1": 80.06485926873198, |
|
"eval_runtime": 83.2186, |
|
"eval_samples_per_second": 40.364, |
|
"eval_steps_per_second": 1.442, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2232, |
|
"train_exact_match": 82.31768231768231, |
|
"train_f1": 93.82612819903538, |
|
"train_runtime": 23.9529, |
|
"train_samples_per_second": 43.669, |
|
"train_steps_per_second": 1.586 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 21.804250717163086, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4725, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 62.625, |
|
"eval_f1": 80.25175708657538, |
|
"eval_runtime": 77.0148, |
|
"eval_samples_per_second": 43.615, |
|
"eval_steps_per_second": 1.558, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2790, |
|
"train_exact_match": 85.21478521478521, |
|
"train_f1": 94.39980772824222, |
|
"train_runtime": 24.3057, |
|
"train_samples_per_second": 42.871, |
|
"train_steps_per_second": 1.563 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 48.04233932495117, |
|
"learning_rate": 0.0, |
|
"loss": 0.378, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 62.46875, |
|
"eval_f1": 80.33013168664009, |
|
"eval_runtime": 78.9765, |
|
"eval_samples_per_second": 42.532, |
|
"eval_steps_per_second": 1.519, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2790, |
|
"total_flos": 5.432587638826752e+16, |
|
"train_loss": 0.7275064119728663, |
|
"train_runtime": 5090.5681, |
|
"train_samples_per_second": 15.321, |
|
"train_steps_per_second": 0.548 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2790, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 5.432587638826752e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|