|
{ |
|
"best_metric": 0.7025796661608499, |
|
"best_model_checkpoint": "./results/checkpoint-4434", |
|
"epoch": 15.0, |
|
"global_step": 22170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2e-06, |
|
"loss": 0.4736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8308534531162268, |
|
"eval_f1": 0.682141915061989, |
|
"eval_loss": 0.37968066334724426, |
|
"eval_precision": 0.6996753246753247, |
|
"eval_recall": 0.6654657745753989, |
|
"eval_runtime": 2.7102, |
|
"eval_samples_per_second": 68.261, |
|
"eval_steps_per_second": 68.261, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.9055266887104394e-06, |
|
"loss": 0.3656, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.826221224031443, |
|
"eval_f1": 0.6371629542790153, |
|
"eval_loss": 0.37435096502304077, |
|
"eval_precision": 0.7399591558883595, |
|
"eval_recall": 0.5594441585177561, |
|
"eval_runtime": 2.6167, |
|
"eval_samples_per_second": 70.701, |
|
"eval_steps_per_second": 70.701, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.8110533774208785e-06, |
|
"loss": 0.3599, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.716580066131318e-06, |
|
"loss": 0.3458, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8349241998877035, |
|
"eval_f1": 0.7025796661608499, |
|
"eval_loss": 0.36284855008125305, |
|
"eval_precision": 0.6907011437095972, |
|
"eval_recall": 0.7148739063304169, |
|
"eval_runtime": 2.6495, |
|
"eval_samples_per_second": 69.825, |
|
"eval_steps_per_second": 69.825, |
|
"step": 4434 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.622106754841757e-06, |
|
"loss": 0.337, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8281864121280179, |
|
"eval_f1": 0.651678998292544, |
|
"eval_loss": 0.37390556931495667, |
|
"eval_precision": 0.728835136855506, |
|
"eval_recall": 0.5892949047864128, |
|
"eval_runtime": 2.6125, |
|
"eval_samples_per_second": 70.813, |
|
"eval_steps_per_second": 70.813, |
|
"step": 5912 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.5276334435521965e-06, |
|
"loss": 0.3239, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.4331601322626356e-06, |
|
"loss": 0.3015, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8357664233576643, |
|
"eval_f1": 0.6978305785123967, |
|
"eval_loss": 0.36176469922065735, |
|
"eval_precision": 0.7003628823224468, |
|
"eval_recall": 0.6953165208440556, |
|
"eval_runtime": 2.8697, |
|
"eval_samples_per_second": 64.467, |
|
"eval_steps_per_second": 64.467, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.338686820973075e-06, |
|
"loss": 0.2884, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.833941605839416, |
|
"eval_f1": 0.6902330452998167, |
|
"eval_loss": 0.37303170561790466, |
|
"eval_precision": 0.7025586353944563, |
|
"eval_recall": 0.6783324755532681, |
|
"eval_runtime": 2.7072, |
|
"eval_samples_per_second": 68.337, |
|
"eval_steps_per_second": 68.337, |
|
"step": 8868 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.2442135096835144e-06, |
|
"loss": 0.264, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.1497401983939536e-06, |
|
"loss": 0.2324, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8322571588994947, |
|
"eval_f1": 0.6844467916556641, |
|
"eval_loss": 0.3992096483707428, |
|
"eval_precision": 0.702819956616052, |
|
"eval_recall": 0.6670097786927431, |
|
"eval_runtime": 2.6272, |
|
"eval_samples_per_second": 70.416, |
|
"eval_steps_per_second": 70.416, |
|
"step": 10346 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.0552668871043931e-06, |
|
"loss": 0.1965, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8250982594048287, |
|
"eval_f1": 0.6525376464026771, |
|
"eval_loss": 0.44216522574424744, |
|
"eval_precision": 0.7121119902617163, |
|
"eval_recall": 0.602161605764282, |
|
"eval_runtime": 2.622, |
|
"eval_samples_per_second": 70.557, |
|
"eval_steps_per_second": 70.557, |
|
"step": 11824 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.607935758148322e-07, |
|
"loss": 0.182, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 8.663202645252716e-07, |
|
"loss": 0.1405, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.823694553621561, |
|
"eval_f1": 0.6694736842105264, |
|
"eval_loss": 0.49459609389305115, |
|
"eval_precision": 0.6849757673667205, |
|
"eval_recall": 0.6546577457539887, |
|
"eval_runtime": 2.6563, |
|
"eval_samples_per_second": 69.645, |
|
"eval_steps_per_second": 69.645, |
|
"step": 13302 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.718469532357109e-07, |
|
"loss": 0.1149, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8238349241998877, |
|
"eval_f1": 0.6766297346044834, |
|
"eval_loss": 0.5486593842506409, |
|
"eval_precision": 0.6775025799793601, |
|
"eval_recall": 0.6757591353576943, |
|
"eval_runtime": 2.6698, |
|
"eval_samples_per_second": 69.293, |
|
"eval_steps_per_second": 69.293, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 6.773736419461502e-07, |
|
"loss": 0.1013, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 5.829003306565895e-07, |
|
"loss": 0.0808, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8252386299831556, |
|
"eval_f1": 0.6767073487405868, |
|
"eval_loss": 0.6354735493659973, |
|
"eval_precision": 0.6829140461215933, |
|
"eval_recall": 0.6706124549665465, |
|
"eval_runtime": 2.7422, |
|
"eval_samples_per_second": 67.465, |
|
"eval_steps_per_second": 67.465, |
|
"step": 16258 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.884270193670288e-07, |
|
"loss": 0.0627, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8260808534531162, |
|
"eval_f1": 0.6648634027589938, |
|
"eval_loss": 0.6998042464256287, |
|
"eval_precision": 0.7006841505131128, |
|
"eval_recall": 0.6325270200720535, |
|
"eval_runtime": 2.7874, |
|
"eval_samples_per_second": 66.371, |
|
"eval_steps_per_second": 66.371, |
|
"step": 17736 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 3.939537080774681e-07, |
|
"loss": 0.0496, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 2.9948039678790744e-07, |
|
"loss": 0.0399, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8258001122964627, |
|
"eval_f1": 0.6703851261620186, |
|
"eval_loss": 0.7429465651512146, |
|
"eval_precision": 0.6926454445664105, |
|
"eval_recall": 0.649511065362841, |
|
"eval_runtime": 2.7701, |
|
"eval_samples_per_second": 66.784, |
|
"eval_steps_per_second": 66.784, |
|
"step": 19214 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 2.0500708549834672e-07, |
|
"loss": 0.032, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8218697361033127, |
|
"eval_f1": 0.6556309362279512, |
|
"eval_loss": 0.7868731617927551, |
|
"eval_precision": 0.6934557979334098, |
|
"eval_recall": 0.6217189912506433, |
|
"eval_runtime": 2.7709, |
|
"eval_samples_per_second": 66.766, |
|
"eval_steps_per_second": 66.766, |
|
"step": 20692 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 1.1053377420878601e-07, |
|
"loss": 0.0274, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 1.606046291922532e-08, |
|
"loss": 0.0233, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8235541830432341, |
|
"eval_f1": 0.6621875839828004, |
|
"eval_loss": 0.7936236262321472, |
|
"eval_precision": 0.6929133858267716, |
|
"eval_recall": 0.6340710241893979, |
|
"eval_runtime": 2.7604, |
|
"eval_samples_per_second": 67.019, |
|
"eval_steps_per_second": 67.019, |
|
"step": 22170 |
|
} |
|
], |
|
"max_steps": 22170, |
|
"num_train_epochs": 15, |
|
"total_flos": 7912225034580240.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|