|
{ |
|
"best_metric": 0.7672541719320296, |
|
"best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-6/checkpoint-3550", |
|
"epoch": 50.0, |
|
"global_step": 3550, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.24808131158351898, |
|
"eval_macro_f1": 0.05481647030175482, |
|
"eval_macro_precision": 0.17800453514739228, |
|
"eval_macro_recall": 0.03330847588023425, |
|
"eval_micro_f1": 0.13594611145131658, |
|
"eval_micro_precision": 0.9568965517241379, |
|
"eval_micro_recall": 0.07317073170731707, |
|
"eval_runtime": 2.8186, |
|
"eval_samples_per_second": 342.724, |
|
"eval_steps_per_second": 21.642, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.16814221441745758, |
|
"eval_macro_f1": 0.37346208328620767, |
|
"eval_macro_precision": 0.4376472360907343, |
|
"eval_macro_recall": 0.3342883491414155, |
|
"eval_micro_f1": 0.7248062015503876, |
|
"eval_micro_precision": 0.8795860771401693, |
|
"eval_micro_recall": 0.6163480553724456, |
|
"eval_runtime": 2.8192, |
|
"eval_samples_per_second": 342.653, |
|
"eval_steps_per_second": 21.638, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.13150478899478912, |
|
"eval_macro_f1": 0.4319986220599845, |
|
"eval_macro_precision": 0.6389953058630822, |
|
"eval_macro_recall": 0.37598442828818834, |
|
"eval_micro_f1": 0.7641723356009071, |
|
"eval_micro_precision": 0.895482728077945, |
|
"eval_micro_recall": 0.6664469347396177, |
|
"eval_runtime": 2.8191, |
|
"eval_samples_per_second": 342.658, |
|
"eval_steps_per_second": 21.638, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 2.8662355855330125e-05, |
|
"loss": 0.2172, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.1121998205780983, |
|
"eval_macro_f1": 0.5386615940425785, |
|
"eval_macro_precision": 0.7631201629660762, |
|
"eval_macro_recall": 0.46714612883136086, |
|
"eval_micro_f1": 0.8077485380116959, |
|
"eval_micro_precision": 0.9064807219031994, |
|
"eval_micro_recall": 0.7284113381674358, |
|
"eval_runtime": 2.8196, |
|
"eval_samples_per_second": 342.598, |
|
"eval_steps_per_second": 21.634, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.10608664900064468, |
|
"eval_macro_f1": 0.5851323545796894, |
|
"eval_macro_precision": 0.7572045278273729, |
|
"eval_macro_recall": 0.5132727928856573, |
|
"eval_micro_f1": 0.8146802325581396, |
|
"eval_micro_precision": 0.9076923076923077, |
|
"eval_micro_recall": 0.7389584706657878, |
|
"eval_runtime": 2.8204, |
|
"eval_samples_per_second": 342.507, |
|
"eval_steps_per_second": 21.628, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.100788913667202, |
|
"eval_macro_f1": 0.628825318779795, |
|
"eval_macro_precision": 0.750817062947988, |
|
"eval_macro_recall": 0.5578116563013593, |
|
"eval_micro_f1": 0.8245363766048501, |
|
"eval_micro_precision": 0.8982128982128982, |
|
"eval_micro_recall": 0.7620303230059328, |
|
"eval_runtime": 2.8211, |
|
"eval_samples_per_second": 342.425, |
|
"eval_steps_per_second": 21.623, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.09952697157859802, |
|
"eval_macro_f1": 0.648367052483795, |
|
"eval_macro_precision": 0.7408904755242313, |
|
"eval_macro_recall": 0.5923039028415692, |
|
"eval_micro_f1": 0.8379418970948547, |
|
"eval_micro_precision": 0.8932835820895523, |
|
"eval_micro_recall": 0.7890573500329597, |
|
"eval_runtime": 2.8203, |
|
"eval_samples_per_second": 342.519, |
|
"eval_steps_per_second": 21.629, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 2.6490965260229357e-05, |
|
"loss": 0.0388, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.10003374516963959, |
|
"eval_macro_f1": 0.7100297932545989, |
|
"eval_macro_precision": 0.8731926397728601, |
|
"eval_macro_recall": 0.6387457691830838, |
|
"eval_micro_f1": 0.837847344354555, |
|
"eval_micro_precision": 0.8981900452488688, |
|
"eval_micro_recall": 0.7851021753460777, |
|
"eval_runtime": 2.8204, |
|
"eval_samples_per_second": 342.505, |
|
"eval_steps_per_second": 21.628, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.10116878896951675, |
|
"eval_macro_f1": 0.7102542897490418, |
|
"eval_macro_precision": 0.8708979085725551, |
|
"eval_macro_recall": 0.6278706438272531, |
|
"eval_micro_f1": 0.8369795342272407, |
|
"eval_micro_precision": 0.9005315110098709, |
|
"eval_micro_recall": 0.7818061964403428, |
|
"eval_runtime": 2.821, |
|
"eval_samples_per_second": 342.431, |
|
"eval_steps_per_second": 21.624, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.10277832299470901, |
|
"eval_macro_f1": 0.7155479835258449, |
|
"eval_macro_precision": 0.8815899776220457, |
|
"eval_macro_recall": 0.635461059766739, |
|
"eval_micro_f1": 0.8361702127659574, |
|
"eval_micro_precision": 0.9048349961627015, |
|
"eval_micro_recall": 0.7771918259723137, |
|
"eval_runtime": 2.8201, |
|
"eval_samples_per_second": 342.543, |
|
"eval_steps_per_second": 21.631, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 2.431957466512859e-05, |
|
"loss": 0.0147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.10351855307817459, |
|
"eval_macro_f1": 0.7203873361325647, |
|
"eval_macro_precision": 0.87817690487536, |
|
"eval_macro_recall": 0.6443209986334517, |
|
"eval_micro_f1": 0.837389770723104, |
|
"eval_micro_precision": 0.9006069802731411, |
|
"eval_micro_recall": 0.7824653922214898, |
|
"eval_runtime": 2.8203, |
|
"eval_samples_per_second": 342.52, |
|
"eval_steps_per_second": 21.629, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.10557578504085541, |
|
"eval_macro_f1": 0.7298035823714943, |
|
"eval_macro_precision": 0.8804692132123697, |
|
"eval_macro_recall": 0.6544575680820387, |
|
"eval_micro_f1": 0.8399153737658673, |
|
"eval_micro_precision": 0.9029567854435178, |
|
"eval_micro_recall": 0.7851021753460777, |
|
"eval_runtime": 2.8192, |
|
"eval_samples_per_second": 342.647, |
|
"eval_steps_per_second": 21.637, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.10698197782039642, |
|
"eval_macro_f1": 0.719671126038702, |
|
"eval_macro_precision": 0.8799359996888543, |
|
"eval_macro_recall": 0.6422398711062759, |
|
"eval_micro_f1": 0.8375706214689265, |
|
"eval_micro_precision": 0.9019011406844106, |
|
"eval_micro_recall": 0.7818061964403428, |
|
"eval_runtime": 2.8198, |
|
"eval_samples_per_second": 342.58, |
|
"eval_steps_per_second": 21.633, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.10863872617483139, |
|
"eval_macro_f1": 0.7318024399797272, |
|
"eval_macro_precision": 0.8781101314061108, |
|
"eval_macro_recall": 0.6548756753680312, |
|
"eval_micro_f1": 0.8389143461402891, |
|
"eval_micro_precision": 0.9015151515151515, |
|
"eval_micro_recall": 0.7844429795649308, |
|
"eval_runtime": 2.8198, |
|
"eval_samples_per_second": 342.583, |
|
"eval_steps_per_second": 21.633, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 2.214818407002782e-05, |
|
"loss": 0.0082, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.1103406548500061, |
|
"eval_macro_f1": 0.728656455732704, |
|
"eval_macro_precision": 0.8784457053881349, |
|
"eval_macro_recall": 0.6552278977528909, |
|
"eval_micro_f1": 0.8400702987697715, |
|
"eval_micro_precision": 0.8998493975903614, |
|
"eval_micro_recall": 0.7877389584706658, |
|
"eval_runtime": 2.8227, |
|
"eval_samples_per_second": 342.231, |
|
"eval_steps_per_second": 21.611, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.11085448414087296, |
|
"eval_macro_f1": 0.7344651790603305, |
|
"eval_macro_precision": 0.8647882525531505, |
|
"eval_macro_recall": 0.6579821769650391, |
|
"eval_micro_f1": 0.8402116402116402, |
|
"eval_micro_precision": 0.9036418816388467, |
|
"eval_micro_recall": 0.7851021753460777, |
|
"eval_runtime": 2.8204, |
|
"eval_samples_per_second": 342.508, |
|
"eval_steps_per_second": 21.628, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.11268670856952667, |
|
"eval_macro_f1": 0.7371526225457369, |
|
"eval_macro_precision": 0.8672638823695995, |
|
"eval_macro_recall": 0.6591834104386446, |
|
"eval_micro_f1": 0.8397323001056711, |
|
"eval_micro_precision": 0.9016641452344932, |
|
"eval_micro_recall": 0.7857613711272248, |
|
"eval_runtime": 2.8203, |
|
"eval_samples_per_second": 342.515, |
|
"eval_steps_per_second": 21.629, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 1.9976793474927056e-05, |
|
"loss": 0.0056, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.11449939757585526, |
|
"eval_macro_f1": 0.7435877211427984, |
|
"eval_macro_precision": 0.8803719443456752, |
|
"eval_macro_recall": 0.6671207777218493, |
|
"eval_micro_f1": 0.8435852372583479, |
|
"eval_micro_precision": 0.9036144578313253, |
|
"eval_micro_recall": 0.7910349373764007, |
|
"eval_runtime": 2.8219, |
|
"eval_samples_per_second": 342.324, |
|
"eval_steps_per_second": 21.617, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.1153542622923851, |
|
"eval_macro_f1": 0.7450728657264144, |
|
"eval_macro_precision": 0.879277762192774, |
|
"eval_macro_recall": 0.6681699293595025, |
|
"eval_micro_f1": 0.8419570573741639, |
|
"eval_micro_precision": 0.9033232628398792, |
|
"eval_micro_recall": 0.7883981542518128, |
|
"eval_runtime": 2.8191, |
|
"eval_samples_per_second": 342.661, |
|
"eval_steps_per_second": 21.638, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.11756419390439987, |
|
"eval_macro_f1": 0.744466331732378, |
|
"eval_macro_precision": 0.8796020530253235, |
|
"eval_macro_recall": 0.6677481100215579, |
|
"eval_micro_f1": 0.8419570573741639, |
|
"eval_micro_precision": 0.9033232628398792, |
|
"eval_micro_recall": 0.7883981542518128, |
|
"eval_runtime": 2.8181, |
|
"eval_samples_per_second": 342.785, |
|
"eval_steps_per_second": 21.646, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.11846820265054703, |
|
"eval_macro_f1": 0.7434279482403103, |
|
"eval_macro_precision": 0.8813168271357439, |
|
"eval_macro_recall": 0.6651456053129234, |
|
"eval_micro_f1": 0.8427717200140696, |
|
"eval_micro_precision": 0.9034690799396682, |
|
"eval_micro_recall": 0.7897165458141068, |
|
"eval_runtime": 2.8203, |
|
"eval_samples_per_second": 342.52, |
|
"eval_steps_per_second": 21.629, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"learning_rate": 1.7805402879826288e-05, |
|
"loss": 0.004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.11996057629585266, |
|
"eval_macro_f1": 0.7394753977857482, |
|
"eval_macro_precision": 0.8783969916238008, |
|
"eval_macro_recall": 0.6650662737686901, |
|
"eval_micro_f1": 0.8427717200140696, |
|
"eval_micro_precision": 0.9034690799396682, |
|
"eval_micro_recall": 0.7897165458141068, |
|
"eval_runtime": 2.82, |
|
"eval_samples_per_second": 342.552, |
|
"eval_steps_per_second": 21.631, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.12046220153570175, |
|
"eval_macro_f1": 0.7423287819151947, |
|
"eval_macro_precision": 0.8810998402784964, |
|
"eval_macro_recall": 0.6639465302563645, |
|
"eval_micro_f1": 0.8432546671363156, |
|
"eval_micro_precision": 0.905446293494705, |
|
"eval_micro_recall": 0.7890573500329597, |
|
"eval_runtime": 2.819, |
|
"eval_samples_per_second": 342.677, |
|
"eval_steps_per_second": 21.639, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.12299305200576782, |
|
"eval_macro_f1": 0.7584256567760465, |
|
"eval_macro_precision": 0.9421857420436691, |
|
"eval_macro_recall": 0.6745195368122752, |
|
"eval_micro_f1": 0.8419570573741639, |
|
"eval_micro_precision": 0.9033232628398792, |
|
"eval_micro_recall": 0.7883981542518128, |
|
"eval_runtime": 2.8184, |
|
"eval_samples_per_second": 342.744, |
|
"eval_steps_per_second": 21.643, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 49.3, |
|
"learning_rate": 1.563401228472552e-05, |
|
"loss": 0.0031, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.12287621200084686, |
|
"eval_macro_f1": 0.7672541719320296, |
|
"eval_macro_precision": 0.9303028445349943, |
|
"eval_macro_recall": 0.6844658294965831, |
|
"eval_micro_f1": 0.8449122807017543, |
|
"eval_micro_precision": 0.9032258064516129, |
|
"eval_micro_recall": 0.7936717205009888, |
|
"eval_runtime": 2.8187, |
|
"eval_samples_per_second": 342.711, |
|
"eval_steps_per_second": 21.641, |
|
"step": 3550 |
|
} |
|
], |
|
"max_steps": 7100, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.007255197834797e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 6.447418463180699e-08, |
|
"learning_rate": 3.0290898801655698e-05, |
|
"per_device_eval_batch_size": 16, |
|
"per_device_train_batch_size": 32, |
|
"seed": 320, |
|
"warmup_steps": 125, |
|
"weight_decay": 4.5126980713116176e-08 |
|
} |
|
} |
|
|