|
{ |
|
"best_metric": 0.6491296227815271, |
|
"best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-0/checkpoint-3266", |
|
"epoch": 23.163120567375888, |
|
"global_step": 3266, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.3462072014808655, |
|
"eval_macro_f1": 0.0007381889763779527, |
|
"eval_macro_precision": 0.0625, |
|
"eval_macro_recall": 0.0003712871287128713, |
|
"eval_micro_f1": 0.0039447731755424065, |
|
"eval_micro_precision": 1.0, |
|
"eval_micro_recall": 0.001976284584980237, |
|
"eval_runtime": 2.8803, |
|
"eval_samples_per_second": 337.116, |
|
"eval_steps_per_second": 10.763, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.26299169659614563, |
|
"eval_macro_f1": 0.03133608815426997, |
|
"eval_macro_precision": 0.051470588235294115, |
|
"eval_macro_recall": 0.022524752475247524, |
|
"eval_micro_f1": 0.20931569867740082, |
|
"eval_micro_precision": 0.8235294117647058, |
|
"eval_micro_recall": 0.11989459815546773, |
|
"eval_runtime": 2.7712, |
|
"eval_samples_per_second": 350.396, |
|
"eval_steps_per_second": 11.187, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 0.2253277599811554, |
|
"eval_macro_f1": 0.06167745523885891, |
|
"eval_macro_precision": 0.16397973284200829, |
|
"eval_macro_recall": 0.06444494429328504, |
|
"eval_micro_f1": 0.43727272727272726, |
|
"eval_micro_precision": 0.7052785923753666, |
|
"eval_micro_recall": 0.3168642951251647, |
|
"eval_runtime": 2.7754, |
|
"eval_samples_per_second": 349.86, |
|
"eval_steps_per_second": 11.17, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.858287403029302e-05, |
|
"loss": 0.3344, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 0.19568884372711182, |
|
"eval_macro_f1": 0.19533405609363325, |
|
"eval_macro_precision": 0.3105566636324695, |
|
"eval_macro_recall": 0.1838257881936085, |
|
"eval_micro_f1": 0.6221161495624502, |
|
"eval_micro_precision": 0.785140562248996, |
|
"eval_micro_recall": 0.5151515151515151, |
|
"eval_runtime": 2.775, |
|
"eval_samples_per_second": 349.909, |
|
"eval_steps_per_second": 11.171, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.16411657631397247, |
|
"eval_macro_f1": 0.2724936969098707, |
|
"eval_macro_precision": 0.3798299682715689, |
|
"eval_macro_recall": 0.24456296636870606, |
|
"eval_micro_f1": 0.6832491255343957, |
|
"eval_micro_precision": 0.833175355450237, |
|
"eval_micro_recall": 0.5790513833992095, |
|
"eval_runtime": 2.7735, |
|
"eval_samples_per_second": 350.097, |
|
"eval_steps_per_second": 11.177, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 0.15434952080249786, |
|
"eval_macro_f1": 0.33512731711181853, |
|
"eval_macro_precision": 0.42023284164245034, |
|
"eval_macro_recall": 0.31527094611085327, |
|
"eval_micro_f1": 0.7207672445592034, |
|
"eval_micro_precision": 0.8189438390611903, |
|
"eval_micro_recall": 0.6436100131752306, |
|
"eval_runtime": 2.7765, |
|
"eval_samples_per_second": 349.72, |
|
"eval_steps_per_second": 11.165, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.14012500643730164, |
|
"eval_macro_f1": 0.3798067714294707, |
|
"eval_macro_precision": 0.5469114307081249, |
|
"eval_macro_recall": 0.3317304014614234, |
|
"eval_micro_f1": 0.7399624765478424, |
|
"eval_micro_precision": 0.8596338273757629, |
|
"eval_micro_recall": 0.6495388669301713, |
|
"eval_runtime": 2.774, |
|
"eval_samples_per_second": 350.033, |
|
"eval_steps_per_second": 11.175, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.0452387239092086e-05, |
|
"loss": 0.1403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.1425222009420395, |
|
"eval_macro_f1": 0.4279380705267567, |
|
"eval_macro_precision": 0.603624913309443, |
|
"eval_macro_recall": 0.3703454980587147, |
|
"eval_micro_f1": 0.7544954128440367, |
|
"eval_micro_precision": 0.851698425849213, |
|
"eval_micro_recall": 0.6772068511198946, |
|
"eval_runtime": 2.7729, |
|
"eval_samples_per_second": 350.179, |
|
"eval_steps_per_second": 11.18, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 0.12965930998325348, |
|
"eval_macro_f1": 0.516908938978679, |
|
"eval_macro_precision": 0.6952135806026781, |
|
"eval_macro_recall": 0.4441065897269686, |
|
"eval_micro_f1": 0.7703488372093023, |
|
"eval_micro_precision": 0.8589951377633711, |
|
"eval_micro_recall": 0.6982872200263505, |
|
"eval_runtime": 2.7725, |
|
"eval_samples_per_second": 350.226, |
|
"eval_steps_per_second": 11.181, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 0.12852537631988525, |
|
"eval_macro_f1": 0.5388984906238257, |
|
"eval_macro_precision": 0.7130990451123878, |
|
"eval_macro_recall": 0.4711434333008039, |
|
"eval_micro_f1": 0.7730547550432276, |
|
"eval_micro_precision": 0.8529411764705882, |
|
"eval_micro_recall": 0.7068511198945981, |
|
"eval_runtime": 2.7737, |
|
"eval_samples_per_second": 350.077, |
|
"eval_steps_per_second": 11.176, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 2.929008238263819e-05, |
|
"loss": 0.0558, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_loss": 0.13507980108261108, |
|
"eval_macro_f1": 0.5653547754610186, |
|
"eval_macro_precision": 0.697025101599949, |
|
"eval_macro_recall": 0.49809191604893205, |
|
"eval_micro_f1": 0.7797808412866738, |
|
"eval_micro_precision": 0.8413424866514111, |
|
"eval_micro_recall": 0.7266139657444005, |
|
"eval_runtime": 2.7736, |
|
"eval_samples_per_second": 350.083, |
|
"eval_steps_per_second": 11.177, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_loss": 0.1355280876159668, |
|
"eval_macro_f1": 0.5794821565833095, |
|
"eval_macro_precision": 0.6818697605893614, |
|
"eval_macro_recall": 0.5236900724860433, |
|
"eval_micro_f1": 0.7849877236057524, |
|
"eval_micro_precision": 0.8394598649662416, |
|
"eval_micro_recall": 0.7371541501976284, |
|
"eval_runtime": 2.7759, |
|
"eval_samples_per_second": 349.801, |
|
"eval_steps_per_second": 11.168, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"eval_loss": 0.13659390807151794, |
|
"eval_macro_f1": 0.5891915194817472, |
|
"eval_macro_precision": 0.69343533514632, |
|
"eval_macro_recall": 0.5302292848265341, |
|
"eval_micro_f1": 0.7858642407277815, |
|
"eval_micro_precision": 0.8380597014925373, |
|
"eval_micro_recall": 0.7397891963109354, |
|
"eval_runtime": 2.7739, |
|
"eval_samples_per_second": 350.043, |
|
"eval_steps_per_second": 11.175, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 0.13488434255123138, |
|
"eval_macro_f1": 0.5900914578255574, |
|
"eval_macro_precision": 0.6881746937300715, |
|
"eval_macro_recall": 0.5327336547593478, |
|
"eval_micro_f1": 0.7906326459279972, |
|
"eval_micro_precision": 0.842144452717796, |
|
"eval_micro_recall": 0.7450592885375494, |
|
"eval_runtime": 2.772, |
|
"eval_samples_per_second": 350.292, |
|
"eval_steps_per_second": 11.183, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 2.8127777526184293e-05, |
|
"loss": 0.0229, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.14160068333148956, |
|
"eval_macro_f1": 0.5920064296505824, |
|
"eval_macro_precision": 0.6848089552512611, |
|
"eval_macro_recall": 0.5382146732964487, |
|
"eval_micro_f1": 0.7904066736183525, |
|
"eval_micro_precision": 0.8366445916114791, |
|
"eval_micro_recall": 0.7490118577075099, |
|
"eval_runtime": 2.7738, |
|
"eval_samples_per_second": 350.061, |
|
"eval_steps_per_second": 11.176, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 0.14674818515777588, |
|
"eval_macro_f1": 0.5928173166365653, |
|
"eval_macro_precision": 0.6894481117094009, |
|
"eval_macro_recall": 0.5355030217646541, |
|
"eval_micro_f1": 0.7838118298166724, |
|
"eval_micro_precision": 0.8252002913328478, |
|
"eval_micro_recall": 0.7463768115942029, |
|
"eval_runtime": 2.7753, |
|
"eval_samples_per_second": 349.871, |
|
"eval_steps_per_second": 11.17, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_loss": 0.15975715219974518, |
|
"eval_macro_f1": 0.6260279452973108, |
|
"eval_macro_precision": 0.7038398537082291, |
|
"eval_macro_recall": 0.5866152515992036, |
|
"eval_micro_f1": 0.7704974271012006, |
|
"eval_micro_precision": 0.80386542591267, |
|
"eval_micro_recall": 0.7397891963109354, |
|
"eval_runtime": 2.773, |
|
"eval_samples_per_second": 350.168, |
|
"eval_steps_per_second": 11.179, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 2.6965472669730396e-05, |
|
"loss": 0.0137, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 0.15081025660037994, |
|
"eval_macro_f1": 0.6271302596876451, |
|
"eval_macro_precision": 0.7314709578369518, |
|
"eval_macro_recall": 0.5704921787075566, |
|
"eval_micro_f1": 0.7809989521480964, |
|
"eval_micro_precision": 0.8312267657992565, |
|
"eval_micro_recall": 0.7364953886693018, |
|
"eval_runtime": 2.7721, |
|
"eval_samples_per_second": 350.282, |
|
"eval_steps_per_second": 11.183, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 0.14972703158855438, |
|
"eval_macro_f1": 0.5953077373311189, |
|
"eval_macro_precision": 0.6771131830689183, |
|
"eval_macro_recall": 0.5419002355296083, |
|
"eval_micro_f1": 0.7900552486187846, |
|
"eval_micro_precision": 0.8301886792452831, |
|
"eval_micro_recall": 0.7536231884057971, |
|
"eval_runtime": 2.7725, |
|
"eval_samples_per_second": 350.229, |
|
"eval_steps_per_second": 11.181, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"eval_loss": 0.16402311623096466, |
|
"eval_macro_f1": 0.5892530505072842, |
|
"eval_macro_precision": 0.6727972959139897, |
|
"eval_macro_recall": 0.5367187180351005, |
|
"eval_micro_f1": 0.779319916724497, |
|
"eval_micro_precision": 0.8233137829912024, |
|
"eval_micro_recall": 0.7397891963109354, |
|
"eval_runtime": 2.7746, |
|
"eval_samples_per_second": 349.963, |
|
"eval_steps_per_second": 11.173, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"eval_loss": 0.16335928440093994, |
|
"eval_macro_f1": 0.6332863501223459, |
|
"eval_macro_precision": 0.7578187629295322, |
|
"eval_macro_recall": 0.5685989504723161, |
|
"eval_micro_f1": 0.7924791086350974, |
|
"eval_micro_precision": 0.8404726735598228, |
|
"eval_micro_recall": 0.7496706192358367, |
|
"eval_runtime": 2.7725, |
|
"eval_samples_per_second": 350.229, |
|
"eval_steps_per_second": 11.181, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 2.58031678132765e-05, |
|
"loss": 0.011, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"eval_loss": 0.16375195980072021, |
|
"eval_macro_f1": 0.6343242676719218, |
|
"eval_macro_precision": 0.7473720060268463, |
|
"eval_macro_recall": 0.5714537925969617, |
|
"eval_micro_f1": 0.7909878682842287, |
|
"eval_micro_precision": 0.8346744696415508, |
|
"eval_micro_recall": 0.7516469038208169, |
|
"eval_runtime": 2.7735, |
|
"eval_samples_per_second": 350.093, |
|
"eval_steps_per_second": 11.177, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"eval_loss": 0.16090567409992218, |
|
"eval_macro_f1": 0.6491296227815271, |
|
"eval_macro_precision": 0.7487080551943797, |
|
"eval_macro_recall": 0.5927079340096836, |
|
"eval_micro_f1": 0.7961299239806496, |
|
"eval_micro_precision": 0.8372093023255814, |
|
"eval_micro_recall": 0.758893280632411, |
|
"eval_runtime": 2.7735, |
|
"eval_samples_per_second": 350.094, |
|
"eval_steps_per_second": 11.177, |
|
"step": 3266 |
|
} |
|
], |
|
"max_steps": 14100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.2446273222900736e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 6.356433345691159e-10, |
|
"learning_rate": 3.084757089028641e-05, |
|
"per_device_eval_batch_size": 32, |
|
"per_device_train_batch_size": 16, |
|
"seed": 326, |
|
"warmup_steps": 830, |
|
"weight_decay": 0.0009910374448883887 |
|
} |
|
} |
|
|