|
{ |
|
"best_metric": 0.7252073370829516, |
|
"best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-3/checkpoint-4402", |
|
"epoch": 31.21985815602837, |
|
"global_step": 4402, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.2769930064678192, |
|
"eval_macro_f1": 0.04276761517615176, |
|
"eval_macro_precision": 0.03250514933058703, |
|
"eval_macro_recall": 0.0625, |
|
"eval_micro_f1": 0.4057854560064283, |
|
"eval_micro_precision": 0.5200823892893924, |
|
"eval_micro_recall": 0.3326745718050066, |
|
"eval_runtime": 10.4856, |
|
"eval_samples_per_second": 92.603, |
|
"eval_steps_per_second": 2.956, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.23320329189300537, |
|
"eval_macro_f1": 0.07268419522929685, |
|
"eval_macro_precision": 0.09945738083657588, |
|
"eval_macro_recall": 0.07568745319910142, |
|
"eval_micro_f1": 0.4403274450667815, |
|
"eval_micro_precision": 0.6363636363636364, |
|
"eval_micro_recall": 0.33662714097496704, |
|
"eval_runtime": 10.4881, |
|
"eval_samples_per_second": 92.581, |
|
"eval_steps_per_second": 2.956, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 0.18873895704746246, |
|
"eval_macro_f1": 0.18226771045112236, |
|
"eval_macro_precision": 0.20572045142357642, |
|
"eval_macro_recall": 0.16688644032337763, |
|
"eval_micro_f1": 0.6089478044739022, |
|
"eval_micro_precision": 0.8203125, |
|
"eval_micro_recall": 0.4841897233201581, |
|
"eval_runtime": 10.4735, |
|
"eval_samples_per_second": 92.71, |
|
"eval_steps_per_second": 2.96, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.0199029012738545e-05, |
|
"loss": 0.2864, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 0.1688539683818817, |
|
"eval_macro_f1": 0.2604924389345447, |
|
"eval_macro_precision": 0.3576131893465115, |
|
"eval_macro_recall": 0.24005925564568711, |
|
"eval_micro_f1": 0.6585842784513101, |
|
"eval_micro_precision": 0.8103946102021174, |
|
"eval_micro_recall": 0.5546772068511199, |
|
"eval_runtime": 10.5012, |
|
"eval_samples_per_second": 92.465, |
|
"eval_steps_per_second": 2.952, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.14974796772003174, |
|
"eval_macro_f1": 0.31858165699036184, |
|
"eval_macro_precision": 0.3683087696286302, |
|
"eval_macro_recall": 0.2922878974373826, |
|
"eval_micro_f1": 0.7164622816796731, |
|
"eval_micro_precision": 0.8218243819266837, |
|
"eval_micro_recall": 0.6350461133069829, |
|
"eval_runtime": 10.488, |
|
"eval_samples_per_second": 92.582, |
|
"eval_steps_per_second": 2.956, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 0.1422141045331955, |
|
"eval_macro_f1": 0.45636691704527516, |
|
"eval_macro_precision": 0.6115096433126577, |
|
"eval_macro_recall": 0.4073540760438781, |
|
"eval_micro_f1": 0.7305970149253732, |
|
"eval_micro_precision": 0.842512908777969, |
|
"eval_micro_recall": 0.644927536231884, |
|
"eval_runtime": 10.4831, |
|
"eval_samples_per_second": 92.625, |
|
"eval_steps_per_second": 2.957, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.1337544023990631, |
|
"eval_macro_f1": 0.5272952937702646, |
|
"eval_macro_precision": 0.6898645530460084, |
|
"eval_macro_recall": 0.47291207140683184, |
|
"eval_micro_f1": 0.7612208258527827, |
|
"eval_micro_precision": 0.8366219415943172, |
|
"eval_micro_recall": 0.6982872200263505, |
|
"eval_runtime": 10.4851, |
|
"eval_samples_per_second": 92.608, |
|
"eval_steps_per_second": 2.957, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 4.790731259177651e-05, |
|
"loss": 0.1272, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.13911226391792297, |
|
"eval_macro_f1": 0.5414520142513848, |
|
"eval_macro_precision": 0.6021054439096823, |
|
"eval_macro_recall": 0.5152762028502071, |
|
"eval_micro_f1": 0.7606779661016948, |
|
"eval_micro_precision": 0.7835195530726257, |
|
"eval_micro_recall": 0.7391304347826086, |
|
"eval_runtime": 10.487, |
|
"eval_samples_per_second": 92.591, |
|
"eval_steps_per_second": 2.956, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 0.1353491097688675, |
|
"eval_macro_f1": 0.5516496195059415, |
|
"eval_macro_precision": 0.6105827458765187, |
|
"eval_macro_recall": 0.5327103225574433, |
|
"eval_micro_f1": 0.7625212947189096, |
|
"eval_micro_precision": 0.78969654199012, |
|
"eval_micro_recall": 0.7371541501976284, |
|
"eval_runtime": 10.4862, |
|
"eval_samples_per_second": 92.598, |
|
"eval_steps_per_second": 2.956, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 0.13818134367465973, |
|
"eval_macro_f1": 0.5629085360191737, |
|
"eval_macro_precision": 0.6601053330799342, |
|
"eval_macro_recall": 0.5158438987675726, |
|
"eval_micro_f1": 0.7794221282593375, |
|
"eval_micro_precision": 0.8378787878787879, |
|
"eval_micro_recall": 0.7285902503293807, |
|
"eval_runtime": 10.4843, |
|
"eval_samples_per_second": 92.615, |
|
"eval_steps_per_second": 2.957, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 4.6078789210411e-05, |
|
"loss": 0.0577, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_loss": 0.145726278424263, |
|
"eval_macro_f1": 0.5903998605572057, |
|
"eval_macro_precision": 0.653652711667118, |
|
"eval_macro_recall": 0.5513401858459299, |
|
"eval_micro_f1": 0.7780429594272077, |
|
"eval_micro_precision": 0.8063604240282686, |
|
"eval_micro_recall": 0.7516469038208169, |
|
"eval_runtime": 10.4837, |
|
"eval_samples_per_second": 92.62, |
|
"eval_steps_per_second": 2.957, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_loss": 0.1388859748840332, |
|
"eval_macro_f1": 0.6030363323278404, |
|
"eval_macro_precision": 0.6755461130761506, |
|
"eval_macro_recall": 0.5610332958067313, |
|
"eval_micro_f1": 0.8001355013550135, |
|
"eval_micro_precision": 0.8235704323570432, |
|
"eval_micro_recall": 0.7779973649538867, |
|
"eval_runtime": 10.4865, |
|
"eval_samples_per_second": 92.595, |
|
"eval_steps_per_second": 2.956, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"eval_loss": 0.1482115238904953, |
|
"eval_macro_f1": 0.6056908525913866, |
|
"eval_macro_precision": 0.6552837427265621, |
|
"eval_macro_recall": 0.572537210182014, |
|
"eval_micro_f1": 0.7822553335590924, |
|
"eval_micro_precision": 0.8048780487804879, |
|
"eval_micro_recall": 0.7608695652173914, |
|
"eval_runtime": 10.5045, |
|
"eval_samples_per_second": 92.437, |
|
"eval_steps_per_second": 2.951, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 0.14726266264915466, |
|
"eval_macro_f1": 0.6072794535097282, |
|
"eval_macro_precision": 0.680123510778154, |
|
"eval_macro_recall": 0.5630252433183344, |
|
"eval_micro_f1": 0.7845188284518829, |
|
"eval_micro_precision": 0.8333333333333334, |
|
"eval_micro_recall": 0.741106719367589, |
|
"eval_runtime": 10.487, |
|
"eval_samples_per_second": 92.591, |
|
"eval_steps_per_second": 2.956, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.425026582904548e-05, |
|
"loss": 0.0284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.14193882048130035, |
|
"eval_macro_f1": 0.6169717669838317, |
|
"eval_macro_precision": 0.6503808357210328, |
|
"eval_macro_recall": 0.5931599384917411, |
|
"eval_micro_f1": 0.7969924812030075, |
|
"eval_micro_precision": 0.828125, |
|
"eval_micro_recall": 0.7681159420289855, |
|
"eval_runtime": 10.4855, |
|
"eval_samples_per_second": 92.604, |
|
"eval_steps_per_second": 2.956, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 0.15002837777137756, |
|
"eval_macro_f1": 0.6525221299555535, |
|
"eval_macro_precision": 0.8021892379342418, |
|
"eval_macro_recall": 0.5999831390602388, |
|
"eval_micro_f1": 0.8061016949152543, |
|
"eval_micro_precision": 0.8303072625698324, |
|
"eval_micro_recall": 0.7832674571805006, |
|
"eval_runtime": 10.503, |
|
"eval_samples_per_second": 92.45, |
|
"eval_steps_per_second": 2.952, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_loss": 0.14581723511219025, |
|
"eval_macro_f1": 0.6138635616219041, |
|
"eval_macro_precision": 0.6561402184700652, |
|
"eval_macro_recall": 0.5837363386062422, |
|
"eval_micro_f1": 0.8006768189509307, |
|
"eval_micro_precision": 0.8232428670842032, |
|
"eval_micro_recall": 0.7793148880105402, |
|
"eval_runtime": 10.4901, |
|
"eval_samples_per_second": 92.564, |
|
"eval_steps_per_second": 2.955, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 4.242174244767996e-05, |
|
"loss": 0.0145, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 0.16017772257328033, |
|
"eval_macro_f1": 0.6444367141145684, |
|
"eval_macro_precision": 0.7439848776601259, |
|
"eval_macro_recall": 0.5952268137231111, |
|
"eval_micro_f1": 0.7922971114167813, |
|
"eval_micro_precision": 0.8287769784172662, |
|
"eval_micro_recall": 0.758893280632411, |
|
"eval_runtime": 10.4941, |
|
"eval_samples_per_second": 92.528, |
|
"eval_steps_per_second": 2.954, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 0.16528286039829254, |
|
"eval_macro_f1": 0.6255401892998842, |
|
"eval_macro_precision": 0.7027892098283081, |
|
"eval_macro_recall": 0.5807621048617468, |
|
"eval_micro_f1": 0.799320882852292, |
|
"eval_micro_precision": 0.8248072880168185, |
|
"eval_micro_recall": 0.7753623188405797, |
|
"eval_runtime": 10.5086, |
|
"eval_samples_per_second": 92.4, |
|
"eval_steps_per_second": 2.95, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"eval_loss": 0.16131597757339478, |
|
"eval_macro_f1": 0.6758630342355485, |
|
"eval_macro_precision": 0.7848399016138751, |
|
"eval_macro_recall": 0.6311385295870753, |
|
"eval_micro_f1": 0.795959595959596, |
|
"eval_micro_precision": 0.8140495867768595, |
|
"eval_micro_recall": 0.7786561264822134, |
|
"eval_runtime": 10.4977, |
|
"eval_samples_per_second": 92.496, |
|
"eval_steps_per_second": 2.953, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"eval_loss": 0.16260398924350739, |
|
"eval_macro_f1": 0.6471248124455184, |
|
"eval_macro_precision": 0.7519504537117815, |
|
"eval_macro_recall": 0.5979276636724924, |
|
"eval_micro_f1": 0.8031604259704569, |
|
"eval_micro_precision": 0.8391959798994975, |
|
"eval_micro_recall": 0.7700922266139657, |
|
"eval_runtime": 10.5031, |
|
"eval_samples_per_second": 92.449, |
|
"eval_steps_per_second": 2.952, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 4.059321906631445e-05, |
|
"loss": 0.0097, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"eval_loss": 0.1720920354127884, |
|
"eval_macro_f1": 0.6197445878457252, |
|
"eval_macro_precision": 0.6777223144129594, |
|
"eval_macro_recall": 0.5809350716774138, |
|
"eval_micro_f1": 0.7995860641600551, |
|
"eval_micro_precision": 0.8392469225199131, |
|
"eval_micro_recall": 0.7635046113306982, |
|
"eval_runtime": 10.5022, |
|
"eval_samples_per_second": 92.457, |
|
"eval_steps_per_second": 2.952, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"eval_loss": 0.18361401557922363, |
|
"eval_macro_f1": 0.679814364002206, |
|
"eval_macro_precision": 0.7438188782599036, |
|
"eval_macro_recall": 0.6793295713709084, |
|
"eval_micro_f1": 0.7756177924217464, |
|
"eval_micro_precision": 0.7758734344100198, |
|
"eval_micro_recall": 0.7753623188405797, |
|
"eval_runtime": 10.4795, |
|
"eval_samples_per_second": 92.658, |
|
"eval_steps_per_second": 2.958, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"eval_loss": 0.18593738973140717, |
|
"eval_macro_f1": 0.6768305526976849, |
|
"eval_macro_precision": 0.7623894236010156, |
|
"eval_macro_recall": 0.639542168905185, |
|
"eval_micro_f1": 0.7799253984401492, |
|
"eval_micro_precision": 0.803633822501747, |
|
"eval_micro_recall": 0.7575757575757576, |
|
"eval_runtime": 10.5008, |
|
"eval_samples_per_second": 92.469, |
|
"eval_steps_per_second": 2.952, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 3.8764695684948935e-05, |
|
"loss": 0.0076, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"eval_loss": 0.17676377296447754, |
|
"eval_macro_f1": 0.6779522309735053, |
|
"eval_macro_precision": 0.7968781822538058, |
|
"eval_macro_recall": 0.6336237715199059, |
|
"eval_micro_f1": 0.796775277124622, |
|
"eval_micro_precision": 0.8128855380397533, |
|
"eval_micro_recall": 0.7812911725955204, |
|
"eval_runtime": 10.5104, |
|
"eval_samples_per_second": 92.384, |
|
"eval_steps_per_second": 2.949, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"eval_loss": 0.1732201725244522, |
|
"eval_macro_f1": 0.707509469543303, |
|
"eval_macro_precision": 0.8146827025242978, |
|
"eval_macro_recall": 0.6503143056843191, |
|
"eval_micro_f1": 0.7985299031072502, |
|
"eval_micro_precision": 0.8101694915254237, |
|
"eval_micro_recall": 0.7872200263504612, |
|
"eval_runtime": 10.4892, |
|
"eval_samples_per_second": 92.571, |
|
"eval_steps_per_second": 2.955, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 27.19, |
|
"eval_loss": 0.20233392715454102, |
|
"eval_macro_f1": 0.6513179543559465, |
|
"eval_macro_precision": 0.7916846976726903, |
|
"eval_macro_recall": 0.5806696184169113, |
|
"eval_micro_f1": 0.7844352617079889, |
|
"eval_micro_precision": 0.8217893217893217, |
|
"eval_micro_recall": 0.7503293807641633, |
|
"eval_runtime": 10.5011, |
|
"eval_samples_per_second": 92.466, |
|
"eval_steps_per_second": 2.952, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"eval_loss": 0.1885799914598465, |
|
"eval_macro_f1": 0.6823207317968834, |
|
"eval_macro_precision": 0.7917566921291868, |
|
"eval_macro_recall": 0.6430312076263187, |
|
"eval_micro_f1": 0.7875717662951706, |
|
"eval_micro_precision": 0.808038808038808, |
|
"eval_micro_recall": 0.7681159420289855, |
|
"eval_runtime": 10.5149, |
|
"eval_samples_per_second": 92.345, |
|
"eval_steps_per_second": 2.948, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 3.6936172303583416e-05, |
|
"loss": 0.0066, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"eval_loss": 0.19189482927322388, |
|
"eval_macro_f1": 0.6669099856977689, |
|
"eval_macro_precision": 0.7967630539496974, |
|
"eval_macro_recall": 0.6033413092496192, |
|
"eval_micro_f1": 0.8017894012388163, |
|
"eval_micro_precision": 0.8393371757925072, |
|
"eval_micro_recall": 0.7674571805006588, |
|
"eval_runtime": 10.4969, |
|
"eval_samples_per_second": 92.503, |
|
"eval_steps_per_second": 2.953, |
|
"step": 4118 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"eval_loss": 0.18965879082679749, |
|
"eval_macro_f1": 0.6876913982263068, |
|
"eval_macro_precision": 0.8027653366004972, |
|
"eval_macro_recall": 0.6233955552143939, |
|
"eval_micro_f1": 0.7947112038970077, |
|
"eval_micro_precision": 0.8421828908554573, |
|
"eval_micro_recall": 0.7523056653491436, |
|
"eval_runtime": 10.497, |
|
"eval_samples_per_second": 92.503, |
|
"eval_steps_per_second": 2.953, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"eval_loss": 0.1917509138584137, |
|
"eval_macro_f1": 0.7252073370829516, |
|
"eval_macro_precision": 0.7693080004594731, |
|
"eval_macro_recall": 0.703604344156779, |
|
"eval_micro_f1": 0.7954469367258119, |
|
"eval_micro_precision": 0.808713410483322, |
|
"eval_micro_recall": 0.782608695652174, |
|
"eval_runtime": 10.5118, |
|
"eval_samples_per_second": 92.373, |
|
"eval_steps_per_second": 2.949, |
|
"step": 4402 |
|
} |
|
], |
|
"max_steps": 14100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.6671849861800448e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 1.874740778707177e-08, |
|
"learning_rate": 4.862043671050906e-05, |
|
"per_device_eval_batch_size": 32, |
|
"per_device_train_batch_size": 16, |
|
"seed": 322, |
|
"warmup_steps": 805, |
|
"weight_decay": 1.0026204622214607e-07 |
|
} |
|
} |
|
|