{ "best_metric": 0.7252073370829516, "best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-3/checkpoint-4402", "epoch": 31.21985815602837, "global_step": 4402, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.01, "eval_loss": 0.2769930064678192, "eval_macro_f1": 0.04276761517615176, "eval_macro_precision": 0.03250514933058703, "eval_macro_recall": 0.0625, "eval_micro_f1": 0.4057854560064283, "eval_micro_precision": 0.5200823892893924, "eval_micro_recall": 0.3326745718050066, "eval_runtime": 10.4856, "eval_samples_per_second": 92.603, "eval_steps_per_second": 2.956, "step": 142 }, { "epoch": 2.01, "eval_loss": 0.23320329189300537, "eval_macro_f1": 0.07268419522929685, "eval_macro_precision": 0.09945738083657588, "eval_macro_recall": 0.07568745319910142, "eval_micro_f1": 0.4403274450667815, "eval_micro_precision": 0.6363636363636364, "eval_micro_recall": 0.33662714097496704, "eval_runtime": 10.4881, "eval_samples_per_second": 92.581, "eval_steps_per_second": 2.956, "step": 284 }, { "epoch": 3.02, "eval_loss": 0.18873895704746246, "eval_macro_f1": 0.18226771045112236, "eval_macro_precision": 0.20572045142357642, "eval_macro_recall": 0.16688644032337763, "eval_micro_f1": 0.6089478044739022, "eval_micro_precision": 0.8203125, "eval_micro_recall": 0.4841897233201581, "eval_runtime": 10.4735, "eval_samples_per_second": 92.71, "eval_steps_per_second": 2.96, "step": 426 }, { "epoch": 3.55, "learning_rate": 3.0199029012738545e-05, "loss": 0.2864, "step": 500 }, { "epoch": 4.03, "eval_loss": 0.1688539683818817, "eval_macro_f1": 0.2604924389345447, "eval_macro_precision": 0.3576131893465115, "eval_macro_recall": 0.24005925564568711, "eval_micro_f1": 0.6585842784513101, "eval_micro_precision": 0.8103946102021174, "eval_micro_recall": 0.5546772068511199, "eval_runtime": 10.5012, "eval_samples_per_second": 92.465, "eval_steps_per_second": 2.952, "step": 568 }, { "epoch": 5.04, "eval_loss": 0.14974796772003174, "eval_macro_f1": 0.31858165699036184, "eval_macro_precision": 0.3683087696286302, "eval_macro_recall": 0.2922878974373826, "eval_micro_f1": 0.7164622816796731, "eval_micro_precision": 0.8218243819266837, "eval_micro_recall": 0.6350461133069829, "eval_runtime": 10.488, "eval_samples_per_second": 92.582, "eval_steps_per_second": 2.956, "step": 710 }, { "epoch": 6.04, "eval_loss": 0.1422141045331955, "eval_macro_f1": 0.45636691704527516, "eval_macro_precision": 0.6115096433126577, "eval_macro_recall": 0.4073540760438781, "eval_micro_f1": 0.7305970149253732, "eval_micro_precision": 0.842512908777969, "eval_micro_recall": 0.644927536231884, "eval_runtime": 10.4831, "eval_samples_per_second": 92.625, "eval_steps_per_second": 2.957, "step": 852 }, { "epoch": 7.05, "eval_loss": 0.1337544023990631, "eval_macro_f1": 0.5272952937702646, "eval_macro_precision": 0.6898645530460084, "eval_macro_recall": 0.47291207140683184, "eval_micro_f1": 0.7612208258527827, "eval_micro_precision": 0.8366219415943172, "eval_micro_recall": 0.6982872200263505, "eval_runtime": 10.4851, "eval_samples_per_second": 92.608, "eval_steps_per_second": 2.957, "step": 994 }, { "epoch": 7.09, "learning_rate": 4.790731259177651e-05, "loss": 0.1272, "step": 1000 }, { "epoch": 8.06, "eval_loss": 0.13911226391792297, "eval_macro_f1": 0.5414520142513848, "eval_macro_precision": 0.6021054439096823, "eval_macro_recall": 0.5152762028502071, "eval_micro_f1": 0.7606779661016948, "eval_micro_precision": 0.7835195530726257, "eval_micro_recall": 0.7391304347826086, "eval_runtime": 10.487, "eval_samples_per_second": 92.591, "eval_steps_per_second": 2.956, "step": 1136 }, { "epoch": 9.06, "eval_loss": 0.1353491097688675, "eval_macro_f1": 0.5516496195059415, "eval_macro_precision": 0.6105827458765187, "eval_macro_recall": 0.5327103225574433, "eval_micro_f1": 0.7625212947189096, "eval_micro_precision": 0.78969654199012, "eval_micro_recall": 0.7371541501976284, "eval_runtime": 10.4862, "eval_samples_per_second": 92.598, "eval_steps_per_second": 2.956, "step": 1278 }, { "epoch": 10.07, "eval_loss": 0.13818134367465973, "eval_macro_f1": 0.5629085360191737, "eval_macro_precision": 0.6601053330799342, "eval_macro_recall": 0.5158438987675726, "eval_micro_f1": 0.7794221282593375, "eval_micro_precision": 0.8378787878787879, "eval_micro_recall": 0.7285902503293807, "eval_runtime": 10.4843, "eval_samples_per_second": 92.615, "eval_steps_per_second": 2.957, "step": 1420 }, { "epoch": 10.64, "learning_rate": 4.6078789210411e-05, "loss": 0.0577, "step": 1500 }, { "epoch": 11.08, "eval_loss": 0.145726278424263, "eval_macro_f1": 0.5903998605572057, "eval_macro_precision": 0.653652711667118, "eval_macro_recall": 0.5513401858459299, "eval_micro_f1": 0.7780429594272077, "eval_micro_precision": 0.8063604240282686, "eval_micro_recall": 0.7516469038208169, "eval_runtime": 10.4837, "eval_samples_per_second": 92.62, "eval_steps_per_second": 2.957, "step": 1562 }, { "epoch": 12.09, "eval_loss": 0.1388859748840332, "eval_macro_f1": 0.6030363323278404, "eval_macro_precision": 0.6755461130761506, "eval_macro_recall": 0.5610332958067313, "eval_micro_f1": 0.8001355013550135, "eval_micro_precision": 0.8235704323570432, "eval_micro_recall": 0.7779973649538867, "eval_runtime": 10.4865, "eval_samples_per_second": 92.595, "eval_steps_per_second": 2.956, "step": 1704 }, { "epoch": 13.09, "eval_loss": 0.1482115238904953, "eval_macro_f1": 0.6056908525913866, "eval_macro_precision": 0.6552837427265621, "eval_macro_recall": 0.572537210182014, "eval_micro_f1": 0.7822553335590924, "eval_micro_precision": 0.8048780487804879, "eval_micro_recall": 0.7608695652173914, "eval_runtime": 10.5045, "eval_samples_per_second": 92.437, "eval_steps_per_second": 2.951, "step": 1846 }, { "epoch": 14.1, "eval_loss": 0.14726266264915466, "eval_macro_f1": 0.6072794535097282, "eval_macro_precision": 0.680123510778154, "eval_macro_recall": 0.5630252433183344, "eval_micro_f1": 0.7845188284518829, "eval_micro_precision": 0.8333333333333334, "eval_micro_recall": 0.741106719367589, "eval_runtime": 10.487, "eval_samples_per_second": 92.591, "eval_steps_per_second": 2.956, "step": 1988 }, { "epoch": 14.18, "learning_rate": 4.425026582904548e-05, "loss": 0.0284, "step": 2000 }, { "epoch": 15.11, "eval_loss": 0.14193882048130035, "eval_macro_f1": 0.6169717669838317, "eval_macro_precision": 0.6503808357210328, "eval_macro_recall": 0.5931599384917411, "eval_micro_f1": 0.7969924812030075, "eval_micro_precision": 0.828125, "eval_micro_recall": 0.7681159420289855, "eval_runtime": 10.4855, "eval_samples_per_second": 92.604, "eval_steps_per_second": 2.956, "step": 2130 }, { "epoch": 16.11, "eval_loss": 0.15002837777137756, "eval_macro_f1": 0.6525221299555535, "eval_macro_precision": 0.8021892379342418, "eval_macro_recall": 0.5999831390602388, "eval_micro_f1": 0.8061016949152543, "eval_micro_precision": 0.8303072625698324, "eval_micro_recall": 0.7832674571805006, "eval_runtime": 10.503, "eval_samples_per_second": 92.45, "eval_steps_per_second": 2.952, "step": 2272 }, { "epoch": 17.12, "eval_loss": 0.14581723511219025, "eval_macro_f1": 0.6138635616219041, "eval_macro_precision": 0.6561402184700652, "eval_macro_recall": 0.5837363386062422, "eval_micro_f1": 0.8006768189509307, "eval_micro_precision": 0.8232428670842032, "eval_micro_recall": 0.7793148880105402, "eval_runtime": 10.4901, "eval_samples_per_second": 92.564, "eval_steps_per_second": 2.955, "step": 2414 }, { "epoch": 17.73, "learning_rate": 4.242174244767996e-05, "loss": 0.0145, "step": 2500 }, { "epoch": 18.13, "eval_loss": 0.16017772257328033, "eval_macro_f1": 0.6444367141145684, "eval_macro_precision": 0.7439848776601259, "eval_macro_recall": 0.5952268137231111, "eval_micro_f1": 0.7922971114167813, "eval_micro_precision": 0.8287769784172662, "eval_micro_recall": 0.758893280632411, "eval_runtime": 10.4941, "eval_samples_per_second": 92.528, "eval_steps_per_second": 2.954, "step": 2556 }, { "epoch": 19.13, "eval_loss": 0.16528286039829254, "eval_macro_f1": 0.6255401892998842, "eval_macro_precision": 0.7027892098283081, "eval_macro_recall": 0.5807621048617468, "eval_micro_f1": 0.799320882852292, "eval_micro_precision": 0.8248072880168185, "eval_micro_recall": 0.7753623188405797, "eval_runtime": 10.5086, "eval_samples_per_second": 92.4, "eval_steps_per_second": 2.95, "step": 2698 }, { "epoch": 20.14, "eval_loss": 0.16131597757339478, "eval_macro_f1": 0.6758630342355485, "eval_macro_precision": 0.7848399016138751, "eval_macro_recall": 0.6311385295870753, "eval_micro_f1": 0.795959595959596, "eval_micro_precision": 0.8140495867768595, "eval_micro_recall": 0.7786561264822134, "eval_runtime": 10.4977, "eval_samples_per_second": 92.496, "eval_steps_per_second": 2.953, "step": 2840 }, { "epoch": 21.15, "eval_loss": 0.16260398924350739, "eval_macro_f1": 0.6471248124455184, "eval_macro_precision": 0.7519504537117815, "eval_macro_recall": 0.5979276636724924, "eval_micro_f1": 0.8031604259704569, "eval_micro_precision": 0.8391959798994975, "eval_micro_recall": 0.7700922266139657, "eval_runtime": 10.5031, "eval_samples_per_second": 92.449, "eval_steps_per_second": 2.952, "step": 2982 }, { "epoch": 21.28, "learning_rate": 4.059321906631445e-05, "loss": 0.0097, "step": 3000 }, { "epoch": 22.16, "eval_loss": 0.1720920354127884, "eval_macro_f1": 0.6197445878457252, "eval_macro_precision": 0.6777223144129594, "eval_macro_recall": 0.5809350716774138, "eval_micro_f1": 0.7995860641600551, "eval_micro_precision": 0.8392469225199131, "eval_micro_recall": 0.7635046113306982, "eval_runtime": 10.5022, "eval_samples_per_second": 92.457, "eval_steps_per_second": 2.952, "step": 3124 }, { "epoch": 23.16, "eval_loss": 0.18361401557922363, "eval_macro_f1": 0.679814364002206, "eval_macro_precision": 0.7438188782599036, "eval_macro_recall": 0.6793295713709084, "eval_micro_f1": 0.7756177924217464, "eval_micro_precision": 0.7758734344100198, "eval_micro_recall": 0.7753623188405797, "eval_runtime": 10.4795, "eval_samples_per_second": 92.658, "eval_steps_per_second": 2.958, "step": 3266 }, { "epoch": 24.17, "eval_loss": 0.18593738973140717, "eval_macro_f1": 0.6768305526976849, "eval_macro_precision": 0.7623894236010156, "eval_macro_recall": 0.639542168905185, "eval_micro_f1": 0.7799253984401492, "eval_micro_precision": 0.803633822501747, "eval_micro_recall": 0.7575757575757576, "eval_runtime": 10.5008, "eval_samples_per_second": 92.469, "eval_steps_per_second": 2.952, "step": 3408 }, { "epoch": 24.82, "learning_rate": 3.8764695684948935e-05, "loss": 0.0076, "step": 3500 }, { "epoch": 25.18, "eval_loss": 0.17676377296447754, "eval_macro_f1": 0.6779522309735053, "eval_macro_precision": 0.7968781822538058, "eval_macro_recall": 0.6336237715199059, "eval_micro_f1": 0.796775277124622, "eval_micro_precision": 0.8128855380397533, "eval_micro_recall": 0.7812911725955204, "eval_runtime": 10.5104, "eval_samples_per_second": 92.384, "eval_steps_per_second": 2.949, "step": 3550 }, { "epoch": 26.18, "eval_loss": 0.1732201725244522, "eval_macro_f1": 0.707509469543303, "eval_macro_precision": 0.8146827025242978, "eval_macro_recall": 0.6503143056843191, "eval_micro_f1": 0.7985299031072502, "eval_micro_precision": 0.8101694915254237, "eval_micro_recall": 0.7872200263504612, "eval_runtime": 10.4892, "eval_samples_per_second": 92.571, "eval_steps_per_second": 2.955, "step": 3692 }, { "epoch": 27.19, "eval_loss": 0.20233392715454102, "eval_macro_f1": 0.6513179543559465, "eval_macro_precision": 0.7916846976726903, "eval_macro_recall": 0.5806696184169113, "eval_micro_f1": 0.7844352617079889, "eval_micro_precision": 0.8217893217893217, "eval_micro_recall": 0.7503293807641633, "eval_runtime": 10.5011, "eval_samples_per_second": 92.466, "eval_steps_per_second": 2.952, "step": 3834 }, { "epoch": 28.2, "eval_loss": 0.1885799914598465, "eval_macro_f1": 0.6823207317968834, "eval_macro_precision": 0.7917566921291868, "eval_macro_recall": 0.6430312076263187, "eval_micro_f1": 0.7875717662951706, "eval_micro_precision": 0.808038808038808, "eval_micro_recall": 0.7681159420289855, "eval_runtime": 10.5149, "eval_samples_per_second": 92.345, "eval_steps_per_second": 2.948, "step": 3976 }, { "epoch": 28.37, "learning_rate": 3.6936172303583416e-05, "loss": 0.0066, "step": 4000 }, { "epoch": 29.21, "eval_loss": 0.19189482927322388, "eval_macro_f1": 0.6669099856977689, "eval_macro_precision": 0.7967630539496974, "eval_macro_recall": 0.6033413092496192, "eval_micro_f1": 0.8017894012388163, "eval_micro_precision": 0.8393371757925072, "eval_micro_recall": 0.7674571805006588, "eval_runtime": 10.4969, "eval_samples_per_second": 92.503, "eval_steps_per_second": 2.953, "step": 4118 }, { "epoch": 30.21, "eval_loss": 0.18965879082679749, "eval_macro_f1": 0.6876913982263068, "eval_macro_precision": 0.8027653366004972, "eval_macro_recall": 0.6233955552143939, "eval_micro_f1": 0.7947112038970077, "eval_micro_precision": 0.8421828908554573, "eval_micro_recall": 0.7523056653491436, "eval_runtime": 10.497, "eval_samples_per_second": 92.503, "eval_steps_per_second": 2.953, "step": 4260 }, { "epoch": 31.22, "eval_loss": 0.1917509138584137, "eval_macro_f1": 0.7252073370829516, "eval_macro_precision": 0.7693080004594731, "eval_macro_recall": 0.703604344156779, "eval_micro_f1": 0.7954469367258119, "eval_micro_precision": 0.808713410483322, "eval_micro_recall": 0.782608695652174, "eval_runtime": 10.5118, "eval_samples_per_second": 92.373, "eval_steps_per_second": 2.949, "step": 4402 } ], "max_steps": 14100, "num_train_epochs": 100, "total_flos": 1.6671849861800448e+16, "trial_name": null, "trial_params": { "adam_epsilon": 1.874740778707177e-08, "learning_rate": 4.862043671050906e-05, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 16, "seed": 322, "warmup_steps": 805, "weight_decay": 1.0026204622214607e-07 } }