cares-bert-base / trainer_state.json
chizhik
model weights updated for stratified split
597f06a
raw
history blame
16.6 kB
{
"best_metric": 0.7252073370829516,
"best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-3/checkpoint-4402",
"epoch": 31.21985815602837,
"global_step": 4402,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.01,
"eval_loss": 0.2769930064678192,
"eval_macro_f1": 0.04276761517615176,
"eval_macro_precision": 0.03250514933058703,
"eval_macro_recall": 0.0625,
"eval_micro_f1": 0.4057854560064283,
"eval_micro_precision": 0.5200823892893924,
"eval_micro_recall": 0.3326745718050066,
"eval_runtime": 10.4856,
"eval_samples_per_second": 92.603,
"eval_steps_per_second": 2.956,
"step": 142
},
{
"epoch": 2.01,
"eval_loss": 0.23320329189300537,
"eval_macro_f1": 0.07268419522929685,
"eval_macro_precision": 0.09945738083657588,
"eval_macro_recall": 0.07568745319910142,
"eval_micro_f1": 0.4403274450667815,
"eval_micro_precision": 0.6363636363636364,
"eval_micro_recall": 0.33662714097496704,
"eval_runtime": 10.4881,
"eval_samples_per_second": 92.581,
"eval_steps_per_second": 2.956,
"step": 284
},
{
"epoch": 3.02,
"eval_loss": 0.18873895704746246,
"eval_macro_f1": 0.18226771045112236,
"eval_macro_precision": 0.20572045142357642,
"eval_macro_recall": 0.16688644032337763,
"eval_micro_f1": 0.6089478044739022,
"eval_micro_precision": 0.8203125,
"eval_micro_recall": 0.4841897233201581,
"eval_runtime": 10.4735,
"eval_samples_per_second": 92.71,
"eval_steps_per_second": 2.96,
"step": 426
},
{
"epoch": 3.55,
"learning_rate": 3.0199029012738545e-05,
"loss": 0.2864,
"step": 500
},
{
"epoch": 4.03,
"eval_loss": 0.1688539683818817,
"eval_macro_f1": 0.2604924389345447,
"eval_macro_precision": 0.3576131893465115,
"eval_macro_recall": 0.24005925564568711,
"eval_micro_f1": 0.6585842784513101,
"eval_micro_precision": 0.8103946102021174,
"eval_micro_recall": 0.5546772068511199,
"eval_runtime": 10.5012,
"eval_samples_per_second": 92.465,
"eval_steps_per_second": 2.952,
"step": 568
},
{
"epoch": 5.04,
"eval_loss": 0.14974796772003174,
"eval_macro_f1": 0.31858165699036184,
"eval_macro_precision": 0.3683087696286302,
"eval_macro_recall": 0.2922878974373826,
"eval_micro_f1": 0.7164622816796731,
"eval_micro_precision": 0.8218243819266837,
"eval_micro_recall": 0.6350461133069829,
"eval_runtime": 10.488,
"eval_samples_per_second": 92.582,
"eval_steps_per_second": 2.956,
"step": 710
},
{
"epoch": 6.04,
"eval_loss": 0.1422141045331955,
"eval_macro_f1": 0.45636691704527516,
"eval_macro_precision": 0.6115096433126577,
"eval_macro_recall": 0.4073540760438781,
"eval_micro_f1": 0.7305970149253732,
"eval_micro_precision": 0.842512908777969,
"eval_micro_recall": 0.644927536231884,
"eval_runtime": 10.4831,
"eval_samples_per_second": 92.625,
"eval_steps_per_second": 2.957,
"step": 852
},
{
"epoch": 7.05,
"eval_loss": 0.1337544023990631,
"eval_macro_f1": 0.5272952937702646,
"eval_macro_precision": 0.6898645530460084,
"eval_macro_recall": 0.47291207140683184,
"eval_micro_f1": 0.7612208258527827,
"eval_micro_precision": 0.8366219415943172,
"eval_micro_recall": 0.6982872200263505,
"eval_runtime": 10.4851,
"eval_samples_per_second": 92.608,
"eval_steps_per_second": 2.957,
"step": 994
},
{
"epoch": 7.09,
"learning_rate": 4.790731259177651e-05,
"loss": 0.1272,
"step": 1000
},
{
"epoch": 8.06,
"eval_loss": 0.13911226391792297,
"eval_macro_f1": 0.5414520142513848,
"eval_macro_precision": 0.6021054439096823,
"eval_macro_recall": 0.5152762028502071,
"eval_micro_f1": 0.7606779661016948,
"eval_micro_precision": 0.7835195530726257,
"eval_micro_recall": 0.7391304347826086,
"eval_runtime": 10.487,
"eval_samples_per_second": 92.591,
"eval_steps_per_second": 2.956,
"step": 1136
},
{
"epoch": 9.06,
"eval_loss": 0.1353491097688675,
"eval_macro_f1": 0.5516496195059415,
"eval_macro_precision": 0.6105827458765187,
"eval_macro_recall": 0.5327103225574433,
"eval_micro_f1": 0.7625212947189096,
"eval_micro_precision": 0.78969654199012,
"eval_micro_recall": 0.7371541501976284,
"eval_runtime": 10.4862,
"eval_samples_per_second": 92.598,
"eval_steps_per_second": 2.956,
"step": 1278
},
{
"epoch": 10.07,
"eval_loss": 0.13818134367465973,
"eval_macro_f1": 0.5629085360191737,
"eval_macro_precision": 0.6601053330799342,
"eval_macro_recall": 0.5158438987675726,
"eval_micro_f1": 0.7794221282593375,
"eval_micro_precision": 0.8378787878787879,
"eval_micro_recall": 0.7285902503293807,
"eval_runtime": 10.4843,
"eval_samples_per_second": 92.615,
"eval_steps_per_second": 2.957,
"step": 1420
},
{
"epoch": 10.64,
"learning_rate": 4.6078789210411e-05,
"loss": 0.0577,
"step": 1500
},
{
"epoch": 11.08,
"eval_loss": 0.145726278424263,
"eval_macro_f1": 0.5903998605572057,
"eval_macro_precision": 0.653652711667118,
"eval_macro_recall": 0.5513401858459299,
"eval_micro_f1": 0.7780429594272077,
"eval_micro_precision": 0.8063604240282686,
"eval_micro_recall": 0.7516469038208169,
"eval_runtime": 10.4837,
"eval_samples_per_second": 92.62,
"eval_steps_per_second": 2.957,
"step": 1562
},
{
"epoch": 12.09,
"eval_loss": 0.1388859748840332,
"eval_macro_f1": 0.6030363323278404,
"eval_macro_precision": 0.6755461130761506,
"eval_macro_recall": 0.5610332958067313,
"eval_micro_f1": 0.8001355013550135,
"eval_micro_precision": 0.8235704323570432,
"eval_micro_recall": 0.7779973649538867,
"eval_runtime": 10.4865,
"eval_samples_per_second": 92.595,
"eval_steps_per_second": 2.956,
"step": 1704
},
{
"epoch": 13.09,
"eval_loss": 0.1482115238904953,
"eval_macro_f1": 0.6056908525913866,
"eval_macro_precision": 0.6552837427265621,
"eval_macro_recall": 0.572537210182014,
"eval_micro_f1": 0.7822553335590924,
"eval_micro_precision": 0.8048780487804879,
"eval_micro_recall": 0.7608695652173914,
"eval_runtime": 10.5045,
"eval_samples_per_second": 92.437,
"eval_steps_per_second": 2.951,
"step": 1846
},
{
"epoch": 14.1,
"eval_loss": 0.14726266264915466,
"eval_macro_f1": 0.6072794535097282,
"eval_macro_precision": 0.680123510778154,
"eval_macro_recall": 0.5630252433183344,
"eval_micro_f1": 0.7845188284518829,
"eval_micro_precision": 0.8333333333333334,
"eval_micro_recall": 0.741106719367589,
"eval_runtime": 10.487,
"eval_samples_per_second": 92.591,
"eval_steps_per_second": 2.956,
"step": 1988
},
{
"epoch": 14.18,
"learning_rate": 4.425026582904548e-05,
"loss": 0.0284,
"step": 2000
},
{
"epoch": 15.11,
"eval_loss": 0.14193882048130035,
"eval_macro_f1": 0.6169717669838317,
"eval_macro_precision": 0.6503808357210328,
"eval_macro_recall": 0.5931599384917411,
"eval_micro_f1": 0.7969924812030075,
"eval_micro_precision": 0.828125,
"eval_micro_recall": 0.7681159420289855,
"eval_runtime": 10.4855,
"eval_samples_per_second": 92.604,
"eval_steps_per_second": 2.956,
"step": 2130
},
{
"epoch": 16.11,
"eval_loss": 0.15002837777137756,
"eval_macro_f1": 0.6525221299555535,
"eval_macro_precision": 0.8021892379342418,
"eval_macro_recall": 0.5999831390602388,
"eval_micro_f1": 0.8061016949152543,
"eval_micro_precision": 0.8303072625698324,
"eval_micro_recall": 0.7832674571805006,
"eval_runtime": 10.503,
"eval_samples_per_second": 92.45,
"eval_steps_per_second": 2.952,
"step": 2272
},
{
"epoch": 17.12,
"eval_loss": 0.14581723511219025,
"eval_macro_f1": 0.6138635616219041,
"eval_macro_precision": 0.6561402184700652,
"eval_macro_recall": 0.5837363386062422,
"eval_micro_f1": 0.8006768189509307,
"eval_micro_precision": 0.8232428670842032,
"eval_micro_recall": 0.7793148880105402,
"eval_runtime": 10.4901,
"eval_samples_per_second": 92.564,
"eval_steps_per_second": 2.955,
"step": 2414
},
{
"epoch": 17.73,
"learning_rate": 4.242174244767996e-05,
"loss": 0.0145,
"step": 2500
},
{
"epoch": 18.13,
"eval_loss": 0.16017772257328033,
"eval_macro_f1": 0.6444367141145684,
"eval_macro_precision": 0.7439848776601259,
"eval_macro_recall": 0.5952268137231111,
"eval_micro_f1": 0.7922971114167813,
"eval_micro_precision": 0.8287769784172662,
"eval_micro_recall": 0.758893280632411,
"eval_runtime": 10.4941,
"eval_samples_per_second": 92.528,
"eval_steps_per_second": 2.954,
"step": 2556
},
{
"epoch": 19.13,
"eval_loss": 0.16528286039829254,
"eval_macro_f1": 0.6255401892998842,
"eval_macro_precision": 0.7027892098283081,
"eval_macro_recall": 0.5807621048617468,
"eval_micro_f1": 0.799320882852292,
"eval_micro_precision": 0.8248072880168185,
"eval_micro_recall": 0.7753623188405797,
"eval_runtime": 10.5086,
"eval_samples_per_second": 92.4,
"eval_steps_per_second": 2.95,
"step": 2698
},
{
"epoch": 20.14,
"eval_loss": 0.16131597757339478,
"eval_macro_f1": 0.6758630342355485,
"eval_macro_precision": 0.7848399016138751,
"eval_macro_recall": 0.6311385295870753,
"eval_micro_f1": 0.795959595959596,
"eval_micro_precision": 0.8140495867768595,
"eval_micro_recall": 0.7786561264822134,
"eval_runtime": 10.4977,
"eval_samples_per_second": 92.496,
"eval_steps_per_second": 2.953,
"step": 2840
},
{
"epoch": 21.15,
"eval_loss": 0.16260398924350739,
"eval_macro_f1": 0.6471248124455184,
"eval_macro_precision": 0.7519504537117815,
"eval_macro_recall": 0.5979276636724924,
"eval_micro_f1": 0.8031604259704569,
"eval_micro_precision": 0.8391959798994975,
"eval_micro_recall": 0.7700922266139657,
"eval_runtime": 10.5031,
"eval_samples_per_second": 92.449,
"eval_steps_per_second": 2.952,
"step": 2982
},
{
"epoch": 21.28,
"learning_rate": 4.059321906631445e-05,
"loss": 0.0097,
"step": 3000
},
{
"epoch": 22.16,
"eval_loss": 0.1720920354127884,
"eval_macro_f1": 0.6197445878457252,
"eval_macro_precision": 0.6777223144129594,
"eval_macro_recall": 0.5809350716774138,
"eval_micro_f1": 0.7995860641600551,
"eval_micro_precision": 0.8392469225199131,
"eval_micro_recall": 0.7635046113306982,
"eval_runtime": 10.5022,
"eval_samples_per_second": 92.457,
"eval_steps_per_second": 2.952,
"step": 3124
},
{
"epoch": 23.16,
"eval_loss": 0.18361401557922363,
"eval_macro_f1": 0.679814364002206,
"eval_macro_precision": 0.7438188782599036,
"eval_macro_recall": 0.6793295713709084,
"eval_micro_f1": 0.7756177924217464,
"eval_micro_precision": 0.7758734344100198,
"eval_micro_recall": 0.7753623188405797,
"eval_runtime": 10.4795,
"eval_samples_per_second": 92.658,
"eval_steps_per_second": 2.958,
"step": 3266
},
{
"epoch": 24.17,
"eval_loss": 0.18593738973140717,
"eval_macro_f1": 0.6768305526976849,
"eval_macro_precision": 0.7623894236010156,
"eval_macro_recall": 0.639542168905185,
"eval_micro_f1": 0.7799253984401492,
"eval_micro_precision": 0.803633822501747,
"eval_micro_recall": 0.7575757575757576,
"eval_runtime": 10.5008,
"eval_samples_per_second": 92.469,
"eval_steps_per_second": 2.952,
"step": 3408
},
{
"epoch": 24.82,
"learning_rate": 3.8764695684948935e-05,
"loss": 0.0076,
"step": 3500
},
{
"epoch": 25.18,
"eval_loss": 0.17676377296447754,
"eval_macro_f1": 0.6779522309735053,
"eval_macro_precision": 0.7968781822538058,
"eval_macro_recall": 0.6336237715199059,
"eval_micro_f1": 0.796775277124622,
"eval_micro_precision": 0.8128855380397533,
"eval_micro_recall": 0.7812911725955204,
"eval_runtime": 10.5104,
"eval_samples_per_second": 92.384,
"eval_steps_per_second": 2.949,
"step": 3550
},
{
"epoch": 26.18,
"eval_loss": 0.1732201725244522,
"eval_macro_f1": 0.707509469543303,
"eval_macro_precision": 0.8146827025242978,
"eval_macro_recall": 0.6503143056843191,
"eval_micro_f1": 0.7985299031072502,
"eval_micro_precision": 0.8101694915254237,
"eval_micro_recall": 0.7872200263504612,
"eval_runtime": 10.4892,
"eval_samples_per_second": 92.571,
"eval_steps_per_second": 2.955,
"step": 3692
},
{
"epoch": 27.19,
"eval_loss": 0.20233392715454102,
"eval_macro_f1": 0.6513179543559465,
"eval_macro_precision": 0.7916846976726903,
"eval_macro_recall": 0.5806696184169113,
"eval_micro_f1": 0.7844352617079889,
"eval_micro_precision": 0.8217893217893217,
"eval_micro_recall": 0.7503293807641633,
"eval_runtime": 10.5011,
"eval_samples_per_second": 92.466,
"eval_steps_per_second": 2.952,
"step": 3834
},
{
"epoch": 28.2,
"eval_loss": 0.1885799914598465,
"eval_macro_f1": 0.6823207317968834,
"eval_macro_precision": 0.7917566921291868,
"eval_macro_recall": 0.6430312076263187,
"eval_micro_f1": 0.7875717662951706,
"eval_micro_precision": 0.808038808038808,
"eval_micro_recall": 0.7681159420289855,
"eval_runtime": 10.5149,
"eval_samples_per_second": 92.345,
"eval_steps_per_second": 2.948,
"step": 3976
},
{
"epoch": 28.37,
"learning_rate": 3.6936172303583416e-05,
"loss": 0.0066,
"step": 4000
},
{
"epoch": 29.21,
"eval_loss": 0.19189482927322388,
"eval_macro_f1": 0.6669099856977689,
"eval_macro_precision": 0.7967630539496974,
"eval_macro_recall": 0.6033413092496192,
"eval_micro_f1": 0.8017894012388163,
"eval_micro_precision": 0.8393371757925072,
"eval_micro_recall": 0.7674571805006588,
"eval_runtime": 10.4969,
"eval_samples_per_second": 92.503,
"eval_steps_per_second": 2.953,
"step": 4118
},
{
"epoch": 30.21,
"eval_loss": 0.18965879082679749,
"eval_macro_f1": 0.6876913982263068,
"eval_macro_precision": 0.8027653366004972,
"eval_macro_recall": 0.6233955552143939,
"eval_micro_f1": 0.7947112038970077,
"eval_micro_precision": 0.8421828908554573,
"eval_micro_recall": 0.7523056653491436,
"eval_runtime": 10.497,
"eval_samples_per_second": 92.503,
"eval_steps_per_second": 2.953,
"step": 4260
},
{
"epoch": 31.22,
"eval_loss": 0.1917509138584137,
"eval_macro_f1": 0.7252073370829516,
"eval_macro_precision": 0.7693080004594731,
"eval_macro_recall": 0.703604344156779,
"eval_micro_f1": 0.7954469367258119,
"eval_micro_precision": 0.808713410483322,
"eval_micro_recall": 0.782608695652174,
"eval_runtime": 10.5118,
"eval_samples_per_second": 92.373,
"eval_steps_per_second": 2.949,
"step": 4402
}
],
"max_steps": 14100,
"num_train_epochs": 100,
"total_flos": 1.6671849861800448e+16,
"trial_name": null,
"trial_params": {
"adam_epsilon": 1.874740778707177e-08,
"learning_rate": 4.862043671050906e-05,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 16,
"seed": 322,
"warmup_steps": 805,
"weight_decay": 1.0026204622214607e-07
}
}