cares-roberta-clinical / trainer_state.json
chizhik
updated model weights
b039ab8
raw
history blame
19.4 kB
{
"best_metric": 0.7414142113821449,
"best_model_checkpoint": "./CARES/checkpoints/roberta/checkpoint-2698",
"epoch": 38.0,
"global_step": 2698,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.2509869933128357,
"eval_macro_f1": 0.04342407975460123,
"eval_macro_precision": 0.034867610837438424,
"eval_macro_recall": 0.057545731707317076,
"eval_micro_f1": 0.38635394456289984,
"eval_micro_precision": 0.5578817733990148,
"eval_micro_recall": 0.29549902152641877,
"eval_runtime": 2.7138,
"eval_samples_per_second": 355.964,
"eval_steps_per_second": 11.423,
"step": 71
},
{
"epoch": 2.0,
"eval_loss": 0.21916496753692627,
"eval_macro_f1": 0.10341134166953839,
"eval_macro_precision": 0.1498316489705596,
"eval_macro_recall": 0.08601393938480183,
"eval_micro_f1": 0.42796410014171,
"eval_micro_precision": 0.7756849315068494,
"eval_micro_recall": 0.29549902152641877,
"eval_runtime": 2.7022,
"eval_samples_per_second": 357.485,
"eval_steps_per_second": 11.472,
"step": 142
},
{
"epoch": 3.0,
"eval_loss": 0.1901940405368805,
"eval_macro_f1": 0.17433694331035438,
"eval_macro_precision": 0.1996622297635695,
"eval_macro_recall": 0.1554066444975929,
"eval_micro_f1": 0.5823627287853577,
"eval_micro_precision": 0.8036739380022963,
"eval_micro_recall": 0.45662100456621,
"eval_runtime": 2.7029,
"eval_samples_per_second": 357.398,
"eval_steps_per_second": 11.469,
"step": 213
},
{
"epoch": 4.0,
"eval_loss": 0.16621476411819458,
"eval_macro_f1": 0.20771764577178875,
"eval_macro_precision": 0.3206539932628853,
"eval_macro_recall": 0.18121219739914096,
"eval_micro_f1": 0.6394934705184012,
"eval_micro_precision": 0.8128772635814889,
"eval_micro_recall": 0.5270711024135681,
"eval_runtime": 2.7016,
"eval_samples_per_second": 357.565,
"eval_steps_per_second": 11.475,
"step": 284
},
{
"epoch": 5.0,
"eval_loss": 0.1592407524585724,
"eval_macro_f1": 0.32374988901102003,
"eval_macro_precision": 0.36319303902027156,
"eval_macro_recall": 0.30985669516000114,
"eval_micro_f1": 0.6886094674556215,
"eval_micro_precision": 0.7950469684030743,
"eval_micro_recall": 0.6073059360730594,
"eval_runtime": 2.7015,
"eval_samples_per_second": 357.579,
"eval_steps_per_second": 11.475,
"step": 355
},
{
"epoch": 6.0,
"eval_loss": 0.1491735577583313,
"eval_macro_f1": 0.3533190399166411,
"eval_macro_precision": 0.4078813390257998,
"eval_macro_recall": 0.33542234175118296,
"eval_micro_f1": 0.7270094134685011,
"eval_micro_precision": 0.8169243287225386,
"eval_micro_recall": 0.6549249836921069,
"eval_runtime": 2.7029,
"eval_samples_per_second": 357.392,
"eval_steps_per_second": 11.469,
"step": 426
},
{
"epoch": 7.0,
"eval_loss": 0.14026637375354767,
"eval_macro_f1": 0.47073084873229765,
"eval_macro_precision": 0.6837800649883113,
"eval_macro_recall": 0.423964742702268,
"eval_micro_f1": 0.75768156424581,
"eval_micro_precision": 0.8151765589782118,
"eval_micro_recall": 0.7077625570776256,
"eval_runtime": 2.7322,
"eval_samples_per_second": 353.558,
"eval_steps_per_second": 11.346,
"step": 497
},
{
"epoch": 7.04,
"learning_rate": 4.647887323943662e-05,
"loss": 0.1794,
"step": 500
},
{
"epoch": 8.0,
"eval_loss": 0.13919810950756073,
"eval_macro_f1": 0.4762066395538287,
"eval_macro_precision": 0.6922175380127314,
"eval_macro_recall": 0.4248427568334097,
"eval_micro_f1": 0.7554770318021201,
"eval_micro_precision": 0.8242097147262915,
"eval_micro_recall": 0.6973255055446836,
"eval_runtime": 2.7464,
"eval_samples_per_second": 351.729,
"eval_steps_per_second": 11.287,
"step": 568
},
{
"epoch": 9.0,
"eval_loss": 0.14247503876686096,
"eval_macro_f1": 0.49866416753622766,
"eval_macro_precision": 0.6531542097723891,
"eval_macro_recall": 0.4611636373916834,
"eval_micro_f1": 0.7498267498267498,
"eval_micro_precision": 0.7997043606799704,
"eval_micro_recall": 0.7058056099151989,
"eval_runtime": 2.7469,
"eval_samples_per_second": 351.665,
"eval_steps_per_second": 11.285,
"step": 639
},
{
"epoch": 10.0,
"eval_loss": 0.14413639903068542,
"eval_macro_f1": 0.515652512172511,
"eval_macro_precision": 0.5714508837953802,
"eval_macro_recall": 0.494438079407737,
"eval_micro_f1": 0.7669675693747909,
"eval_micro_precision": 0.7866941015089163,
"eval_micro_recall": 0.7482061317677756,
"eval_runtime": 2.752,
"eval_samples_per_second": 351.019,
"eval_steps_per_second": 11.265,
"step": 710
},
{
"epoch": 11.0,
"eval_loss": 0.13881583511829376,
"eval_macro_f1": 0.5815495436036973,
"eval_macro_precision": 0.6306553976911301,
"eval_macro_recall": 0.5606746536385887,
"eval_micro_f1": 0.7825223435948361,
"eval_micro_precision": 0.7943548387096774,
"eval_micro_recall": 0.7710371819960861,
"eval_runtime": 2.7353,
"eval_samples_per_second": 353.165,
"eval_steps_per_second": 11.333,
"step": 781
},
{
"epoch": 12.0,
"eval_loss": 0.14889590442180634,
"eval_macro_f1": 0.582949261540157,
"eval_macro_precision": 0.6154355859721446,
"eval_macro_recall": 0.5660348822272816,
"eval_micro_f1": 0.7689724647414371,
"eval_micro_precision": 0.7923875432525952,
"eval_micro_recall": 0.7469015003261579,
"eval_runtime": 2.7291,
"eval_samples_per_second": 353.967,
"eval_steps_per_second": 11.359,
"step": 852
},
{
"epoch": 13.0,
"eval_loss": 0.14177829027175903,
"eval_macro_f1": 0.6003936149639709,
"eval_macro_precision": 0.6178459654131079,
"eval_macro_recall": 0.5986040907728625,
"eval_micro_f1": 0.7824089268132589,
"eval_micro_precision": 0.7873183619550859,
"eval_micro_recall": 0.7775603392041748,
"eval_runtime": 2.7236,
"eval_samples_per_second": 354.674,
"eval_steps_per_second": 11.382,
"step": 923
},
{
"epoch": 14.0,
"eval_loss": 0.13778340816497803,
"eval_macro_f1": 0.6071892145561097,
"eval_macro_precision": 0.6420473960894854,
"eval_macro_recall": 0.5902771317099681,
"eval_micro_f1": 0.7962716378162449,
"eval_micro_precision": 0.8130523453433038,
"eval_micro_recall": 0.7801696020874103,
"eval_runtime": 4.6013,
"eval_samples_per_second": 209.942,
"eval_steps_per_second": 6.737,
"step": 994
},
{
"epoch": 14.08,
"learning_rate": 4.295774647887324e-05,
"loss": 0.0459,
"step": 1000
},
{
"epoch": 15.0,
"eval_loss": 0.14022594690322876,
"eval_macro_f1": 0.6215256196989195,
"eval_macro_precision": 0.7025580891227308,
"eval_macro_recall": 0.5957635875240769,
"eval_micro_f1": 0.7986776859504131,
"eval_micro_precision": 0.8096514745308311,
"eval_micro_recall": 0.7879973907371167,
"eval_runtime": 2.7033,
"eval_samples_per_second": 357.348,
"eval_steps_per_second": 11.468,
"step": 1065
},
{
"epoch": 16.0,
"eval_loss": 0.1448926478624344,
"eval_macro_f1": 0.608747774411539,
"eval_macro_precision": 0.621954970496014,
"eval_macro_recall": 0.6062377476848362,
"eval_micro_f1": 0.79816813869807,
"eval_micro_precision": 0.800524934383202,
"eval_micro_recall": 0.7958251793868232,
"eval_runtime": 2.7015,
"eval_samples_per_second": 357.574,
"eval_steps_per_second": 11.475,
"step": 1136
},
{
"epoch": 17.0,
"eval_loss": 0.1469811201095581,
"eval_macro_f1": 0.6153866789938162,
"eval_macro_precision": 0.650591338815516,
"eval_macro_recall": 0.5950841704161889,
"eval_micro_f1": 0.7996005326231691,
"eval_micro_precision": 0.8164513936097892,
"eval_micro_recall": 0.7834311806914547,
"eval_runtime": 2.701,
"eval_samples_per_second": 357.647,
"eval_steps_per_second": 11.477,
"step": 1207
},
{
"epoch": 18.0,
"eval_loss": 0.15327712893486023,
"eval_macro_f1": 0.6316443999429009,
"eval_macro_precision": 0.6512229785159827,
"eval_macro_recall": 0.6349187975925275,
"eval_micro_f1": 0.7879777850375694,
"eval_micro_precision": 0.7892670157068062,
"eval_micro_recall": 0.786692759295499,
"eval_runtime": 5.7579,
"eval_samples_per_second": 167.769,
"eval_steps_per_second": 5.384,
"step": 1278
},
{
"epoch": 19.0,
"eval_loss": 0.15278153121471405,
"eval_macro_f1": 0.6310003042032536,
"eval_macro_precision": 0.6839818740829384,
"eval_macro_recall": 0.6255359551717035,
"eval_micro_f1": 0.7923659098387628,
"eval_micro_precision": 0.799468791500664,
"eval_micro_recall": 0.7853881278538812,
"eval_runtime": 2.7,
"eval_samples_per_second": 357.774,
"eval_steps_per_second": 11.481,
"step": 1349
},
{
"epoch": 20.0,
"eval_loss": 0.15630246698856354,
"eval_macro_f1": 0.632349332888396,
"eval_macro_precision": 0.6992914076324832,
"eval_macro_recall": 0.6115304182846466,
"eval_micro_f1": 0.7856437273625287,
"eval_micro_precision": 0.793218085106383,
"eval_micro_recall": 0.7782126549249837,
"eval_runtime": 2.7011,
"eval_samples_per_second": 357.638,
"eval_steps_per_second": 11.477,
"step": 1420
},
{
"epoch": 21.0,
"eval_loss": 0.15509752929210663,
"eval_macro_f1": 0.6092726479340159,
"eval_macro_precision": 0.6680318673287149,
"eval_macro_recall": 0.5717336891200013,
"eval_micro_f1": 0.7991967871485944,
"eval_micro_precision": 0.8206185567010309,
"eval_micro_recall": 0.7788649706457925,
"eval_runtime": 2.6986,
"eval_samples_per_second": 357.967,
"eval_steps_per_second": 11.488,
"step": 1491
},
{
"epoch": 21.13,
"learning_rate": 3.943661971830986e-05,
"loss": 0.0169,
"step": 1500
},
{
"epoch": 22.0,
"eval_loss": 0.15633134543895721,
"eval_macro_f1": 0.6559542184820522,
"eval_macro_precision": 0.6920252568569637,
"eval_macro_recall": 0.6423031165953985,
"eval_micro_f1": 0.8009213557091149,
"eval_micro_precision": 0.8081009296148738,
"eval_micro_recall": 0.7938682322243966,
"eval_runtime": 2.7117,
"eval_samples_per_second": 356.232,
"eval_steps_per_second": 11.432,
"step": 1562
},
{
"epoch": 23.0,
"eval_loss": 0.16708678007125854,
"eval_macro_f1": 0.6341964700327825,
"eval_macro_precision": 0.6730301960786685,
"eval_macro_recall": 0.6361571334823197,
"eval_micro_f1": 0.7845195145949493,
"eval_micro_precision": 0.7889182058047494,
"eval_micro_recall": 0.7801696020874103,
"eval_runtime": 2.7072,
"eval_samples_per_second": 356.825,
"eval_steps_per_second": 11.451,
"step": 1633
},
{
"epoch": 24.0,
"eval_loss": 0.15924513339996338,
"eval_macro_f1": 0.6567583708222104,
"eval_macro_precision": 0.6932259526572662,
"eval_macro_recall": 0.6481414988629403,
"eval_micro_f1": 0.8018252933507171,
"eval_micro_precision": 0.8013029315960912,
"eval_micro_recall": 0.8023483365949119,
"eval_runtime": 2.7129,
"eval_samples_per_second": 356.081,
"eval_steps_per_second": 11.427,
"step": 1704
},
{
"epoch": 25.0,
"eval_loss": 0.15955589711666107,
"eval_macro_f1": 0.6499053948607745,
"eval_macro_precision": 0.7063535942579273,
"eval_macro_recall": 0.622573757353082,
"eval_micro_f1": 0.8029100529100529,
"eval_micro_precision": 0.8142186452045607,
"eval_micro_recall": 0.79191128506197,
"eval_runtime": 2.7378,
"eval_samples_per_second": 352.84,
"eval_steps_per_second": 11.323,
"step": 1775
},
{
"epoch": 26.0,
"eval_loss": 0.1635105311870575,
"eval_macro_f1": 0.6483066019452002,
"eval_macro_precision": 0.6824077806874724,
"eval_macro_recall": 0.6413983597695092,
"eval_micro_f1": 0.7965879265091863,
"eval_micro_precision": 0.8013201320132013,
"eval_micro_recall": 0.79191128506197,
"eval_runtime": 2.7006,
"eval_samples_per_second": 357.694,
"eval_steps_per_second": 11.479,
"step": 1846
},
{
"epoch": 27.0,
"eval_loss": 0.16744764149188995,
"eval_macro_f1": 0.6758901394131414,
"eval_macro_precision": 0.7249401901181491,
"eval_macro_recall": 0.6772836080088236,
"eval_micro_f1": 0.789776357827476,
"eval_micro_precision": 0.7739511584220413,
"eval_micro_recall": 0.8062622309197651,
"eval_runtime": 2.7031,
"eval_samples_per_second": 357.373,
"eval_steps_per_second": 11.468,
"step": 1917
},
{
"epoch": 28.0,
"eval_loss": 0.17063026130199432,
"eval_macro_f1": 0.6882911714431834,
"eval_macro_precision": 0.7320265820262954,
"eval_macro_recall": 0.6788510173157892,
"eval_micro_f1": 0.7945659377070908,
"eval_micro_precision": 0.8074074074074075,
"eval_micro_recall": 0.7821265492498369,
"eval_runtime": 2.7065,
"eval_samples_per_second": 356.912,
"eval_steps_per_second": 11.454,
"step": 1988
},
{
"epoch": 28.17,
"learning_rate": 3.5915492957746486e-05,
"loss": 0.0088,
"step": 2000
},
{
"epoch": 29.0,
"eval_loss": 0.17283257842063904,
"eval_macro_f1": 0.6771985368510574,
"eval_macro_precision": 0.7332109505236407,
"eval_macro_recall": 0.663425919860526,
"eval_micro_f1": 0.7905294311081881,
"eval_micro_precision": 0.7970822281167109,
"eval_micro_recall": 0.7840834964122635,
"eval_runtime": 2.7052,
"eval_samples_per_second": 357.094,
"eval_steps_per_second": 11.46,
"step": 2059
},
{
"epoch": 30.0,
"eval_loss": 0.18444736301898956,
"eval_macro_f1": 0.6746367426025414,
"eval_macro_precision": 0.7201523247516791,
"eval_macro_recall": 0.6728013388910854,
"eval_micro_f1": 0.7788242936018187,
"eval_micro_precision": 0.7755498059508409,
"eval_micro_recall": 0.7821265492498369,
"eval_runtime": 2.704,
"eval_samples_per_second": 357.254,
"eval_steps_per_second": 11.465,
"step": 2130
},
{
"epoch": 31.0,
"eval_loss": 0.1695714294910431,
"eval_macro_f1": 0.6879346286499437,
"eval_macro_precision": 0.7516673286655658,
"eval_macro_recall": 0.6669919763081706,
"eval_micro_f1": 0.802747791952895,
"eval_micro_precision": 0.8051181102362205,
"eval_micro_recall": 0.8003913894324853,
"eval_runtime": 2.7049,
"eval_samples_per_second": 357.124,
"eval_steps_per_second": 11.46,
"step": 2201
},
{
"epoch": 32.0,
"eval_loss": 0.16994765400886536,
"eval_macro_f1": 0.7053100252741045,
"eval_macro_precision": 0.805278845304122,
"eval_macro_recall": 0.6824655471979887,
"eval_micro_f1": 0.8006472491909384,
"eval_micro_precision": 0.7944765574823378,
"eval_micro_recall": 0.806914546640574,
"eval_runtime": 2.7034,
"eval_samples_per_second": 357.325,
"eval_steps_per_second": 11.467,
"step": 2272
},
{
"epoch": 33.0,
"eval_loss": 0.17551660537719727,
"eval_macro_f1": 0.7235647899419184,
"eval_macro_precision": 0.7984745552617538,
"eval_macro_recall": 0.6960462941676209,
"eval_micro_f1": 0.8010352636687157,
"eval_micro_precision": 0.7946084724005135,
"eval_micro_recall": 0.8075668623613829,
"eval_runtime": 2.7047,
"eval_samples_per_second": 357.151,
"eval_steps_per_second": 11.461,
"step": 2343
},
{
"epoch": 34.0,
"eval_loss": 0.17244330048561096,
"eval_macro_f1": 0.6948832290211537,
"eval_macro_precision": 0.7553961335643664,
"eval_macro_recall": 0.6730022583492952,
"eval_micro_f1": 0.8063989552726085,
"eval_micro_precision": 0.8071895424836601,
"eval_micro_recall": 0.8056099151989563,
"eval_runtime": 2.7035,
"eval_samples_per_second": 357.318,
"eval_steps_per_second": 11.467,
"step": 2414
},
{
"epoch": 35.0,
"eval_loss": 0.17627869546413422,
"eval_macro_f1": 0.7275662557839568,
"eval_macro_precision": 0.7750438946995515,
"eval_macro_recall": 0.717891607952716,
"eval_micro_f1": 0.8019261637239165,
"eval_micro_precision": 0.7895069532237674,
"eval_micro_recall": 0.8147423352902805,
"eval_runtime": 2.7043,
"eval_samples_per_second": 357.211,
"eval_steps_per_second": 11.463,
"step": 2485
},
{
"epoch": 35.21,
"learning_rate": 3.23943661971831e-05,
"loss": 0.0051,
"step": 2500
},
{
"epoch": 36.0,
"eval_loss": 0.1767842173576355,
"eval_macro_f1": 0.6877210668749649,
"eval_macro_precision": 0.7524115221848844,
"eval_macro_recall": 0.6652074916473143,
"eval_micro_f1": 0.7996077149395227,
"eval_micro_precision": 0.8014416775884666,
"eval_micro_recall": 0.7977821265492498,
"eval_runtime": 2.7004,
"eval_samples_per_second": 357.724,
"eval_steps_per_second": 11.48,
"step": 2556
},
{
"epoch": 37.0,
"eval_loss": 0.17860282957553864,
"eval_macro_f1": 0.7341275703601238,
"eval_macro_precision": 0.8208336935763849,
"eval_macro_recall": 0.6963532745066767,
"eval_micro_f1": 0.8019512195121952,
"eval_micro_precision": 0.7996108949416343,
"eval_micro_recall": 0.8043052837573386,
"eval_runtime": 2.7018,
"eval_samples_per_second": 357.539,
"eval_steps_per_second": 11.474,
"step": 2627
},
{
"epoch": 38.0,
"eval_loss": 0.18057870864868164,
"eval_macro_f1": 0.7414142113821449,
"eval_macro_precision": 0.8229012131281167,
"eval_macro_recall": 0.7070558641306933,
"eval_micro_f1": 0.8049575994781474,
"eval_micro_precision": 0.8049575994781474,
"eval_micro_recall": 0.8049575994781474,
"eval_runtime": 2.703,
"eval_samples_per_second": 357.385,
"eval_steps_per_second": 11.469,
"step": 2698
}
],
"max_steps": 7100,
"num_train_epochs": 100,
"total_flos": 2.2147253694876576e+16,
"trial_name": null,
"trial_params": null
}