|
{ |
|
"best_metric": 0.8249799337347952, |
|
"best_model_checkpoint": "./CARES/checkpoints/bert-ba-stratified/run-9/checkpoint-3976", |
|
"epoch": 56.0, |
|
"global_step": 3976, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.24943208694458008, |
|
"eval_macro_f1": 0.08911588063155029, |
|
"eval_macro_precision": 0.1020147123407993, |
|
"eval_macro_recall": 0.08017933766090879, |
|
"eval_micro_f1": 0.27627302275189597, |
|
"eval_micro_precision": 0.7750759878419453, |
|
"eval_micro_recall": 0.16809492419248517, |
|
"eval_runtime": 2.9775, |
|
"eval_samples_per_second": 324.431, |
|
"eval_steps_per_second": 20.487, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.1698431819677353, |
|
"eval_macro_f1": 0.30238618603356054, |
|
"eval_macro_precision": 0.4320649543604988, |
|
"eval_macro_recall": 0.2644492382273683, |
|
"eval_micro_f1": 0.6521739130434782, |
|
"eval_micro_precision": 0.8376421923474664, |
|
"eval_micro_recall": 0.5339485827290705, |
|
"eval_runtime": 2.979, |
|
"eval_samples_per_second": 324.273, |
|
"eval_steps_per_second": 20.477, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.14047418534755707, |
|
"eval_macro_f1": 0.40223830727079396, |
|
"eval_macro_precision": 0.4784721691006365, |
|
"eval_macro_recall": 0.3687052580973402, |
|
"eval_micro_f1": 0.739880059970015, |
|
"eval_micro_precision": 0.8575152041702867, |
|
"eval_micro_recall": 0.6506262359920897, |
|
"eval_runtime": 2.9805, |
|
"eval_samples_per_second": 324.11, |
|
"eval_steps_per_second": 20.467, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.279287453609026e-05, |
|
"loss": 0.2244, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.1187577173113823, |
|
"eval_macro_f1": 0.5347249220382584, |
|
"eval_macro_precision": 0.7327975628305986, |
|
"eval_macro_recall": 0.4754068941604508, |
|
"eval_micro_f1": 0.7880299251870324, |
|
"eval_micro_precision": 0.8573643410852713, |
|
"eval_micro_recall": 0.7290705339485827, |
|
"eval_runtime": 2.9805, |
|
"eval_samples_per_second": 324.108, |
|
"eval_steps_per_second": 20.466, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.11074026674032211, |
|
"eval_macro_f1": 0.5991127961103198, |
|
"eval_macro_precision": 0.6991665233729463, |
|
"eval_macro_recall": 0.5596673950826421, |
|
"eval_micro_f1": 0.8128196385952949, |
|
"eval_micro_precision": 0.8418079096045198, |
|
"eval_micro_recall": 0.7857613711272248, |
|
"eval_runtime": 2.9796, |
|
"eval_samples_per_second": 324.206, |
|
"eval_steps_per_second": 20.473, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.10078531503677368, |
|
"eval_macro_f1": 0.6568773778388772, |
|
"eval_macro_precision": 0.700413763066982, |
|
"eval_macro_recall": 0.6283428000904666, |
|
"eval_micro_f1": 0.8347529812606473, |
|
"eval_micro_precision": 0.8638928067700987, |
|
"eval_micro_recall": 0.8075148319050758, |
|
"eval_runtime": 2.979, |
|
"eval_samples_per_second": 324.274, |
|
"eval_steps_per_second": 20.477, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.10221733897924423, |
|
"eval_macro_f1": 0.6758575377881516, |
|
"eval_macro_precision": 0.838910780569426, |
|
"eval_macro_recall": 0.6238243641780066, |
|
"eval_micro_f1": 0.8337912087912088, |
|
"eval_micro_precision": 0.8702508960573476, |
|
"eval_micro_recall": 0.8002636783124588, |
|
"eval_runtime": 2.9795, |
|
"eval_samples_per_second": 324.21, |
|
"eval_steps_per_second": 20.473, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 3.0308565859113728e-05, |
|
"loss": 0.0513, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.10313227772712708, |
|
"eval_macro_f1": 0.7298003500123689, |
|
"eval_macro_precision": 0.8118751777216693, |
|
"eval_macro_recall": 0.6944187826598622, |
|
"eval_micro_f1": 0.8354006034193765, |
|
"eval_micro_precision": 0.849931787175989, |
|
"eval_micro_recall": 0.8213579433091628, |
|
"eval_runtime": 2.9811, |
|
"eval_samples_per_second": 324.042, |
|
"eval_steps_per_second": 20.462, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.10032625496387482, |
|
"eval_macro_f1": 0.7681297243773157, |
|
"eval_macro_precision": 0.8958394795684446, |
|
"eval_macro_recall": 0.7124194145895655, |
|
"eval_micro_f1": 0.8443093549476527, |
|
"eval_micro_precision": 0.8656509695290858, |
|
"eval_micro_recall": 0.8239947264337508, |
|
"eval_runtime": 2.9813, |
|
"eval_samples_per_second": 324.018, |
|
"eval_steps_per_second": 20.461, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.1044757142663002, |
|
"eval_macro_f1": 0.7829375079467304, |
|
"eval_macro_precision": 0.8858384241353942, |
|
"eval_macro_recall": 0.7367403001149204, |
|
"eval_micro_f1": 0.8420698924731183, |
|
"eval_micro_precision": 0.8588074023303632, |
|
"eval_micro_recall": 0.8259723137771918, |
|
"eval_runtime": 2.9861, |
|
"eval_samples_per_second": 323.499, |
|
"eval_steps_per_second": 20.428, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 21.13, |
|
"learning_rate": 2.7824257182137193e-05, |
|
"loss": 0.0183, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.1039622500538826, |
|
"eval_macro_f1": 0.7902798824417182, |
|
"eval_macro_precision": 0.89276275853935, |
|
"eval_macro_recall": 0.7403232660636272, |
|
"eval_micro_f1": 0.8479512360311547, |
|
"eval_micro_precision": 0.871866295264624, |
|
"eval_micro_recall": 0.8253131179960448, |
|
"eval_runtime": 2.977, |
|
"eval_samples_per_second": 324.483, |
|
"eval_steps_per_second": 20.49, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.10449391603469849, |
|
"eval_macro_f1": 0.7973957256263516, |
|
"eval_macro_precision": 0.8897797609904068, |
|
"eval_macro_recall": 0.7482788788828134, |
|
"eval_micro_f1": 0.8530297957817209, |
|
"eval_micro_precision": 0.8666666666666667, |
|
"eval_micro_recall": 0.8398154251812788, |
|
"eval_runtime": 2.982, |
|
"eval_samples_per_second": 323.947, |
|
"eval_steps_per_second": 20.456, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.10751193016767502, |
|
"eval_macro_f1": 0.815099576727704, |
|
"eval_macro_precision": 0.8596170620799346, |
|
"eval_macro_recall": 0.7893940744657464, |
|
"eval_micro_f1": 0.8517287234042553, |
|
"eval_micro_precision": 0.8591549295774648, |
|
"eval_micro_recall": 0.8444297956493079, |
|
"eval_runtime": 2.9874, |
|
"eval_samples_per_second": 323.362, |
|
"eval_steps_per_second": 20.419, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.10826986283063889, |
|
"eval_macro_f1": 0.8122978188941863, |
|
"eval_macro_precision": 0.8818206343623207, |
|
"eval_macro_recall": 0.7768327155632533, |
|
"eval_micro_f1": 0.8507362784471219, |
|
"eval_micro_precision": 0.8640380693405847, |
|
"eval_micro_recall": 0.8378378378378378, |
|
"eval_runtime": 2.9805, |
|
"eval_samples_per_second": 324.106, |
|
"eval_steps_per_second": 20.466, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 2.5339948505160657e-05, |
|
"loss": 0.0094, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.11172767728567123, |
|
"eval_macro_f1": 0.7952069303877679, |
|
"eval_macro_precision": 0.8834731091800803, |
|
"eval_macro_recall": 0.7589521997225694, |
|
"eval_micro_f1": 0.8510210913960494, |
|
"eval_micro_precision": 0.8646258503401361, |
|
"eval_micro_recall": 0.8378378378378378, |
|
"eval_runtime": 2.9811, |
|
"eval_samples_per_second": 324.039, |
|
"eval_steps_per_second": 20.462, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.12214264273643494, |
|
"eval_macro_f1": 0.784963786280585, |
|
"eval_macro_precision": 0.8399655989881138, |
|
"eval_macro_recall": 0.7539663033116887, |
|
"eval_micro_f1": 0.8395881766854865, |
|
"eval_micro_precision": 0.8460508701472557, |
|
"eval_micro_recall": 0.8332234673698088, |
|
"eval_runtime": 2.9802, |
|
"eval_samples_per_second": 324.138, |
|
"eval_steps_per_second": 20.468, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.10992709547281265, |
|
"eval_macro_f1": 0.818123937895116, |
|
"eval_macro_precision": 0.846307748666965, |
|
"eval_macro_recall": 0.7959472696549352, |
|
"eval_micro_f1": 0.8540829986613119, |
|
"eval_micro_precision": 0.86743711760707, |
|
"eval_micro_recall": 0.8411338167435728, |
|
"eval_runtime": 2.9817, |
|
"eval_samples_per_second": 323.976, |
|
"eval_steps_per_second": 20.458, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 2.2855639828184125e-05, |
|
"loss": 0.0088, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.11282340437173843, |
|
"eval_macro_f1": 0.810588172484549, |
|
"eval_macro_precision": 0.8881718400115531, |
|
"eval_macro_recall": 0.7660828954423657, |
|
"eval_micro_f1": 0.8533783783783784, |
|
"eval_micro_precision": 0.8752598752598753, |
|
"eval_micro_recall": 0.8325642715886619, |
|
"eval_runtime": 2.9823, |
|
"eval_samples_per_second": 323.913, |
|
"eval_steps_per_second": 20.454, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.11364943534135818, |
|
"eval_macro_f1": 0.8086979072156089, |
|
"eval_macro_precision": 0.874192444614366, |
|
"eval_macro_recall": 0.7687841160082525, |
|
"eval_micro_f1": 0.855510752688172, |
|
"eval_micro_precision": 0.8725154215215901, |
|
"eval_micro_recall": 0.8391562294001318, |
|
"eval_runtime": 2.9856, |
|
"eval_samples_per_second": 323.552, |
|
"eval_steps_per_second": 20.431, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.11377756297588348, |
|
"eval_macro_f1": 0.8074550758048753, |
|
"eval_macro_precision": 0.8601506214626871, |
|
"eval_macro_recall": 0.7772364003410493, |
|
"eval_micro_f1": 0.8550483172275909, |
|
"eval_micro_precision": 0.8645552560646901, |
|
"eval_micro_recall": 0.8457481872116018, |
|
"eval_runtime": 2.987, |
|
"eval_samples_per_second": 323.399, |
|
"eval_steps_per_second": 20.422, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.11456111818552017, |
|
"eval_macro_f1": 0.8197602680311404, |
|
"eval_macro_precision": 0.8652772327919502, |
|
"eval_macro_recall": 0.7923572373451824, |
|
"eval_micro_f1": 0.8584748584748586, |
|
"eval_micro_precision": 0.8674293405114402, |
|
"eval_micro_recall": 0.8497033618984838, |
|
"eval_runtime": 2.9793, |
|
"eval_samples_per_second": 324.237, |
|
"eval_steps_per_second": 20.475, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 42.25, |
|
"learning_rate": 2.0371331151207586e-05, |
|
"loss": 0.0049, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.11773423105478287, |
|
"eval_macro_f1": 0.8086306686692242, |
|
"eval_macro_precision": 0.8642281309223148, |
|
"eval_macro_recall": 0.7764573931394076, |
|
"eval_micro_f1": 0.8553291012362179, |
|
"eval_micro_precision": 0.8672086720867209, |
|
"eval_micro_recall": 0.8437705998681608, |
|
"eval_runtime": 2.9813, |
|
"eval_samples_per_second": 324.023, |
|
"eval_steps_per_second": 20.461, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.11780666559934616, |
|
"eval_macro_f1": 0.8101599105843645, |
|
"eval_macro_precision": 0.8691080726361069, |
|
"eval_macro_recall": 0.7762766074908475, |
|
"eval_micro_f1": 0.8565723793677205, |
|
"eval_micro_precision": 0.8649193548387096, |
|
"eval_micro_recall": 0.8483849703361899, |
|
"eval_runtime": 2.9804, |
|
"eval_samples_per_second": 324.123, |
|
"eval_steps_per_second": 20.467, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.11932362616062164, |
|
"eval_macro_f1": 0.8165194442314286, |
|
"eval_macro_precision": 0.8648115942931565, |
|
"eval_macro_recall": 0.7864788866212832, |
|
"eval_micro_f1": 0.8535201868535202, |
|
"eval_micro_precision": 0.8641891891891892, |
|
"eval_micro_recall": 0.8431114040870138, |
|
"eval_runtime": 2.985, |
|
"eval_samples_per_second": 323.616, |
|
"eval_steps_per_second": 20.435, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 49.3, |
|
"learning_rate": 1.7887022474231054e-05, |
|
"loss": 0.0034, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.12221735715866089, |
|
"eval_macro_f1": 0.8141089592504215, |
|
"eval_macro_precision": 0.8615361853719665, |
|
"eval_macro_recall": 0.7844581807191786, |
|
"eval_micro_f1": 0.8523714094856378, |
|
"eval_micro_precision": 0.8639133378469871, |
|
"eval_micro_recall": 0.8411338167435728, |
|
"eval_runtime": 2.9803, |
|
"eval_samples_per_second": 324.125, |
|
"eval_steps_per_second": 20.468, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.12201466411352158, |
|
"eval_macro_f1": 0.8215839258353351, |
|
"eval_macro_precision": 0.8618774470352207, |
|
"eval_macro_recall": 0.795741448912531, |
|
"eval_micro_f1": 0.8534223706176962, |
|
"eval_micro_precision": 0.86468200270636, |
|
"eval_micro_recall": 0.8424522083058669, |
|
"eval_runtime": 2.9796, |
|
"eval_samples_per_second": 324.208, |
|
"eval_steps_per_second": 20.473, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.12538054585456848, |
|
"eval_macro_f1": 0.8134875725433206, |
|
"eval_macro_precision": 0.8710944940417797, |
|
"eval_macro_recall": 0.7763605326378977, |
|
"eval_micro_f1": 0.85425782564793, |
|
"eval_micro_precision": 0.8727647867950481, |
|
"eval_micro_recall": 0.8365194462755439, |
|
"eval_runtime": 2.9841, |
|
"eval_samples_per_second": 323.717, |
|
"eval_steps_per_second": 20.442, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.12492760270833969, |
|
"eval_macro_f1": 0.8249799337347952, |
|
"eval_macro_precision": 0.8672879358583392, |
|
"eval_macro_recall": 0.7974535529440212, |
|
"eval_micro_f1": 0.8574290484140235, |
|
"eval_micro_precision": 0.8687415426251691, |
|
"eval_micro_recall": 0.8464073829927489, |
|
"eval_runtime": 2.9821, |
|
"eval_samples_per_second": 323.938, |
|
"eval_steps_per_second": 20.456, |
|
"step": 3976 |
|
} |
|
], |
|
"max_steps": 7100, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.752304368656253e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 2.4799103776060603e-09, |
|
"learning_rate": 3.4268553890214325e-05, |
|
"per_device_eval_batch_size": 16, |
|
"per_device_train_batch_size": 32, |
|
"seed": 326, |
|
"warmup_steps": 203, |
|
"weight_decay": 2.8436289860950645e-08 |
|
} |
|
} |
|
|