|
{ |
|
"best_metric": 0.6252569868800233, |
|
"best_model_checkpoint": "./CARES/checkpoints/bio-ber-stratified/run-1/checkpoint-3408", |
|
"epoch": 24.170212765957448, |
|
"global_step": 3408, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.2541882395744324, |
|
"eval_macro_f1": 0.026901004304160685, |
|
"eval_macro_precision": 0.048828125, |
|
"eval_macro_recall": 0.018564356435643563, |
|
"eval_micro_f1": 0.17543859649122806, |
|
"eval_micro_precision": 0.78125, |
|
"eval_micro_recall": 0.09881422924901186, |
|
"eval_runtime": 9.8993, |
|
"eval_samples_per_second": 98.088, |
|
"eval_steps_per_second": 3.132, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.22154481709003448, |
|
"eval_macro_f1": 0.10623663256202004, |
|
"eval_macro_precision": 0.18484581306311026, |
|
"eval_macro_recall": 0.09432864376066943, |
|
"eval_micro_f1": 0.41890639481000924, |
|
"eval_micro_precision": 0.70625, |
|
"eval_micro_recall": 0.2977602108036891, |
|
"eval_runtime": 9.9434, |
|
"eval_samples_per_second": 97.653, |
|
"eval_steps_per_second": 3.118, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 0.18873167037963867, |
|
"eval_macro_f1": 0.20450440261358283, |
|
"eval_macro_precision": 0.3100231872943203, |
|
"eval_macro_recall": 0.1738961275122085, |
|
"eval_micro_f1": 0.568944099378882, |
|
"eval_micro_precision": 0.7658862876254181, |
|
"eval_micro_recall": 0.4525691699604743, |
|
"eval_runtime": 9.9071, |
|
"eval_samples_per_second": 98.011, |
|
"eval_steps_per_second": 3.129, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.1013980584550545e-05, |
|
"loss": 0.2716, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 0.17541566491127014, |
|
"eval_macro_f1": 0.2919005682162893, |
|
"eval_macro_precision": 0.3812561939105582, |
|
"eval_macro_recall": 0.25249891158193927, |
|
"eval_micro_f1": 0.6301369863013698, |
|
"eval_micro_precision": 0.7762777242044359, |
|
"eval_micro_recall": 0.5303030303030303, |
|
"eval_runtime": 9.944, |
|
"eval_samples_per_second": 97.647, |
|
"eval_steps_per_second": 3.117, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.1692640334367752, |
|
"eval_macro_f1": 0.4143233067428529, |
|
"eval_macro_precision": 0.5073151086851961, |
|
"eval_macro_recall": 0.3678263076835473, |
|
"eval_micro_f1": 0.6415811478525276, |
|
"eval_micro_precision": 0.7583108715184187, |
|
"eval_micro_recall": 0.5559947299077734, |
|
"eval_runtime": 9.9093, |
|
"eval_samples_per_second": 97.989, |
|
"eval_steps_per_second": 3.128, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 0.1651107370853424, |
|
"eval_macro_f1": 0.4093940822021235, |
|
"eval_macro_precision": 0.5308122229847152, |
|
"eval_macro_recall": 0.358915994433547, |
|
"eval_micro_f1": 0.6615737203972498, |
|
"eval_micro_precision": 0.7872727272727272, |
|
"eval_micro_recall": 0.5704874835309618, |
|
"eval_runtime": 9.8986, |
|
"eval_samples_per_second": 98.095, |
|
"eval_steps_per_second": 3.132, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.1729801893234253, |
|
"eval_macro_f1": 0.42556225648133283, |
|
"eval_macro_precision": 0.58718674661248, |
|
"eval_macro_recall": 0.3723531112715551, |
|
"eval_micro_f1": 0.6549062844542448, |
|
"eval_micro_precision": 0.7406483790523691, |
|
"eval_micro_recall": 0.5869565217391305, |
|
"eval_runtime": 9.912, |
|
"eval_samples_per_second": 97.962, |
|
"eval_steps_per_second": 3.128, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.950611365129501e-05, |
|
"loss": 0.1249, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.17469000816345215, |
|
"eval_macro_f1": 0.4273735214628892, |
|
"eval_macro_precision": 0.6238596091113585, |
|
"eval_macro_recall": 0.375472703111614, |
|
"eval_micro_f1": 0.6517101875689592, |
|
"eval_micro_precision": 0.7377185678601166, |
|
"eval_micro_recall": 0.5836627140974967, |
|
"eval_runtime": 9.8904, |
|
"eval_samples_per_second": 98.176, |
|
"eval_steps_per_second": 3.134, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 0.17877863347530365, |
|
"eval_macro_f1": 0.475201225203798, |
|
"eval_macro_precision": 0.5739651831374122, |
|
"eval_macro_recall": 0.4350214360169413, |
|
"eval_micro_f1": 0.6738227146814404, |
|
"eval_micro_precision": 0.7102189781021898, |
|
"eval_micro_recall": 0.6409749670619236, |
|
"eval_runtime": 9.9151, |
|
"eval_samples_per_second": 97.932, |
|
"eval_steps_per_second": 3.127, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 0.17759235203266144, |
|
"eval_macro_f1": 0.47628022362120415, |
|
"eval_macro_precision": 0.6058635009097308, |
|
"eval_macro_recall": 0.42583522254336503, |
|
"eval_micro_f1": 0.6828591256072173, |
|
"eval_micro_precision": 0.7214076246334311, |
|
"eval_micro_recall": 0.6482213438735178, |
|
"eval_runtime": 9.9061, |
|
"eval_samples_per_second": 98.021, |
|
"eval_steps_per_second": 3.129, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 3.7998246718039476e-05, |
|
"loss": 0.0762, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_loss": 0.1776473969221115, |
|
"eval_macro_f1": 0.5089905336706273, |
|
"eval_macro_precision": 0.591647351441912, |
|
"eval_macro_recall": 0.47047423582847886, |
|
"eval_micro_f1": 0.688728024819028, |
|
"eval_micro_precision": 0.7223427331887202, |
|
"eval_micro_recall": 0.658102766798419, |
|
"eval_runtime": 9.9428, |
|
"eval_samples_per_second": 97.658, |
|
"eval_steps_per_second": 3.118, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_loss": 0.18189238011837006, |
|
"eval_macro_f1": 0.48004944819202805, |
|
"eval_macro_precision": 0.6458220963071541, |
|
"eval_macro_recall": 0.41943684599646036, |
|
"eval_micro_f1": 0.6883162725026473, |
|
"eval_micro_precision": 0.7414448669201521, |
|
"eval_micro_recall": 0.642292490118577, |
|
"eval_runtime": 9.9058, |
|
"eval_samples_per_second": 98.024, |
|
"eval_steps_per_second": 3.129, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"eval_loss": 0.19002576172351837, |
|
"eval_macro_f1": 0.5199688644954614, |
|
"eval_macro_precision": 0.5810572518525183, |
|
"eval_macro_recall": 0.49026284720966035, |
|
"eval_micro_f1": 0.6896090878717006, |
|
"eval_micro_precision": 0.6996610169491525, |
|
"eval_micro_recall": 0.6798418972332015, |
|
"eval_runtime": 9.9018, |
|
"eval_samples_per_second": 98.063, |
|
"eval_steps_per_second": 3.131, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 0.19194385409355164, |
|
"eval_macro_f1": 0.520691157744967, |
|
"eval_macro_precision": 0.5966466597800141, |
|
"eval_macro_recall": 0.47615158217927345, |
|
"eval_micro_f1": 0.6891228070175438, |
|
"eval_micro_precision": 0.7372372372372372, |
|
"eval_micro_recall": 0.6469038208168643, |
|
"eval_runtime": 9.9153, |
|
"eval_samples_per_second": 97.93, |
|
"eval_steps_per_second": 3.126, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 3.649037978478394e-05, |
|
"loss": 0.0449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.192080557346344, |
|
"eval_macro_f1": 0.5605505080291033, |
|
"eval_macro_precision": 0.6623700312301602, |
|
"eval_macro_recall": 0.5087278128204601, |
|
"eval_micro_f1": 0.6957736639888229, |
|
"eval_micro_precision": 0.7405204460966542, |
|
"eval_micro_recall": 0.6561264822134387, |
|
"eval_runtime": 9.9139, |
|
"eval_samples_per_second": 97.943, |
|
"eval_steps_per_second": 3.127, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 0.1960616409778595, |
|
"eval_macro_f1": 0.53306733167955, |
|
"eval_macro_precision": 0.5965001445588101, |
|
"eval_macro_recall": 0.49021955336676715, |
|
"eval_micro_f1": 0.698961937716263, |
|
"eval_micro_precision": 0.7361516034985423, |
|
"eval_micro_recall": 0.6653491436100132, |
|
"eval_runtime": 9.942, |
|
"eval_samples_per_second": 97.667, |
|
"eval_steps_per_second": 3.118, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_loss": 0.20587308704853058, |
|
"eval_macro_f1": 0.5199165971603507, |
|
"eval_macro_precision": 0.5554999099415838, |
|
"eval_macro_recall": 0.501101816922742, |
|
"eval_micro_f1": 0.6840148698884758, |
|
"eval_micro_precision": 0.7022900763358778, |
|
"eval_micro_recall": 0.6666666666666666, |
|
"eval_runtime": 9.9026, |
|
"eval_samples_per_second": 98.055, |
|
"eval_steps_per_second": 3.131, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 3.498251285152841e-05, |
|
"loss": 0.0256, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 0.20113714039325714, |
|
"eval_macro_f1": 0.529618760993627, |
|
"eval_macro_precision": 0.6032995030406657, |
|
"eval_macro_recall": 0.4871012589073676, |
|
"eval_micro_f1": 0.6982167352537724, |
|
"eval_micro_precision": 0.7281831187410587, |
|
"eval_micro_recall": 0.6706192358366272, |
|
"eval_runtime": 9.899, |
|
"eval_samples_per_second": 98.091, |
|
"eval_steps_per_second": 3.132, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 0.20477713644504547, |
|
"eval_macro_f1": 0.5176514067210329, |
|
"eval_macro_precision": 0.5857117797598407, |
|
"eval_macro_recall": 0.47641342794341435, |
|
"eval_micro_f1": 0.6941015089163237, |
|
"eval_micro_precision": 0.7238912732474965, |
|
"eval_micro_recall": 0.6666666666666666, |
|
"eval_runtime": 9.9038, |
|
"eval_samples_per_second": 98.043, |
|
"eval_steps_per_second": 3.13, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"eval_loss": 0.2074529379606247, |
|
"eval_macro_f1": 0.5704867306049771, |
|
"eval_macro_precision": 0.6916588415347732, |
|
"eval_macro_recall": 0.5189916858370269, |
|
"eval_micro_f1": 0.7134187457855697, |
|
"eval_micro_precision": 0.7306629834254144, |
|
"eval_micro_recall": 0.696969696969697, |
|
"eval_runtime": 9.8989, |
|
"eval_samples_per_second": 98.092, |
|
"eval_steps_per_second": 3.132, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"eval_loss": 0.21805770695209503, |
|
"eval_macro_f1": 0.5306833888134577, |
|
"eval_macro_precision": 0.6107555764603518, |
|
"eval_macro_recall": 0.4823641731674541, |
|
"eval_micro_f1": 0.709366391184573, |
|
"eval_micro_precision": 0.7431457431457431, |
|
"eval_micro_recall": 0.6785243741765481, |
|
"eval_runtime": 9.95, |
|
"eval_samples_per_second": 97.588, |
|
"eval_steps_per_second": 3.116, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 3.347464591827287e-05, |
|
"loss": 0.015, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"eval_loss": 0.21425750851631165, |
|
"eval_macro_f1": 0.5900896049733363, |
|
"eval_macro_precision": 0.7164448328913706, |
|
"eval_macro_recall": 0.5339604602685475, |
|
"eval_micro_f1": 0.7065292096219932, |
|
"eval_micro_precision": 0.7385057471264368, |
|
"eval_micro_recall": 0.6772068511198946, |
|
"eval_runtime": 9.9181, |
|
"eval_samples_per_second": 97.901, |
|
"eval_steps_per_second": 3.126, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"eval_loss": 0.21841417253017426, |
|
"eval_macro_f1": 0.5731327712532719, |
|
"eval_macro_precision": 0.6766904212874656, |
|
"eval_macro_recall": 0.5216860842370524, |
|
"eval_micro_f1": 0.7131730443616662, |
|
"eval_micro_precision": 0.7337979094076655, |
|
"eval_micro_recall": 0.6936758893280632, |
|
"eval_runtime": 9.8943, |
|
"eval_samples_per_second": 98.137, |
|
"eval_steps_per_second": 3.133, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"eval_loss": 0.22420497238636017, |
|
"eval_macro_f1": 0.6252569868800233, |
|
"eval_macro_precision": 0.7327575817423139, |
|
"eval_macro_recall": 0.5742398722065618, |
|
"eval_micro_f1": 0.7081471295978001, |
|
"eval_micro_precision": 0.7404744787922358, |
|
"eval_micro_recall": 0.6785243741765481, |
|
"eval_runtime": 9.961, |
|
"eval_samples_per_second": 97.48, |
|
"eval_steps_per_second": 3.112, |
|
"step": 3408 |
|
} |
|
], |
|
"max_steps": 14100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.1986190383959552e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 1.241521755885265e-07, |
|
"learning_rate": 4.181013432530947e-05, |
|
"per_device_eval_batch_size": 32, |
|
"per_device_train_batch_size": 16, |
|
"seed": 321, |
|
"warmup_steps": 236, |
|
"weight_decay": 3.190500833235664e-11 |
|
} |
|
} |
|
|