|
{ |
|
"best_metric": 0.7370102490601179, |
|
"best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-4/checkpoint-5112", |
|
"epoch": 36.255319148936174, |
|
"global_step": 5112, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.3023780882358551, |
|
"eval_macro_f1": 0.04276761517615176, |
|
"eval_macro_precision": 0.03250514933058703, |
|
"eval_macro_recall": 0.0625, |
|
"eval_micro_f1": 0.4057854560064283, |
|
"eval_micro_precision": 0.5200823892893924, |
|
"eval_micro_recall": 0.3326745718050066, |
|
"eval_runtime": 2.6706, |
|
"eval_samples_per_second": 363.595, |
|
"eval_steps_per_second": 22.842, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.2355797439813614, |
|
"eval_macro_f1": 0.0478700416730977, |
|
"eval_macro_precision": 0.10847007722007722, |
|
"eval_macro_recall": 0.04781835896355593, |
|
"eval_micro_f1": 0.3766552231486022, |
|
"eval_micro_precision": 0.7370441458733206, |
|
"eval_micro_recall": 0.25296442687747034, |
|
"eval_runtime": 2.6743, |
|
"eval_samples_per_second": 363.08, |
|
"eval_steps_per_second": 22.809, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 0.20839156210422516, |
|
"eval_macro_f1": 0.1270678623977507, |
|
"eval_macro_precision": 0.15146020789583886, |
|
"eval_macro_recall": 0.11056835837347367, |
|
"eval_micro_f1": 0.5080789946140036, |
|
"eval_micro_precision": 0.7971830985915493, |
|
"eval_micro_recall": 0.37285902503293805, |
|
"eval_runtime": 2.6737, |
|
"eval_samples_per_second": 363.172, |
|
"eval_steps_per_second": 22.815, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.760888771678367e-05, |
|
"loss": 0.3067, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 0.17458897829055786, |
|
"eval_macro_f1": 0.2134043623340923, |
|
"eval_macro_precision": 0.2555480902740095, |
|
"eval_macro_recall": 0.1956241201896679, |
|
"eval_micro_f1": 0.652869972666927, |
|
"eval_micro_precision": 0.8015340364333653, |
|
"eval_micro_recall": 0.5507246376811594, |
|
"eval_runtime": 2.6714, |
|
"eval_samples_per_second": 363.484, |
|
"eval_steps_per_second": 22.835, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.1594998985528946, |
|
"eval_macro_f1": 0.322947699403185, |
|
"eval_macro_precision": 0.33082406427783345, |
|
"eval_macro_recall": 0.31592054350156284, |
|
"eval_micro_f1": 0.7086383601756956, |
|
"eval_micro_precision": 0.7973640856672158, |
|
"eval_micro_recall": 0.6376811594202898, |
|
"eval_runtime": 2.6774, |
|
"eval_samples_per_second": 362.663, |
|
"eval_steps_per_second": 22.783, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 0.14328011870384216, |
|
"eval_macro_f1": 0.35552541662372633, |
|
"eval_macro_precision": 0.4154000509380286, |
|
"eval_macro_recall": 0.3384352474665031, |
|
"eval_micro_f1": 0.730332967435053, |
|
"eval_micro_precision": 0.8213991769547325, |
|
"eval_micro_recall": 0.6574440052700923, |
|
"eval_runtime": 2.6759, |
|
"eval_samples_per_second": 362.867, |
|
"eval_steps_per_second": 22.796, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.1372288167476654, |
|
"eval_macro_f1": 0.35769698646328396, |
|
"eval_macro_precision": 0.45729587282684003, |
|
"eval_macro_recall": 0.340956446807801, |
|
"eval_micro_f1": 0.747014115092291, |
|
"eval_micro_precision": 0.8289156626506025, |
|
"eval_micro_recall": 0.6798418972332015, |
|
"eval_runtime": 2.6665, |
|
"eval_samples_per_second": 364.154, |
|
"eval_steps_per_second": 22.877, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 4.585856096249015e-05, |
|
"loss": 0.1366, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.13587501645088196, |
|
"eval_macro_f1": 0.4519999262177494, |
|
"eval_macro_precision": 0.6383482484756009, |
|
"eval_macro_recall": 0.4120606473790193, |
|
"eval_micro_f1": 0.7476635514018692, |
|
"eval_micro_precision": 0.8227848101265823, |
|
"eval_micro_recall": 0.6851119894598156, |
|
"eval_runtime": 2.6678, |
|
"eval_samples_per_second": 363.976, |
|
"eval_steps_per_second": 22.866, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 0.1254434585571289, |
|
"eval_macro_f1": 0.5752781473187552, |
|
"eval_macro_precision": 0.6682540412600851, |
|
"eval_macro_recall": 0.5248832453258128, |
|
"eval_micro_f1": 0.7869884575026234, |
|
"eval_micro_precision": 0.8389261744966443, |
|
"eval_micro_recall": 0.741106719367589, |
|
"eval_runtime": 2.6676, |
|
"eval_samples_per_second": 364.002, |
|
"eval_steps_per_second": 22.867, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 0.12716087698936462, |
|
"eval_macro_f1": 0.5783160913404322, |
|
"eval_macro_precision": 0.6669120855288475, |
|
"eval_macro_recall": 0.5333612306839322, |
|
"eval_micro_f1": 0.7954701441317777, |
|
"eval_micro_precision": 0.8302292263610315, |
|
"eval_micro_recall": 0.7635046113306982, |
|
"eval_runtime": 2.6683, |
|
"eval_samples_per_second": 363.896, |
|
"eval_steps_per_second": 22.861, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 4.410823420819664e-05, |
|
"loss": 0.0661, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_loss": 0.12210354208946228, |
|
"eval_macro_f1": 0.6129525814973475, |
|
"eval_macro_precision": 0.694661979564102, |
|
"eval_macro_recall": 0.5604968051311103, |
|
"eval_micro_f1": 0.8090971743625087, |
|
"eval_micro_precision": 0.8482658959537572, |
|
"eval_micro_recall": 0.7733860342555995, |
|
"eval_runtime": 2.6691, |
|
"eval_samples_per_second": 363.79, |
|
"eval_steps_per_second": 22.854, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_loss": 0.13834641873836517, |
|
"eval_macro_f1": 0.59964421079272, |
|
"eval_macro_precision": 0.6259545070217613, |
|
"eval_macro_recall": 0.5849130461175929, |
|
"eval_micro_f1": 0.7874705287975748, |
|
"eval_micro_precision": 0.8056512749827704, |
|
"eval_micro_recall": 0.7700922266139657, |
|
"eval_runtime": 2.6674, |
|
"eval_samples_per_second": 364.02, |
|
"eval_steps_per_second": 22.868, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"eval_loss": 0.13302326202392578, |
|
"eval_macro_f1": 0.6249414362192053, |
|
"eval_macro_precision": 0.6603169616331872, |
|
"eval_macro_recall": 0.6008160113233191, |
|
"eval_micro_f1": 0.8105579685933846, |
|
"eval_micro_precision": 0.8223728813559322, |
|
"eval_micro_recall": 0.7990777338603425, |
|
"eval_runtime": 2.6684, |
|
"eval_samples_per_second": 363.887, |
|
"eval_steps_per_second": 22.86, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 0.13799144327640533, |
|
"eval_macro_f1": 0.6256821918613437, |
|
"eval_macro_precision": 0.6740205274811021, |
|
"eval_macro_recall": 0.5938987995613589, |
|
"eval_micro_f1": 0.8119891008174386, |
|
"eval_micro_precision": 0.840620592383639, |
|
"eval_micro_recall": 0.7852437417654808, |
|
"eval_runtime": 2.6691, |
|
"eval_samples_per_second": 363.79, |
|
"eval_steps_per_second": 22.854, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.235790745390312e-05, |
|
"loss": 0.0324, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.13957080245018005, |
|
"eval_macro_f1": 0.6541379860188454, |
|
"eval_macro_precision": 0.7002594602789083, |
|
"eval_macro_recall": 0.6359217043250158, |
|
"eval_micro_f1": 0.803843605036448, |
|
"eval_micro_precision": 0.8086666666666666, |
|
"eval_micro_recall": 0.7990777338603425, |
|
"eval_runtime": 2.6709, |
|
"eval_samples_per_second": 363.55, |
|
"eval_steps_per_second": 22.839, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 0.13600043952465057, |
|
"eval_macro_f1": 0.6528569810495737, |
|
"eval_macro_precision": 0.7119806310239326, |
|
"eval_macro_recall": 0.6199612374678921, |
|
"eval_micro_f1": 0.8169491525423729, |
|
"eval_micro_precision": 0.8414804469273743, |
|
"eval_micro_recall": 0.7938076416337286, |
|
"eval_runtime": 2.6686, |
|
"eval_samples_per_second": 363.864, |
|
"eval_steps_per_second": 22.859, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"eval_loss": 0.1411595642566681, |
|
"eval_macro_f1": 0.6780053870985077, |
|
"eval_macro_precision": 0.7825949490722317, |
|
"eval_macro_recall": 0.6371295063080809, |
|
"eval_micro_f1": 0.8134328358208954, |
|
"eval_micro_precision": 0.8384615384615385, |
|
"eval_micro_recall": 0.7898550724637681, |
|
"eval_runtime": 2.6685, |
|
"eval_samples_per_second": 363.87, |
|
"eval_steps_per_second": 22.859, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 4.06075806996096e-05, |
|
"loss": 0.0173, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 0.14683738350868225, |
|
"eval_macro_f1": 0.6538188838769178, |
|
"eval_macro_precision": 0.7058131112592007, |
|
"eval_macro_recall": 0.628120629850237, |
|
"eval_micro_f1": 0.8044280442804428, |
|
"eval_micro_precision": 0.8195488721804511, |
|
"eval_micro_recall": 0.7898550724637681, |
|
"eval_runtime": 2.6704, |
|
"eval_samples_per_second": 363.611, |
|
"eval_steps_per_second": 22.843, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 0.14477181434631348, |
|
"eval_macro_f1": 0.7213223353389469, |
|
"eval_macro_precision": 0.7931053666626622, |
|
"eval_macro_recall": 0.6989155005450692, |
|
"eval_micro_f1": 0.8122731771692511, |
|
"eval_micro_precision": 0.813615333773959, |
|
"eval_micro_recall": 0.810935441370224, |
|
"eval_runtime": 2.6682, |
|
"eval_samples_per_second": 363.916, |
|
"eval_steps_per_second": 22.862, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"eval_loss": 0.1553182750940323, |
|
"eval_macro_f1": 0.6767777822180807, |
|
"eval_macro_precision": 0.7296284296772766, |
|
"eval_macro_recall": 0.6640188299255232, |
|
"eval_micro_f1": 0.8082867477803354, |
|
"eval_micro_precision": 0.8069599474720945, |
|
"eval_micro_recall": 0.8096179183135704, |
|
"eval_runtime": 2.6706, |
|
"eval_samples_per_second": 363.587, |
|
"eval_steps_per_second": 22.841, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"eval_loss": 0.14831620454788208, |
|
"eval_macro_f1": 0.6951923518777028, |
|
"eval_macro_precision": 0.8479068478364982, |
|
"eval_macro_recall": 0.6493756779822191, |
|
"eval_micro_f1": 0.8177506775067751, |
|
"eval_micro_precision": 0.8417015341701534, |
|
"eval_micro_recall": 0.7951251646903821, |
|
"eval_runtime": 2.6679, |
|
"eval_samples_per_second": 363.953, |
|
"eval_steps_per_second": 22.864, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 3.885725394531609e-05, |
|
"loss": 0.0121, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"eval_loss": 0.1529681533575058, |
|
"eval_macro_f1": 0.7215214471733791, |
|
"eval_macro_precision": 0.840496134606828, |
|
"eval_macro_recall": 0.6777226713039917, |
|
"eval_micro_f1": 0.8099395567494962, |
|
"eval_micro_precision": 0.826027397260274, |
|
"eval_micro_recall": 0.7944664031620553, |
|
"eval_runtime": 2.6724, |
|
"eval_samples_per_second": 363.343, |
|
"eval_steps_per_second": 22.826, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"eval_loss": 0.15208803117275238, |
|
"eval_macro_f1": 0.7282532116551124, |
|
"eval_macro_precision": 0.8178655579947314, |
|
"eval_macro_recall": 0.702450635375965, |
|
"eval_micro_f1": 0.8099009900990098, |
|
"eval_micro_precision": 0.8115079365079365, |
|
"eval_micro_recall": 0.808300395256917, |
|
"eval_runtime": 2.6735, |
|
"eval_samples_per_second": 363.196, |
|
"eval_steps_per_second": 22.817, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"eval_loss": 0.17097046971321106, |
|
"eval_macro_f1": 0.6781929633024913, |
|
"eval_macro_precision": 0.7664743620916477, |
|
"eval_macro_recall": 0.647287649907105, |
|
"eval_micro_f1": 0.8018836192398252, |
|
"eval_micro_precision": 0.8192439862542955, |
|
"eval_micro_recall": 0.7852437417654808, |
|
"eval_runtime": 2.6679, |
|
"eval_samples_per_second": 363.956, |
|
"eval_steps_per_second": 22.864, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 3.710692719102257e-05, |
|
"loss": 0.0086, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"eval_loss": 0.16351111233234406, |
|
"eval_macro_f1": 0.7009939961760294, |
|
"eval_macro_precision": 0.8049793687947511, |
|
"eval_macro_recall": 0.6631263784729529, |
|
"eval_micro_f1": 0.8080672268907563, |
|
"eval_micro_precision": 0.8249828414550446, |
|
"eval_micro_recall": 0.7918313570487484, |
|
"eval_runtime": 2.6758, |
|
"eval_samples_per_second": 362.882, |
|
"eval_steps_per_second": 22.797, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"eval_loss": 0.1703951209783554, |
|
"eval_macro_f1": 0.728858993061112, |
|
"eval_macro_precision": 0.8293166501604144, |
|
"eval_macro_recall": 0.6848344575219967, |
|
"eval_micro_f1": 0.8101945003353456, |
|
"eval_micro_precision": 0.825136612021858, |
|
"eval_micro_recall": 0.7957839262187089, |
|
"eval_runtime": 2.6853, |
|
"eval_samples_per_second": 361.599, |
|
"eval_steps_per_second": 22.716, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 27.19, |
|
"eval_loss": 0.1729104220867157, |
|
"eval_macro_f1": 0.7246800474910258, |
|
"eval_macro_precision": 0.8088443156400115, |
|
"eval_macro_recall": 0.6938957592472167, |
|
"eval_micro_f1": 0.8112827400940228, |
|
"eval_micro_precision": 0.8273972602739726, |
|
"eval_micro_recall": 0.7957839262187089, |
|
"eval_runtime": 2.6704, |
|
"eval_samples_per_second": 363.619, |
|
"eval_steps_per_second": 22.843, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"eval_loss": 0.17421075701713562, |
|
"eval_macro_f1": 0.721641920467399, |
|
"eval_macro_precision": 0.7953793533738551, |
|
"eval_macro_recall": 0.6939586213926427, |
|
"eval_micro_f1": 0.8118745830553702, |
|
"eval_micro_precision": 0.8222972972972973, |
|
"eval_micro_recall": 0.8017127799736495, |
|
"eval_runtime": 2.6674, |
|
"eval_samples_per_second": 364.019, |
|
"eval_steps_per_second": 22.868, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 3.535660043672905e-05, |
|
"loss": 0.0054, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"eval_loss": 0.17936377227306366, |
|
"eval_macro_f1": 0.7165402537125084, |
|
"eval_macro_precision": 0.7314012003015316, |
|
"eval_macro_recall": 0.7193045979731636, |
|
"eval_micro_f1": 0.8045826513911619, |
|
"eval_micro_precision": 0.7996096291476903, |
|
"eval_micro_recall": 0.8096179183135704, |
|
"eval_runtime": 2.669, |
|
"eval_samples_per_second": 363.809, |
|
"eval_steps_per_second": 22.855, |
|
"step": 4118 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"eval_loss": 0.184128999710083, |
|
"eval_macro_f1": 0.7249069877656021, |
|
"eval_macro_precision": 0.7920679958383108, |
|
"eval_macro_recall": 0.6986794530201526, |
|
"eval_micro_f1": 0.8046822742474916, |
|
"eval_micro_precision": 0.8172554347826086, |
|
"eval_micro_recall": 0.7924901185770751, |
|
"eval_runtime": 2.6699, |
|
"eval_samples_per_second": 363.687, |
|
"eval_steps_per_second": 22.847, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 31.22, |
|
"eval_loss": 0.18270088732242584, |
|
"eval_macro_f1": 0.7341637485973148, |
|
"eval_macro_precision": 0.8378115033399074, |
|
"eval_macro_recall": 0.6892808840460984, |
|
"eval_micro_f1": 0.8134680134680136, |
|
"eval_micro_precision": 0.8319559228650137, |
|
"eval_micro_recall": 0.7957839262187089, |
|
"eval_runtime": 2.6693, |
|
"eval_samples_per_second": 363.77, |
|
"eval_steps_per_second": 22.853, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"learning_rate": 3.3606273682435536e-05, |
|
"loss": 0.0035, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.23, |
|
"eval_loss": 0.18414482474327087, |
|
"eval_macro_f1": 0.7314425663595913, |
|
"eval_macro_precision": 0.8099477622958757, |
|
"eval_macro_recall": 0.6986944147838622, |
|
"eval_micro_f1": 0.8134003350083752, |
|
"eval_micro_precision": 0.8275391956373551, |
|
"eval_micro_recall": 0.7997364953886693, |
|
"eval_runtime": 2.6686, |
|
"eval_samples_per_second": 363.857, |
|
"eval_steps_per_second": 22.858, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"eval_loss": 0.18519891798496246, |
|
"eval_macro_f1": 0.7352936873025266, |
|
"eval_macro_precision": 0.8265688293622399, |
|
"eval_macro_recall": 0.6958315897622773, |
|
"eval_micro_f1": 0.8212722988892629, |
|
"eval_micro_precision": 0.8396421197522368, |
|
"eval_micro_recall": 0.8036890645586298, |
|
"eval_runtime": 2.6687, |
|
"eval_samples_per_second": 363.853, |
|
"eval_steps_per_second": 22.858, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 34.24, |
|
"eval_loss": 0.1865757256746292, |
|
"eval_macro_f1": 0.7207303759640109, |
|
"eval_macro_precision": 0.7610227795725935, |
|
"eval_macro_recall": 0.6996289765073358, |
|
"eval_micro_f1": 0.8130245048674052, |
|
"eval_micro_precision": 0.8288843258042436, |
|
"eval_micro_recall": 0.7977602108036891, |
|
"eval_runtime": 2.6701, |
|
"eval_samples_per_second": 363.659, |
|
"eval_steps_per_second": 22.846, |
|
"step": 4828 |
|
}, |
|
{ |
|
"epoch": 35.25, |
|
"eval_loss": 0.19158615171909332, |
|
"eval_macro_f1": 0.7231833700345036, |
|
"eval_macro_precision": 0.759857266859788, |
|
"eval_macro_recall": 0.7038379676451936, |
|
"eval_micro_f1": 0.8071928071928073, |
|
"eval_micro_precision": 0.8161616161616162, |
|
"eval_micro_recall": 0.7984189723320159, |
|
"eval_runtime": 2.6696, |
|
"eval_samples_per_second": 363.724, |
|
"eval_steps_per_second": 22.85, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 35.46, |
|
"learning_rate": 3.185594692814201e-05, |
|
"loss": 0.0025, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.26, |
|
"eval_loss": 0.18590226769447327, |
|
"eval_macro_f1": 0.7370102490601179, |
|
"eval_macro_precision": 0.8141501549264045, |
|
"eval_macro_recall": 0.7143003391573518, |
|
"eval_micro_f1": 0.8030253206182177, |
|
"eval_micro_precision": 0.8017071569271176, |
|
"eval_micro_recall": 0.8043478260869565, |
|
"eval_runtime": 2.6693, |
|
"eval_samples_per_second": 363.761, |
|
"eval_steps_per_second": 22.852, |
|
"step": 5112 |
|
} |
|
], |
|
"max_steps": 14100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.9741043739581184e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"adam_epsilon": 2.7636948844125687e-08, |
|
"learning_rate": 4.7699904708006934e-05, |
|
"per_device_eval_batch_size": 16, |
|
"per_device_train_batch_size": 16, |
|
"seed": 320, |
|
"warmup_steps": 474, |
|
"weight_decay": 0.08343382340090989 |
|
} |
|
} |
|
|