|
{ |
|
"best_metric": 0.6745562130177515, |
|
"best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_WLASL_100_SR_4_ep200_p20/checkpoint-4686", |
|
"epoch": 45.00499305555556, |
|
"eval_steps": 500, |
|
"global_step": 8291, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002777777777777778, |
|
"grad_norm": 46.614158630371094, |
|
"learning_rate": 1.3472222222222222e-06, |
|
"loss": 18.9788, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"eval_accuracy": 0.026627218934911243, |
|
"eval_f1": 0.008186647196111367, |
|
"eval_loss": 4.690920829772949, |
|
"eval_precision": 0.005517409748672709, |
|
"eval_recall": 0.026627218934911243, |
|
"eval_runtime": 19.4352, |
|
"eval_samples_per_second": 17.391, |
|
"eval_steps_per_second": 8.696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.000548611111111, |
|
"grad_norm": 44.873966217041016, |
|
"learning_rate": 2.7361111111111114e-06, |
|
"loss": 18.8244, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0033263888888888, |
|
"grad_norm": 43.23536682128906, |
|
"learning_rate": 4.125e-06, |
|
"loss": 18.6803, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0049930555555555, |
|
"eval_accuracy": 0.026627218934911243, |
|
"eval_f1": 0.0057761083277597, |
|
"eval_loss": 4.617973327636719, |
|
"eval_precision": 0.003442859602301134, |
|
"eval_recall": 0.026627218934911243, |
|
"eval_runtime": 18.7273, |
|
"eval_samples_per_second": 18.049, |
|
"eval_steps_per_second": 9.024, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.001097222222222, |
|
"grad_norm": 43.87873458862305, |
|
"learning_rate": 5.513888888888889e-06, |
|
"loss": 18.3151, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.003875, |
|
"grad_norm": 45.60523986816406, |
|
"learning_rate": 6.902777777777777e-06, |
|
"loss": 18.0341, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.004986111111111, |
|
"eval_accuracy": 0.047337278106508875, |
|
"eval_f1": 0.023768276430998322, |
|
"eval_loss": 4.518121242523193, |
|
"eval_precision": 0.020485887948289476, |
|
"eval_recall": 0.047337278106508875, |
|
"eval_runtime": 19.5951, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 8.625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.0016458333333333, |
|
"grad_norm": 49.1752815246582, |
|
"learning_rate": 8.291666666666667e-06, |
|
"loss": 17.4359, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.004423611111111, |
|
"grad_norm": 51.076786041259766, |
|
"learning_rate": 9.680555555555556e-06, |
|
"loss": 17.0617, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0050069444444443, |
|
"eval_accuracy": 0.07396449704142012, |
|
"eval_f1": 0.047821065111354005, |
|
"eval_loss": 4.337636470794678, |
|
"eval_precision": 0.054133340554329064, |
|
"eval_recall": 0.07396449704142012, |
|
"eval_runtime": 18.9343, |
|
"eval_samples_per_second": 17.851, |
|
"eval_steps_per_second": 8.926, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 4.002194444444444, |
|
"grad_norm": 46.15924835205078, |
|
"learning_rate": 1.1069444444444445e-05, |
|
"loss": 16.0988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.004972222222222, |
|
"grad_norm": 51.79167938232422, |
|
"learning_rate": 1.2458333333333334e-05, |
|
"loss": 15.6302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.005, |
|
"eval_accuracy": 0.11834319526627218, |
|
"eval_f1": 0.081513142682628, |
|
"eval_loss": 4.070884704589844, |
|
"eval_precision": 0.07720141700404858, |
|
"eval_recall": 0.11834319526627218, |
|
"eval_runtime": 20.1431, |
|
"eval_samples_per_second": 16.78, |
|
"eval_steps_per_second": 8.39, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 5.002743055555555, |
|
"grad_norm": 55.07365798950195, |
|
"learning_rate": 1.3847222222222223e-05, |
|
"loss": 14.0851, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0049930555555555, |
|
"eval_accuracy": 0.1863905325443787, |
|
"eval_f1": 0.14494279207764105, |
|
"eval_loss": 3.755218029022217, |
|
"eval_precision": 0.1535636372174834, |
|
"eval_recall": 0.1863905325443787, |
|
"eval_runtime": 19.7392, |
|
"eval_samples_per_second": 17.123, |
|
"eval_steps_per_second": 8.562, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 6.0005138888888885, |
|
"grad_norm": 49.54998779296875, |
|
"learning_rate": 1.5236111111111112e-05, |
|
"loss": 13.3452, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.003291666666667, |
|
"grad_norm": 54.62944793701172, |
|
"learning_rate": 1.6625e-05, |
|
"loss": 11.953, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.004986111111111, |
|
"eval_accuracy": 0.26627218934911245, |
|
"eval_f1": 0.2220989336141275, |
|
"eval_loss": 3.406508684158325, |
|
"eval_precision": 0.26431351743742965, |
|
"eval_recall": 0.26627218934911245, |
|
"eval_runtime": 19.5936, |
|
"eval_samples_per_second": 17.251, |
|
"eval_steps_per_second": 8.625, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 7.0010625, |
|
"grad_norm": 56.319576263427734, |
|
"learning_rate": 1.8013888888888888e-05, |
|
"loss": 11.0917, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.003840277777778, |
|
"grad_norm": 54.84210205078125, |
|
"learning_rate": 1.940277777777778e-05, |
|
"loss": 9.8223, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.005006944444444, |
|
"eval_accuracy": 0.33727810650887574, |
|
"eval_f1": 0.29438573252416295, |
|
"eval_loss": 3.103813886642456, |
|
"eval_precision": 0.3283793047846302, |
|
"eval_recall": 0.33727810650887574, |
|
"eval_runtime": 20.1573, |
|
"eval_samples_per_second": 16.768, |
|
"eval_steps_per_second": 8.384, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 8.001611111111112, |
|
"grad_norm": 51.06685256958008, |
|
"learning_rate": 2.0791666666666666e-05, |
|
"loss": 8.6714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.004388888888888, |
|
"grad_norm": 50.67875289916992, |
|
"learning_rate": 2.2180555555555557e-05, |
|
"loss": 7.8126, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.005, |
|
"eval_accuracy": 0.41420118343195267, |
|
"eval_f1": 0.3676157668761219, |
|
"eval_loss": 2.7834277153015137, |
|
"eval_precision": 0.42014839107599283, |
|
"eval_recall": 0.41420118343195267, |
|
"eval_runtime": 19.7317, |
|
"eval_samples_per_second": 17.13, |
|
"eval_steps_per_second": 8.565, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 9.002159722222222, |
|
"grad_norm": 49.01296615600586, |
|
"learning_rate": 2.3569444444444447e-05, |
|
"loss": 6.4863, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.0049375, |
|
"grad_norm": 47.182640075683594, |
|
"learning_rate": 2.4958333333333335e-05, |
|
"loss": 6.0952, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.004993055555556, |
|
"eval_accuracy": 0.47633136094674555, |
|
"eval_f1": 0.4368087369566659, |
|
"eval_loss": 2.512887477874756, |
|
"eval_precision": 0.4949564324564325, |
|
"eval_recall": 0.47633136094674555, |
|
"eval_runtime": 20.1068, |
|
"eval_samples_per_second": 16.81, |
|
"eval_steps_per_second": 8.405, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 10.002708333333333, |
|
"grad_norm": 49.08199691772461, |
|
"learning_rate": 2.6347222222222222e-05, |
|
"loss": 4.3155, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.00498611111111, |
|
"eval_accuracy": 0.5059171597633136, |
|
"eval_f1": 0.48941789637055916, |
|
"eval_loss": 2.2756905555725098, |
|
"eval_precision": 0.5593944517021441, |
|
"eval_recall": 0.5059171597633136, |
|
"eval_runtime": 18.9167, |
|
"eval_samples_per_second": 17.868, |
|
"eval_steps_per_second": 8.934, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 11.000479166666667, |
|
"grad_norm": 37.75128936767578, |
|
"learning_rate": 2.7736111111111113e-05, |
|
"loss": 4.3448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.003256944444445, |
|
"grad_norm": 36.987205505371094, |
|
"learning_rate": 2.9111111111111112e-05, |
|
"loss": 3.0214, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.005006944444444, |
|
"eval_accuracy": 0.5473372781065089, |
|
"eval_f1": 0.5177564874310436, |
|
"eval_loss": 2.0459656715393066, |
|
"eval_precision": 0.563966482235713, |
|
"eval_recall": 0.5473372781065089, |
|
"eval_runtime": 19.7866, |
|
"eval_samples_per_second": 17.082, |
|
"eval_steps_per_second": 8.541, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 12.001027777777777, |
|
"grad_norm": 30.315874099731445, |
|
"learning_rate": 3.05e-05, |
|
"loss": 2.7849, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.003805555555555, |
|
"grad_norm": 27.424291610717773, |
|
"learning_rate": 3.188888888888889e-05, |
|
"loss": 2.0687, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.005, |
|
"eval_accuracy": 0.591715976331361, |
|
"eval_f1": 0.5730650184792196, |
|
"eval_loss": 1.8803057670593262, |
|
"eval_precision": 0.6183537616229923, |
|
"eval_recall": 0.591715976331361, |
|
"eval_runtime": 19.466, |
|
"eval_samples_per_second": 17.364, |
|
"eval_steps_per_second": 8.682, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 13.00157638888889, |
|
"grad_norm": 19.18665313720703, |
|
"learning_rate": 3.327777777777778e-05, |
|
"loss": 1.5252, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.004354166666667, |
|
"grad_norm": 31.665971755981445, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 1.3523, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.004993055555556, |
|
"eval_accuracy": 0.591715976331361, |
|
"eval_f1": 0.5601332592456852, |
|
"eval_loss": 1.7261461019515991, |
|
"eval_precision": 0.6065053536207382, |
|
"eval_recall": 0.591715976331361, |
|
"eval_runtime": 19.273, |
|
"eval_samples_per_second": 17.537, |
|
"eval_steps_per_second": 8.769, |
|
"step": 2523 |
|
}, |
|
{ |
|
"epoch": 14.002125, |
|
"grad_norm": 10.98436450958252, |
|
"learning_rate": 3.605555555555556e-05, |
|
"loss": 0.9208, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.004902777777778, |
|
"grad_norm": 45.65860366821289, |
|
"learning_rate": 3.7444444444444446e-05, |
|
"loss": 0.7828, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.00498611111111, |
|
"eval_accuracy": 0.6035502958579881, |
|
"eval_f1": 0.5923542762300159, |
|
"eval_loss": 1.6275168657302856, |
|
"eval_precision": 0.6644219028834413, |
|
"eval_recall": 0.6035502958579881, |
|
"eval_runtime": 19.479, |
|
"eval_samples_per_second": 17.352, |
|
"eval_steps_per_second": 8.676, |
|
"step": 2703 |
|
}, |
|
{ |
|
"epoch": 15.002673611111112, |
|
"grad_norm": 24.026081085205078, |
|
"learning_rate": 3.883333333333333e-05, |
|
"loss": 0.4222, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.005006944444444, |
|
"eval_accuracy": 0.6420118343195266, |
|
"eval_f1": 0.6244853437161129, |
|
"eval_loss": 1.5283950567245483, |
|
"eval_precision": 0.67558435154589, |
|
"eval_recall": 0.6420118343195266, |
|
"eval_runtime": 19.8172, |
|
"eval_samples_per_second": 17.056, |
|
"eval_steps_per_second": 8.528, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 16.000444444444444, |
|
"grad_norm": 11.13332748413086, |
|
"learning_rate": 4.022222222222222e-05, |
|
"loss": 0.4752, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.003222222222224, |
|
"grad_norm": 8.170760154724121, |
|
"learning_rate": 4.1611111111111114e-05, |
|
"loss": 0.3113, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.005, |
|
"eval_accuracy": 0.6272189349112426, |
|
"eval_f1": 0.6092443552502724, |
|
"eval_loss": 1.5459295511245728, |
|
"eval_precision": 0.6664412510566357, |
|
"eval_recall": 0.6272189349112426, |
|
"eval_runtime": 19.569, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 8.636, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 17.000993055555554, |
|
"grad_norm": 1.5062611103057861, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.2905, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.003770833333334, |
|
"grad_norm": 1.255529761314392, |
|
"learning_rate": 4.438888888888889e-05, |
|
"loss": 0.2021, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.004993055555556, |
|
"eval_accuracy": 0.665680473372781, |
|
"eval_f1": 0.6484432398633582, |
|
"eval_loss": 1.4440850019454956, |
|
"eval_precision": 0.6991018596787828, |
|
"eval_recall": 0.665680473372781, |
|
"eval_runtime": 20.6892, |
|
"eval_samples_per_second": 16.337, |
|
"eval_steps_per_second": 8.168, |
|
"step": 3244 |
|
}, |
|
{ |
|
"epoch": 18.001541666666668, |
|
"grad_norm": 2.2327980995178223, |
|
"learning_rate": 4.577777777777778e-05, |
|
"loss": 0.1544, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.004319444444445, |
|
"grad_norm": 1.6391757726669312, |
|
"learning_rate": 4.716666666666667e-05, |
|
"loss": 0.1698, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.004986111111112, |
|
"eval_accuracy": 0.6124260355029586, |
|
"eval_f1": 0.5942196856398039, |
|
"eval_loss": 1.5340176820755005, |
|
"eval_precision": 0.6510575962499039, |
|
"eval_recall": 0.6124260355029586, |
|
"eval_runtime": 19.0949, |
|
"eval_samples_per_second": 17.701, |
|
"eval_steps_per_second": 8.851, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 19.00209027777778, |
|
"grad_norm": 7.581464767456055, |
|
"learning_rate": 4.855555555555556e-05, |
|
"loss": 0.0791, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.004868055555555, |
|
"grad_norm": 0.5042413473129272, |
|
"learning_rate": 4.994444444444445e-05, |
|
"loss": 0.1199, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.005006944444446, |
|
"eval_accuracy": 0.650887573964497, |
|
"eval_f1": 0.6288250211327134, |
|
"eval_loss": 1.3934611082077026, |
|
"eval_precision": 0.6745703014933784, |
|
"eval_recall": 0.650887573964497, |
|
"eval_runtime": 18.5254, |
|
"eval_samples_per_second": 18.245, |
|
"eval_steps_per_second": 9.123, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 20.00263888888889, |
|
"grad_norm": 0.40117892622947693, |
|
"learning_rate": 4.9851851851851855e-05, |
|
"loss": 0.0244, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.005, |
|
"eval_accuracy": 0.6686390532544378, |
|
"eval_f1": 0.6573669919823766, |
|
"eval_loss": 1.478171706199646, |
|
"eval_precision": 0.7130459284305437, |
|
"eval_recall": 0.6686390532544378, |
|
"eval_runtime": 19.5864, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 8.628, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 21.000409722222223, |
|
"grad_norm": 0.6385509967803955, |
|
"learning_rate": 4.969753086419753e-05, |
|
"loss": 0.1644, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 21.0031875, |
|
"grad_norm": 0.36917442083358765, |
|
"learning_rate": 4.954320987654321e-05, |
|
"loss": 0.0407, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.004993055555556, |
|
"eval_accuracy": 0.6686390532544378, |
|
"eval_f1": 0.6557107422492038, |
|
"eval_loss": 1.388999581336975, |
|
"eval_precision": 0.7149478726401803, |
|
"eval_recall": 0.6686390532544378, |
|
"eval_runtime": 19.0631, |
|
"eval_samples_per_second": 17.731, |
|
"eval_steps_per_second": 8.865, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 22.000958333333333, |
|
"grad_norm": 0.5856227874755859, |
|
"learning_rate": 4.938888888888889e-05, |
|
"loss": 0.1314, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.00373611111111, |
|
"grad_norm": 0.7390286326408386, |
|
"learning_rate": 4.923611111111112e-05, |
|
"loss": 0.0719, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.004986111111112, |
|
"eval_accuracy": 0.6597633136094675, |
|
"eval_f1": 0.6477249246480016, |
|
"eval_loss": 1.4896520376205444, |
|
"eval_precision": 0.7189349112426036, |
|
"eval_recall": 0.6597633136094675, |
|
"eval_runtime": 19.4131, |
|
"eval_samples_per_second": 17.411, |
|
"eval_steps_per_second": 8.705, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 23.001506944444444, |
|
"grad_norm": 0.09171107411384583, |
|
"learning_rate": 4.9081790123456794e-05, |
|
"loss": 0.1134, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 23.004284722222224, |
|
"grad_norm": 27.191944122314453, |
|
"learning_rate": 4.892746913580247e-05, |
|
"loss": 0.1163, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.005006944444446, |
|
"eval_accuracy": 0.6715976331360947, |
|
"eval_f1": 0.6638741600280061, |
|
"eval_loss": 1.3919016122817993, |
|
"eval_precision": 0.7218300929839392, |
|
"eval_recall": 0.6715976331360947, |
|
"eval_runtime": 18.0114, |
|
"eval_samples_per_second": 18.766, |
|
"eval_steps_per_second": 9.383, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 24.002055555555554, |
|
"grad_norm": 0.14431528747081757, |
|
"learning_rate": 4.877314814814815e-05, |
|
"loss": 0.061, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.004833333333334, |
|
"grad_norm": 33.57377624511719, |
|
"learning_rate": 4.861882716049383e-05, |
|
"loss": 0.1167, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.005, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_f1": 0.6379572761229566, |
|
"eval_loss": 1.569033145904541, |
|
"eval_precision": 0.7189384333615103, |
|
"eval_recall": 0.6538461538461539, |
|
"eval_runtime": 18.5212, |
|
"eval_samples_per_second": 18.249, |
|
"eval_steps_per_second": 9.125, |
|
"step": 4506 |
|
}, |
|
{ |
|
"epoch": 25.002604166666668, |
|
"grad_norm": 0.03929486870765686, |
|
"learning_rate": 4.8464506172839505e-05, |
|
"loss": 0.0366, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.004993055555556, |
|
"eval_accuracy": 0.6745562130177515, |
|
"eval_f1": 0.6541387606476363, |
|
"eval_loss": 1.5032190084457397, |
|
"eval_precision": 0.6978867286559595, |
|
"eval_recall": 0.6745562130177515, |
|
"eval_runtime": 18.3411, |
|
"eval_samples_per_second": 18.429, |
|
"eval_steps_per_second": 9.214, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 26.000375, |
|
"grad_norm": 0.10611555725336075, |
|
"learning_rate": 4.831018518518518e-05, |
|
"loss": 0.1361, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 26.00315277777778, |
|
"grad_norm": 1.335172176361084, |
|
"learning_rate": 4.815586419753087e-05, |
|
"loss": 0.1065, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.004986111111112, |
|
"eval_accuracy": 0.6390532544378699, |
|
"eval_f1": 0.6134555615324846, |
|
"eval_loss": 1.4892995357513428, |
|
"eval_precision": 0.6475309946463793, |
|
"eval_recall": 0.6390532544378699, |
|
"eval_runtime": 19.1889, |
|
"eval_samples_per_second": 17.614, |
|
"eval_steps_per_second": 8.807, |
|
"step": 4866 |
|
}, |
|
{ |
|
"epoch": 27.000923611111112, |
|
"grad_norm": 0.25120797753334045, |
|
"learning_rate": 4.8001543209876545e-05, |
|
"loss": 0.1148, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 27.00370138888889, |
|
"grad_norm": 0.08281093835830688, |
|
"learning_rate": 4.784722222222223e-05, |
|
"loss": 0.0454, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.005006944444446, |
|
"eval_accuracy": 0.6242603550295858, |
|
"eval_f1": 0.6022126952600326, |
|
"eval_loss": 1.5013433694839478, |
|
"eval_precision": 0.6600591715976332, |
|
"eval_recall": 0.6242603550295858, |
|
"eval_runtime": 20.5193, |
|
"eval_samples_per_second": 16.472, |
|
"eval_steps_per_second": 8.236, |
|
"step": 5047 |
|
}, |
|
{ |
|
"epoch": 28.001472222222223, |
|
"grad_norm": 0.05560300499200821, |
|
"learning_rate": 4.769290123456791e-05, |
|
"loss": 0.0973, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 28.00425, |
|
"grad_norm": 1.4039995670318604, |
|
"learning_rate": 4.7538580246913585e-05, |
|
"loss": 0.0844, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.005, |
|
"eval_accuracy": 0.6597633136094675, |
|
"eval_f1": 0.6388020099558561, |
|
"eval_loss": 1.5608867406845093, |
|
"eval_precision": 0.6974077204846435, |
|
"eval_recall": 0.6597633136094675, |
|
"eval_runtime": 20.1446, |
|
"eval_samples_per_second": 16.779, |
|
"eval_steps_per_second": 8.389, |
|
"step": 5227 |
|
}, |
|
{ |
|
"epoch": 29.002020833333333, |
|
"grad_norm": 0.056026242673397064, |
|
"learning_rate": 4.738425925925926e-05, |
|
"loss": 0.0921, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 29.00479861111111, |
|
"grad_norm": 0.39889276027679443, |
|
"learning_rate": 4.722993827160494e-05, |
|
"loss": 0.109, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 29.004993055555556, |
|
"eval_accuracy": 0.665680473372781, |
|
"eval_f1": 0.6506595311329039, |
|
"eval_loss": 1.4839813709259033, |
|
"eval_precision": 0.7151002843310535, |
|
"eval_recall": 0.665680473372781, |
|
"eval_runtime": 18.7473, |
|
"eval_samples_per_second": 18.029, |
|
"eval_steps_per_second": 9.015, |
|
"step": 5407 |
|
}, |
|
{ |
|
"epoch": 30.002569444444443, |
|
"grad_norm": 0.05277135968208313, |
|
"learning_rate": 4.707561728395062e-05, |
|
"loss": 0.1508, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 30.004986111111112, |
|
"eval_accuracy": 0.6035502958579881, |
|
"eval_f1": 0.590259690999336, |
|
"eval_loss": 1.801746129989624, |
|
"eval_precision": 0.6784164553395322, |
|
"eval_recall": 0.6035502958579881, |
|
"eval_runtime": 17.6073, |
|
"eval_samples_per_second": 19.197, |
|
"eval_steps_per_second": 9.598, |
|
"step": 5587 |
|
}, |
|
{ |
|
"epoch": 31.000340277777777, |
|
"grad_norm": 0.6189529895782471, |
|
"learning_rate": 4.6921296296296296e-05, |
|
"loss": 0.0796, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 31.003118055555557, |
|
"grad_norm": 0.06006496772170067, |
|
"learning_rate": 4.676697530864197e-05, |
|
"loss": 0.1114, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 31.005006944444446, |
|
"eval_accuracy": 0.6390532544378699, |
|
"eval_f1": 0.613423755731448, |
|
"eval_loss": 1.6675891876220703, |
|
"eval_precision": 0.6721329952099183, |
|
"eval_recall": 0.6390532544378699, |
|
"eval_runtime": 17.8113, |
|
"eval_samples_per_second": 18.977, |
|
"eval_steps_per_second": 9.488, |
|
"step": 5768 |
|
}, |
|
{ |
|
"epoch": 32.00088888888889, |
|
"grad_norm": 0.04244798794388771, |
|
"learning_rate": 4.661265432098766e-05, |
|
"loss": 0.1236, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 32.00366666666667, |
|
"grad_norm": 0.24667857587337494, |
|
"learning_rate": 4.6458333333333335e-05, |
|
"loss": 0.0931, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 32.005, |
|
"eval_accuracy": 0.6745562130177515, |
|
"eval_f1": 0.6520330097253173, |
|
"eval_loss": 1.534504771232605, |
|
"eval_precision": 0.7082065370526909, |
|
"eval_recall": 0.6745562130177515, |
|
"eval_runtime": 16.9203, |
|
"eval_samples_per_second": 19.976, |
|
"eval_steps_per_second": 9.988, |
|
"step": 5948 |
|
}, |
|
{ |
|
"epoch": 33.0014375, |
|
"grad_norm": 0.04040142521262169, |
|
"learning_rate": 4.630401234567901e-05, |
|
"loss": 0.0767, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 33.004215277777774, |
|
"grad_norm": 0.18193276226520538, |
|
"learning_rate": 4.614969135802469e-05, |
|
"loss": 0.0619, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 33.00499305555556, |
|
"eval_accuracy": 0.6301775147928994, |
|
"eval_f1": 0.6008137161983316, |
|
"eval_loss": 1.746155858039856, |
|
"eval_precision": 0.6424168779938011, |
|
"eval_recall": 0.6301775147928994, |
|
"eval_runtime": 18.9428, |
|
"eval_samples_per_second": 17.843, |
|
"eval_steps_per_second": 8.922, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 34.00198611111111, |
|
"grad_norm": 0.009634270332753658, |
|
"learning_rate": 4.599691358024691e-05, |
|
"loss": 0.1314, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 34.00476388888889, |
|
"grad_norm": 0.21367499232292175, |
|
"learning_rate": 4.584259259259259e-05, |
|
"loss": 0.2698, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 34.00498611111111, |
|
"eval_accuracy": 0.6331360946745562, |
|
"eval_f1": 0.6126230606999838, |
|
"eval_loss": 1.7031519412994385, |
|
"eval_precision": 0.6710693153000845, |
|
"eval_recall": 0.6331360946745562, |
|
"eval_runtime": 19.9792, |
|
"eval_samples_per_second": 16.918, |
|
"eval_steps_per_second": 8.459, |
|
"step": 6308 |
|
}, |
|
{ |
|
"epoch": 35.00253472222222, |
|
"grad_norm": 0.03326309099793434, |
|
"learning_rate": 4.5688271604938275e-05, |
|
"loss": 0.1108, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 35.005006944444446, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_f1": 0.62648739111679, |
|
"eval_loss": 1.7694625854492188, |
|
"eval_precision": 0.6784205193228862, |
|
"eval_recall": 0.6538461538461539, |
|
"eval_runtime": 18.3538, |
|
"eval_samples_per_second": 18.416, |
|
"eval_steps_per_second": 9.208, |
|
"step": 6489 |
|
}, |
|
{ |
|
"epoch": 36.000305555555556, |
|
"grad_norm": 104.1079330444336, |
|
"learning_rate": 4.553395061728395e-05, |
|
"loss": 0.1261, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.003083333333336, |
|
"grad_norm": 0.1133200004696846, |
|
"learning_rate": 4.537962962962963e-05, |
|
"loss": 0.1006, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 36.005, |
|
"eval_accuracy": 0.5828402366863905, |
|
"eval_f1": 0.5661115757104835, |
|
"eval_loss": 2.018780469894409, |
|
"eval_precision": 0.6289293300387975, |
|
"eval_recall": 0.5828402366863905, |
|
"eval_runtime": 17.6731, |
|
"eval_samples_per_second": 19.125, |
|
"eval_steps_per_second": 9.563, |
|
"step": 6669 |
|
}, |
|
{ |
|
"epoch": 37.00085416666667, |
|
"grad_norm": 0.024959033355116844, |
|
"learning_rate": 4.5225308641975314e-05, |
|
"loss": 0.1183, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 37.00363194444444, |
|
"grad_norm": 0.09667906910181046, |
|
"learning_rate": 4.507098765432099e-05, |
|
"loss": 0.0823, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 37.00499305555556, |
|
"eval_accuracy": 0.6568047337278107, |
|
"eval_f1": 0.6425439452066671, |
|
"eval_loss": 1.6487033367156982, |
|
"eval_precision": 0.6874260355029587, |
|
"eval_recall": 0.6568047337278107, |
|
"eval_runtime": 20.3974, |
|
"eval_samples_per_second": 16.571, |
|
"eval_steps_per_second": 8.285, |
|
"step": 6849 |
|
}, |
|
{ |
|
"epoch": 38.00140277777778, |
|
"grad_norm": 0.025008585304021835, |
|
"learning_rate": 4.491666666666667e-05, |
|
"loss": 0.1114, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 38.00418055555556, |
|
"grad_norm": 3.3624982833862305, |
|
"learning_rate": 4.476234567901235e-05, |
|
"loss": 0.0632, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 38.00498611111111, |
|
"eval_accuracy": 0.636094674556213, |
|
"eval_f1": 0.6253254280582216, |
|
"eval_loss": 1.8013869524002075, |
|
"eval_precision": 0.691735347985348, |
|
"eval_recall": 0.636094674556213, |
|
"eval_runtime": 18.4919, |
|
"eval_samples_per_second": 18.278, |
|
"eval_steps_per_second": 9.139, |
|
"step": 7029 |
|
}, |
|
{ |
|
"epoch": 39.00195138888889, |
|
"grad_norm": 0.028111210092902184, |
|
"learning_rate": 4.4608024691358025e-05, |
|
"loss": 0.0691, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 39.004729166666664, |
|
"grad_norm": 0.022145342081785202, |
|
"learning_rate": 4.445370370370371e-05, |
|
"loss": 0.1162, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 39.005006944444446, |
|
"eval_accuracy": 0.6449704142011834, |
|
"eval_f1": 0.6195841797912803, |
|
"eval_loss": 1.67414128780365, |
|
"eval_precision": 0.6671691556306941, |
|
"eval_recall": 0.6449704142011834, |
|
"eval_runtime": 19.1991, |
|
"eval_samples_per_second": 17.605, |
|
"eval_steps_per_second": 8.802, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 40.0025, |
|
"grad_norm": 0.012291846796870232, |
|
"learning_rate": 4.429938271604939e-05, |
|
"loss": 0.0846, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 40.005, |
|
"eval_accuracy": 0.636094674556213, |
|
"eval_f1": 0.6205015070399685, |
|
"eval_loss": 1.8031796216964722, |
|
"eval_precision": 0.6948467692900892, |
|
"eval_recall": 0.636094674556213, |
|
"eval_runtime": 19.3607, |
|
"eval_samples_per_second": 17.458, |
|
"eval_steps_per_second": 8.729, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 41.00027083333333, |
|
"grad_norm": 0.45003169775009155, |
|
"learning_rate": 4.4145061728395065e-05, |
|
"loss": 0.146, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 41.00304861111111, |
|
"grad_norm": 4.705352306365967, |
|
"learning_rate": 4.399074074074074e-05, |
|
"loss": 0.1528, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 41.00499305555556, |
|
"eval_accuracy": 0.6331360946745562, |
|
"eval_f1": 0.6101778563317025, |
|
"eval_loss": 1.8375493288040161, |
|
"eval_precision": 0.673196675119752, |
|
"eval_recall": 0.6331360946745562, |
|
"eval_runtime": 17.7659, |
|
"eval_samples_per_second": 19.025, |
|
"eval_steps_per_second": 9.513, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 42.000819444444446, |
|
"grad_norm": 94.2012939453125, |
|
"learning_rate": 4.383641975308642e-05, |
|
"loss": 0.1548, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 42.003597222222226, |
|
"grad_norm": 0.008578118868172169, |
|
"learning_rate": 4.36820987654321e-05, |
|
"loss": 0.0695, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 42.00498611111111, |
|
"eval_accuracy": 0.6568047337278107, |
|
"eval_f1": 0.6417349744272821, |
|
"eval_loss": 1.672219157218933, |
|
"eval_precision": 0.7030008453085377, |
|
"eval_recall": 0.6568047337278107, |
|
"eval_runtime": 18.0993, |
|
"eval_samples_per_second": 18.675, |
|
"eval_steps_per_second": 9.337, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 43.00136805555555, |
|
"grad_norm": 0.005858391989022493, |
|
"learning_rate": 4.3527777777777776e-05, |
|
"loss": 0.0513, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 43.00414583333333, |
|
"grad_norm": 0.24502906203269958, |
|
"learning_rate": 4.3375000000000004e-05, |
|
"loss": 0.1516, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 43.005006944444446, |
|
"eval_accuracy": 0.6715976331360947, |
|
"eval_f1": 0.6494567398413551, |
|
"eval_loss": 1.7810900211334229, |
|
"eval_precision": 0.7009087066779375, |
|
"eval_recall": 0.6715976331360947, |
|
"eval_runtime": 18.6526, |
|
"eval_samples_per_second": 18.121, |
|
"eval_steps_per_second": 9.06, |
|
"step": 7931 |
|
}, |
|
{ |
|
"epoch": 44.001916666666666, |
|
"grad_norm": 0.023847075179219246, |
|
"learning_rate": 4.322067901234568e-05, |
|
"loss": 0.1172, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 44.004694444444446, |
|
"grad_norm": 0.1472051441669464, |
|
"learning_rate": 4.306635802469136e-05, |
|
"loss": 0.1565, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 44.005, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_f1": 0.6346678469459534, |
|
"eval_loss": 1.8077248334884644, |
|
"eval_precision": 0.6883840518455903, |
|
"eval_recall": 0.6538461538461539, |
|
"eval_runtime": 18.0276, |
|
"eval_samples_per_second": 18.749, |
|
"eval_steps_per_second": 9.375, |
|
"step": 8111 |
|
}, |
|
{ |
|
"epoch": 45.00246527777778, |
|
"grad_norm": 0.013580802828073502, |
|
"learning_rate": 4.291203703703704e-05, |
|
"loss": 0.0728, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 45.00499305555556, |
|
"eval_accuracy": 0.6272189349112426, |
|
"eval_f1": 0.604730419153496, |
|
"eval_loss": 1.7950366735458374, |
|
"eval_precision": 0.6755196480048551, |
|
"eval_recall": 0.6272189349112426, |
|
"eval_runtime": 18.0454, |
|
"eval_samples_per_second": 18.731, |
|
"eval_steps_per_second": 9.365, |
|
"step": 8291 |
|
}, |
|
{ |
|
"epoch": 45.00499305555556, |
|
"step": 8291, |
|
"total_flos": 1.700902027656954e+20, |
|
"train_loss": 3.3378389804185824, |
|
"train_runtime": 8591.5525, |
|
"train_samples_per_second": 33.521, |
|
"train_steps_per_second": 4.19 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 36000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 20, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 20 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.700902027656954e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|