{ "best_metric": 0.6745562130177515, "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/ViViT_WLASL_100_SR_4_ep200_p20/checkpoint-4686", "epoch": 45.00499305555556, "eval_steps": 500, "global_step": 8291, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002777777777777778, "grad_norm": 46.614158630371094, "learning_rate": 1.3472222222222222e-06, "loss": 18.9788, "step": 100 }, { "epoch": 0.005, "eval_accuracy": 0.026627218934911243, "eval_f1": 0.008186647196111367, "eval_loss": 4.690920829772949, "eval_precision": 0.005517409748672709, "eval_recall": 0.026627218934911243, "eval_runtime": 19.4352, "eval_samples_per_second": 17.391, "eval_steps_per_second": 8.696, "step": 180 }, { "epoch": 1.000548611111111, "grad_norm": 44.873966217041016, "learning_rate": 2.7361111111111114e-06, "loss": 18.8244, "step": 200 }, { "epoch": 1.0033263888888888, "grad_norm": 43.23536682128906, "learning_rate": 4.125e-06, "loss": 18.6803, "step": 300 }, { "epoch": 1.0049930555555555, "eval_accuracy": 0.026627218934911243, "eval_f1": 0.0057761083277597, "eval_loss": 4.617973327636719, "eval_precision": 0.003442859602301134, "eval_recall": 0.026627218934911243, "eval_runtime": 18.7273, "eval_samples_per_second": 18.049, "eval_steps_per_second": 9.024, "step": 360 }, { "epoch": 2.001097222222222, "grad_norm": 43.87873458862305, "learning_rate": 5.513888888888889e-06, "loss": 18.3151, "step": 400 }, { "epoch": 2.003875, "grad_norm": 45.60523986816406, "learning_rate": 6.902777777777777e-06, "loss": 18.0341, "step": 500 }, { "epoch": 2.004986111111111, "eval_accuracy": 0.047337278106508875, "eval_f1": 0.023768276430998322, "eval_loss": 4.518121242523193, "eval_precision": 0.020485887948289476, "eval_recall": 0.047337278106508875, "eval_runtime": 19.5951, "eval_samples_per_second": 17.249, "eval_steps_per_second": 8.625, "step": 540 }, { "epoch": 3.0016458333333333, "grad_norm": 49.1752815246582, "learning_rate": 8.291666666666667e-06, "loss": 17.4359, "step": 600 }, { "epoch": 3.004423611111111, "grad_norm": 51.076786041259766, "learning_rate": 9.680555555555556e-06, "loss": 17.0617, "step": 700 }, { "epoch": 3.0050069444444443, "eval_accuracy": 0.07396449704142012, "eval_f1": 0.047821065111354005, "eval_loss": 4.337636470794678, "eval_precision": 0.054133340554329064, "eval_recall": 0.07396449704142012, "eval_runtime": 18.9343, "eval_samples_per_second": 17.851, "eval_steps_per_second": 8.926, "step": 721 }, { "epoch": 4.002194444444444, "grad_norm": 46.15924835205078, "learning_rate": 1.1069444444444445e-05, "loss": 16.0988, "step": 800 }, { "epoch": 4.004972222222222, "grad_norm": 51.79167938232422, "learning_rate": 1.2458333333333334e-05, "loss": 15.6302, "step": 900 }, { "epoch": 4.005, "eval_accuracy": 0.11834319526627218, "eval_f1": 0.081513142682628, "eval_loss": 4.070884704589844, "eval_precision": 0.07720141700404858, "eval_recall": 0.11834319526627218, "eval_runtime": 20.1431, "eval_samples_per_second": 16.78, "eval_steps_per_second": 8.39, "step": 901 }, { "epoch": 5.002743055555555, "grad_norm": 55.07365798950195, "learning_rate": 1.3847222222222223e-05, "loss": 14.0851, "step": 1000 }, { "epoch": 5.0049930555555555, "eval_accuracy": 0.1863905325443787, "eval_f1": 0.14494279207764105, "eval_loss": 3.755218029022217, "eval_precision": 0.1535636372174834, "eval_recall": 0.1863905325443787, "eval_runtime": 19.7392, "eval_samples_per_second": 17.123, "eval_steps_per_second": 8.562, "step": 1081 }, { "epoch": 6.0005138888888885, "grad_norm": 49.54998779296875, "learning_rate": 1.5236111111111112e-05, "loss": 13.3452, "step": 1100 }, { "epoch": 6.003291666666667, "grad_norm": 54.62944793701172, "learning_rate": 1.6625e-05, "loss": 11.953, "step": 1200 }, { "epoch": 6.004986111111111, "eval_accuracy": 0.26627218934911245, "eval_f1": 0.2220989336141275, "eval_loss": 3.406508684158325, "eval_precision": 0.26431351743742965, "eval_recall": 0.26627218934911245, "eval_runtime": 19.5936, "eval_samples_per_second": 17.251, "eval_steps_per_second": 8.625, "step": 1261 }, { "epoch": 7.0010625, "grad_norm": 56.319576263427734, "learning_rate": 1.8013888888888888e-05, "loss": 11.0917, "step": 1300 }, { "epoch": 7.003840277777778, "grad_norm": 54.84210205078125, "learning_rate": 1.940277777777778e-05, "loss": 9.8223, "step": 1400 }, { "epoch": 7.005006944444444, "eval_accuracy": 0.33727810650887574, "eval_f1": 0.29438573252416295, "eval_loss": 3.103813886642456, "eval_precision": 0.3283793047846302, "eval_recall": 0.33727810650887574, "eval_runtime": 20.1573, "eval_samples_per_second": 16.768, "eval_steps_per_second": 8.384, "step": 1442 }, { "epoch": 8.001611111111112, "grad_norm": 51.06685256958008, "learning_rate": 2.0791666666666666e-05, "loss": 8.6714, "step": 1500 }, { "epoch": 8.004388888888888, "grad_norm": 50.67875289916992, "learning_rate": 2.2180555555555557e-05, "loss": 7.8126, "step": 1600 }, { "epoch": 8.005, "eval_accuracy": 0.41420118343195267, "eval_f1": 0.3676157668761219, "eval_loss": 2.7834277153015137, "eval_precision": 0.42014839107599283, "eval_recall": 0.41420118343195267, "eval_runtime": 19.7317, "eval_samples_per_second": 17.13, "eval_steps_per_second": 8.565, "step": 1622 }, { "epoch": 9.002159722222222, "grad_norm": 49.01296615600586, "learning_rate": 2.3569444444444447e-05, "loss": 6.4863, "step": 1700 }, { "epoch": 9.0049375, "grad_norm": 47.182640075683594, "learning_rate": 2.4958333333333335e-05, "loss": 6.0952, "step": 1800 }, { "epoch": 9.004993055555556, "eval_accuracy": 0.47633136094674555, "eval_f1": 0.4368087369566659, "eval_loss": 2.512887477874756, "eval_precision": 0.4949564324564325, "eval_recall": 0.47633136094674555, "eval_runtime": 20.1068, "eval_samples_per_second": 16.81, "eval_steps_per_second": 8.405, "step": 1802 }, { "epoch": 10.002708333333333, "grad_norm": 49.08199691772461, "learning_rate": 2.6347222222222222e-05, "loss": 4.3155, "step": 1900 }, { "epoch": 10.00498611111111, "eval_accuracy": 0.5059171597633136, "eval_f1": 0.48941789637055916, "eval_loss": 2.2756905555725098, "eval_precision": 0.5593944517021441, "eval_recall": 0.5059171597633136, "eval_runtime": 18.9167, "eval_samples_per_second": 17.868, "eval_steps_per_second": 8.934, "step": 1982 }, { "epoch": 11.000479166666667, "grad_norm": 37.75128936767578, "learning_rate": 2.7736111111111113e-05, "loss": 4.3448, "step": 2000 }, { "epoch": 11.003256944444445, "grad_norm": 36.987205505371094, "learning_rate": 2.9111111111111112e-05, "loss": 3.0214, "step": 2100 }, { "epoch": 11.005006944444444, "eval_accuracy": 0.5473372781065089, "eval_f1": 0.5177564874310436, "eval_loss": 2.0459656715393066, "eval_precision": 0.563966482235713, "eval_recall": 0.5473372781065089, "eval_runtime": 19.7866, "eval_samples_per_second": 17.082, "eval_steps_per_second": 8.541, "step": 2163 }, { "epoch": 12.001027777777777, "grad_norm": 30.315874099731445, "learning_rate": 3.05e-05, "loss": 2.7849, "step": 2200 }, { "epoch": 12.003805555555555, "grad_norm": 27.424291610717773, "learning_rate": 3.188888888888889e-05, "loss": 2.0687, "step": 2300 }, { "epoch": 12.005, "eval_accuracy": 0.591715976331361, "eval_f1": 0.5730650184792196, "eval_loss": 1.8803057670593262, "eval_precision": 0.6183537616229923, "eval_recall": 0.591715976331361, "eval_runtime": 19.466, "eval_samples_per_second": 17.364, "eval_steps_per_second": 8.682, "step": 2343 }, { "epoch": 13.00157638888889, "grad_norm": 19.18665313720703, "learning_rate": 3.327777777777778e-05, "loss": 1.5252, "step": 2400 }, { "epoch": 13.004354166666667, "grad_norm": 31.665971755981445, "learning_rate": 3.466666666666667e-05, "loss": 1.3523, "step": 2500 }, { "epoch": 13.004993055555556, "eval_accuracy": 0.591715976331361, "eval_f1": 0.5601332592456852, "eval_loss": 1.7261461019515991, "eval_precision": 0.6065053536207382, "eval_recall": 0.591715976331361, "eval_runtime": 19.273, "eval_samples_per_second": 17.537, "eval_steps_per_second": 8.769, "step": 2523 }, { "epoch": 14.002125, "grad_norm": 10.98436450958252, "learning_rate": 3.605555555555556e-05, "loss": 0.9208, "step": 2600 }, { "epoch": 14.004902777777778, "grad_norm": 45.65860366821289, "learning_rate": 3.7444444444444446e-05, "loss": 0.7828, "step": 2700 }, { "epoch": 14.00498611111111, "eval_accuracy": 0.6035502958579881, "eval_f1": 0.5923542762300159, "eval_loss": 1.6275168657302856, "eval_precision": 0.6644219028834413, "eval_recall": 0.6035502958579881, "eval_runtime": 19.479, "eval_samples_per_second": 17.352, "eval_steps_per_second": 8.676, "step": 2703 }, { "epoch": 15.002673611111112, "grad_norm": 24.026081085205078, "learning_rate": 3.883333333333333e-05, "loss": 0.4222, "step": 2800 }, { "epoch": 15.005006944444444, "eval_accuracy": 0.6420118343195266, "eval_f1": 0.6244853437161129, "eval_loss": 1.5283950567245483, "eval_precision": 0.67558435154589, "eval_recall": 0.6420118343195266, "eval_runtime": 19.8172, "eval_samples_per_second": 17.056, "eval_steps_per_second": 8.528, "step": 2884 }, { "epoch": 16.000444444444444, "grad_norm": 11.13332748413086, "learning_rate": 4.022222222222222e-05, "loss": 0.4752, "step": 2900 }, { "epoch": 16.003222222222224, "grad_norm": 8.170760154724121, "learning_rate": 4.1611111111111114e-05, "loss": 0.3113, "step": 3000 }, { "epoch": 16.005, "eval_accuracy": 0.6272189349112426, "eval_f1": 0.6092443552502724, "eval_loss": 1.5459295511245728, "eval_precision": 0.6664412510566357, "eval_recall": 0.6272189349112426, "eval_runtime": 19.569, "eval_samples_per_second": 17.272, "eval_steps_per_second": 8.636, "step": 3064 }, { "epoch": 17.000993055555554, "grad_norm": 1.5062611103057861, "learning_rate": 4.3e-05, "loss": 0.2905, "step": 3100 }, { "epoch": 17.003770833333334, "grad_norm": 1.255529761314392, "learning_rate": 4.438888888888889e-05, "loss": 0.2021, "step": 3200 }, { "epoch": 17.004993055555556, "eval_accuracy": 0.665680473372781, "eval_f1": 0.6484432398633582, "eval_loss": 1.4440850019454956, "eval_precision": 0.6991018596787828, "eval_recall": 0.665680473372781, "eval_runtime": 20.6892, "eval_samples_per_second": 16.337, "eval_steps_per_second": 8.168, "step": 3244 }, { "epoch": 18.001541666666668, "grad_norm": 2.2327980995178223, "learning_rate": 4.577777777777778e-05, "loss": 0.1544, "step": 3300 }, { "epoch": 18.004319444444445, "grad_norm": 1.6391757726669312, "learning_rate": 4.716666666666667e-05, "loss": 0.1698, "step": 3400 }, { "epoch": 18.004986111111112, "eval_accuracy": 0.6124260355029586, "eval_f1": 0.5942196856398039, "eval_loss": 1.5340176820755005, "eval_precision": 0.6510575962499039, "eval_recall": 0.6124260355029586, "eval_runtime": 19.0949, "eval_samples_per_second": 17.701, "eval_steps_per_second": 8.851, "step": 3424 }, { "epoch": 19.00209027777778, "grad_norm": 7.581464767456055, "learning_rate": 4.855555555555556e-05, "loss": 0.0791, "step": 3500 }, { "epoch": 19.004868055555555, "grad_norm": 0.5042413473129272, "learning_rate": 4.994444444444445e-05, "loss": 0.1199, "step": 3600 }, { "epoch": 19.005006944444446, "eval_accuracy": 0.650887573964497, "eval_f1": 0.6288250211327134, "eval_loss": 1.3934611082077026, "eval_precision": 0.6745703014933784, "eval_recall": 0.650887573964497, "eval_runtime": 18.5254, "eval_samples_per_second": 18.245, "eval_steps_per_second": 9.123, "step": 3605 }, { "epoch": 20.00263888888889, "grad_norm": 0.40117892622947693, "learning_rate": 4.9851851851851855e-05, "loss": 0.0244, "step": 3700 }, { "epoch": 20.005, "eval_accuracy": 0.6686390532544378, "eval_f1": 0.6573669919823766, "eval_loss": 1.478171706199646, "eval_precision": 0.7130459284305437, "eval_recall": 0.6686390532544378, "eval_runtime": 19.5864, "eval_samples_per_second": 17.257, "eval_steps_per_second": 8.628, "step": 3785 }, { "epoch": 21.000409722222223, "grad_norm": 0.6385509967803955, "learning_rate": 4.969753086419753e-05, "loss": 0.1644, "step": 3800 }, { "epoch": 21.0031875, "grad_norm": 0.36917442083358765, "learning_rate": 4.954320987654321e-05, "loss": 0.0407, "step": 3900 }, { "epoch": 21.004993055555556, "eval_accuracy": 0.6686390532544378, "eval_f1": 0.6557107422492038, "eval_loss": 1.388999581336975, "eval_precision": 0.7149478726401803, "eval_recall": 0.6686390532544378, "eval_runtime": 19.0631, "eval_samples_per_second": 17.731, "eval_steps_per_second": 8.865, "step": 3965 }, { "epoch": 22.000958333333333, "grad_norm": 0.5856227874755859, "learning_rate": 4.938888888888889e-05, "loss": 0.1314, "step": 4000 }, { "epoch": 22.00373611111111, "grad_norm": 0.7390286326408386, "learning_rate": 4.923611111111112e-05, "loss": 0.0719, "step": 4100 }, { "epoch": 22.004986111111112, "eval_accuracy": 0.6597633136094675, "eval_f1": 0.6477249246480016, "eval_loss": 1.4896520376205444, "eval_precision": 0.7189349112426036, "eval_recall": 0.6597633136094675, "eval_runtime": 19.4131, "eval_samples_per_second": 17.411, "eval_steps_per_second": 8.705, "step": 4145 }, { "epoch": 23.001506944444444, "grad_norm": 0.09171107411384583, "learning_rate": 4.9081790123456794e-05, "loss": 0.1134, "step": 4200 }, { "epoch": 23.004284722222224, "grad_norm": 27.191944122314453, "learning_rate": 4.892746913580247e-05, "loss": 0.1163, "step": 4300 }, { "epoch": 23.005006944444446, "eval_accuracy": 0.6715976331360947, "eval_f1": 0.6638741600280061, "eval_loss": 1.3919016122817993, "eval_precision": 0.7218300929839392, "eval_recall": 0.6715976331360947, "eval_runtime": 18.0114, "eval_samples_per_second": 18.766, "eval_steps_per_second": 9.383, "step": 4326 }, { "epoch": 24.002055555555554, "grad_norm": 0.14431528747081757, "learning_rate": 4.877314814814815e-05, "loss": 0.061, "step": 4400 }, { "epoch": 24.004833333333334, "grad_norm": 33.57377624511719, "learning_rate": 4.861882716049383e-05, "loss": 0.1167, "step": 4500 }, { "epoch": 24.005, "eval_accuracy": 0.6538461538461539, "eval_f1": 0.6379572761229566, "eval_loss": 1.569033145904541, "eval_precision": 0.7189384333615103, "eval_recall": 0.6538461538461539, "eval_runtime": 18.5212, "eval_samples_per_second": 18.249, "eval_steps_per_second": 9.125, "step": 4506 }, { "epoch": 25.002604166666668, "grad_norm": 0.03929486870765686, "learning_rate": 4.8464506172839505e-05, "loss": 0.0366, "step": 4600 }, { "epoch": 25.004993055555556, "eval_accuracy": 0.6745562130177515, "eval_f1": 0.6541387606476363, "eval_loss": 1.5032190084457397, "eval_precision": 0.6978867286559595, "eval_recall": 0.6745562130177515, "eval_runtime": 18.3411, "eval_samples_per_second": 18.429, "eval_steps_per_second": 9.214, "step": 4686 }, { "epoch": 26.000375, "grad_norm": 0.10611555725336075, "learning_rate": 4.831018518518518e-05, "loss": 0.1361, "step": 4700 }, { "epoch": 26.00315277777778, "grad_norm": 1.335172176361084, "learning_rate": 4.815586419753087e-05, "loss": 0.1065, "step": 4800 }, { "epoch": 26.004986111111112, "eval_accuracy": 0.6390532544378699, "eval_f1": 0.6134555615324846, "eval_loss": 1.4892995357513428, "eval_precision": 0.6475309946463793, "eval_recall": 0.6390532544378699, "eval_runtime": 19.1889, "eval_samples_per_second": 17.614, "eval_steps_per_second": 8.807, "step": 4866 }, { "epoch": 27.000923611111112, "grad_norm": 0.25120797753334045, "learning_rate": 4.8001543209876545e-05, "loss": 0.1148, "step": 4900 }, { "epoch": 27.00370138888889, "grad_norm": 0.08281093835830688, "learning_rate": 4.784722222222223e-05, "loss": 0.0454, "step": 5000 }, { "epoch": 27.005006944444446, "eval_accuracy": 0.6242603550295858, "eval_f1": 0.6022126952600326, "eval_loss": 1.5013433694839478, "eval_precision": 0.6600591715976332, "eval_recall": 0.6242603550295858, "eval_runtime": 20.5193, "eval_samples_per_second": 16.472, "eval_steps_per_second": 8.236, "step": 5047 }, { "epoch": 28.001472222222223, "grad_norm": 0.05560300499200821, "learning_rate": 4.769290123456791e-05, "loss": 0.0973, "step": 5100 }, { "epoch": 28.00425, "grad_norm": 1.4039995670318604, "learning_rate": 4.7538580246913585e-05, "loss": 0.0844, "step": 5200 }, { "epoch": 28.005, "eval_accuracy": 0.6597633136094675, "eval_f1": 0.6388020099558561, "eval_loss": 1.5608867406845093, "eval_precision": 0.6974077204846435, "eval_recall": 0.6597633136094675, "eval_runtime": 20.1446, "eval_samples_per_second": 16.779, "eval_steps_per_second": 8.389, "step": 5227 }, { "epoch": 29.002020833333333, "grad_norm": 0.056026242673397064, "learning_rate": 4.738425925925926e-05, "loss": 0.0921, "step": 5300 }, { "epoch": 29.00479861111111, "grad_norm": 0.39889276027679443, "learning_rate": 4.722993827160494e-05, "loss": 0.109, "step": 5400 }, { "epoch": 29.004993055555556, "eval_accuracy": 0.665680473372781, "eval_f1": 0.6506595311329039, "eval_loss": 1.4839813709259033, "eval_precision": 0.7151002843310535, "eval_recall": 0.665680473372781, "eval_runtime": 18.7473, "eval_samples_per_second": 18.029, "eval_steps_per_second": 9.015, "step": 5407 }, { "epoch": 30.002569444444443, "grad_norm": 0.05277135968208313, "learning_rate": 4.707561728395062e-05, "loss": 0.1508, "step": 5500 }, { "epoch": 30.004986111111112, "eval_accuracy": 0.6035502958579881, "eval_f1": 0.590259690999336, "eval_loss": 1.801746129989624, "eval_precision": 0.6784164553395322, "eval_recall": 0.6035502958579881, "eval_runtime": 17.6073, "eval_samples_per_second": 19.197, "eval_steps_per_second": 9.598, "step": 5587 }, { "epoch": 31.000340277777777, "grad_norm": 0.6189529895782471, "learning_rate": 4.6921296296296296e-05, "loss": 0.0796, "step": 5600 }, { "epoch": 31.003118055555557, "grad_norm": 0.06006496772170067, "learning_rate": 4.676697530864197e-05, "loss": 0.1114, "step": 5700 }, { "epoch": 31.005006944444446, "eval_accuracy": 0.6390532544378699, "eval_f1": 0.613423755731448, "eval_loss": 1.6675891876220703, "eval_precision": 0.6721329952099183, "eval_recall": 0.6390532544378699, "eval_runtime": 17.8113, "eval_samples_per_second": 18.977, "eval_steps_per_second": 9.488, "step": 5768 }, { "epoch": 32.00088888888889, "grad_norm": 0.04244798794388771, "learning_rate": 4.661265432098766e-05, "loss": 0.1236, "step": 5800 }, { "epoch": 32.00366666666667, "grad_norm": 0.24667857587337494, "learning_rate": 4.6458333333333335e-05, "loss": 0.0931, "step": 5900 }, { "epoch": 32.005, "eval_accuracy": 0.6745562130177515, "eval_f1": 0.6520330097253173, "eval_loss": 1.534504771232605, "eval_precision": 0.7082065370526909, "eval_recall": 0.6745562130177515, "eval_runtime": 16.9203, "eval_samples_per_second": 19.976, "eval_steps_per_second": 9.988, "step": 5948 }, { "epoch": 33.0014375, "grad_norm": 0.04040142521262169, "learning_rate": 4.630401234567901e-05, "loss": 0.0767, "step": 6000 }, { "epoch": 33.004215277777774, "grad_norm": 0.18193276226520538, "learning_rate": 4.614969135802469e-05, "loss": 0.0619, "step": 6100 }, { "epoch": 33.00499305555556, "eval_accuracy": 0.6301775147928994, "eval_f1": 0.6008137161983316, "eval_loss": 1.746155858039856, "eval_precision": 0.6424168779938011, "eval_recall": 0.6301775147928994, "eval_runtime": 18.9428, "eval_samples_per_second": 17.843, "eval_steps_per_second": 8.922, "step": 6128 }, { "epoch": 34.00198611111111, "grad_norm": 0.009634270332753658, "learning_rate": 4.599691358024691e-05, "loss": 0.1314, "step": 6200 }, { "epoch": 34.00476388888889, "grad_norm": 0.21367499232292175, "learning_rate": 4.584259259259259e-05, "loss": 0.2698, "step": 6300 }, { "epoch": 34.00498611111111, "eval_accuracy": 0.6331360946745562, "eval_f1": 0.6126230606999838, "eval_loss": 1.7031519412994385, "eval_precision": 0.6710693153000845, "eval_recall": 0.6331360946745562, "eval_runtime": 19.9792, "eval_samples_per_second": 16.918, "eval_steps_per_second": 8.459, "step": 6308 }, { "epoch": 35.00253472222222, "grad_norm": 0.03326309099793434, "learning_rate": 4.5688271604938275e-05, "loss": 0.1108, "step": 6400 }, { "epoch": 35.005006944444446, "eval_accuracy": 0.6538461538461539, "eval_f1": 0.62648739111679, "eval_loss": 1.7694625854492188, "eval_precision": 0.6784205193228862, "eval_recall": 0.6538461538461539, "eval_runtime": 18.3538, "eval_samples_per_second": 18.416, "eval_steps_per_second": 9.208, "step": 6489 }, { "epoch": 36.000305555555556, "grad_norm": 104.1079330444336, "learning_rate": 4.553395061728395e-05, "loss": 0.1261, "step": 6500 }, { "epoch": 36.003083333333336, "grad_norm": 0.1133200004696846, "learning_rate": 4.537962962962963e-05, "loss": 0.1006, "step": 6600 }, { "epoch": 36.005, "eval_accuracy": 0.5828402366863905, "eval_f1": 0.5661115757104835, "eval_loss": 2.018780469894409, "eval_precision": 0.6289293300387975, "eval_recall": 0.5828402366863905, "eval_runtime": 17.6731, "eval_samples_per_second": 19.125, "eval_steps_per_second": 9.563, "step": 6669 }, { "epoch": 37.00085416666667, "grad_norm": 0.024959033355116844, "learning_rate": 4.5225308641975314e-05, "loss": 0.1183, "step": 6700 }, { "epoch": 37.00363194444444, "grad_norm": 0.09667906910181046, "learning_rate": 4.507098765432099e-05, "loss": 0.0823, "step": 6800 }, { "epoch": 37.00499305555556, "eval_accuracy": 0.6568047337278107, "eval_f1": 0.6425439452066671, "eval_loss": 1.6487033367156982, "eval_precision": 0.6874260355029587, "eval_recall": 0.6568047337278107, "eval_runtime": 20.3974, "eval_samples_per_second": 16.571, "eval_steps_per_second": 8.285, "step": 6849 }, { "epoch": 38.00140277777778, "grad_norm": 0.025008585304021835, "learning_rate": 4.491666666666667e-05, "loss": 0.1114, "step": 6900 }, { "epoch": 38.00418055555556, "grad_norm": 3.3624982833862305, "learning_rate": 4.476234567901235e-05, "loss": 0.0632, "step": 7000 }, { "epoch": 38.00498611111111, "eval_accuracy": 0.636094674556213, "eval_f1": 0.6253254280582216, "eval_loss": 1.8013869524002075, "eval_precision": 0.691735347985348, "eval_recall": 0.636094674556213, "eval_runtime": 18.4919, "eval_samples_per_second": 18.278, "eval_steps_per_second": 9.139, "step": 7029 }, { "epoch": 39.00195138888889, "grad_norm": 0.028111210092902184, "learning_rate": 4.4608024691358025e-05, "loss": 0.0691, "step": 7100 }, { "epoch": 39.004729166666664, "grad_norm": 0.022145342081785202, "learning_rate": 4.445370370370371e-05, "loss": 0.1162, "step": 7200 }, { "epoch": 39.005006944444446, "eval_accuracy": 0.6449704142011834, "eval_f1": 0.6195841797912803, "eval_loss": 1.67414128780365, "eval_precision": 0.6671691556306941, "eval_recall": 0.6449704142011834, "eval_runtime": 19.1991, "eval_samples_per_second": 17.605, "eval_steps_per_second": 8.802, "step": 7210 }, { "epoch": 40.0025, "grad_norm": 0.012291846796870232, "learning_rate": 4.429938271604939e-05, "loss": 0.0846, "step": 7300 }, { "epoch": 40.005, "eval_accuracy": 0.636094674556213, "eval_f1": 0.6205015070399685, "eval_loss": 1.8031796216964722, "eval_precision": 0.6948467692900892, "eval_recall": 0.636094674556213, "eval_runtime": 19.3607, "eval_samples_per_second": 17.458, "eval_steps_per_second": 8.729, "step": 7390 }, { "epoch": 41.00027083333333, "grad_norm": 0.45003169775009155, "learning_rate": 4.4145061728395065e-05, "loss": 0.146, "step": 7400 }, { "epoch": 41.00304861111111, "grad_norm": 4.705352306365967, "learning_rate": 4.399074074074074e-05, "loss": 0.1528, "step": 7500 }, { "epoch": 41.00499305555556, "eval_accuracy": 0.6331360946745562, "eval_f1": 0.6101778563317025, "eval_loss": 1.8375493288040161, "eval_precision": 0.673196675119752, "eval_recall": 0.6331360946745562, "eval_runtime": 17.7659, "eval_samples_per_second": 19.025, "eval_steps_per_second": 9.513, "step": 7570 }, { "epoch": 42.000819444444446, "grad_norm": 94.2012939453125, "learning_rate": 4.383641975308642e-05, "loss": 0.1548, "step": 7600 }, { "epoch": 42.003597222222226, "grad_norm": 0.008578118868172169, "learning_rate": 4.36820987654321e-05, "loss": 0.0695, "step": 7700 }, { "epoch": 42.00498611111111, "eval_accuracy": 0.6568047337278107, "eval_f1": 0.6417349744272821, "eval_loss": 1.672219157218933, "eval_precision": 0.7030008453085377, "eval_recall": 0.6568047337278107, "eval_runtime": 18.0993, "eval_samples_per_second": 18.675, "eval_steps_per_second": 9.337, "step": 7750 }, { "epoch": 43.00136805555555, "grad_norm": 0.005858391989022493, "learning_rate": 4.3527777777777776e-05, "loss": 0.0513, "step": 7800 }, { "epoch": 43.00414583333333, "grad_norm": 0.24502906203269958, "learning_rate": 4.3375000000000004e-05, "loss": 0.1516, "step": 7900 }, { "epoch": 43.005006944444446, "eval_accuracy": 0.6715976331360947, "eval_f1": 0.6494567398413551, "eval_loss": 1.7810900211334229, "eval_precision": 0.7009087066779375, "eval_recall": 0.6715976331360947, "eval_runtime": 18.6526, "eval_samples_per_second": 18.121, "eval_steps_per_second": 9.06, "step": 7931 }, { "epoch": 44.001916666666666, "grad_norm": 0.023847075179219246, "learning_rate": 4.322067901234568e-05, "loss": 0.1172, "step": 8000 }, { "epoch": 44.004694444444446, "grad_norm": 0.1472051441669464, "learning_rate": 4.306635802469136e-05, "loss": 0.1565, "step": 8100 }, { "epoch": 44.005, "eval_accuracy": 0.6538461538461539, "eval_f1": 0.6346678469459534, "eval_loss": 1.8077248334884644, "eval_precision": 0.6883840518455903, "eval_recall": 0.6538461538461539, "eval_runtime": 18.0276, "eval_samples_per_second": 18.749, "eval_steps_per_second": 9.375, "step": 8111 }, { "epoch": 45.00246527777778, "grad_norm": 0.013580802828073502, "learning_rate": 4.291203703703704e-05, "loss": 0.0728, "step": 8200 }, { "epoch": 45.00499305555556, "eval_accuracy": 0.6272189349112426, "eval_f1": 0.604730419153496, "eval_loss": 1.7950366735458374, "eval_precision": 0.6755196480048551, "eval_recall": 0.6272189349112426, "eval_runtime": 18.0454, "eval_samples_per_second": 18.731, "eval_steps_per_second": 9.365, "step": 8291 }, { "epoch": 45.00499305555556, "step": 8291, "total_flos": 1.700902027656954e+20, "train_loss": 3.3378389804185824, "train_runtime": 8591.5525, "train_samples_per_second": 33.521, "train_steps_per_second": 4.19 } ], "logging_steps": 100, "max_steps": 36000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 20, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 20 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.700902027656954e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }