{ "best_metric": 0.8015357353809806, "best_model_checkpoint": "/tmp/logs/binary_classification_model_v3.1.5_Junction_folder10/checkpoint-4735", "epoch": 450.95238095238096, "eval_steps": 500, "global_step": 4735, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09523809523809523, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 1.4012, "step": 1 }, { "epoch": 0.19047619047619047, "grad_norm": 105.49879455566406, "learning_rate": 1.8740629685157423e-10, "loss": 1.463, "step": 2 }, { "epoch": 0.2857142857142857, "grad_norm": 104.57809448242188, "learning_rate": 3.7481259370314846e-10, "loss": 1.3533, "step": 3 }, { "epoch": 0.38095238095238093, "grad_norm": 96.78276062011719, "learning_rate": 5.622188905547227e-10, "loss": 1.3083, "step": 4 }, { "epoch": 0.47619047619047616, "grad_norm": 86.87081146240234, "learning_rate": 7.496251874062969e-10, "loss": 1.3851, "step": 5 }, { "epoch": 0.5714285714285714, "grad_norm": 99.54174041748047, "learning_rate": 9.37031484257871e-10, "loss": 1.4052, "step": 6 }, { "epoch": 0.6666666666666666, "grad_norm": 105.97377014160156, "learning_rate": 1.1244377811094454e-09, "loss": 1.3674, "step": 7 }, { "epoch": 0.7619047619047619, "grad_norm": 117.7193374633789, "learning_rate": 1.3118440779610195e-09, "loss": 1.4592, "step": 8 }, { "epoch": 0.8571428571428571, "grad_norm": 110.17356872558594, "learning_rate": 1.4992503748125938e-09, "loss": 1.4495, "step": 9 }, { "epoch": 0.9523809523809523, "grad_norm": 109.03305053710938, "learning_rate": 1.6866566716641682e-09, "loss": 1.4223, "step": 10 }, { "epoch": 0.9523809523809523, "eval_accuracy": 0.6852753840389659, "eval_f1": 0.7584818861414606, "eval_loss": 0.6929942965507507, "eval_precision": 0.6820062047569804, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7219533678756477, "eval_runtime": 1.202, "eval_samples_per_second": 2220.439, "eval_steps_per_second": 4.992, "step": 10 }, { "epoch": 1.0476190476190477, "grad_norm": 80.2335433959961, "learning_rate": 1.874062968515742e-09, "loss": 1.3215, "step": 11 }, { "epoch": 1.1428571428571428, "grad_norm": 115.97867584228516, "learning_rate": 2.0614692653673164e-09, "loss": 1.4214, "step": 12 }, { "epoch": 1.2380952380952381, "grad_norm": 105.73731231689453, "learning_rate": 2.2488755622188908e-09, "loss": 1.4118, "step": 13 }, { "epoch": 1.3333333333333333, "grad_norm": 103.12907409667969, "learning_rate": 2.436281859070465e-09, "loss": 1.3847, "step": 14 }, { "epoch": 1.4285714285714286, "grad_norm": 112.44313049316406, "learning_rate": 2.623688155922039e-09, "loss": 1.4623, "step": 15 }, { "epoch": 1.5238095238095237, "grad_norm": 103.6406478881836, "learning_rate": 2.8110944527736133e-09, "loss": 1.4486, "step": 16 }, { "epoch": 1.619047619047619, "grad_norm": 90.27802276611328, "learning_rate": 2.9985007496251877e-09, "loss": 1.404, "step": 17 }, { "epoch": 1.7142857142857144, "grad_norm": 96.4856185913086, "learning_rate": 3.1859070464767616e-09, "loss": 1.4192, "step": 18 }, { "epoch": 1.8095238095238095, "grad_norm": 101.81619262695312, "learning_rate": 3.3733133433283363e-09, "loss": 1.3883, "step": 19 }, { "epoch": 1.9047619047619047, "grad_norm": 113.48494720458984, "learning_rate": 3.5607196401799103e-09, "loss": 1.4136, "step": 20 }, { "epoch": 2.0, "grad_norm": 98.515625, "learning_rate": 3.748125937031484e-09, "loss": 1.422, "step": 21 }, { "epoch": 2.0, "eval_accuracy": 0.6852753840389659, "eval_f1": 0.7583429228998849, "eval_loss": 0.6922231316566467, "eval_precision": 0.6821946169772257, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7219496257915946, "eval_runtime": 1.2942, "eval_samples_per_second": 2062.333, "eval_steps_per_second": 4.636, "step": 21 }, { "epoch": 2.0952380952380953, "grad_norm": 102.9639663696289, "learning_rate": 3.9355322338830585e-09, "loss": 1.3666, "step": 22 }, { "epoch": 2.1904761904761907, "grad_norm": 106.0006332397461, "learning_rate": 4.122938530734633e-09, "loss": 1.3874, "step": 23 }, { "epoch": 2.2857142857142856, "grad_norm": 97.28984832763672, "learning_rate": 4.310344827586206e-09, "loss": 1.3638, "step": 24 }, { "epoch": 2.380952380952381, "grad_norm": 106.2695083618164, "learning_rate": 4.4977511244377815e-09, "loss": 1.4204, "step": 25 }, { "epoch": 2.4761904761904763, "grad_norm": 112.76600646972656, "learning_rate": 4.685157421289355e-09, "loss": 1.3955, "step": 26 }, { "epoch": 2.571428571428571, "grad_norm": 96.83776092529297, "learning_rate": 4.87256371814093e-09, "loss": 1.3999, "step": 27 }, { "epoch": 2.6666666666666665, "grad_norm": 87.85649108886719, "learning_rate": 5.059970014992504e-09, "loss": 1.3537, "step": 28 }, { "epoch": 2.761904761904762, "grad_norm": 102.71916961669922, "learning_rate": 5.247376311844078e-09, "loss": 1.4219, "step": 29 }, { "epoch": 2.857142857142857, "grad_norm": 99.2935791015625, "learning_rate": 5.434782608695652e-09, "loss": 1.3916, "step": 30 }, { "epoch": 2.9523809523809526, "grad_norm": 95.25791931152344, "learning_rate": 5.622188905547227e-09, "loss": 1.4288, "step": 31 }, { "epoch": 2.9523809523809526, "eval_accuracy": 0.6845260397152492, "eval_f1": 0.7574884792626728, "eval_loss": 0.6911234855651855, "eval_precision": 0.6820539419087137, "eval_recall": 0.8516839378238342, "eval_roc_auc": 0.7219237190558434, "eval_runtime": 1.1237, "eval_samples_per_second": 2375.214, "eval_steps_per_second": 5.34, "step": 31 }, { "epoch": 3.0476190476190474, "grad_norm": 104.1244125366211, "learning_rate": 5.8095952023988e-09, "loss": 1.4041, "step": 32 }, { "epoch": 3.142857142857143, "grad_norm": 101.19844055175781, "learning_rate": 5.997001499250375e-09, "loss": 1.3404, "step": 33 }, { "epoch": 3.238095238095238, "grad_norm": 87.8860855102539, "learning_rate": 6.184407796101949e-09, "loss": 1.4263, "step": 34 }, { "epoch": 3.3333333333333335, "grad_norm": 96.34718322753906, "learning_rate": 6.371814092953523e-09, "loss": 1.3771, "step": 35 }, { "epoch": 3.4285714285714284, "grad_norm": 98.07717895507812, "learning_rate": 6.5592203898050975e-09, "loss": 1.4294, "step": 36 }, { "epoch": 3.5238095238095237, "grad_norm": 98.57717895507812, "learning_rate": 6.746626686656673e-09, "loss": 1.3477, "step": 37 }, { "epoch": 3.619047619047619, "grad_norm": 106.59417724609375, "learning_rate": 6.934032983508246e-09, "loss": 1.4278, "step": 38 }, { "epoch": 3.7142857142857144, "grad_norm": 101.46930694580078, "learning_rate": 7.1214392803598205e-09, "loss": 1.3988, "step": 39 }, { "epoch": 3.8095238095238093, "grad_norm": 115.23248291015625, "learning_rate": 7.308845577211394e-09, "loss": 1.4424, "step": 40 }, { "epoch": 3.9047619047619047, "grad_norm": 100.2016830444336, "learning_rate": 7.496251874062968e-09, "loss": 1.3932, "step": 41 }, { "epoch": 4.0, "grad_norm": 110.41011810302734, "learning_rate": 7.683658170914544e-09, "loss": 1.4236, "step": 42 }, { "epoch": 4.0, "eval_accuracy": 0.6856500562008243, "eval_f1": 0.7580040380732622, "eval_loss": 0.6892919540405273, "eval_precision": 0.6833073322932918, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7218949337938974, "eval_runtime": 1.1259, "eval_samples_per_second": 2370.508, "eval_steps_per_second": 5.329, "step": 42 }, { "epoch": 4.095238095238095, "grad_norm": 86.44599914550781, "learning_rate": 7.871064467766117e-09, "loss": 1.3405, "step": 43 }, { "epoch": 4.190476190476191, "grad_norm": 89.20963287353516, "learning_rate": 8.058470764617691e-09, "loss": 1.4478, "step": 44 }, { "epoch": 4.285714285714286, "grad_norm": 107.15955352783203, "learning_rate": 8.245877061469266e-09, "loss": 1.3755, "step": 45 }, { "epoch": 4.380952380952381, "grad_norm": 102.78776550292969, "learning_rate": 8.43328335832084e-09, "loss": 1.4022, "step": 46 }, { "epoch": 4.476190476190476, "grad_norm": 120.45516204833984, "learning_rate": 8.620689655172413e-09, "loss": 1.4396, "step": 47 }, { "epoch": 4.571428571428571, "grad_norm": 92.11113739013672, "learning_rate": 8.808095952023989e-09, "loss": 1.3656, "step": 48 }, { "epoch": 4.666666666666667, "grad_norm": 100.20201873779297, "learning_rate": 8.995502248875563e-09, "loss": 1.4059, "step": 49 }, { "epoch": 4.761904761904762, "grad_norm": 88.53689575195312, "learning_rate": 9.182908545727137e-09, "loss": 1.3268, "step": 50 }, { "epoch": 4.857142857142857, "grad_norm": 106.94924926757812, "learning_rate": 9.37031484257871e-09, "loss": 1.4176, "step": 51 }, { "epoch": 4.9523809523809526, "grad_norm": 91.2048110961914, "learning_rate": 9.557721139430284e-09, "loss": 1.4304, "step": 52 }, { "epoch": 4.9523809523809526, "eval_accuracy": 0.6882727613338329, "eval_f1": 0.7591198610306891, "eval_loss": 0.6873071789741516, "eval_precision": 0.6863874345549739, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7219168105929764, "eval_runtime": 1.2348, "eval_samples_per_second": 2161.52, "eval_steps_per_second": 4.859, "step": 52 }, { "epoch": 5.0476190476190474, "grad_norm": 113.53949737548828, "learning_rate": 9.74512743628186e-09, "loss": 1.4397, "step": 53 }, { "epoch": 5.142857142857143, "grad_norm": 109.27808380126953, "learning_rate": 9.932533733133435e-09, "loss": 1.4053, "step": 54 }, { "epoch": 5.238095238095238, "grad_norm": 90.2195816040039, "learning_rate": 1.0119940029985007e-08, "loss": 1.3786, "step": 55 }, { "epoch": 5.333333333333333, "grad_norm": 91.2532730102539, "learning_rate": 1.0307346326836582e-08, "loss": 1.3868, "step": 56 }, { "epoch": 5.428571428571429, "grad_norm": 109.18313598632812, "learning_rate": 1.0494752623688156e-08, "loss": 1.3365, "step": 57 }, { "epoch": 5.523809523809524, "grad_norm": 74.70710754394531, "learning_rate": 1.0682158920539732e-08, "loss": 1.4311, "step": 58 }, { "epoch": 5.619047619047619, "grad_norm": 105.76243591308594, "learning_rate": 1.0869565217391305e-08, "loss": 1.3777, "step": 59 }, { "epoch": 5.714285714285714, "grad_norm": 87.60913848876953, "learning_rate": 1.1056971514242879e-08, "loss": 1.405, "step": 60 }, { "epoch": 5.809523809523809, "grad_norm": 92.69194030761719, "learning_rate": 1.1244377811094453e-08, "loss": 1.4627, "step": 61 }, { "epoch": 5.904761904761905, "grad_norm": 90.4942855834961, "learning_rate": 1.1431784107946028e-08, "loss": 1.3523, "step": 62 }, { "epoch": 6.0, "grad_norm": 87.5588150024414, "learning_rate": 1.16191904047976e-08, "loss": 1.3618, "step": 63 }, { "epoch": 6.0, "eval_accuracy": 0.6863994005245411, "eval_f1": 0.7566152951439372, "eval_loss": 0.6846460700035095, "eval_precision": 0.6865435356200528, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7218773747841105, "eval_runtime": 1.222, "eval_samples_per_second": 2184.196, "eval_steps_per_second": 4.91, "step": 63 }, { "epoch": 6.095238095238095, "grad_norm": 96.4197998046875, "learning_rate": 1.1806596701649176e-08, "loss": 1.4462, "step": 64 }, { "epoch": 6.190476190476191, "grad_norm": 96.55722045898438, "learning_rate": 1.199400299850075e-08, "loss": 1.45, "step": 65 }, { "epoch": 6.285714285714286, "grad_norm": 84.94829559326172, "learning_rate": 1.2181409295352325e-08, "loss": 1.3282, "step": 66 }, { "epoch": 6.380952380952381, "grad_norm": 85.26909637451172, "learning_rate": 1.2368815592203898e-08, "loss": 1.3238, "step": 67 }, { "epoch": 6.476190476190476, "grad_norm": 101.26187896728516, "learning_rate": 1.2556221889055474e-08, "loss": 1.4357, "step": 68 }, { "epoch": 6.571428571428571, "grad_norm": 78.13963317871094, "learning_rate": 1.2743628185907046e-08, "loss": 1.316, "step": 69 }, { "epoch": 6.666666666666667, "grad_norm": 97.32000732421875, "learning_rate": 1.2931034482758622e-08, "loss": 1.4427, "step": 70 }, { "epoch": 6.761904761904762, "grad_norm": 93.46250915527344, "learning_rate": 1.3118440779610195e-08, "loss": 1.3841, "step": 71 }, { "epoch": 6.857142857142857, "grad_norm": 107.21956634521484, "learning_rate": 1.330584707646177e-08, "loss": 1.4041, "step": 72 }, { "epoch": 6.9523809523809526, "grad_norm": 81.90492248535156, "learning_rate": 1.3493253373313345e-08, "loss": 1.4, "step": 73 }, { "epoch": 6.9523809523809526, "eval_accuracy": 0.6856500562008243, "eval_f1": 0.755036496350365, "eval_loss": 0.6818766593933105, "eval_precision": 0.6874003189792663, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7218707541738629, "eval_runtime": 1.2794, "eval_samples_per_second": 2086.087, "eval_steps_per_second": 4.69, "step": 73 }, { "epoch": 7.0476190476190474, "grad_norm": 87.02517700195312, "learning_rate": 1.3680659670164918e-08, "loss": 1.274, "step": 74 }, { "epoch": 7.142857142857143, "grad_norm": 89.8641586303711, "learning_rate": 1.3868065967016492e-08, "loss": 1.3081, "step": 75 }, { "epoch": 7.238095238095238, "grad_norm": 90.02095031738281, "learning_rate": 1.4055472263868065e-08, "loss": 1.4267, "step": 76 }, { "epoch": 7.333333333333333, "grad_norm": 86.56961059570312, "learning_rate": 1.4242878560719641e-08, "loss": 1.3516, "step": 77 }, { "epoch": 7.428571428571429, "grad_norm": 93.65435028076172, "learning_rate": 1.4430284857571215e-08, "loss": 1.393, "step": 78 }, { "epoch": 7.523809523809524, "grad_norm": 75.84454345703125, "learning_rate": 1.4617691154422788e-08, "loss": 1.3685, "step": 79 }, { "epoch": 7.619047619047619, "grad_norm": 92.14884948730469, "learning_rate": 1.4805097451274364e-08, "loss": 1.4322, "step": 80 }, { "epoch": 7.714285714285714, "grad_norm": 96.71854400634766, "learning_rate": 1.4992503748125937e-08, "loss": 1.4496, "step": 81 }, { "epoch": 7.809523809523809, "grad_norm": 76.76438903808594, "learning_rate": 1.517991004497751e-08, "loss": 1.3499, "step": 82 }, { "epoch": 7.904761904761905, "grad_norm": 85.24507904052734, "learning_rate": 1.536731634182909e-08, "loss": 1.3642, "step": 83 }, { "epoch": 8.0, "grad_norm": 94.62620544433594, "learning_rate": 1.555472263868066e-08, "loss": 1.37, "step": 84 }, { "epoch": 8.0, "eval_accuracy": 0.6860247283626827, "eval_f1": 0.7541079812206573, "eval_loss": 0.6784666776657104, "eval_precision": 0.6893776824034334, "eval_recall": 0.8322538860103627, "eval_roc_auc": 0.721849453080023, "eval_runtime": 1.1703, "eval_samples_per_second": 2280.546, "eval_steps_per_second": 5.127, "step": 84 }, { "epoch": 8.095238095238095, "grad_norm": 90.98119354248047, "learning_rate": 1.5742128935532234e-08, "loss": 1.3537, "step": 85 }, { "epoch": 8.19047619047619, "grad_norm": 83.05977630615234, "learning_rate": 1.592953523238381e-08, "loss": 1.3945, "step": 86 }, { "epoch": 8.285714285714286, "grad_norm": 77.79987335205078, "learning_rate": 1.6116941529235383e-08, "loss": 1.3033, "step": 87 }, { "epoch": 8.380952380952381, "grad_norm": 83.56439971923828, "learning_rate": 1.6304347826086957e-08, "loss": 1.4001, "step": 88 }, { "epoch": 8.476190476190476, "grad_norm": 65.35790252685547, "learning_rate": 1.649175412293853e-08, "loss": 1.3183, "step": 89 }, { "epoch": 8.571428571428571, "grad_norm": 72.92051696777344, "learning_rate": 1.6679160419790106e-08, "loss": 1.2747, "step": 90 }, { "epoch": 8.666666666666666, "grad_norm": 92.48654174804688, "learning_rate": 1.686656671664168e-08, "loss": 1.3942, "step": 91 }, { "epoch": 8.761904761904763, "grad_norm": 92.03863525390625, "learning_rate": 1.7053973013493254e-08, "loss": 1.3977, "step": 92 }, { "epoch": 8.857142857142858, "grad_norm": 78.154296875, "learning_rate": 1.7241379310344825e-08, "loss": 1.4182, "step": 93 }, { "epoch": 8.952380952380953, "grad_norm": 77.85521697998047, "learning_rate": 1.7428785607196403e-08, "loss": 1.339, "step": 94 }, { "epoch": 8.952380952380953, "eval_accuracy": 0.6871487448482577, "eval_f1": 0.7541948778333823, "eval_loss": 0.675155758857727, "eval_precision": 0.6913113869400972, "eval_recall": 0.8296632124352331, "eval_roc_auc": 0.7217829591249281, "eval_runtime": 1.1052, "eval_samples_per_second": 2414.859, "eval_steps_per_second": 5.429, "step": 94 }, { "epoch": 9.047619047619047, "grad_norm": 75.20770263671875, "learning_rate": 1.7616191904047977e-08, "loss": 1.3267, "step": 95 }, { "epoch": 9.142857142857142, "grad_norm": 74.02423858642578, "learning_rate": 1.7803598200899552e-08, "loss": 1.4283, "step": 96 }, { "epoch": 9.238095238095237, "grad_norm": 71.2074966430664, "learning_rate": 1.7991004497751126e-08, "loss": 1.3037, "step": 97 }, { "epoch": 9.333333333333334, "grad_norm": 70.90864562988281, "learning_rate": 1.8178410794602697e-08, "loss": 1.3745, "step": 98 }, { "epoch": 9.428571428571429, "grad_norm": 93.27525329589844, "learning_rate": 1.8365817091454275e-08, "loss": 1.4259, "step": 99 }, { "epoch": 9.523809523809524, "grad_norm": 80.44207763671875, "learning_rate": 1.855322338830585e-08, "loss": 1.3235, "step": 100 }, { "epoch": 9.619047619047619, "grad_norm": 69.08979034423828, "learning_rate": 1.874062968515742e-08, "loss": 1.3567, "step": 101 }, { "epoch": 9.714285714285714, "grad_norm": 89.39695739746094, "learning_rate": 1.8928035982008998e-08, "loss": 1.4014, "step": 102 }, { "epoch": 9.80952380952381, "grad_norm": 91.82769012451172, "learning_rate": 1.911544227886057e-08, "loss": 1.4411, "step": 103 }, { "epoch": 9.904761904761905, "grad_norm": 78.9308090209961, "learning_rate": 1.9302848575712146e-08, "loss": 1.3307, "step": 104 }, { "epoch": 10.0, "grad_norm": 65.37604522705078, "learning_rate": 1.949025487256372e-08, "loss": 1.3799, "step": 105 }, { "epoch": 10.0, "eval_accuracy": 0.6875234170101161, "eval_f1": 0.7534003548196333, "eval_loss": 0.6712872982025146, "eval_precision": 0.6931447225244831, "eval_recall": 0.8251295336787565, "eval_roc_auc": 0.7217037996545768, "eval_runtime": 1.1823, "eval_samples_per_second": 2257.479, "eval_steps_per_second": 5.075, "step": 105 }, { "epoch": 10.095238095238095, "grad_norm": 77.4139175415039, "learning_rate": 1.9677661169415292e-08, "loss": 1.3822, "step": 106 }, { "epoch": 10.19047619047619, "grad_norm": 75.99652862548828, "learning_rate": 1.986506746626687e-08, "loss": 1.3308, "step": 107 }, { "epoch": 10.285714285714286, "grad_norm": 73.82905578613281, "learning_rate": 2.005247376311844e-08, "loss": 1.3461, "step": 108 }, { "epoch": 10.380952380952381, "grad_norm": 69.6850814819336, "learning_rate": 2.0239880059970015e-08, "loss": 1.2927, "step": 109 }, { "epoch": 10.476190476190476, "grad_norm": 83.00745391845703, "learning_rate": 2.0427286356821592e-08, "loss": 1.395, "step": 110 }, { "epoch": 10.571428571428571, "grad_norm": 76.98029327392578, "learning_rate": 2.0614692653673163e-08, "loss": 1.3835, "step": 111 }, { "epoch": 10.666666666666666, "grad_norm": 65.298828125, "learning_rate": 2.0802098950524738e-08, "loss": 1.3835, "step": 112 }, { "epoch": 10.761904761904763, "grad_norm": 65.25972747802734, "learning_rate": 2.0989505247376312e-08, "loss": 1.3321, "step": 113 }, { "epoch": 10.857142857142858, "grad_norm": 71.95514678955078, "learning_rate": 2.1176911544227886e-08, "loss": 1.3456, "step": 114 }, { "epoch": 10.952380952380953, "grad_norm": 71.81147003173828, "learning_rate": 2.1364317841079464e-08, "loss": 1.4254, "step": 115 }, { "epoch": 10.952380952380953, "eval_accuracy": 0.6878980891719745, "eval_f1": 0.7528923168199347, "eval_loss": 0.6676313281059265, "eval_precision": 0.6945812807881774, "eval_recall": 0.8218911917098446, "eval_roc_auc": 0.721703223949338, "eval_runtime": 1.459, "eval_samples_per_second": 1829.393, "eval_steps_per_second": 4.113, "step": 115 }, { "epoch": 11.047619047619047, "grad_norm": 56.503204345703125, "learning_rate": 2.1551724137931035e-08, "loss": 1.3297, "step": 116 }, { "epoch": 11.142857142857142, "grad_norm": 58.1290397644043, "learning_rate": 2.173913043478261e-08, "loss": 1.3334, "step": 117 }, { "epoch": 11.238095238095237, "grad_norm": 74.19014739990234, "learning_rate": 2.1926536731634184e-08, "loss": 1.3858, "step": 118 }, { "epoch": 11.333333333333334, "grad_norm": 74.8871841430664, "learning_rate": 2.2113943028485758e-08, "loss": 1.3663, "step": 119 }, { "epoch": 11.428571428571429, "grad_norm": 78.6004867553711, "learning_rate": 2.2301349325337332e-08, "loss": 1.3541, "step": 120 }, { "epoch": 11.523809523809524, "grad_norm": 61.97282791137695, "learning_rate": 2.2488755622188907e-08, "loss": 1.3786, "step": 121 }, { "epoch": 11.619047619047619, "grad_norm": 66.15354919433594, "learning_rate": 2.267616191904048e-08, "loss": 1.3387, "step": 122 }, { "epoch": 11.714285714285714, "grad_norm": 66.2543716430664, "learning_rate": 2.2863568215892055e-08, "loss": 1.3319, "step": 123 }, { "epoch": 11.80952380952381, "grad_norm": 68.32968139648438, "learning_rate": 2.305097451274363e-08, "loss": 1.3416, "step": 124 }, { "epoch": 11.904761904761905, "grad_norm": 50.14171600341797, "learning_rate": 2.32383808095952e-08, "loss": 1.2865, "step": 125 }, { "epoch": 12.0, "grad_norm": 60.39354705810547, "learning_rate": 2.342578710644678e-08, "loss": 1.3506, "step": 126 }, { "epoch": 12.0, "eval_accuracy": 0.6912701386286999, "eval_f1": 0.75344105326152, "eval_loss": 0.6636635065078735, "eval_precision": 0.7002224694104561, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.7216375935521013, "eval_runtime": 1.251, "eval_samples_per_second": 2133.499, "eval_steps_per_second": 4.796, "step": 126 }, { "epoch": 12.095238095238095, "grad_norm": 68.8990249633789, "learning_rate": 2.3613193403298353e-08, "loss": 1.3287, "step": 127 }, { "epoch": 12.19047619047619, "grad_norm": 55.19594192504883, "learning_rate": 2.3800599700149927e-08, "loss": 1.3361, "step": 128 }, { "epoch": 12.285714285714286, "grad_norm": 61.52813720703125, "learning_rate": 2.39880059970015e-08, "loss": 1.3434, "step": 129 }, { "epoch": 12.380952380952381, "grad_norm": 61.1476936340332, "learning_rate": 2.4175412293853072e-08, "loss": 1.2941, "step": 130 }, { "epoch": 12.476190476190476, "grad_norm": 45.287376403808594, "learning_rate": 2.436281859070465e-08, "loss": 1.3373, "step": 131 }, { "epoch": 12.571428571428571, "grad_norm": 67.01655578613281, "learning_rate": 2.4550224887556224e-08, "loss": 1.4024, "step": 132 }, { "epoch": 12.666666666666666, "grad_norm": 51.15004348754883, "learning_rate": 2.4737631184407795e-08, "loss": 1.386, "step": 133 }, { "epoch": 12.761904761904763, "grad_norm": 54.940879821777344, "learning_rate": 2.492503748125937e-08, "loss": 1.3333, "step": 134 }, { "epoch": 12.857142857142858, "grad_norm": 50.37574768066406, "learning_rate": 2.5112443778110947e-08, "loss": 1.3527, "step": 135 }, { "epoch": 12.952380952380953, "grad_norm": 46.545135498046875, "learning_rate": 2.529985007496252e-08, "loss": 1.2964, "step": 136 }, { "epoch": 12.952380952380953, "eval_accuracy": 0.689396777819408, "eval_f1": 0.7503763926528154, "eval_loss": 0.6601486802101135, "eval_precision": 0.7011817670230726, "eval_recall": 0.8069948186528497, "eval_roc_auc": 0.7214781232009211, "eval_runtime": 1.2976, "eval_samples_per_second": 2056.814, "eval_steps_per_second": 4.624, "step": 136 }, { "epoch": 13.047619047619047, "grad_norm": 63.312862396240234, "learning_rate": 2.5487256371814093e-08, "loss": 1.3792, "step": 137 }, { "epoch": 13.142857142857142, "grad_norm": 77.56578826904297, "learning_rate": 2.567466266866567e-08, "loss": 1.4125, "step": 138 }, { "epoch": 13.238095238095237, "grad_norm": 33.94204330444336, "learning_rate": 2.5862068965517245e-08, "loss": 1.3047, "step": 139 }, { "epoch": 13.333333333333334, "grad_norm": 58.11079025268555, "learning_rate": 2.6049475262368816e-08, "loss": 1.24, "step": 140 }, { "epoch": 13.428571428571429, "grad_norm": 47.870269775390625, "learning_rate": 2.623688155922039e-08, "loss": 1.3368, "step": 141 }, { "epoch": 13.523809523809524, "grad_norm": 35.01408767700195, "learning_rate": 2.6424287856071968e-08, "loss": 1.3086, "step": 142 }, { "epoch": 13.619047619047619, "grad_norm": 54.038848876953125, "learning_rate": 2.661169415292354e-08, "loss": 1.3006, "step": 143 }, { "epoch": 13.714285714285714, "grad_norm": 49.3018684387207, "learning_rate": 2.6799100449775113e-08, "loss": 1.316, "step": 144 }, { "epoch": 13.80952380952381, "grad_norm": 51.7872428894043, "learning_rate": 2.698650674662669e-08, "loss": 1.3577, "step": 145 }, { "epoch": 13.904761904761905, "grad_norm": 45.14494705200195, "learning_rate": 2.7173913043478262e-08, "loss": 1.3951, "step": 146 }, { "epoch": 14.0, "grad_norm": 39.679683685302734, "learning_rate": 2.7361319340329836e-08, "loss": 1.318, "step": 147 }, { "epoch": 14.0, "eval_accuracy": 0.6908954664668415, "eval_f1": 0.7493163172288059, "eval_loss": 0.656489908695221, "eval_precision": 0.7057813394390383, "eval_recall": 0.7985751295336787, "eval_roc_auc": 0.721395221646517, "eval_runtime": 1.1331, "eval_samples_per_second": 2355.495, "eval_steps_per_second": 5.295, "step": 147 }, { "epoch": 14.095238095238095, "grad_norm": 49.89531707763672, "learning_rate": 2.7548725637181414e-08, "loss": 1.3619, "step": 148 }, { "epoch": 14.19047619047619, "grad_norm": 41.59170150756836, "learning_rate": 2.7736131934032985e-08, "loss": 1.2874, "step": 149 }, { "epoch": 14.285714285714286, "grad_norm": 48.24529266357422, "learning_rate": 2.792353823088456e-08, "loss": 1.3604, "step": 150 }, { "epoch": 14.380952380952381, "grad_norm": 38.974098205566406, "learning_rate": 2.811094452773613e-08, "loss": 1.3147, "step": 151 }, { "epoch": 14.476190476190476, "grad_norm": 44.127227783203125, "learning_rate": 2.8298350824587708e-08, "loss": 1.3018, "step": 152 }, { "epoch": 14.571428571428571, "grad_norm": 27.71172332763672, "learning_rate": 2.8485757121439282e-08, "loss": 1.3879, "step": 153 }, { "epoch": 14.666666666666666, "grad_norm": 40.47721862792969, "learning_rate": 2.8673163418290853e-08, "loss": 1.3635, "step": 154 }, { "epoch": 14.761904761904763, "grad_norm": 33.5676155090332, "learning_rate": 2.886056971514243e-08, "loss": 1.2736, "step": 155 }, { "epoch": 14.857142857142858, "grad_norm": 39.84068298339844, "learning_rate": 2.9047976011994005e-08, "loss": 1.3315, "step": 156 }, { "epoch": 14.952380952380953, "grad_norm": 42.16352844238281, "learning_rate": 2.9235382308845576e-08, "loss": 1.3555, "step": 157 }, { "epoch": 14.952380952380953, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7485451761102604, "eval_loss": 0.6533787846565247, "eval_precision": 0.7100522951772226, "eval_recall": 0.7914507772020726, "eval_roc_auc": 0.7213140472078295, "eval_runtime": 1.1034, "eval_samples_per_second": 2418.914, "eval_steps_per_second": 5.438, "step": 157 }, { "epoch": 15.047619047619047, "grad_norm": 39.9268798828125, "learning_rate": 2.942278860569715e-08, "loss": 1.3337, "step": 158 }, { "epoch": 15.142857142857142, "grad_norm": 55.53904724121094, "learning_rate": 2.9610194902548728e-08, "loss": 1.3204, "step": 159 }, { "epoch": 15.238095238095237, "grad_norm": 25.50678825378418, "learning_rate": 2.9797601199400302e-08, "loss": 1.2897, "step": 160 }, { "epoch": 15.333333333333334, "grad_norm": 35.85453414916992, "learning_rate": 2.9985007496251873e-08, "loss": 1.3681, "step": 161 }, { "epoch": 15.428571428571429, "grad_norm": 37.41948699951172, "learning_rate": 3.0172413793103454e-08, "loss": 1.3464, "step": 162 }, { "epoch": 15.523809523809524, "grad_norm": 37.18633270263672, "learning_rate": 3.035982008995502e-08, "loss": 1.3106, "step": 163 }, { "epoch": 15.619047619047619, "grad_norm": 21.66717529296875, "learning_rate": 3.0547226386806596e-08, "loss": 1.3386, "step": 164 }, { "epoch": 15.714285714285714, "grad_norm": 24.518701553344727, "learning_rate": 3.073463268365818e-08, "loss": 1.3262, "step": 165 }, { "epoch": 15.80952380952381, "grad_norm": 23.171676635742188, "learning_rate": 3.0922038980509745e-08, "loss": 1.2522, "step": 166 }, { "epoch": 15.904761904761905, "grad_norm": 22.128908157348633, "learning_rate": 3.110944527736132e-08, "loss": 1.3117, "step": 167 }, { "epoch": 16.0, "grad_norm": 30.98495864868164, "learning_rate": 3.1296851574212894e-08, "loss": 1.2886, "step": 168 }, { "epoch": 16.0, "eval_accuracy": 0.6875234170101161, "eval_f1": 0.7413151364764268, "eval_loss": 0.6505224704742432, "eval_precision": 0.7113095238095238, "eval_recall": 0.7739637305699482, "eval_roc_auc": 0.7212000575705239, "eval_runtime": 1.2094, "eval_samples_per_second": 2206.913, "eval_steps_per_second": 4.961, "step": 168 }, { "epoch": 16.095238095238095, "grad_norm": 21.845766067504883, "learning_rate": 3.148425787106447e-08, "loss": 1.2719, "step": 169 }, { "epoch": 16.19047619047619, "grad_norm": 14.632295608520508, "learning_rate": 3.167166416791604e-08, "loss": 1.3311, "step": 170 }, { "epoch": 16.285714285714285, "grad_norm": 16.54092788696289, "learning_rate": 3.185907046476762e-08, "loss": 1.3079, "step": 171 }, { "epoch": 16.38095238095238, "grad_norm": 28.98506736755371, "learning_rate": 3.204647676161919e-08, "loss": 1.2828, "step": 172 }, { "epoch": 16.476190476190474, "grad_norm": 22.530902862548828, "learning_rate": 3.2233883058470765e-08, "loss": 1.3206, "step": 173 }, { "epoch": 16.571428571428573, "grad_norm": 39.143653869628906, "learning_rate": 3.242128935532234e-08, "loss": 1.2731, "step": 174 }, { "epoch": 16.666666666666668, "grad_norm": 42.83742141723633, "learning_rate": 3.2608695652173914e-08, "loss": 1.3593, "step": 175 }, { "epoch": 16.761904761904763, "grad_norm": 24.04018211364746, "learning_rate": 3.279610194902549e-08, "loss": 1.3516, "step": 176 }, { "epoch": 16.857142857142858, "grad_norm": 27.83500862121582, "learning_rate": 3.298350824587706e-08, "loss": 1.3182, "step": 177 }, { "epoch": 16.952380952380953, "grad_norm": 16.24953842163086, "learning_rate": 3.317091454272864e-08, "loss": 1.2646, "step": 178 }, { "epoch": 16.952380952380953, "eval_accuracy": 0.6860247283626827, "eval_f1": 0.7377972465581978, "eval_loss": 0.6483314633369446, "eval_precision": 0.7136803874092009, "eval_recall": 0.7636010362694301, "eval_roc_auc": 0.7211528497409326, "eval_runtime": 1.2241, "eval_samples_per_second": 2180.389, "eval_steps_per_second": 4.902, "step": 178 }, { "epoch": 17.047619047619047, "grad_norm": 18.79022789001465, "learning_rate": 3.335832083958021e-08, "loss": 1.2849, "step": 179 }, { "epoch": 17.142857142857142, "grad_norm": 18.4663143157959, "learning_rate": 3.3545727136431786e-08, "loss": 1.3546, "step": 180 }, { "epoch": 17.238095238095237, "grad_norm": 22.815502166748047, "learning_rate": 3.373313343328336e-08, "loss": 1.3085, "step": 181 }, { "epoch": 17.333333333333332, "grad_norm": 23.185617446899414, "learning_rate": 3.3920539730134934e-08, "loss": 1.3128, "step": 182 }, { "epoch": 17.428571428571427, "grad_norm": 22.799545288085938, "learning_rate": 3.410794602698651e-08, "loss": 1.3307, "step": 183 }, { "epoch": 17.523809523809526, "grad_norm": 13.100482940673828, "learning_rate": 3.429535232383808e-08, "loss": 1.2815, "step": 184 }, { "epoch": 17.61904761904762, "grad_norm": 14.181185722351074, "learning_rate": 3.448275862068965e-08, "loss": 1.3237, "step": 185 }, { "epoch": 17.714285714285715, "grad_norm": 25.63428497314453, "learning_rate": 3.467016491754123e-08, "loss": 1.375, "step": 186 }, { "epoch": 17.80952380952381, "grad_norm": 14.835464477539062, "learning_rate": 3.4857571214392806e-08, "loss": 1.3288, "step": 187 }, { "epoch": 17.904761904761905, "grad_norm": 20.395618438720703, "learning_rate": 3.504497751124438e-08, "loss": 1.3057, "step": 188 }, { "epoch": 18.0, "grad_norm": 11.902831077575684, "learning_rate": 3.5232383808095955e-08, "loss": 1.2873, "step": 189 }, { "epoch": 18.0, "eval_accuracy": 0.6849007118771075, "eval_f1": 0.7347839798170924, "eval_loss": 0.6463755965232849, "eval_precision": 0.7160417947141979, "eval_recall": 0.7545336787564767, "eval_roc_auc": 0.7211459412780656, "eval_runtime": 1.2122, "eval_samples_per_second": 2201.763, "eval_steps_per_second": 4.95, "step": 189 }, { "epoch": 18.095238095238095, "grad_norm": 19.942028045654297, "learning_rate": 3.541979010494753e-08, "loss": 1.3181, "step": 190 }, { "epoch": 18.19047619047619, "grad_norm": 11.15404224395752, "learning_rate": 3.5607196401799103e-08, "loss": 1.187, "step": 191 }, { "epoch": 18.285714285714285, "grad_norm": 13.81049919128418, "learning_rate": 3.579460269865068e-08, "loss": 1.3189, "step": 192 }, { "epoch": 18.38095238095238, "grad_norm": 12.814773559570312, "learning_rate": 3.598200899550225e-08, "loss": 1.3253, "step": 193 }, { "epoch": 18.476190476190474, "grad_norm": 17.163558959960938, "learning_rate": 3.6169415292353826e-08, "loss": 1.3064, "step": 194 }, { "epoch": 18.571428571428573, "grad_norm": 16.707319259643555, "learning_rate": 3.6356821589205394e-08, "loss": 1.3173, "step": 195 }, { "epoch": 18.666666666666668, "grad_norm": 16.393266677856445, "learning_rate": 3.6544227886056975e-08, "loss": 1.3359, "step": 196 }, { "epoch": 18.761904761904763, "grad_norm": 11.493505477905273, "learning_rate": 3.673163418290855e-08, "loss": 1.3048, "step": 197 }, { "epoch": 18.857142857142858, "grad_norm": 11.934215545654297, "learning_rate": 3.691904047976012e-08, "loss": 1.2677, "step": 198 }, { "epoch": 18.952380952380953, "grad_norm": 15.357635498046875, "learning_rate": 3.71064467766117e-08, "loss": 1.2959, "step": 199 }, { "epoch": 18.952380952380953, "eval_accuracy": 0.6830273510678156, "eval_f1": 0.7321089297023432, "eval_loss": 0.6447131037712097, "eval_precision": 0.7162329615861215, "eval_recall": 0.7487046632124352, "eval_roc_auc": 0.7214608520437537, "eval_runtime": 1.1709, "eval_samples_per_second": 2279.454, "eval_steps_per_second": 5.124, "step": 199 }, { "epoch": 19.047619047619047, "grad_norm": 14.120625495910645, "learning_rate": 3.729385307346327e-08, "loss": 1.3118, "step": 200 }, { "epoch": 19.142857142857142, "grad_norm": 17.531734466552734, "learning_rate": 3.748125937031484e-08, "loss": 1.3112, "step": 201 }, { "epoch": 19.238095238095237, "grad_norm": 16.632896423339844, "learning_rate": 3.766866566716642e-08, "loss": 1.3308, "step": 202 }, { "epoch": 19.333333333333332, "grad_norm": 12.964826583862305, "learning_rate": 3.7856071964017995e-08, "loss": 1.2656, "step": 203 }, { "epoch": 19.428571428571427, "grad_norm": 12.439407348632812, "learning_rate": 3.804347826086956e-08, "loss": 1.2727, "step": 204 }, { "epoch": 19.523809523809526, "grad_norm": 25.103260040283203, "learning_rate": 3.823088455772114e-08, "loss": 1.3474, "step": 205 }, { "epoch": 19.61904761904762, "grad_norm": 18.016218185424805, "learning_rate": 3.841829085457272e-08, "loss": 1.3193, "step": 206 }, { "epoch": 19.714285714285715, "grad_norm": 13.59919261932373, "learning_rate": 3.860569715142429e-08, "loss": 1.2927, "step": 207 }, { "epoch": 19.80952380952381, "grad_norm": 15.137652397155762, "learning_rate": 3.879310344827586e-08, "loss": 1.3174, "step": 208 }, { "epoch": 19.904761904761905, "grad_norm": 21.939130783081055, "learning_rate": 3.898050974512744e-08, "loss": 1.2291, "step": 209 }, { "epoch": 20.0, "grad_norm": 14.990110397338867, "learning_rate": 3.9167916041979016e-08, "loss": 1.3391, "step": 210 }, { "epoch": 20.0, "eval_accuracy": 0.6830273510678156, "eval_f1": 0.7315989847715736, "eval_loss": 0.6429556608200073, "eval_precision": 0.7170398009950248, "eval_recall": 0.7467616580310881, "eval_roc_auc": 0.7219881980426022, "eval_runtime": 1.1317, "eval_samples_per_second": 2358.443, "eval_steps_per_second": 5.302, "step": 210 }, { "epoch": 20.095238095238095, "grad_norm": 13.944443702697754, "learning_rate": 3.9355322338830584e-08, "loss": 1.348, "step": 211 }, { "epoch": 20.19047619047619, "grad_norm": 13.567949295043945, "learning_rate": 3.954272863568216e-08, "loss": 1.3795, "step": 212 }, { "epoch": 20.285714285714285, "grad_norm": 14.929570198059082, "learning_rate": 3.973013493253374e-08, "loss": 1.2863, "step": 213 }, { "epoch": 20.38095238095238, "grad_norm": 11.928326606750488, "learning_rate": 3.9917541229385307e-08, "loss": 1.2453, "step": 214 }, { "epoch": 20.476190476190474, "grad_norm": 16.773330688476562, "learning_rate": 4.010494752623688e-08, "loss": 1.3193, "step": 215 }, { "epoch": 20.571428571428573, "grad_norm": 16.72808265686035, "learning_rate": 4.029235382308846e-08, "loss": 1.3068, "step": 216 }, { "epoch": 20.666666666666668, "grad_norm": 13.820122718811035, "learning_rate": 4.047976011994003e-08, "loss": 1.2867, "step": 217 }, { "epoch": 20.761904761904763, "grad_norm": 11.65041732788086, "learning_rate": 4.0667166416791604e-08, "loss": 1.3137, "step": 218 }, { "epoch": 20.857142857142858, "grad_norm": 11.561596870422363, "learning_rate": 4.0854572713643185e-08, "loss": 1.2622, "step": 219 }, { "epoch": 20.952380952380953, "grad_norm": 11.974090576171875, "learning_rate": 4.104197901049475e-08, "loss": 1.2637, "step": 220 }, { "epoch": 20.952380952380953, "eval_accuracy": 0.6837766953915324, "eval_f1": 0.7325728770595691, "eval_loss": 0.6413893103599548, "eval_precision": 0.71712158808933, "eval_recall": 0.7487046632124352, "eval_roc_auc": 0.7224211283822682, "eval_runtime": 1.1887, "eval_samples_per_second": 2245.39, "eval_steps_per_second": 5.048, "step": 220 }, { "epoch": 21.047619047619047, "grad_norm": 15.292080879211426, "learning_rate": 4.122938530734633e-08, "loss": 1.3156, "step": 221 }, { "epoch": 21.142857142857142, "grad_norm": 15.198674201965332, "learning_rate": 4.14167916041979e-08, "loss": 1.329, "step": 222 }, { "epoch": 21.238095238095237, "grad_norm": 18.44618034362793, "learning_rate": 4.1604197901049476e-08, "loss": 1.254, "step": 223 }, { "epoch": 21.333333333333332, "grad_norm": 25.041452407836914, "learning_rate": 4.179160419790105e-08, "loss": 1.2522, "step": 224 }, { "epoch": 21.428571428571427, "grad_norm": 16.231889724731445, "learning_rate": 4.1979010494752624e-08, "loss": 1.2916, "step": 225 }, { "epoch": 21.523809523809526, "grad_norm": 13.011600494384766, "learning_rate": 4.2166416791604205e-08, "loss": 1.3383, "step": 226 }, { "epoch": 21.61904761904762, "grad_norm": 13.868030548095703, "learning_rate": 4.235382308845577e-08, "loss": 1.3488, "step": 227 }, { "epoch": 21.714285714285715, "grad_norm": 21.20703887939453, "learning_rate": 4.254122938530735e-08, "loss": 1.2101, "step": 228 }, { "epoch": 21.80952380952381, "grad_norm": 15.180831909179688, "learning_rate": 4.272863568215893e-08, "loss": 1.3172, "step": 229 }, { "epoch": 21.904761904761905, "grad_norm": 14.952122688293457, "learning_rate": 4.2916041979010496e-08, "loss": 1.3397, "step": 230 }, { "epoch": 22.0, "grad_norm": 15.182557106018066, "learning_rate": 4.310344827586207e-08, "loss": 1.3171, "step": 231 }, { "epoch": 22.0, "eval_accuracy": 0.6830273510678156, "eval_f1": 0.7319391634980988, "eval_loss": 0.6397904753684998, "eval_precision": 0.716501240694789, "eval_recall": 0.7480569948186528, "eval_roc_auc": 0.722874496257916, "eval_runtime": 1.4672, "eval_samples_per_second": 1819.081, "eval_steps_per_second": 4.089, "step": 231 }, { "epoch": 22.095238095238095, "grad_norm": 19.74833106994629, "learning_rate": 4.3290854572713645e-08, "loss": 1.3591, "step": 232 }, { "epoch": 22.19047619047619, "grad_norm": 12.066325187683105, "learning_rate": 4.347826086956522e-08, "loss": 1.3181, "step": 233 }, { "epoch": 22.285714285714285, "grad_norm": 10.577549934387207, "learning_rate": 4.366566716641679e-08, "loss": 1.2408, "step": 234 }, { "epoch": 22.38095238095238, "grad_norm": 17.58034896850586, "learning_rate": 4.385307346326837e-08, "loss": 1.3027, "step": 235 }, { "epoch": 22.476190476190474, "grad_norm": 12.278691291809082, "learning_rate": 4.404047976011994e-08, "loss": 1.2905, "step": 236 }, { "epoch": 22.571428571428573, "grad_norm": 13.021527290344238, "learning_rate": 4.4227886056971516e-08, "loss": 1.2831, "step": 237 }, { "epoch": 22.666666666666668, "grad_norm": 13.841450691223145, "learning_rate": 4.441529235382309e-08, "loss": 1.3171, "step": 238 }, { "epoch": 22.761904761904763, "grad_norm": 11.652981758117676, "learning_rate": 4.4602698650674665e-08, "loss": 1.3003, "step": 239 }, { "epoch": 22.857142857142858, "grad_norm": 15.65576171875, "learning_rate": 4.479010494752624e-08, "loss": 1.3055, "step": 240 }, { "epoch": 22.952380952380953, "grad_norm": 11.00838565826416, "learning_rate": 4.4977511244377814e-08, "loss": 1.2486, "step": 241 }, { "epoch": 22.952380952380953, "eval_accuracy": 0.6826526789059573, "eval_f1": 0.7313669521091024, "eval_loss": 0.6382837295532227, "eval_precision": 0.7165941578620261, "eval_recall": 0.7467616580310881, "eval_roc_auc": 0.7232576280944156, "eval_runtime": 1.2133, "eval_samples_per_second": 2199.758, "eval_steps_per_second": 4.945, "step": 241 }, { "epoch": 23.047619047619047, "grad_norm": 13.720239639282227, "learning_rate": 4.516491754122939e-08, "loss": 1.2912, "step": 242 }, { "epoch": 23.142857142857142, "grad_norm": 14.70983600616455, "learning_rate": 4.535232383808096e-08, "loss": 1.2933, "step": 243 }, { "epoch": 23.238095238095237, "grad_norm": 14.259865760803223, "learning_rate": 4.5539730134932537e-08, "loss": 1.3368, "step": 244 }, { "epoch": 23.333333333333332, "grad_norm": 11.778265953063965, "learning_rate": 4.572713643178411e-08, "loss": 1.2697, "step": 245 }, { "epoch": 23.428571428571427, "grad_norm": 12.168619155883789, "learning_rate": 4.5914542728635685e-08, "loss": 1.3096, "step": 246 }, { "epoch": 23.523809523809526, "grad_norm": 15.17798137664795, "learning_rate": 4.610194902548726e-08, "loss": 1.2814, "step": 247 }, { "epoch": 23.61904761904762, "grad_norm": 13.276535034179688, "learning_rate": 4.6289355322338834e-08, "loss": 1.2687, "step": 248 }, { "epoch": 23.714285714285715, "grad_norm": 16.98518180847168, "learning_rate": 4.64767616191904e-08, "loss": 1.2899, "step": 249 }, { "epoch": 23.80952380952381, "grad_norm": 11.48328971862793, "learning_rate": 4.666416791604198e-08, "loss": 1.2549, "step": 250 }, { "epoch": 23.904761904761905, "grad_norm": 11.340226173400879, "learning_rate": 4.685157421289356e-08, "loss": 1.2531, "step": 251 }, { "epoch": 24.0, "grad_norm": 10.424729347229004, "learning_rate": 4.703898050974513e-08, "loss": 1.3059, "step": 252 }, { "epoch": 24.0, "eval_accuracy": 0.683402023229674, "eval_f1": 0.7323408299018055, "eval_loss": 0.63660728931427, "eval_precision": 0.7166769993800371, "eval_recall": 0.7487046632124352, "eval_roc_auc": 0.7237892918825561, "eval_runtime": 1.3641, "eval_samples_per_second": 1956.631, "eval_steps_per_second": 4.399, "step": 252 }, { "epoch": 24.095238095238095, "grad_norm": 13.842662811279297, "learning_rate": 4.7226386806596706e-08, "loss": 1.2593, "step": 253 }, { "epoch": 24.19047619047619, "grad_norm": 10.153083801269531, "learning_rate": 4.741379310344828e-08, "loss": 1.2947, "step": 254 }, { "epoch": 24.285714285714285, "grad_norm": 13.279069900512695, "learning_rate": 4.7601199400299854e-08, "loss": 1.3419, "step": 255 }, { "epoch": 24.38095238095238, "grad_norm": 10.15274715423584, "learning_rate": 4.778860569715143e-08, "loss": 1.2321, "step": 256 }, { "epoch": 24.476190476190474, "grad_norm": 11.35965347290039, "learning_rate": 4.7976011994003e-08, "loss": 1.3073, "step": 257 }, { "epoch": 24.571428571428573, "grad_norm": 14.660529136657715, "learning_rate": 4.816341829085458e-08, "loss": 1.332, "step": 258 }, { "epoch": 24.666666666666668, "grad_norm": 14.38634204864502, "learning_rate": 4.8350824587706145e-08, "loss": 1.3127, "step": 259 }, { "epoch": 24.761904761904763, "grad_norm": 11.082036972045898, "learning_rate": 4.8538230884557726e-08, "loss": 1.2769, "step": 260 }, { "epoch": 24.857142857142858, "grad_norm": 13.869253158569336, "learning_rate": 4.87256371814093e-08, "loss": 1.3329, "step": 261 }, { "epoch": 24.952380952380953, "grad_norm": 10.266282081604004, "learning_rate": 4.8913043478260875e-08, "loss": 1.2312, "step": 262 }, { "epoch": 24.952380952380953, "eval_accuracy": 0.6845260397152492, "eval_f1": 0.7333755541481951, "eval_loss": 0.6350855827331543, "eval_precision": 0.7174721189591078, "eval_recall": 0.75, "eval_roc_auc": 0.7242173287276914, "eval_runtime": 1.2504, "eval_samples_per_second": 2134.503, "eval_steps_per_second": 4.798, "step": 262 }, { "epoch": 25.047619047619047, "grad_norm": 13.72497844696045, "learning_rate": 4.910044977511245e-08, "loss": 1.3405, "step": 263 }, { "epoch": 25.142857142857142, "grad_norm": 12.01894760131836, "learning_rate": 4.928785607196402e-08, "loss": 1.3145, "step": 264 }, { "epoch": 25.238095238095237, "grad_norm": 10.575872421264648, "learning_rate": 4.947526236881559e-08, "loss": 1.2693, "step": 265 }, { "epoch": 25.333333333333332, "grad_norm": 18.743549346923828, "learning_rate": 4.9662668665667165e-08, "loss": 1.2669, "step": 266 }, { "epoch": 25.428571428571427, "grad_norm": 8.173626899719238, "learning_rate": 4.985007496251874e-08, "loss": 1.2132, "step": 267 }, { "epoch": 25.523809523809526, "grad_norm": 10.303065299987793, "learning_rate": 5.003748125937032e-08, "loss": 1.2327, "step": 268 }, { "epoch": 25.61904761904762, "grad_norm": 11.798093795776367, "learning_rate": 5.0224887556221895e-08, "loss": 1.2299, "step": 269 }, { "epoch": 25.714285714285715, "grad_norm": 10.679269790649414, "learning_rate": 5.041229385307347e-08, "loss": 1.3056, "step": 270 }, { "epoch": 25.80952380952381, "grad_norm": 11.035752296447754, "learning_rate": 5.059970014992504e-08, "loss": 1.2988, "step": 271 }, { "epoch": 25.904761904761905, "grad_norm": 9.624054908752441, "learning_rate": 5.078710644677661e-08, "loss": 1.2798, "step": 272 }, { "epoch": 26.0, "grad_norm": 11.053035736083984, "learning_rate": 5.0974512743628186e-08, "loss": 1.2709, "step": 273 }, { "epoch": 26.0, "eval_accuracy": 0.6860247283626827, "eval_f1": 0.7346421785940469, "eval_loss": 0.6334132552146912, "eval_precision": 0.7187112763320942, "eval_recall": 0.7512953367875648, "eval_roc_auc": 0.7246937248128957, "eval_runtime": 1.1191, "eval_samples_per_second": 2384.911, "eval_steps_per_second": 5.361, "step": 273 }, { "epoch": 26.095238095238095, "grad_norm": 11.396424293518066, "learning_rate": 5.116191904047976e-08, "loss": 1.268, "step": 274 }, { "epoch": 26.19047619047619, "grad_norm": 14.449176788330078, "learning_rate": 5.134932533733134e-08, "loss": 1.3123, "step": 275 }, { "epoch": 26.285714285714285, "grad_norm": 13.00364875793457, "learning_rate": 5.1536731634182915e-08, "loss": 1.2875, "step": 276 }, { "epoch": 26.38095238095238, "grad_norm": 9.805450439453125, "learning_rate": 5.172413793103449e-08, "loss": 1.2843, "step": 277 }, { "epoch": 26.476190476190474, "grad_norm": 11.89764404296875, "learning_rate": 5.191154422788606e-08, "loss": 1.3268, "step": 278 }, { "epoch": 26.571428571428573, "grad_norm": 10.767585754394531, "learning_rate": 5.209895052473763e-08, "loss": 1.298, "step": 279 }, { "epoch": 26.666666666666668, "grad_norm": 13.058414459228516, "learning_rate": 5.2286356821589206e-08, "loss": 1.2781, "step": 280 }, { "epoch": 26.761904761904763, "grad_norm": 17.320100784301758, "learning_rate": 5.247376311844078e-08, "loss": 1.2988, "step": 281 }, { "epoch": 26.857142857142858, "grad_norm": 17.9411563873291, "learning_rate": 5.266116941529236e-08, "loss": 1.287, "step": 282 }, { "epoch": 26.952380952380953, "grad_norm": 10.667381286621094, "learning_rate": 5.2848575712143936e-08, "loss": 1.2671, "step": 283 }, { "epoch": 26.952380952380953, "eval_accuracy": 0.6867740726863995, "eval_f1": 0.7351077313054499, "eval_loss": 0.6319016218185425, "eval_precision": 0.7196029776674938, "eval_recall": 0.7512953367875648, "eval_roc_auc": 0.7251082325849165, "eval_runtime": 1.1627, "eval_samples_per_second": 2295.462, "eval_steps_per_second": 5.16, "step": 283 }, { "epoch": 27.047619047619047, "grad_norm": 12.966008186340332, "learning_rate": 5.30359820089955e-08, "loss": 1.3341, "step": 284 }, { "epoch": 27.142857142857142, "grad_norm": 12.528854370117188, "learning_rate": 5.322338830584708e-08, "loss": 1.2418, "step": 285 }, { "epoch": 27.238095238095237, "grad_norm": 13.622454643249512, "learning_rate": 5.341079460269865e-08, "loss": 1.3488, "step": 286 }, { "epoch": 27.333333333333332, "grad_norm": 16.258296966552734, "learning_rate": 5.3598200899550226e-08, "loss": 1.235, "step": 287 }, { "epoch": 27.428571428571427, "grad_norm": 9.838101387023926, "learning_rate": 5.3785607196401794e-08, "loss": 1.2687, "step": 288 }, { "epoch": 27.523809523809526, "grad_norm": 9.929924964904785, "learning_rate": 5.397301349325338e-08, "loss": 1.2249, "step": 289 }, { "epoch": 27.61904761904762, "grad_norm": 9.11926555633545, "learning_rate": 5.416041979010495e-08, "loss": 1.3112, "step": 290 }, { "epoch": 27.714285714285715, "grad_norm": 12.670866012573242, "learning_rate": 5.4347826086956524e-08, "loss": 1.3038, "step": 291 }, { "epoch": 27.80952380952381, "grad_norm": 16.6510066986084, "learning_rate": 5.45352323838081e-08, "loss": 1.3411, "step": 292 }, { "epoch": 27.904761904761905, "grad_norm": 10.850798606872559, "learning_rate": 5.472263868065967e-08, "loss": 1.3158, "step": 293 }, { "epoch": 28.0, "grad_norm": 14.082104682922363, "learning_rate": 5.491004497751124e-08, "loss": 1.321, "step": 294 }, { "epoch": 28.0, "eval_accuracy": 0.6878980891719745, "eval_f1": 0.7368088467614534, "eval_loss": 0.6302856802940369, "eval_precision": 0.7193090684762492, "eval_recall": 0.7551813471502591, "eval_roc_auc": 0.7255808865860679, "eval_runtime": 1.1704, "eval_samples_per_second": 2280.384, "eval_steps_per_second": 5.126, "step": 294 }, { "epoch": 28.095238095238095, "grad_norm": 12.710125923156738, "learning_rate": 5.509745127436283e-08, "loss": 1.3059, "step": 295 }, { "epoch": 28.19047619047619, "grad_norm": 12.972898483276367, "learning_rate": 5.52848575712144e-08, "loss": 1.2712, "step": 296 }, { "epoch": 28.285714285714285, "grad_norm": 10.665593147277832, "learning_rate": 5.547226386806597e-08, "loss": 1.2697, "step": 297 }, { "epoch": 28.38095238095238, "grad_norm": 10.437527656555176, "learning_rate": 5.5659670164917544e-08, "loss": 1.2156, "step": 298 }, { "epoch": 28.476190476190474, "grad_norm": 12.75148868560791, "learning_rate": 5.584707646176912e-08, "loss": 1.2533, "step": 299 }, { "epoch": 28.571428571428573, "grad_norm": 16.342567443847656, "learning_rate": 5.603448275862069e-08, "loss": 1.3416, "step": 300 }, { "epoch": 28.666666666666668, "grad_norm": 14.161049842834473, "learning_rate": 5.622188905547226e-08, "loss": 1.2625, "step": 301 }, { "epoch": 28.761904761904763, "grad_norm": 12.947792053222656, "learning_rate": 5.640929535232385e-08, "loss": 1.2499, "step": 302 }, { "epoch": 28.857142857142858, "grad_norm": 12.01698112487793, "learning_rate": 5.6596701649175416e-08, "loss": 1.3089, "step": 303 }, { "epoch": 28.952380952380953, "grad_norm": 9.873224258422852, "learning_rate": 5.678410794602699e-08, "loss": 1.2842, "step": 304 }, { "epoch": 28.952380952380953, "eval_accuracy": 0.6897714499812664, "eval_f1": 0.7391304347826086, "eval_loss": 0.6289286017417908, "eval_precision": 0.7196319018404908, "eval_recall": 0.7597150259067358, "eval_roc_auc": 0.7259801381692574, "eval_runtime": 1.2466, "eval_samples_per_second": 2141.024, "eval_steps_per_second": 4.813, "step": 304 }, { "epoch": 29.047619047619047, "grad_norm": 10.019207954406738, "learning_rate": 5.6971514242878564e-08, "loss": 1.2933, "step": 305 }, { "epoch": 29.142857142857142, "grad_norm": 12.953960418701172, "learning_rate": 5.715892053973014e-08, "loss": 1.2779, "step": 306 }, { "epoch": 29.238095238095237, "grad_norm": 11.685044288635254, "learning_rate": 5.7346326836581706e-08, "loss": 1.2449, "step": 307 }, { "epoch": 29.333333333333332, "grad_norm": 15.267889022827148, "learning_rate": 5.753373313343328e-08, "loss": 1.2968, "step": 308 }, { "epoch": 29.428571428571427, "grad_norm": 8.864400863647461, "learning_rate": 5.772113943028486e-08, "loss": 1.3027, "step": 309 }, { "epoch": 29.523809523809526, "grad_norm": 27.022933959960938, "learning_rate": 5.7908545727136436e-08, "loss": 1.3559, "step": 310 }, { "epoch": 29.61904761904762, "grad_norm": 12.17434024810791, "learning_rate": 5.809595202398801e-08, "loss": 1.3358, "step": 311 }, { "epoch": 29.714285714285715, "grad_norm": 12.910569190979004, "learning_rate": 5.8283358320839585e-08, "loss": 1.2667, "step": 312 }, { "epoch": 29.80952380952381, "grad_norm": 9.485709190368652, "learning_rate": 5.847076461769115e-08, "loss": 1.2922, "step": 313 }, { "epoch": 29.904761904761905, "grad_norm": 10.216976165771484, "learning_rate": 5.8658170914542727e-08, "loss": 1.2439, "step": 314 }, { "epoch": 30.0, "grad_norm": 13.940195083618164, "learning_rate": 5.88455772113943e-08, "loss": 1.2695, "step": 315 }, { "epoch": 30.0, "eval_accuracy": 0.6916448107905583, "eval_f1": 0.7411135577225543, "eval_loss": 0.6274736523628235, "eval_precision": 0.7204892966360856, "eval_recall": 0.7629533678756477, "eval_roc_auc": 0.7264208405296488, "eval_runtime": 1.324, "eval_samples_per_second": 2015.884, "eval_steps_per_second": 4.532, "step": 315 }, { "epoch": 30.095238095238095, "grad_norm": 12.867043495178223, "learning_rate": 5.903298350824588e-08, "loss": 1.2534, "step": 316 }, { "epoch": 30.19047619047619, "grad_norm": 10.215372085571289, "learning_rate": 5.9220389805097456e-08, "loss": 1.2551, "step": 317 }, { "epoch": 30.285714285714285, "grad_norm": 12.636347770690918, "learning_rate": 5.940779610194903e-08, "loss": 1.3279, "step": 318 }, { "epoch": 30.38095238095238, "grad_norm": 15.71722412109375, "learning_rate": 5.9595202398800605e-08, "loss": 1.2522, "step": 319 }, { "epoch": 30.476190476190474, "grad_norm": 13.175235748291016, "learning_rate": 5.978260869565218e-08, "loss": 1.2719, "step": 320 }, { "epoch": 30.571428571428573, "grad_norm": 12.036287307739258, "learning_rate": 5.997001499250375e-08, "loss": 1.2943, "step": 321 }, { "epoch": 30.666666666666668, "grad_norm": 10.251227378845215, "learning_rate": 6.015742128935533e-08, "loss": 1.2898, "step": 322 }, { "epoch": 30.761904761904763, "grad_norm": 10.132257461547852, "learning_rate": 6.034482758620691e-08, "loss": 1.2605, "step": 323 }, { "epoch": 30.857142857142858, "grad_norm": 13.103528022766113, "learning_rate": 6.053223388305848e-08, "loss": 1.2523, "step": 324 }, { "epoch": 30.952380952380953, "grad_norm": 10.725114822387695, "learning_rate": 6.071964017991004e-08, "loss": 1.2916, "step": 325 }, { "epoch": 30.952380952380953, "eval_accuracy": 0.6905207943049831, "eval_f1": 0.740251572327044, "eval_loss": 0.6261727809906006, "eval_precision": 0.719437652811736, "eval_recall": 0.7623056994818653, "eval_roc_auc": 0.7266635002878526, "eval_runtime": 1.1207, "eval_samples_per_second": 2381.643, "eval_steps_per_second": 5.354, "step": 325 }, { "epoch": 31.047619047619047, "grad_norm": 16.890737533569336, "learning_rate": 6.090704647676163e-08, "loss": 1.1974, "step": 326 }, { "epoch": 31.142857142857142, "grad_norm": 10.4693603515625, "learning_rate": 6.109445277361319e-08, "loss": 1.2511, "step": 327 }, { "epoch": 31.238095238095237, "grad_norm": 11.922187805175781, "learning_rate": 6.128185907046476e-08, "loss": 1.2439, "step": 328 }, { "epoch": 31.333333333333332, "grad_norm": 9.826422691345215, "learning_rate": 6.146926536731635e-08, "loss": 1.2901, "step": 329 }, { "epoch": 31.428571428571427, "grad_norm": 10.615519523620605, "learning_rate": 6.165667166416792e-08, "loss": 1.2171, "step": 330 }, { "epoch": 31.523809523809526, "grad_norm": 10.314847946166992, "learning_rate": 6.184407796101949e-08, "loss": 1.3078, "step": 331 }, { "epoch": 31.61904761904762, "grad_norm": 9.442974090576172, "learning_rate": 6.203148425787107e-08, "loss": 1.2235, "step": 332 }, { "epoch": 31.714285714285715, "grad_norm": 13.98375129699707, "learning_rate": 6.221889055472264e-08, "loss": 1.2872, "step": 333 }, { "epoch": 31.80952380952381, "grad_norm": 9.29346752166748, "learning_rate": 6.240629685157421e-08, "loss": 1.2985, "step": 334 }, { "epoch": 31.904761904761905, "grad_norm": 12.216511726379395, "learning_rate": 6.259370314842579e-08, "loss": 1.3001, "step": 335 }, { "epoch": 32.0, "grad_norm": 11.124615669250488, "learning_rate": 6.278110944527737e-08, "loss": 1.2794, "step": 336 }, { "epoch": 32.0, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7411838790931989, "eval_loss": 0.6247643828392029, "eval_precision": 0.7212009803921569, "eval_recall": 0.7623056994818653, "eval_roc_auc": 0.727045192861255, "eval_runtime": 1.1903, "eval_samples_per_second": 2242.322, "eval_steps_per_second": 5.041, "step": 336 }, { "epoch": 32.095238095238095, "grad_norm": 17.693710327148438, "learning_rate": 6.296851574212894e-08, "loss": 1.2293, "step": 337 }, { "epoch": 32.19047619047619, "grad_norm": 13.685829162597656, "learning_rate": 6.315592203898052e-08, "loss": 1.2249, "step": 338 }, { "epoch": 32.285714285714285, "grad_norm": 12.157681465148926, "learning_rate": 6.334332833583208e-08, "loss": 1.3495, "step": 339 }, { "epoch": 32.38095238095238, "grad_norm": 19.257387161254883, "learning_rate": 6.353073463268365e-08, "loss": 1.2029, "step": 340 }, { "epoch": 32.476190476190474, "grad_norm": 14.897787094116211, "learning_rate": 6.371814092953523e-08, "loss": 1.2478, "step": 341 }, { "epoch": 32.57142857142857, "grad_norm": 8.744516372680664, "learning_rate": 6.39055472263868e-08, "loss": 1.2386, "step": 342 }, { "epoch": 32.666666666666664, "grad_norm": 11.148335456848145, "learning_rate": 6.409295352323838e-08, "loss": 1.2786, "step": 343 }, { "epoch": 32.76190476190476, "grad_norm": 10.281587600708008, "learning_rate": 6.428035982008996e-08, "loss": 1.3163, "step": 344 }, { "epoch": 32.857142857142854, "grad_norm": 10.477128982543945, "learning_rate": 6.446776611694153e-08, "loss": 1.2902, "step": 345 }, { "epoch": 32.95238095238095, "grad_norm": 7.760868072509766, "learning_rate": 6.465517241379311e-08, "loss": 1.2286, "step": 346 }, { "epoch": 32.95238095238095, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7414173228346457, "eval_loss": 0.6235555410385132, "eval_precision": 0.7216431637032495, "eval_recall": 0.7623056994818653, "eval_roc_auc": 0.7272826712723086, "eval_runtime": 1.4945, "eval_samples_per_second": 1785.937, "eval_steps_per_second": 4.015, "step": 346 }, { "epoch": 33.04761904761905, "grad_norm": 10.051183700561523, "learning_rate": 6.484257871064468e-08, "loss": 1.285, "step": 347 }, { "epoch": 33.142857142857146, "grad_norm": 8.26945972442627, "learning_rate": 6.502998500749625e-08, "loss": 1.2618, "step": 348 }, { "epoch": 33.23809523809524, "grad_norm": 10.043110847473145, "learning_rate": 6.521739130434783e-08, "loss": 1.3071, "step": 349 }, { "epoch": 33.333333333333336, "grad_norm": 8.486072540283203, "learning_rate": 6.540479760119941e-08, "loss": 1.3003, "step": 350 }, { "epoch": 33.42857142857143, "grad_norm": 9.369478225708008, "learning_rate": 6.559220389805098e-08, "loss": 1.2488, "step": 351 }, { "epoch": 33.523809523809526, "grad_norm": 19.558025360107422, "learning_rate": 6.577961019490256e-08, "loss": 1.2199, "step": 352 }, { "epoch": 33.61904761904762, "grad_norm": 9.55553150177002, "learning_rate": 6.596701649175413e-08, "loss": 1.2679, "step": 353 }, { "epoch": 33.714285714285715, "grad_norm": 12.629579544067383, "learning_rate": 6.615442278860569e-08, "loss": 1.2395, "step": 354 }, { "epoch": 33.80952380952381, "grad_norm": 14.873830795288086, "learning_rate": 6.634182908545727e-08, "loss": 1.293, "step": 355 }, { "epoch": 33.904761904761905, "grad_norm": 9.722416877746582, "learning_rate": 6.652923538230886e-08, "loss": 1.2764, "step": 356 }, { "epoch": 34.0, "grad_norm": 9.184341430664062, "learning_rate": 6.671664167916042e-08, "loss": 1.2784, "step": 357 }, { "epoch": 34.0, "eval_accuracy": 0.6931434994379918, "eval_f1": 0.7422096317280453, "eval_loss": 0.6222566366195679, "eval_precision": 0.7219840783833436, "eval_recall": 0.7636010362694301, "eval_roc_auc": 0.7276188831318364, "eval_runtime": 1.1766, "eval_samples_per_second": 2268.491, "eval_steps_per_second": 5.1, "step": 357 }, { "epoch": 34.095238095238095, "grad_norm": 17.952123641967773, "learning_rate": 6.6904047976012e-08, "loss": 1.2041, "step": 358 }, { "epoch": 34.19047619047619, "grad_norm": 9.577611923217773, "learning_rate": 6.709145427286357e-08, "loss": 1.2329, "step": 359 }, { "epoch": 34.285714285714285, "grad_norm": 10.438279151916504, "learning_rate": 6.727886056971514e-08, "loss": 1.2664, "step": 360 }, { "epoch": 34.38095238095238, "grad_norm": 11.91615104675293, "learning_rate": 6.746626686656672e-08, "loss": 1.2901, "step": 361 }, { "epoch": 34.476190476190474, "grad_norm": 11.50903034210205, "learning_rate": 6.765367316341829e-08, "loss": 1.3196, "step": 362 }, { "epoch": 34.57142857142857, "grad_norm": 21.127635955810547, "learning_rate": 6.784107946026987e-08, "loss": 1.246, "step": 363 }, { "epoch": 34.666666666666664, "grad_norm": 8.733270645141602, "learning_rate": 6.802848575712145e-08, "loss": 1.2559, "step": 364 }, { "epoch": 34.76190476190476, "grad_norm": 12.190812110900879, "learning_rate": 6.821589205397302e-08, "loss": 1.2671, "step": 365 }, { "epoch": 34.857142857142854, "grad_norm": 9.869928359985352, "learning_rate": 6.840329835082459e-08, "loss": 1.2123, "step": 366 }, { "epoch": 34.95238095238095, "grad_norm": 14.302867889404297, "learning_rate": 6.859070464767617e-08, "loss": 1.2832, "step": 367 }, { "epoch": 34.95238095238095, "eval_accuracy": 0.6942675159235668, "eval_f1": 0.7442006269592476, "eval_loss": 0.6210344433784485, "eval_precision": 0.721142162818955, "eval_recall": 0.7687823834196891, "eval_roc_auc": 0.7280549798503166, "eval_runtime": 1.3197, "eval_samples_per_second": 2022.409, "eval_steps_per_second": 4.546, "step": 367 }, { "epoch": 35.04761904761905, "grad_norm": 10.143964767456055, "learning_rate": 6.877811094452773e-08, "loss": 1.2314, "step": 368 }, { "epoch": 35.142857142857146, "grad_norm": 13.52642822265625, "learning_rate": 6.89655172413793e-08, "loss": 1.3133, "step": 369 }, { "epoch": 35.23809523809524, "grad_norm": 8.791267395019531, "learning_rate": 6.91529235382309e-08, "loss": 1.2008, "step": 370 }, { "epoch": 35.333333333333336, "grad_norm": 8.982850074768066, "learning_rate": 6.934032983508246e-08, "loss": 1.221, "step": 371 }, { "epoch": 35.42857142857143, "grad_norm": 6.191827774047852, "learning_rate": 6.952773613193403e-08, "loss": 1.2471, "step": 372 }, { "epoch": 35.523809523809526, "grad_norm": 8.668267250061035, "learning_rate": 6.971514242878561e-08, "loss": 1.2409, "step": 373 }, { "epoch": 35.61904761904762, "grad_norm": 11.444587707519531, "learning_rate": 6.990254872563718e-08, "loss": 1.2306, "step": 374 }, { "epoch": 35.714285714285715, "grad_norm": 8.564613342285156, "learning_rate": 7.008995502248876e-08, "loss": 1.2479, "step": 375 }, { "epoch": 35.80952380952381, "grad_norm": 17.54589080810547, "learning_rate": 7.027736131934034e-08, "loss": 1.2814, "step": 376 }, { "epoch": 35.904761904761905, "grad_norm": 10.60114574432373, "learning_rate": 7.046476761619191e-08, "loss": 1.3122, "step": 377 }, { "epoch": 36.0, "grad_norm": 13.735464096069336, "learning_rate": 7.065217391304348e-08, "loss": 1.257, "step": 378 }, { "epoch": 36.0, "eval_accuracy": 0.6946421880854252, "eval_f1": 0.7445941711062363, "eval_loss": 0.619743287563324, "eval_precision": 0.7213114754098361, "eval_recall": 0.7694300518134715, "eval_roc_auc": 0.7284668969487623, "eval_runtime": 1.1237, "eval_samples_per_second": 2375.245, "eval_steps_per_second": 5.34, "step": 378 }, { "epoch": 36.095238095238095, "grad_norm": 11.545016288757324, "learning_rate": 7.083958020989506e-08, "loss": 1.2738, "step": 379 }, { "epoch": 36.19047619047619, "grad_norm": 11.023893356323242, "learning_rate": 7.102698650674663e-08, "loss": 1.2481, "step": 380 }, { "epoch": 36.285714285714285, "grad_norm": 10.620939254760742, "learning_rate": 7.121439280359821e-08, "loss": 1.1878, "step": 381 }, { "epoch": 36.38095238095238, "grad_norm": 9.518024444580078, "learning_rate": 7.140179910044977e-08, "loss": 1.2586, "step": 382 }, { "epoch": 36.476190476190474, "grad_norm": 9.962079048156738, "learning_rate": 7.158920539730136e-08, "loss": 1.3146, "step": 383 }, { "epoch": 36.57142857142857, "grad_norm": 7.642823219299316, "learning_rate": 7.177661169415294e-08, "loss": 1.2584, "step": 384 }, { "epoch": 36.666666666666664, "grad_norm": 6.657567977905273, "learning_rate": 7.19640179910045e-08, "loss": 1.2405, "step": 385 }, { "epoch": 36.76190476190476, "grad_norm": 8.174885749816895, "learning_rate": 7.215142428785607e-08, "loss": 1.2213, "step": 386 }, { "epoch": 36.857142857142854, "grad_norm": 9.345281600952148, "learning_rate": 7.233883058470765e-08, "loss": 1.2349, "step": 387 }, { "epoch": 36.95238095238095, "grad_norm": 10.576764106750488, "learning_rate": 7.252623688155922e-08, "loss": 1.2673, "step": 388 }, { "epoch": 36.95238095238095, "eval_accuracy": 0.695391532409142, "eval_f1": 0.7453805198872534, "eval_loss": 0.6185930371284485, "eval_precision": 0.7216494845360825, "eval_recall": 0.7707253886010362, "eval_roc_auc": 0.7288293033966609, "eval_runtime": 1.1526, "eval_samples_per_second": 2315.706, "eval_steps_per_second": 5.206, "step": 388 }, { "epoch": 37.04761904761905, "grad_norm": 13.148157119750977, "learning_rate": 7.271364317841079e-08, "loss": 1.3245, "step": 389 }, { "epoch": 37.142857142857146, "grad_norm": 16.42124366760254, "learning_rate": 7.290104947526238e-08, "loss": 1.248, "step": 390 }, { "epoch": 37.23809523809524, "grad_norm": 10.700825691223145, "learning_rate": 7.308845577211395e-08, "loss": 1.278, "step": 391 }, { "epoch": 37.333333333333336, "grad_norm": 6.934027671813965, "learning_rate": 7.327586206896552e-08, "loss": 1.1932, "step": 392 }, { "epoch": 37.42857142857143, "grad_norm": 7.132205009460449, "learning_rate": 7.34632683658171e-08, "loss": 1.1896, "step": 393 }, { "epoch": 37.523809523809526, "grad_norm": 9.692222595214844, "learning_rate": 7.365067466266867e-08, "loss": 1.2578, "step": 394 }, { "epoch": 37.61904761904762, "grad_norm": 8.961304664611816, "learning_rate": 7.383808095952023e-08, "loss": 1.269, "step": 395 }, { "epoch": 37.714285714285715, "grad_norm": 17.51494598388672, "learning_rate": 7.402548725637182e-08, "loss": 1.2132, "step": 396 }, { "epoch": 37.80952380952381, "grad_norm": 10.411418914794922, "learning_rate": 7.42128935532234e-08, "loss": 1.2914, "step": 397 }, { "epoch": 37.904761904761905, "grad_norm": 32.3235969543457, "learning_rate": 7.440029985007496e-08, "loss": 1.2852, "step": 398 }, { "epoch": 38.0, "grad_norm": 11.350420951843262, "learning_rate": 7.458770614692654e-08, "loss": 1.2991, "step": 399 }, { "epoch": 38.0, "eval_accuracy": 0.6927688272761334, "eval_f1": 0.74375, "eval_loss": 0.6172738671302795, "eval_precision": 0.7185990338164251, "eval_recall": 0.7707253886010362, "eval_roc_auc": 0.7293100172711571, "eval_runtime": 1.1692, "eval_samples_per_second": 2282.832, "eval_steps_per_second": 5.132, "step": 399 }, { "epoch": 38.095238095238095, "grad_norm": 10.146343231201172, "learning_rate": 7.477511244377811e-08, "loss": 1.2156, "step": 400 }, { "epoch": 38.19047619047619, "grad_norm": 9.178386688232422, "learning_rate": 7.496251874062968e-08, "loss": 1.3147, "step": 401 }, { "epoch": 38.285714285714285, "grad_norm": 16.39377212524414, "learning_rate": 7.514992503748126e-08, "loss": 1.2434, "step": 402 }, { "epoch": 38.38095238095238, "grad_norm": 8.165606498718262, "learning_rate": 7.533733133433284e-08, "loss": 1.241, "step": 403 }, { "epoch": 38.476190476190474, "grad_norm": 7.81613826751709, "learning_rate": 7.552473763118441e-08, "loss": 1.2205, "step": 404 }, { "epoch": 38.57142857142857, "grad_norm": 12.5527982711792, "learning_rate": 7.571214392803599e-08, "loss": 1.2375, "step": 405 }, { "epoch": 38.666666666666664, "grad_norm": 13.193964004516602, "learning_rate": 7.589955022488756e-08, "loss": 1.3149, "step": 406 }, { "epoch": 38.76190476190476, "grad_norm": 11.886463165283203, "learning_rate": 7.608695652173913e-08, "loss": 1.2915, "step": 407 }, { "epoch": 38.857142857142854, "grad_norm": 8.637429237365723, "learning_rate": 7.627436281859071e-08, "loss": 1.2104, "step": 408 }, { "epoch": 38.95238095238095, "grad_norm": 8.065494537353516, "learning_rate": 7.646176911544228e-08, "loss": 1.2651, "step": 409 }, { "epoch": 38.95238095238095, "eval_accuracy": 0.6935181715998502, "eval_f1": 0.7445346658338539, "eval_loss": 0.6162141561508179, "eval_precision": 0.7189384800965019, "eval_recall": 0.772020725388601, "eval_roc_auc": 0.7296609096142775, "eval_runtime": 1.2038, "eval_samples_per_second": 2217.099, "eval_steps_per_second": 4.984, "step": 409 }, { "epoch": 39.04761904761905, "grad_norm": 10.647790908813477, "learning_rate": 7.664917541229386e-08, "loss": 1.1924, "step": 410 }, { "epoch": 39.142857142857146, "grad_norm": 8.407620429992676, "learning_rate": 7.683658170914544e-08, "loss": 1.2834, "step": 411 }, { "epoch": 39.23809523809524, "grad_norm": 10.034721374511719, "learning_rate": 7.7023988005997e-08, "loss": 1.2479, "step": 412 }, { "epoch": 39.333333333333336, "grad_norm": 8.923266410827637, "learning_rate": 7.721139430284859e-08, "loss": 1.2341, "step": 413 }, { "epoch": 39.42857142857143, "grad_norm": 8.994647979736328, "learning_rate": 7.739880059970015e-08, "loss": 1.2758, "step": 414 }, { "epoch": 39.523809523809526, "grad_norm": 7.205422401428223, "learning_rate": 7.758620689655172e-08, "loss": 1.2421, "step": 415 }, { "epoch": 39.61904761904762, "grad_norm": 7.557537078857422, "learning_rate": 7.77736131934033e-08, "loss": 1.2648, "step": 416 }, { "epoch": 39.714285714285715, "grad_norm": 13.540074348449707, "learning_rate": 7.796101949025488e-08, "loss": 1.2454, "step": 417 }, { "epoch": 39.80952380952381, "grad_norm": 7.045814037322998, "learning_rate": 7.814842578710645e-08, "loss": 1.2481, "step": 418 }, { "epoch": 39.904761904761905, "grad_norm": 12.278300285339355, "learning_rate": 7.833583208395803e-08, "loss": 1.2519, "step": 419 }, { "epoch": 40.0, "grad_norm": 8.270506858825684, "learning_rate": 7.85232383808096e-08, "loss": 1.2424, "step": 420 }, { "epoch": 40.0, "eval_accuracy": 0.6938928437617085, "eval_f1": 0.7447672602311778, "eval_loss": 0.6150386929512024, "eval_precision": 0.7193723596861799, "eval_recall": 0.772020725388601, "eval_roc_auc": 0.7299176741508346, "eval_runtime": 1.376, "eval_samples_per_second": 1939.651, "eval_steps_per_second": 4.36, "step": 420 }, { "epoch": 40.095238095238095, "grad_norm": 9.389835357666016, "learning_rate": 7.871064467766117e-08, "loss": 1.2773, "step": 421 }, { "epoch": 40.19047619047619, "grad_norm": 17.68068504333496, "learning_rate": 7.889805097451275e-08, "loss": 1.2939, "step": 422 }, { "epoch": 40.285714285714285, "grad_norm": 10.948050498962402, "learning_rate": 7.908545727136432e-08, "loss": 1.247, "step": 423 }, { "epoch": 40.38095238095238, "grad_norm": 10.204144477844238, "learning_rate": 7.92728635682159e-08, "loss": 1.2583, "step": 424 }, { "epoch": 40.476190476190474, "grad_norm": 8.29542350769043, "learning_rate": 7.946026986506748e-08, "loss": 1.2417, "step": 425 }, { "epoch": 40.57142857142857, "grad_norm": 10.966816902160645, "learning_rate": 7.964767616191905e-08, "loss": 1.2749, "step": 426 }, { "epoch": 40.666666666666664, "grad_norm": 7.698588848114014, "learning_rate": 7.983508245877061e-08, "loss": 1.2242, "step": 427 }, { "epoch": 40.76190476190476, "grad_norm": 13.610939979553223, "learning_rate": 8.00224887556222e-08, "loss": 1.1539, "step": 428 }, { "epoch": 40.857142857142854, "grad_norm": 9.892770767211914, "learning_rate": 8.020989505247376e-08, "loss": 1.2967, "step": 429 }, { "epoch": 40.95238095238095, "grad_norm": 8.661589622497559, "learning_rate": 8.039730134932534e-08, "loss": 1.2472, "step": 430 }, { "epoch": 40.95238095238095, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7431967469502658, "eval_loss": 0.6140232682228088, "eval_precision": 0.7186932849364791, "eval_recall": 0.7694300518134715, "eval_roc_auc": 0.730028209556707, "eval_runtime": 1.1592, "eval_samples_per_second": 2302.479, "eval_steps_per_second": 5.176, "step": 430 }, { "epoch": 41.04761904761905, "grad_norm": 9.2203950881958, "learning_rate": 8.058470764617692e-08, "loss": 1.2648, "step": 431 }, { "epoch": 41.142857142857146, "grad_norm": 6.482606887817383, "learning_rate": 8.077211394302849e-08, "loss": 1.2506, "step": 432 }, { "epoch": 41.23809523809524, "grad_norm": 8.282089233398438, "learning_rate": 8.095952023988006e-08, "loss": 1.2826, "step": 433 }, { "epoch": 41.333333333333336, "grad_norm": 8.352246284484863, "learning_rate": 8.114692653673164e-08, "loss": 1.2123, "step": 434 }, { "epoch": 41.42857142857143, "grad_norm": 9.01622486114502, "learning_rate": 8.133433283358321e-08, "loss": 1.284, "step": 435 }, { "epoch": 41.523809523809526, "grad_norm": 14.874005317687988, "learning_rate": 8.152173913043478e-08, "loss": 1.2559, "step": 436 }, { "epoch": 41.61904761904762, "grad_norm": 9.86267375946045, "learning_rate": 8.170914542728637e-08, "loss": 1.234, "step": 437 }, { "epoch": 41.714285714285715, "grad_norm": 15.416117668151855, "learning_rate": 8.189655172413794e-08, "loss": 1.21, "step": 438 }, { "epoch": 41.80952380952381, "grad_norm": 7.8476881980896, "learning_rate": 8.20839580209895e-08, "loss": 1.2181, "step": 439 }, { "epoch": 41.904761904761905, "grad_norm": 7.718698024749756, "learning_rate": 8.227136431784109e-08, "loss": 1.2211, "step": 440 }, { "epoch": 42.0, "grad_norm": 8.605914115905762, "learning_rate": 8.245877061469265e-08, "loss": 1.2357, "step": 441 }, { "epoch": 42.0, "eval_accuracy": 0.6908954664668415, "eval_f1": 0.7414603572547791, "eval_loss": 0.6128706932067871, "eval_precision": 0.7182756527018822, "eval_recall": 0.7661917098445595, "eval_roc_auc": 0.7303862982153138, "eval_runtime": 1.1383, "eval_samples_per_second": 2344.629, "eval_steps_per_second": 5.271, "step": 441 }, { "epoch": 42.095238095238095, "grad_norm": 15.596901893615723, "learning_rate": 8.264617691154423e-08, "loss": 1.2439, "step": 442 }, { "epoch": 42.19047619047619, "grad_norm": 10.043680191040039, "learning_rate": 8.28335832083958e-08, "loss": 1.28, "step": 443 }, { "epoch": 42.285714285714285, "grad_norm": 6.956470012664795, "learning_rate": 8.302098950524738e-08, "loss": 1.1946, "step": 444 }, { "epoch": 42.38095238095238, "grad_norm": 8.87226676940918, "learning_rate": 8.320839580209895e-08, "loss": 1.2674, "step": 445 }, { "epoch": 42.476190476190474, "grad_norm": 8.562736511230469, "learning_rate": 8.339580209895053e-08, "loss": 1.2248, "step": 446 }, { "epoch": 42.57142857142857, "grad_norm": 12.94089126586914, "learning_rate": 8.35832083958021e-08, "loss": 1.2146, "step": 447 }, { "epoch": 42.666666666666664, "grad_norm": 5.351092338562012, "learning_rate": 8.377061469265368e-08, "loss": 1.1685, "step": 448 }, { "epoch": 42.76190476190476, "grad_norm": 12.503556251525879, "learning_rate": 8.395802098950525e-08, "loss": 1.1986, "step": 449 }, { "epoch": 42.857142857142854, "grad_norm": 13.914941787719727, "learning_rate": 8.414542728635682e-08, "loss": 1.2451, "step": 450 }, { "epoch": 42.95238095238095, "grad_norm": 7.735352993011475, "learning_rate": 8.433283358320841e-08, "loss": 1.2592, "step": 451 }, { "epoch": 42.95238095238095, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7440846824408468, "eval_loss": 0.6118565201759338, "eval_precision": 0.7164268585131894, "eval_recall": 0.7739637305699482, "eval_roc_auc": 0.7308465745538284, "eval_runtime": 1.1892, "eval_samples_per_second": 2244.425, "eval_steps_per_second": 5.046, "step": 451 }, { "epoch": 43.04761904761905, "grad_norm": 13.332247734069824, "learning_rate": 8.452023988005998e-08, "loss": 1.3321, "step": 452 }, { "epoch": 43.142857142857146, "grad_norm": 6.532809257507324, "learning_rate": 8.470764617691155e-08, "loss": 1.2256, "step": 453 }, { "epoch": 43.23809523809524, "grad_norm": 6.154987812042236, "learning_rate": 8.489505247376313e-08, "loss": 1.208, "step": 454 }, { "epoch": 43.333333333333336, "grad_norm": 6.660593032836914, "learning_rate": 8.50824587706147e-08, "loss": 1.2417, "step": 455 }, { "epoch": 43.42857142857143, "grad_norm": 10.340599060058594, "learning_rate": 8.526986506746626e-08, "loss": 1.2063, "step": 456 }, { "epoch": 43.523809523809526, "grad_norm": 24.725839614868164, "learning_rate": 8.545727136431786e-08, "loss": 1.2705, "step": 457 }, { "epoch": 43.61904761904762, "grad_norm": 10.683469772338867, "learning_rate": 8.564467766116942e-08, "loss": 1.2525, "step": 458 }, { "epoch": 43.714285714285715, "grad_norm": 9.80042839050293, "learning_rate": 8.583208395802099e-08, "loss": 1.2321, "step": 459 }, { "epoch": 43.80952380952381, "grad_norm": 9.710074424743652, "learning_rate": 8.601949025487257e-08, "loss": 1.2513, "step": 460 }, { "epoch": 43.904761904761905, "grad_norm": 8.437572479248047, "learning_rate": 8.620689655172414e-08, "loss": 1.198, "step": 461 }, { "epoch": 44.0, "grad_norm": 16.720048904418945, "learning_rate": 8.639430284857571e-08, "loss": 1.1771, "step": 462 }, { "epoch": 44.0, "eval_accuracy": 0.6905207943049831, "eval_f1": 0.74235807860262, "eval_loss": 0.6107428669929504, "eval_precision": 0.7160048134777377, "eval_recall": 0.7707253886010362, "eval_roc_auc": 0.7310915371329879, "eval_runtime": 1.523, "eval_samples_per_second": 1752.463, "eval_steps_per_second": 3.94, "step": 462 }, { "epoch": 44.095238095238095, "grad_norm": 11.607662200927734, "learning_rate": 8.658170914542729e-08, "loss": 1.2554, "step": 463 }, { "epoch": 44.19047619047619, "grad_norm": 10.140999794006348, "learning_rate": 8.676911544227887e-08, "loss": 1.2278, "step": 464 }, { "epoch": 44.285714285714285, "grad_norm": 8.476537704467773, "learning_rate": 8.695652173913044e-08, "loss": 1.2808, "step": 465 }, { "epoch": 44.38095238095238, "grad_norm": 6.853167533874512, "learning_rate": 8.714392803598202e-08, "loss": 1.2115, "step": 466 }, { "epoch": 44.476190476190474, "grad_norm": 7.388545513153076, "learning_rate": 8.733133433283359e-08, "loss": 1.2131, "step": 467 }, { "epoch": 44.57142857142857, "grad_norm": 12.221652030944824, "learning_rate": 8.751874062968515e-08, "loss": 1.2563, "step": 468 }, { "epoch": 44.666666666666664, "grad_norm": 8.581650733947754, "learning_rate": 8.770614692653674e-08, "loss": 1.2217, "step": 469 }, { "epoch": 44.76190476190476, "grad_norm": 10.659028053283691, "learning_rate": 8.78935532233883e-08, "loss": 1.2165, "step": 470 }, { "epoch": 44.857142857142854, "grad_norm": 7.4702935218811035, "learning_rate": 8.808095952023988e-08, "loss": 1.2186, "step": 471 }, { "epoch": 44.95238095238095, "grad_norm": 8.66907024383545, "learning_rate": 8.826836581709146e-08, "loss": 1.2805, "step": 472 }, { "epoch": 44.95238095238095, "eval_accuracy": 0.6897714499812664, "eval_f1": 0.74125, "eval_loss": 0.6098491549491882, "eval_precision": 0.716183574879227, "eval_recall": 0.7681347150259067, "eval_roc_auc": 0.7313713298791019, "eval_runtime": 1.2302, "eval_samples_per_second": 2169.652, "eval_steps_per_second": 4.877, "step": 472 }, { "epoch": 45.04761904761905, "grad_norm": 8.637537002563477, "learning_rate": 8.845577211394303e-08, "loss": 1.2485, "step": 473 }, { "epoch": 45.142857142857146, "grad_norm": 7.344876289367676, "learning_rate": 8.86431784107946e-08, "loss": 1.2742, "step": 474 }, { "epoch": 45.23809523809524, "grad_norm": 12.491889953613281, "learning_rate": 8.883058470764618e-08, "loss": 1.2503, "step": 475 }, { "epoch": 45.333333333333336, "grad_norm": 8.886862754821777, "learning_rate": 8.901799100449775e-08, "loss": 1.2116, "step": 476 }, { "epoch": 45.42857142857143, "grad_norm": 11.657227516174316, "learning_rate": 8.920539730134933e-08, "loss": 1.2066, "step": 477 }, { "epoch": 45.523809523809526, "grad_norm": 6.646770000457764, "learning_rate": 8.939280359820091e-08, "loss": 1.178, "step": 478 }, { "epoch": 45.61904761904762, "grad_norm": 12.152413368225098, "learning_rate": 8.958020989505248e-08, "loss": 1.2413, "step": 479 }, { "epoch": 45.714285714285715, "grad_norm": 10.730239868164062, "learning_rate": 8.976761619190406e-08, "loss": 1.2969, "step": 480 }, { "epoch": 45.80952380952381, "grad_norm": 8.96199893951416, "learning_rate": 8.995502248875563e-08, "loss": 1.1655, "step": 481 }, { "epoch": 45.904761904761905, "grad_norm": 11.491959571838379, "learning_rate": 9.01424287856072e-08, "loss": 1.2569, "step": 482 }, { "epoch": 46.0, "grad_norm": 9.993547439575195, "learning_rate": 9.032983508245878e-08, "loss": 1.231, "step": 483 }, { "epoch": 46.0, "eval_accuracy": 0.6912701386286999, "eval_f1": 0.7429819089207735, "eval_loss": 0.6088653802871704, "eval_precision": 0.7166064981949458, "eval_recall": 0.7713730569948186, "eval_roc_auc": 0.7315898100172712, "eval_runtime": 1.2193, "eval_samples_per_second": 2188.898, "eval_steps_per_second": 4.921, "step": 483 }, { "epoch": 46.095238095238095, "grad_norm": 12.399568557739258, "learning_rate": 9.051724137931036e-08, "loss": 1.2516, "step": 484 }, { "epoch": 46.19047619047619, "grad_norm": 5.617837429046631, "learning_rate": 9.070464767616192e-08, "loss": 1.2265, "step": 485 }, { "epoch": 46.285714285714285, "grad_norm": 10.155060768127441, "learning_rate": 9.08920539730135e-08, "loss": 1.2064, "step": 486 }, { "epoch": 46.38095238095238, "grad_norm": 7.8809661865234375, "learning_rate": 9.107946026986507e-08, "loss": 1.2029, "step": 487 }, { "epoch": 46.476190476190474, "grad_norm": 7.232656478881836, "learning_rate": 9.126686656671664e-08, "loss": 1.2328, "step": 488 }, { "epoch": 46.57142857142857, "grad_norm": 6.484198093414307, "learning_rate": 9.145427286356822e-08, "loss": 1.2244, "step": 489 }, { "epoch": 46.666666666666664, "grad_norm": 9.187188148498535, "learning_rate": 9.164167916041979e-08, "loss": 1.2345, "step": 490 }, { "epoch": 46.76190476190476, "grad_norm": 6.534236431121826, "learning_rate": 9.182908545727137e-08, "loss": 1.2412, "step": 491 }, { "epoch": 46.857142857142854, "grad_norm": 5.195504188537598, "learning_rate": 9.201649175412295e-08, "loss": 1.2278, "step": 492 }, { "epoch": 46.95238095238095, "grad_norm": 7.281925201416016, "learning_rate": 9.220389805097452e-08, "loss": 1.2285, "step": 493 }, { "epoch": 46.95238095238095, "eval_accuracy": 0.6916448107905583, "eval_f1": 0.7444892890406706, "eval_loss": 0.6079878807067871, "eval_precision": 0.7149672033392963, "eval_recall": 0.7765544041450777, "eval_roc_auc": 0.7319453080023028, "eval_runtime": 1.1504, "eval_samples_per_second": 2319.983, "eval_steps_per_second": 5.215, "step": 493 }, { "epoch": 47.04761904761905, "grad_norm": 14.828407287597656, "learning_rate": 9.239130434782609e-08, "loss": 1.2821, "step": 494 }, { "epoch": 47.142857142857146, "grad_norm": 6.8681182861328125, "learning_rate": 9.257871064467767e-08, "loss": 1.2301, "step": 495 }, { "epoch": 47.23809523809524, "grad_norm": 8.041788101196289, "learning_rate": 9.276611694152924e-08, "loss": 1.1844, "step": 496 }, { "epoch": 47.333333333333336, "grad_norm": 12.36368179321289, "learning_rate": 9.29535232383808e-08, "loss": 1.2044, "step": 497 }, { "epoch": 47.42857142857143, "grad_norm": 12.137744903564453, "learning_rate": 9.31409295352324e-08, "loss": 1.2119, "step": 498 }, { "epoch": 47.523809523809526, "grad_norm": 13.399957656860352, "learning_rate": 9.332833583208397e-08, "loss": 1.2211, "step": 499 }, { "epoch": 47.61904761904762, "grad_norm": 9.091277122497559, "learning_rate": 9.351574212893553e-08, "loss": 1.193, "step": 500 }, { "epoch": 47.714285714285715, "grad_norm": 10.781776428222656, "learning_rate": 9.370314842578711e-08, "loss": 1.301, "step": 501 }, { "epoch": 47.80952380952381, "grad_norm": 7.946430683135986, "learning_rate": 9.389055472263868e-08, "loss": 1.2459, "step": 502 }, { "epoch": 47.904761904761905, "grad_norm": 17.722209930419922, "learning_rate": 9.407796101949026e-08, "loss": 1.2716, "step": 503 }, { "epoch": 48.0, "grad_norm": 10.651185989379883, "learning_rate": 9.426536731634183e-08, "loss": 1.1701, "step": 504 }, { "epoch": 48.0, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7447204968944099, "eval_loss": 0.6071056723594666, "eval_precision": 0.7153937947494033, "eval_recall": 0.7765544041450777, "eval_roc_auc": 0.7321105354058722, "eval_runtime": 1.1292, "eval_samples_per_second": 2363.553, "eval_steps_per_second": 5.313, "step": 504 }, { "epoch": 48.095238095238095, "grad_norm": 8.36655044555664, "learning_rate": 9.445277361319341e-08, "loss": 1.2032, "step": 505 }, { "epoch": 48.19047619047619, "grad_norm": 6.818087577819824, "learning_rate": 9.464017991004498e-08, "loss": 1.2746, "step": 506 }, { "epoch": 48.285714285714285, "grad_norm": 7.613657474517822, "learning_rate": 9.482758620689656e-08, "loss": 1.2157, "step": 507 }, { "epoch": 48.38095238095238, "grad_norm": 5.677556991577148, "learning_rate": 9.501499250374813e-08, "loss": 1.1771, "step": 508 }, { "epoch": 48.476190476190474, "grad_norm": 7.941476345062256, "learning_rate": 9.520239880059971e-08, "loss": 1.2557, "step": 509 }, { "epoch": 48.57142857142857, "grad_norm": 6.0007805824279785, "learning_rate": 9.538980509745128e-08, "loss": 1.2346, "step": 510 }, { "epoch": 48.666666666666664, "grad_norm": 8.160831451416016, "learning_rate": 9.557721139430286e-08, "loss": 1.2416, "step": 511 }, { "epoch": 48.76190476190476, "grad_norm": 11.385088920593262, "learning_rate": 9.576461769115442e-08, "loss": 1.182, "step": 512 }, { "epoch": 48.857142857142854, "grad_norm": 10.08471965789795, "learning_rate": 9.5952023988006e-08, "loss": 1.1992, "step": 513 }, { "epoch": 48.95238095238095, "grad_norm": 10.728684425354004, "learning_rate": 9.613943028485757e-08, "loss": 1.2379, "step": 514 }, { "epoch": 48.95238095238095, "eval_accuracy": 0.6890221056575496, "eval_f1": 0.7409488139825219, "eval_loss": 0.6063689589500427, "eval_precision": 0.7150602409638555, "eval_recall": 0.7687823834196891, "eval_roc_auc": 0.7321614853195165, "eval_runtime": 1.2051, "eval_samples_per_second": 2214.838, "eval_steps_per_second": 4.979, "step": 514 }, { "epoch": 49.04761904761905, "grad_norm": 11.368154525756836, "learning_rate": 9.632683658170915e-08, "loss": 1.2628, "step": 515 }, { "epoch": 49.142857142857146, "grad_norm": 6.578487873077393, "learning_rate": 9.651424287856072e-08, "loss": 1.2107, "step": 516 }, { "epoch": 49.23809523809524, "grad_norm": 7.267766952514648, "learning_rate": 9.670164917541229e-08, "loss": 1.1805, "step": 517 }, { "epoch": 49.333333333333336, "grad_norm": 6.106053829193115, "learning_rate": 9.688905547226388e-08, "loss": 1.2076, "step": 518 }, { "epoch": 49.42857142857143, "grad_norm": 10.81891918182373, "learning_rate": 9.707646176911545e-08, "loss": 1.244, "step": 519 }, { "epoch": 49.523809523809526, "grad_norm": 7.16576623916626, "learning_rate": 9.726386806596702e-08, "loss": 1.2448, "step": 520 }, { "epoch": 49.61904761904762, "grad_norm": 7.20371675491333, "learning_rate": 9.74512743628186e-08, "loss": 1.2267, "step": 521 }, { "epoch": 49.714285714285715, "grad_norm": 7.5791730880737305, "learning_rate": 9.763868065967017e-08, "loss": 1.1871, "step": 522 }, { "epoch": 49.80952380952381, "grad_norm": 6.973799705505371, "learning_rate": 9.782608695652175e-08, "loss": 1.2649, "step": 523 }, { "epoch": 49.904761904761905, "grad_norm": 7.447197914123535, "learning_rate": 9.801349325337332e-08, "loss": 1.2246, "step": 524 }, { "epoch": 50.0, "grad_norm": 7.1552734375, "learning_rate": 9.82008995502249e-08, "loss": 1.2511, "step": 525 }, { "epoch": 50.0, "eval_accuracy": 0.6875234170101161, "eval_f1": 0.7408328154133001, "eval_loss": 0.6054435968399048, "eval_precision": 0.7120669056152927, "eval_recall": 0.772020725388601, "eval_roc_auc": 0.7324881980426021, "eval_runtime": 1.2114, "eval_samples_per_second": 2203.31, "eval_steps_per_second": 4.953, "step": 525 }, { "epoch": 50.095238095238095, "grad_norm": 9.0575590133667, "learning_rate": 9.838830584707645e-08, "loss": 1.2519, "step": 526 }, { "epoch": 50.19047619047619, "grad_norm": 5.1338324546813965, "learning_rate": 9.857571214392805e-08, "loss": 1.2371, "step": 527 }, { "epoch": 50.285714285714285, "grad_norm": 13.615644454956055, "learning_rate": 9.876311844077963e-08, "loss": 1.2103, "step": 528 }, { "epoch": 50.38095238095238, "grad_norm": 9.063286781311035, "learning_rate": 9.895052473763118e-08, "loss": 1.2197, "step": 529 }, { "epoch": 50.476190476190474, "grad_norm": 7.897054195404053, "learning_rate": 9.913793103448278e-08, "loss": 1.2472, "step": 530 }, { "epoch": 50.57142857142857, "grad_norm": 8.853591918945312, "learning_rate": 9.932533733133433e-08, "loss": 1.2366, "step": 531 }, { "epoch": 50.666666666666664, "grad_norm": 5.471190929412842, "learning_rate": 9.951274362818591e-08, "loss": 1.2298, "step": 532 }, { "epoch": 50.76190476190476, "grad_norm": 7.534485816955566, "learning_rate": 9.970014992503748e-08, "loss": 1.1715, "step": 533 }, { "epoch": 50.857142857142854, "grad_norm": 4.532857894897461, "learning_rate": 9.988755622188906e-08, "loss": 1.1834, "step": 534 }, { "epoch": 50.95238095238095, "grad_norm": 6.546600341796875, "learning_rate": 1.0007496251874064e-07, "loss": 1.2442, "step": 535 }, { "epoch": 50.95238095238095, "eval_accuracy": 0.6916448107905583, "eval_f1": 0.7462226333641689, "eval_loss": 0.6045239567756653, "eval_precision": 0.7121836374337845, "eval_recall": 0.783678756476684, "eval_roc_auc": 0.7328635578583765, "eval_runtime": 1.2782, "eval_samples_per_second": 2088.101, "eval_steps_per_second": 4.694, "step": 535 }, { "epoch": 51.04761904761905, "grad_norm": 5.521002292633057, "learning_rate": 1.0026236881559221e-07, "loss": 1.2303, "step": 536 }, { "epoch": 51.142857142857146, "grad_norm": 6.953253746032715, "learning_rate": 1.0044977511244379e-07, "loss": 1.2548, "step": 537 }, { "epoch": 51.23809523809524, "grad_norm": 10.340543746948242, "learning_rate": 1.0063718140929536e-07, "loss": 1.2135, "step": 538 }, { "epoch": 51.333333333333336, "grad_norm": 11.009821891784668, "learning_rate": 1.0082458770614694e-07, "loss": 1.2175, "step": 539 }, { "epoch": 51.42857142857143, "grad_norm": 7.70527458190918, "learning_rate": 1.0101199400299849e-07, "loss": 1.2264, "step": 540 }, { "epoch": 51.523809523809526, "grad_norm": 5.6265363693237305, "learning_rate": 1.0119940029985007e-07, "loss": 1.212, "step": 541 }, { "epoch": 51.61904761904762, "grad_norm": 9.259636878967285, "learning_rate": 1.0138680659670167e-07, "loss": 1.1998, "step": 542 }, { "epoch": 51.714285714285715, "grad_norm": 7.9342851638793945, "learning_rate": 1.0157421289355322e-07, "loss": 1.1955, "step": 543 }, { "epoch": 51.80952380952381, "grad_norm": 5.805824279785156, "learning_rate": 1.017616191904048e-07, "loss": 1.207, "step": 544 }, { "epoch": 51.904761904761905, "grad_norm": 6.57551908493042, "learning_rate": 1.0194902548725637e-07, "loss": 1.2073, "step": 545 }, { "epoch": 52.0, "grad_norm": 7.8102216720581055, "learning_rate": 1.0213643178410795e-07, "loss": 1.2194, "step": 546 }, { "epoch": 52.0, "eval_accuracy": 0.6908954664668415, "eval_f1": 0.7451343836886005, "eval_loss": 0.6035574078559875, "eval_precision": 0.7123449497932663, "eval_recall": 0.7810880829015544, "eval_roc_auc": 0.7331142774899252, "eval_runtime": 1.1522, "eval_samples_per_second": 2316.385, "eval_steps_per_second": 5.207, "step": 546 }, { "epoch": 52.095238095238095, "grad_norm": 7.845072269439697, "learning_rate": 1.0232383808095952e-07, "loss": 1.1608, "step": 547 }, { "epoch": 52.19047619047619, "grad_norm": 5.869110107421875, "learning_rate": 1.025112443778111e-07, "loss": 1.216, "step": 548 }, { "epoch": 52.285714285714285, "grad_norm": 8.363801002502441, "learning_rate": 1.0269865067466268e-07, "loss": 1.2384, "step": 549 }, { "epoch": 52.38095238095238, "grad_norm": 16.105985641479492, "learning_rate": 1.0288605697151425e-07, "loss": 1.2684, "step": 550 }, { "epoch": 52.476190476190474, "grad_norm": 5.838757038116455, "learning_rate": 1.0307346326836583e-07, "loss": 1.2206, "step": 551 }, { "epoch": 52.57142857142857, "grad_norm": 16.695104598999023, "learning_rate": 1.0326086956521738e-07, "loss": 1.2561, "step": 552 }, { "epoch": 52.666666666666664, "grad_norm": 5.210092544555664, "learning_rate": 1.0344827586206898e-07, "loss": 1.2346, "step": 553 }, { "epoch": 52.76190476190476, "grad_norm": 5.63034200668335, "learning_rate": 1.0363568215892053e-07, "loss": 1.1727, "step": 554 }, { "epoch": 52.857142857142854, "grad_norm": 7.307580947875977, "learning_rate": 1.0382308845577211e-07, "loss": 1.2514, "step": 555 }, { "epoch": 52.95238095238095, "grad_norm": 7.287981986999512, "learning_rate": 1.040104947526237e-07, "loss": 1.1089, "step": 556 }, { "epoch": 52.95238095238095, "eval_accuracy": 0.6916448107905583, "eval_f1": 0.7459092312442112, "eval_loss": 0.602644145488739, "eval_precision": 0.7126843657817109, "eval_recall": 0.7823834196891192, "eval_roc_auc": 0.7335103626943006, "eval_runtime": 1.1449, "eval_samples_per_second": 2331.155, "eval_steps_per_second": 5.241, "step": 556 }, { "epoch": 53.04761904761905, "grad_norm": 11.443868637084961, "learning_rate": 1.0419790104947526e-07, "loss": 1.1587, "step": 557 }, { "epoch": 53.142857142857146, "grad_norm": 7.264131546020508, "learning_rate": 1.0438530734632684e-07, "loss": 1.2418, "step": 558 }, { "epoch": 53.23809523809524, "grad_norm": 4.907642841339111, "learning_rate": 1.0457271364317841e-07, "loss": 1.1863, "step": 559 }, { "epoch": 53.333333333333336, "grad_norm": 8.334844589233398, "learning_rate": 1.0476011994002999e-07, "loss": 1.2441, "step": 560 }, { "epoch": 53.42857142857143, "grad_norm": 9.130343437194824, "learning_rate": 1.0494752623688156e-07, "loss": 1.2188, "step": 561 }, { "epoch": 53.523809523809526, "grad_norm": 6.453733921051025, "learning_rate": 1.0513493253373314e-07, "loss": 1.1792, "step": 562 }, { "epoch": 53.61904761904762, "grad_norm": 10.290508270263672, "learning_rate": 1.0532233883058472e-07, "loss": 1.2191, "step": 563 }, { "epoch": 53.714285714285715, "grad_norm": 10.130738258361816, "learning_rate": 1.0550974512743628e-07, "loss": 1.2772, "step": 564 }, { "epoch": 53.80952380952381, "grad_norm": 6.250458240509033, "learning_rate": 1.0569715142428787e-07, "loss": 1.1638, "step": 565 }, { "epoch": 53.904761904761905, "grad_norm": 8.7243013381958, "learning_rate": 1.0588455772113943e-07, "loss": 1.1861, "step": 566 }, { "epoch": 54.0, "grad_norm": 14.219977378845215, "learning_rate": 1.06071964017991e-07, "loss": 1.2299, "step": 567 }, { "epoch": 54.0, "eval_accuracy": 0.6901461221431248, "eval_f1": 0.7443585780525502, "eval_loss": 0.6018810272216797, "eval_precision": 0.7120047309284447, "eval_recall": 0.7797927461139896, "eval_roc_auc": 0.7337489925158318, "eval_runtime": 1.1922, "eval_samples_per_second": 2238.788, "eval_steps_per_second": 5.033, "step": 567 }, { "epoch": 54.095238095238095, "grad_norm": 6.662168025970459, "learning_rate": 1.0625937031484257e-07, "loss": 1.1866, "step": 568 }, { "epoch": 54.19047619047619, "grad_norm": 13.390618324279785, "learning_rate": 1.0644677661169416e-07, "loss": 1.2342, "step": 569 }, { "epoch": 54.285714285714285, "grad_norm": 6.478188991546631, "learning_rate": 1.0663418290854574e-07, "loss": 1.1909, "step": 570 }, { "epoch": 54.38095238095238, "grad_norm": 9.548168182373047, "learning_rate": 1.068215892053973e-07, "loss": 1.1936, "step": 571 }, { "epoch": 54.476190476190474, "grad_norm": 5.8785295486450195, "learning_rate": 1.0700899550224888e-07, "loss": 1.2344, "step": 572 }, { "epoch": 54.57142857142857, "grad_norm": 6.237848281860352, "learning_rate": 1.0719640179910045e-07, "loss": 1.2835, "step": 573 }, { "epoch": 54.666666666666664, "grad_norm": 3.976562261581421, "learning_rate": 1.0738380809595203e-07, "loss": 1.1899, "step": 574 }, { "epoch": 54.76190476190476, "grad_norm": 12.389945983886719, "learning_rate": 1.0757121439280359e-07, "loss": 1.1579, "step": 575 }, { "epoch": 54.857142857142854, "grad_norm": 5.776051998138428, "learning_rate": 1.0775862068965518e-07, "loss": 1.2387, "step": 576 }, { "epoch": 54.95238095238095, "grad_norm": 9.520394325256348, "learning_rate": 1.0794602698650676e-07, "loss": 1.2015, "step": 577 }, { "epoch": 54.95238095238095, "eval_accuracy": 0.6905207943049831, "eval_f1": 0.7441140024783147, "eval_loss": 0.6013320684432983, "eval_precision": 0.7131828978622328, "eval_recall": 0.7778497409326425, "eval_roc_auc": 0.7338149107656879, "eval_runtime": 1.1497, "eval_samples_per_second": 2321.471, "eval_steps_per_second": 5.219, "step": 577 }, { "epoch": 55.04761904761905, "grad_norm": 11.354265213012695, "learning_rate": 1.0813343328335832e-07, "loss": 1.1656, "step": 578 }, { "epoch": 55.142857142857146, "grad_norm": 6.79288911819458, "learning_rate": 1.083208395802099e-07, "loss": 1.2565, "step": 579 }, { "epoch": 55.23809523809524, "grad_norm": 6.954402923583984, "learning_rate": 1.0850824587706147e-07, "loss": 1.2943, "step": 580 }, { "epoch": 55.333333333333336, "grad_norm": 7.525900363922119, "learning_rate": 1.0869565217391305e-07, "loss": 1.1596, "step": 581 }, { "epoch": 55.42857142857143, "grad_norm": 5.8448309898376465, "learning_rate": 1.0888305847076463e-07, "loss": 1.1933, "step": 582 }, { "epoch": 55.523809523809526, "grad_norm": 6.475961208343506, "learning_rate": 1.090704647676162e-07, "loss": 1.2261, "step": 583 }, { "epoch": 55.61904761904762, "grad_norm": 7.44426155090332, "learning_rate": 1.0925787106446778e-07, "loss": 1.1633, "step": 584 }, { "epoch": 55.714285714285715, "grad_norm": 10.208169937133789, "learning_rate": 1.0944527736131934e-07, "loss": 1.1981, "step": 585 }, { "epoch": 55.80952380952381, "grad_norm": 8.34847354888916, "learning_rate": 1.0963268365817093e-07, "loss": 1.1768, "step": 586 }, { "epoch": 55.904761904761905, "grad_norm": 5.280081748962402, "learning_rate": 1.0982008995502248e-07, "loss": 1.189, "step": 587 }, { "epoch": 56.0, "grad_norm": 8.047446250915527, "learning_rate": 1.1000749625187407e-07, "loss": 1.2522, "step": 588 }, { "epoch": 56.0, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7474623192863734, "eval_loss": 0.6006296873092651, "eval_precision": 0.7117750439367311, "eval_recall": 0.7869170984455959, "eval_roc_auc": 0.7342130109383994, "eval_runtime": 1.2179, "eval_samples_per_second": 2191.539, "eval_steps_per_second": 4.927, "step": 588 }, { "epoch": 56.095238095238095, "grad_norm": 5.879335403442383, "learning_rate": 1.1019490254872566e-07, "loss": 1.2014, "step": 589 }, { "epoch": 56.19047619047619, "grad_norm": 6.829493045806885, "learning_rate": 1.1038230884557721e-07, "loss": 1.1867, "step": 590 }, { "epoch": 56.285714285714285, "grad_norm": 6.574869155883789, "learning_rate": 1.105697151424288e-07, "loss": 1.1913, "step": 591 }, { "epoch": 56.38095238095238, "grad_norm": 7.857141971588135, "learning_rate": 1.1075712143928036e-07, "loss": 1.2192, "step": 592 }, { "epoch": 56.476190476190474, "grad_norm": 6.09318733215332, "learning_rate": 1.1094452773613194e-07, "loss": 1.2501, "step": 593 }, { "epoch": 56.57142857142857, "grad_norm": 7.295796871185303, "learning_rate": 1.1113193403298351e-07, "loss": 1.2479, "step": 594 }, { "epoch": 56.666666666666664, "grad_norm": 7.448188781738281, "learning_rate": 1.1131934032983509e-07, "loss": 1.1998, "step": 595 }, { "epoch": 56.76190476190476, "grad_norm": 5.921523094177246, "learning_rate": 1.1150674662668667e-07, "loss": 1.2029, "step": 596 }, { "epoch": 56.857142857142854, "grad_norm": 7.603562355041504, "learning_rate": 1.1169415292353824e-07, "loss": 1.2234, "step": 597 }, { "epoch": 56.95238095238095, "grad_norm": 8.197514533996582, "learning_rate": 1.1188155922038982e-07, "loss": 1.1689, "step": 598 }, { "epoch": 56.95238095238095, "eval_accuracy": 0.6931434994379918, "eval_f1": 0.7483870967741936, "eval_loss": 0.5999482870101929, "eval_precision": 0.711864406779661, "eval_recall": 0.788860103626943, "eval_roc_auc": 0.7343552101324122, "eval_runtime": 1.2535, "eval_samples_per_second": 2129.228, "eval_steps_per_second": 4.787, "step": 598 }, { "epoch": 57.04761904761905, "grad_norm": 5.4424147605896, "learning_rate": 1.1206896551724139e-07, "loss": 1.2533, "step": 599 }, { "epoch": 57.142857142857146, "grad_norm": 6.09983491897583, "learning_rate": 1.1225637181409297e-07, "loss": 1.2196, "step": 600 }, { "epoch": 57.23809523809524, "grad_norm": 10.144179344177246, "learning_rate": 1.1244377811094452e-07, "loss": 1.1214, "step": 601 }, { "epoch": 57.333333333333336, "grad_norm": 8.143068313598633, "learning_rate": 1.126311844077961e-07, "loss": 1.2736, "step": 602 }, { "epoch": 57.42857142857143, "grad_norm": 4.57938289642334, "learning_rate": 1.128185907046477e-07, "loss": 1.1829, "step": 603 }, { "epoch": 57.523809523809526, "grad_norm": 8.159676551818848, "learning_rate": 1.1300599700149925e-07, "loss": 1.2055, "step": 604 }, { "epoch": 57.61904761904762, "grad_norm": 5.003109455108643, "learning_rate": 1.1319340329835083e-07, "loss": 1.1579, "step": 605 }, { "epoch": 57.714285714285715, "grad_norm": 5.263125419616699, "learning_rate": 1.133808095952024e-07, "loss": 1.18, "step": 606 }, { "epoch": 57.80952380952381, "grad_norm": 5.333617687225342, "learning_rate": 1.1356821589205398e-07, "loss": 1.234, "step": 607 }, { "epoch": 57.904761904761905, "grad_norm": 7.104213237762451, "learning_rate": 1.1375562218890555e-07, "loss": 1.2407, "step": 608 }, { "epoch": 58.0, "grad_norm": 5.938223361968994, "learning_rate": 1.1394302848575713e-07, "loss": 1.2129, "step": 609 }, { "epoch": 58.0, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7472324723247232, "eval_loss": 0.5991713404655457, "eval_precision": 0.7113583138173302, "eval_recall": 0.7869170984455959, "eval_roc_auc": 0.7346292458261371, "eval_runtime": 1.1169, "eval_samples_per_second": 2389.62, "eval_steps_per_second": 5.372, "step": 609 }, { "epoch": 58.095238095238095, "grad_norm": 5.953458309173584, "learning_rate": 1.1413043478260871e-07, "loss": 1.231, "step": 610 }, { "epoch": 58.19047619047619, "grad_norm": 9.69005012512207, "learning_rate": 1.1431784107946028e-07, "loss": 1.1305, "step": 611 }, { "epoch": 58.285714285714285, "grad_norm": 3.9009530544281006, "learning_rate": 1.1450524737631186e-07, "loss": 1.1851, "step": 612 }, { "epoch": 58.38095238095238, "grad_norm": 8.352299690246582, "learning_rate": 1.1469265367316341e-07, "loss": 1.2033, "step": 613 }, { "epoch": 58.476190476190474, "grad_norm": 7.871908664703369, "learning_rate": 1.1488005997001501e-07, "loss": 1.2383, "step": 614 }, { "epoch": 58.57142857142857, "grad_norm": 5.937413692474365, "learning_rate": 1.1506746626686656e-07, "loss": 1.2332, "step": 615 }, { "epoch": 58.666666666666664, "grad_norm": 7.5808820724487305, "learning_rate": 1.1525487256371814e-07, "loss": 1.2052, "step": 616 }, { "epoch": 58.76190476190476, "grad_norm": 10.876705169677734, "learning_rate": 1.1544227886056972e-07, "loss": 1.2613, "step": 617 }, { "epoch": 58.857142857142854, "grad_norm": 6.522879123687744, "learning_rate": 1.1562968515742129e-07, "loss": 1.2303, "step": 618 }, { "epoch": 58.95238095238095, "grad_norm": 4.543294906616211, "learning_rate": 1.1581709145427287e-07, "loss": 1.1908, "step": 619 }, { "epoch": 58.95238095238095, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7477726574500768, "eval_loss": 0.5984930396080017, "eval_precision": 0.7112799532437172, "eval_recall": 0.7882124352331606, "eval_roc_auc": 0.7348506044905009, "eval_runtime": 1.1584, "eval_samples_per_second": 2304.056, "eval_steps_per_second": 5.18, "step": 619 }, { "epoch": 59.04761904761905, "grad_norm": 6.806978702545166, "learning_rate": 1.1600449775112444e-07, "loss": 1.2054, "step": 620 }, { "epoch": 59.142857142857146, "grad_norm": 4.517679214477539, "learning_rate": 1.1619190404797602e-07, "loss": 1.2155, "step": 621 }, { "epoch": 59.23809523809524, "grad_norm": 8.569692611694336, "learning_rate": 1.1637931034482758e-07, "loss": 1.2062, "step": 622 }, { "epoch": 59.333333333333336, "grad_norm": 7.021420478820801, "learning_rate": 1.1656671664167917e-07, "loss": 1.1733, "step": 623 }, { "epoch": 59.42857142857143, "grad_norm": 6.8625288009643555, "learning_rate": 1.1675412293853075e-07, "loss": 1.2997, "step": 624 }, { "epoch": 59.523809523809526, "grad_norm": 6.276980876922607, "learning_rate": 1.169415292353823e-07, "loss": 1.1977, "step": 625 }, { "epoch": 59.61904761904762, "grad_norm": 4.541833400726318, "learning_rate": 1.171289355322339e-07, "loss": 1.1695, "step": 626 }, { "epoch": 59.714285714285715, "grad_norm": 6.162714004516602, "learning_rate": 1.1731634182908545e-07, "loss": 1.2158, "step": 627 }, { "epoch": 59.80952380952381, "grad_norm": 9.742534637451172, "learning_rate": 1.1750374812593703e-07, "loss": 1.1878, "step": 628 }, { "epoch": 59.904761904761905, "grad_norm": 11.55949878692627, "learning_rate": 1.176911544227886e-07, "loss": 1.2577, "step": 629 }, { "epoch": 60.0, "grad_norm": 7.8720502853393555, "learning_rate": 1.1787856071964018e-07, "loss": 1.1847, "step": 630 }, { "epoch": 60.0, "eval_accuracy": 0.6916448107905583, "eval_f1": 0.7474685486345505, "eval_loss": 0.5978415012359619, "eval_precision": 0.710204081632653, "eval_recall": 0.788860103626943, "eval_roc_auc": 0.7350463442717328, "eval_runtime": 1.1763, "eval_samples_per_second": 2269.063, "eval_steps_per_second": 5.101, "step": 630 }, { "epoch": 60.095238095238095, "grad_norm": 10.057273864746094, "learning_rate": 1.1806596701649176e-07, "loss": 1.241, "step": 631 }, { "epoch": 60.19047619047619, "grad_norm": 5.35072660446167, "learning_rate": 1.1825337331334333e-07, "loss": 1.1841, "step": 632 }, { "epoch": 60.285714285714285, "grad_norm": 8.153692245483398, "learning_rate": 1.1844077961019491e-07, "loss": 1.2294, "step": 633 }, { "epoch": 60.38095238095238, "grad_norm": 4.509757995605469, "learning_rate": 1.1862818590704648e-07, "loss": 1.2288, "step": 634 }, { "epoch": 60.476190476190474, "grad_norm": 9.07571029663086, "learning_rate": 1.1881559220389806e-07, "loss": 1.2248, "step": 635 }, { "epoch": 60.57142857142857, "grad_norm": 8.7220458984375, "learning_rate": 1.1900299850074964e-07, "loss": 1.2432, "step": 636 }, { "epoch": 60.666666666666664, "grad_norm": 8.08939266204834, "learning_rate": 1.1919040479760121e-07, "loss": 1.1833, "step": 637 }, { "epoch": 60.76190476190476, "grad_norm": 4.108371257781982, "learning_rate": 1.1937781109445278e-07, "loss": 1.19, "step": 638 }, { "epoch": 60.857142857142854, "grad_norm": 11.294686317443848, "learning_rate": 1.1956521739130436e-07, "loss": 1.1802, "step": 639 }, { "epoch": 60.95238095238095, "grad_norm": 3.7819859981536865, "learning_rate": 1.1975262368815594e-07, "loss": 1.1992, "step": 640 }, { "epoch": 60.95238095238095, "eval_accuracy": 0.6927688272761334, "eval_f1": 0.7495418448381185, "eval_loss": 0.5971837043762207, "eval_precision": 0.7092485549132947, "eval_recall": 0.7946891191709845, "eval_roc_auc": 0.7353765112262521, "eval_runtime": 1.2087, "eval_samples_per_second": 2208.082, "eval_steps_per_second": 4.964, "step": 640 }, { "epoch": 61.04761904761905, "grad_norm": 5.13773775100708, "learning_rate": 1.199400299850075e-07, "loss": 1.2065, "step": 641 }, { "epoch": 61.142857142857146, "grad_norm": 5.876140594482422, "learning_rate": 1.2012743628185907e-07, "loss": 1.2267, "step": 642 }, { "epoch": 61.23809523809524, "grad_norm": 7.470527172088623, "learning_rate": 1.2031484257871066e-07, "loss": 1.2262, "step": 643 }, { "epoch": 61.333333333333336, "grad_norm": 5.522604942321777, "learning_rate": 1.205022488755622e-07, "loss": 1.1942, "step": 644 }, { "epoch": 61.42857142857143, "grad_norm": 4.152938365936279, "learning_rate": 1.2068965517241382e-07, "loss": 1.2191, "step": 645 }, { "epoch": 61.523809523809526, "grad_norm": 8.6640043258667, "learning_rate": 1.2087706146926537e-07, "loss": 1.2091, "step": 646 }, { "epoch": 61.61904761904762, "grad_norm": 7.228600978851318, "learning_rate": 1.2106446776611695e-07, "loss": 1.2186, "step": 647 }, { "epoch": 61.714285714285715, "grad_norm": 8.163618087768555, "learning_rate": 1.212518740629685e-07, "loss": 1.1775, "step": 648 }, { "epoch": 61.80952380952381, "grad_norm": 7.274651050567627, "learning_rate": 1.214392803598201e-07, "loss": 1.17, "step": 649 }, { "epoch": 61.904761904761905, "grad_norm": 4.588978290557861, "learning_rate": 1.2162668665667167e-07, "loss": 1.1833, "step": 650 }, { "epoch": 62.0, "grad_norm": 7.658914566040039, "learning_rate": 1.2181409295352325e-07, "loss": 1.1698, "step": 651 }, { "epoch": 62.0, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7480073574494175, "eval_loss": 0.59659343957901, "eval_precision": 0.710128055878929, "eval_recall": 0.7901554404145078, "eval_roc_auc": 0.7355400115141049, "eval_runtime": 1.2505, "eval_samples_per_second": 2134.361, "eval_steps_per_second": 4.798, "step": 651 }, { "epoch": 62.095238095238095, "grad_norm": 6.7331743240356445, "learning_rate": 1.2200149925037483e-07, "loss": 1.1716, "step": 652 }, { "epoch": 62.19047619047619, "grad_norm": 6.890786647796631, "learning_rate": 1.2218890554722639e-07, "loss": 1.2546, "step": 653 }, { "epoch": 62.285714285714285, "grad_norm": 6.733562469482422, "learning_rate": 1.2237631184407797e-07, "loss": 1.2633, "step": 654 }, { "epoch": 62.38095238095238, "grad_norm": 4.396595001220703, "learning_rate": 1.2256371814092952e-07, "loss": 1.1832, "step": 655 }, { "epoch": 62.476190476190474, "grad_norm": 5.2550764083862305, "learning_rate": 1.227511244377811e-07, "loss": 1.2088, "step": 656 }, { "epoch": 62.57142857142857, "grad_norm": 8.098922729492188, "learning_rate": 1.229385307346327e-07, "loss": 1.1956, "step": 657 }, { "epoch": 62.666666666666664, "grad_norm": 8.616472244262695, "learning_rate": 1.2312593703148426e-07, "loss": 1.1821, "step": 658 }, { "epoch": 62.76190476190476, "grad_norm": 3.985522508621216, "learning_rate": 1.2331334332833585e-07, "loss": 1.1895, "step": 659 }, { "epoch": 62.857142857142854, "grad_norm": 8.401481628417969, "learning_rate": 1.235007496251874e-07, "loss": 1.2246, "step": 660 }, { "epoch": 62.95238095238095, "grad_norm": 7.483948230743408, "learning_rate": 1.2368815592203898e-07, "loss": 1.1683, "step": 661 }, { "epoch": 62.95238095238095, "eval_accuracy": 0.692394155114275, "eval_f1": 0.7499238501370697, "eval_loss": 0.5961089134216309, "eval_precision": 0.7078780908568143, "eval_recall": 0.7972797927461139, "eval_roc_auc": 0.7358785261945884, "eval_runtime": 1.1192, "eval_samples_per_second": 2384.799, "eval_steps_per_second": 5.361, "step": 661 }, { "epoch": 63.04761904761905, "grad_norm": 7.974298477172852, "learning_rate": 1.2387556221889056e-07, "loss": 1.2418, "step": 662 }, { "epoch": 63.142857142857146, "grad_norm": 4.714601516723633, "learning_rate": 1.2406296851574214e-07, "loss": 1.1899, "step": 663 }, { "epoch": 63.23809523809524, "grad_norm": 8.871092796325684, "learning_rate": 1.2425037481259372e-07, "loss": 1.1732, "step": 664 }, { "epoch": 63.333333333333336, "grad_norm": 6.921295642852783, "learning_rate": 1.2443778110944528e-07, "loss": 1.1797, "step": 665 }, { "epoch": 63.42857142857143, "grad_norm": 6.838317394256592, "learning_rate": 1.2462518740629686e-07, "loss": 1.2006, "step": 666 }, { "epoch": 63.523809523809526, "grad_norm": 9.975998878479004, "learning_rate": 1.2481259370314841e-07, "loss": 1.1928, "step": 667 }, { "epoch": 63.61904761904762, "grad_norm": 4.443400859832764, "learning_rate": 1.2500000000000002e-07, "loss": 1.1637, "step": 668 }, { "epoch": 63.714285714285715, "grad_norm": 6.422091960906982, "learning_rate": 1.2518740629685158e-07, "loss": 1.2192, "step": 669 }, { "epoch": 63.80952380952381, "grad_norm": 8.040130615234375, "learning_rate": 1.2537481259370316e-07, "loss": 1.2418, "step": 670 }, { "epoch": 63.904761904761905, "grad_norm": 4.976798057556152, "learning_rate": 1.2556221889055474e-07, "loss": 1.1433, "step": 671 }, { "epoch": 64.0, "grad_norm": 5.444014549255371, "learning_rate": 1.257496251874063e-07, "loss": 1.2515, "step": 672 }, { "epoch": 64.0, "eval_accuracy": 0.6931434994379918, "eval_f1": 0.7518933656467737, "eval_loss": 0.5956265330314636, "eval_precision": 0.7063175867956745, "eval_recall": 0.8037564766839378, "eval_roc_auc": 0.7361352907311456, "eval_runtime": 1.1898, "eval_samples_per_second": 2243.242, "eval_steps_per_second": 5.043, "step": 672 }, { "epoch": 64.0952380952381, "grad_norm": 7.590255260467529, "learning_rate": 1.2593703148425787e-07, "loss": 1.2581, "step": 673 }, { "epoch": 64.19047619047619, "grad_norm": 4.764690399169922, "learning_rate": 1.2612443778110945e-07, "loss": 1.2219, "step": 674 }, { "epoch": 64.28571428571429, "grad_norm": 4.292867660522461, "learning_rate": 1.2631184407796103e-07, "loss": 1.1789, "step": 675 }, { "epoch": 64.38095238095238, "grad_norm": 8.631563186645508, "learning_rate": 1.264992503748126e-07, "loss": 1.2549, "step": 676 }, { "epoch": 64.47619047619048, "grad_norm": 5.184398174285889, "learning_rate": 1.2668665667166417e-07, "loss": 1.2146, "step": 677 }, { "epoch": 64.57142857142857, "grad_norm": 9.575491905212402, "learning_rate": 1.2687406296851575e-07, "loss": 1.1306, "step": 678 }, { "epoch": 64.66666666666667, "grad_norm": 4.342453479766846, "learning_rate": 1.270614692653673e-07, "loss": 1.182, "step": 679 }, { "epoch": 64.76190476190476, "grad_norm": 4.187139987945557, "learning_rate": 1.272488755622189e-07, "loss": 1.1806, "step": 680 }, { "epoch": 64.85714285714286, "grad_norm": 7.976365089416504, "learning_rate": 1.2743628185907047e-07, "loss": 1.1707, "step": 681 }, { "epoch": 64.95238095238095, "grad_norm": 4.439985275268555, "learning_rate": 1.2762368815592205e-07, "loss": 1.1925, "step": 682 }, { "epoch": 64.95238095238095, "eval_accuracy": 0.6931434994379918, "eval_f1": 0.7515923566878981, "eval_loss": 0.5950964689254761, "eval_precision": 0.7067883628066173, "eval_recall": 0.802461139896373, "eval_roc_auc": 0.736240932642487, "eval_runtime": 1.1681, "eval_samples_per_second": 2284.886, "eval_steps_per_second": 5.136, "step": 682 }, { "epoch": 65.04761904761905, "grad_norm": 13.692363739013672, "learning_rate": 1.278110944527736e-07, "loss": 1.2215, "step": 683 }, { "epoch": 65.14285714285714, "grad_norm": 12.23557186126709, "learning_rate": 1.2799850074962518e-07, "loss": 1.2143, "step": 684 }, { "epoch": 65.23809523809524, "grad_norm": 5.822898864746094, "learning_rate": 1.2818590704647676e-07, "loss": 1.2465, "step": 685 }, { "epoch": 65.33333333333333, "grad_norm": 8.577857971191406, "learning_rate": 1.2837331334332835e-07, "loss": 1.1758, "step": 686 }, { "epoch": 65.42857142857143, "grad_norm": 5.592923641204834, "learning_rate": 1.2856071964017993e-07, "loss": 1.229, "step": 687 }, { "epoch": 65.52380952380952, "grad_norm": 8.428314208984375, "learning_rate": 1.2874812593703148e-07, "loss": 1.1457, "step": 688 }, { "epoch": 65.61904761904762, "grad_norm": 4.13578462600708, "learning_rate": 1.2893553223388306e-07, "loss": 1.221, "step": 689 }, { "epoch": 65.71428571428571, "grad_norm": 10.429786682128906, "learning_rate": 1.2912293853073464e-07, "loss": 1.2281, "step": 690 }, { "epoch": 65.80952380952381, "grad_norm": 6.562197685241699, "learning_rate": 1.2931034482758622e-07, "loss": 1.2364, "step": 691 }, { "epoch": 65.9047619047619, "grad_norm": 5.115170001983643, "learning_rate": 1.294977511244378e-07, "loss": 1.1427, "step": 692 }, { "epoch": 66.0, "grad_norm": 5.716797351837158, "learning_rate": 1.2968515742128936e-07, "loss": 1.1304, "step": 693 }, { "epoch": 66.0, "eval_accuracy": 0.6920194829524167, "eval_f1": 0.7510599636583889, "eval_loss": 0.5946346521377563, "eval_precision": 0.7053469852104665, "eval_recall": 0.8031088082901554, "eval_roc_auc": 0.7363713298791019, "eval_runtime": 1.4654, "eval_samples_per_second": 1821.307, "eval_steps_per_second": 4.094, "step": 693 }, { "epoch": 66.0952380952381, "grad_norm": 4.564248085021973, "learning_rate": 1.2987256371814094e-07, "loss": 1.1607, "step": 694 }, { "epoch": 66.19047619047619, "grad_norm": 5.520058631896973, "learning_rate": 1.300599700149925e-07, "loss": 1.1834, "step": 695 }, { "epoch": 66.28571428571429, "grad_norm": 7.891077041625977, "learning_rate": 1.3024737631184408e-07, "loss": 1.241, "step": 696 }, { "epoch": 66.38095238095238, "grad_norm": 3.926841974258423, "learning_rate": 1.3043478260869566e-07, "loss": 1.1777, "step": 697 }, { "epoch": 66.47619047619048, "grad_norm": 7.784186840057373, "learning_rate": 1.3062218890554724e-07, "loss": 1.2522, "step": 698 }, { "epoch": 66.57142857142857, "grad_norm": 4.014712810516357, "learning_rate": 1.3080959520239882e-07, "loss": 1.1957, "step": 699 }, { "epoch": 66.66666666666667, "grad_norm": 3.500624656677246, "learning_rate": 1.3099700149925037e-07, "loss": 1.1759, "step": 700 }, { "epoch": 66.76190476190476, "grad_norm": 4.814046859741211, "learning_rate": 1.3118440779610195e-07, "loss": 1.199, "step": 701 }, { "epoch": 66.85714285714286, "grad_norm": 7.344492435455322, "learning_rate": 1.313718140929535e-07, "loss": 1.2273, "step": 702 }, { "epoch": 66.95238095238095, "grad_norm": 3.8098535537719727, "learning_rate": 1.3155922038980512e-07, "loss": 1.1647, "step": 703 }, { "epoch": 66.95238095238095, "eval_accuracy": 0.6938928437617085, "eval_f1": 0.7529482915028727, "eval_loss": 0.5941770672798157, "eval_precision": 0.706182643221781, "eval_recall": 0.8063471502590673, "eval_roc_auc": 0.736562176165803, "eval_runtime": 1.2309, "eval_samples_per_second": 2168.262, "eval_steps_per_second": 4.874, "step": 703 }, { "epoch": 67.04761904761905, "grad_norm": 6.632507801055908, "learning_rate": 1.317466266866567e-07, "loss": 1.1796, "step": 704 }, { "epoch": 67.14285714285714, "grad_norm": 4.132903575897217, "learning_rate": 1.3193403298350825e-07, "loss": 1.1704, "step": 705 }, { "epoch": 67.23809523809524, "grad_norm": 6.37913179397583, "learning_rate": 1.3212143928035983e-07, "loss": 1.208, "step": 706 }, { "epoch": 67.33333333333333, "grad_norm": 7.594150543212891, "learning_rate": 1.3230884557721139e-07, "loss": 1.159, "step": 707 }, { "epoch": 67.42857142857143, "grad_norm": 6.472071170806885, "learning_rate": 1.3249625187406297e-07, "loss": 1.1981, "step": 708 }, { "epoch": 67.52380952380952, "grad_norm": 8.726730346679688, "learning_rate": 1.3268365817091455e-07, "loss": 1.23, "step": 709 }, { "epoch": 67.61904761904762, "grad_norm": 6.169523239135742, "learning_rate": 1.3287106446776613e-07, "loss": 1.2393, "step": 710 }, { "epoch": 67.71428571428571, "grad_norm": 7.1220879554748535, "learning_rate": 1.330584707646177e-07, "loss": 1.2379, "step": 711 }, { "epoch": 67.80952380952381, "grad_norm": 4.605555534362793, "learning_rate": 1.3324587706146927e-07, "loss": 1.1859, "step": 712 }, { "epoch": 67.9047619047619, "grad_norm": 3.750711679458618, "learning_rate": 1.3343328335832085e-07, "loss": 1.1814, "step": 713 }, { "epoch": 68.0, "grad_norm": 9.115017890930176, "learning_rate": 1.3362068965517243e-07, "loss": 1.1628, "step": 714 }, { "epoch": 68.0, "eval_accuracy": 0.6927688272761334, "eval_f1": 0.7512135922330098, "eval_loss": 0.5937598943710327, "eval_precision": 0.70662100456621, "eval_recall": 0.8018134715025906, "eval_roc_auc": 0.736656303972366, "eval_runtime": 1.2369, "eval_samples_per_second": 2157.854, "eval_steps_per_second": 4.851, "step": 714 }, { "epoch": 68.0952380952381, "grad_norm": 6.957290172576904, "learning_rate": 1.33808095952024e-07, "loss": 1.1832, "step": 715 }, { "epoch": 68.19047619047619, "grad_norm": 6.766869068145752, "learning_rate": 1.3399550224887556e-07, "loss": 1.2354, "step": 716 }, { "epoch": 68.28571428571429, "grad_norm": 4.803560733795166, "learning_rate": 1.3418290854572714e-07, "loss": 1.1683, "step": 717 }, { "epoch": 68.38095238095238, "grad_norm": 6.3927693367004395, "learning_rate": 1.3437031484257872e-07, "loss": 1.2238, "step": 718 }, { "epoch": 68.47619047619048, "grad_norm": 4.72044563293457, "learning_rate": 1.3455772113943028e-07, "loss": 1.1838, "step": 719 }, { "epoch": 68.57142857142857, "grad_norm": 3.594869375228882, "learning_rate": 1.3474512743628186e-07, "loss": 1.2327, "step": 720 }, { "epoch": 68.66666666666667, "grad_norm": 8.74575138092041, "learning_rate": 1.3493253373313344e-07, "loss": 1.1613, "step": 721 }, { "epoch": 68.76190476190476, "grad_norm": 4.941747665405273, "learning_rate": 1.3511994002998502e-07, "loss": 1.208, "step": 722 }, { "epoch": 68.85714285714286, "grad_norm": 4.247956275939941, "learning_rate": 1.3530734632683658e-07, "loss": 1.2211, "step": 723 }, { "epoch": 68.95238095238095, "grad_norm": 13.995353698730469, "learning_rate": 1.3549475262368816e-07, "loss": 1.1874, "step": 724 }, { "epoch": 68.95238095238095, "eval_accuracy": 0.6931434994379918, "eval_f1": 0.7514415781487102, "eval_loss": 0.5934665203094482, "eval_precision": 0.7070245573957739, "eval_recall": 0.8018134715025906, "eval_roc_auc": 0.7367440990213011, "eval_runtime": 1.1208, "eval_samples_per_second": 2381.392, "eval_steps_per_second": 5.353, "step": 724 }, { "epoch": 69.04761904761905, "grad_norm": 5.009316444396973, "learning_rate": 1.3568215892053974e-07, "loss": 1.2182, "step": 725 }, { "epoch": 69.14285714285714, "grad_norm": 9.797475814819336, "learning_rate": 1.3586956521739132e-07, "loss": 1.1913, "step": 726 }, { "epoch": 69.23809523809524, "grad_norm": 3.616032600402832, "learning_rate": 1.360569715142429e-07, "loss": 1.1511, "step": 727 }, { "epoch": 69.33333333333333, "grad_norm": 7.884753704071045, "learning_rate": 1.3624437781109445e-07, "loss": 1.1546, "step": 728 }, { "epoch": 69.42857142857143, "grad_norm": 6.181896686553955, "learning_rate": 1.3643178410794604e-07, "loss": 1.2139, "step": 729 }, { "epoch": 69.52380952380952, "grad_norm": 5.938560485839844, "learning_rate": 1.366191904047976e-07, "loss": 1.1997, "step": 730 }, { "epoch": 69.61904761904762, "grad_norm": 7.971124172210693, "learning_rate": 1.3680659670164917e-07, "loss": 1.2043, "step": 731 }, { "epoch": 69.71428571428571, "grad_norm": 5.756801128387451, "learning_rate": 1.3699400299850075e-07, "loss": 1.2044, "step": 732 }, { "epoch": 69.80952380952381, "grad_norm": 4.534045696258545, "learning_rate": 1.3718140929535233e-07, "loss": 1.1576, "step": 733 }, { "epoch": 69.9047619047619, "grad_norm": 4.456146240234375, "learning_rate": 1.3736881559220391e-07, "loss": 1.2251, "step": 734 }, { "epoch": 70.0, "grad_norm": 4.985422134399414, "learning_rate": 1.3755622188905547e-07, "loss": 1.1776, "step": 735 }, { "epoch": 70.0, "eval_accuracy": 0.6935181715998502, "eval_f1": 0.752720677146312, "eval_loss": 0.5932142734527588, "eval_precision": 0.70578231292517, "eval_recall": 0.8063471502590673, "eval_roc_auc": 0.7369044329303398, "eval_runtime": 1.1959, "eval_samples_per_second": 2231.765, "eval_steps_per_second": 5.017, "step": 735 }, { "epoch": 70.0952380952381, "grad_norm": 4.807669162750244, "learning_rate": 1.3774362818590705e-07, "loss": 1.2149, "step": 736 }, { "epoch": 70.19047619047619, "grad_norm": 5.5707502365112305, "learning_rate": 1.379310344827586e-07, "loss": 1.1785, "step": 737 }, { "epoch": 70.28571428571429, "grad_norm": 4.975805759429932, "learning_rate": 1.381184407796102e-07, "loss": 1.1759, "step": 738 }, { "epoch": 70.38095238095238, "grad_norm": 9.631810188293457, "learning_rate": 1.383058470764618e-07, "loss": 1.1972, "step": 739 }, { "epoch": 70.47619047619048, "grad_norm": 7.148436546325684, "learning_rate": 1.3849325337331335e-07, "loss": 1.1757, "step": 740 }, { "epoch": 70.57142857142857, "grad_norm": 4.956615924835205, "learning_rate": 1.3868065967016493e-07, "loss": 1.2066, "step": 741 }, { "epoch": 70.66666666666667, "grad_norm": 3.861035108566284, "learning_rate": 1.3886806596701648e-07, "loss": 1.1936, "step": 742 }, { "epoch": 70.76190476190476, "grad_norm": 8.290705680847168, "learning_rate": 1.3905547226386806e-07, "loss": 1.1542, "step": 743 }, { "epoch": 70.85714285714286, "grad_norm": 4.794528484344482, "learning_rate": 1.3924287856071967e-07, "loss": 1.1657, "step": 744 }, { "epoch": 70.95238095238095, "grad_norm": 10.255331039428711, "learning_rate": 1.3943028485757122e-07, "loss": 1.291, "step": 745 }, { "epoch": 70.95238095238095, "eval_accuracy": 0.6946421880854252, "eval_f1": 0.7538508003624282, "eval_loss": 0.5930056571960449, "eval_precision": 0.7062818336162988, "eval_recall": 0.8082901554404145, "eval_roc_auc": 0.7369087507196316, "eval_runtime": 1.2213, "eval_samples_per_second": 2185.328, "eval_steps_per_second": 4.913, "step": 745 }, { "epoch": 71.04761904761905, "grad_norm": 6.958563804626465, "learning_rate": 1.396176911544228e-07, "loss": 1.228, "step": 746 }, { "epoch": 71.14285714285714, "grad_norm": 7.86414098739624, "learning_rate": 1.3980509745127436e-07, "loss": 1.1859, "step": 747 }, { "epoch": 71.23809523809524, "grad_norm": 4.688377380371094, "learning_rate": 1.3999250374812594e-07, "loss": 1.1815, "step": 748 }, { "epoch": 71.33333333333333, "grad_norm": 6.568027496337891, "learning_rate": 1.4017991004497752e-07, "loss": 1.1951, "step": 749 }, { "epoch": 71.42857142857143, "grad_norm": 9.667662620544434, "learning_rate": 1.403673163418291e-07, "loss": 1.1897, "step": 750 }, { "epoch": 71.52380952380952, "grad_norm": 3.651684045791626, "learning_rate": 1.4055472263868068e-07, "loss": 1.2159, "step": 751 }, { "epoch": 71.61904761904762, "grad_norm": 4.854371070861816, "learning_rate": 1.4074212893553224e-07, "loss": 1.2089, "step": 752 }, { "epoch": 71.71428571428571, "grad_norm": 5.8277153968811035, "learning_rate": 1.4092953523238382e-07, "loss": 1.1586, "step": 753 }, { "epoch": 71.80952380952381, "grad_norm": 8.932456016540527, "learning_rate": 1.4111694152923537e-07, "loss": 1.1964, "step": 754 }, { "epoch": 71.9047619047619, "grad_norm": 7.3312482833862305, "learning_rate": 1.4130434782608695e-07, "loss": 1.2107, "step": 755 }, { "epoch": 72.0, "grad_norm": 4.927225589752197, "learning_rate": 1.4149175412293854e-07, "loss": 1.1468, "step": 756 }, { "epoch": 72.0, "eval_accuracy": 0.6961408767328587, "eval_f1": 0.7550588945937783, "eval_loss": 0.5926088690757751, "eval_precision": 0.7074136955291455, "eval_recall": 0.8095854922279793, "eval_roc_auc": 0.7370814622913068, "eval_runtime": 1.2983, "eval_samples_per_second": 2055.808, "eval_steps_per_second": 4.622, "step": 756 }, { "epoch": 72.0952380952381, "grad_norm": 4.906456470489502, "learning_rate": 1.4167916041979012e-07, "loss": 1.1828, "step": 757 }, { "epoch": 72.19047619047619, "grad_norm": 3.9079153537750244, "learning_rate": 1.418665667166417e-07, "loss": 1.2224, "step": 758 }, { "epoch": 72.28571428571429, "grad_norm": 6.527932167053223, "learning_rate": 1.4205397301349325e-07, "loss": 1.1986, "step": 759 }, { "epoch": 72.38095238095238, "grad_norm": 4.133279800415039, "learning_rate": 1.4224137931034483e-07, "loss": 1.1761, "step": 760 }, { "epoch": 72.47619047619048, "grad_norm": 4.781145095825195, "learning_rate": 1.4242878560719641e-07, "loss": 1.1717, "step": 761 }, { "epoch": 72.57142857142857, "grad_norm": 3.2274749279022217, "learning_rate": 1.42616191904048e-07, "loss": 1.1883, "step": 762 }, { "epoch": 72.66666666666667, "grad_norm": 4.167154312133789, "learning_rate": 1.4280359820089955e-07, "loss": 1.1845, "step": 763 }, { "epoch": 72.76190476190476, "grad_norm": 3.8813252449035645, "learning_rate": 1.4299100449775113e-07, "loss": 1.183, "step": 764 }, { "epoch": 72.85714285714286, "grad_norm": 6.702694892883301, "learning_rate": 1.431784107946027e-07, "loss": 1.2002, "step": 765 }, { "epoch": 72.95238095238095, "grad_norm": 5.558817386627197, "learning_rate": 1.4336581709145427e-07, "loss": 1.2337, "step": 766 }, { "epoch": 72.95238095238095, "eval_accuracy": 0.6972648932184339, "eval_f1": 0.756185878092939, "eval_loss": 0.5922711491584778, "eval_precision": 0.707909604519774, "eval_recall": 0.8115284974093264, "eval_roc_auc": 0.7372495682210709, "eval_runtime": 1.2573, "eval_samples_per_second": 2122.807, "eval_steps_per_second": 4.772, "step": 766 }, { "epoch": 73.04761904761905, "grad_norm": 3.9507691860198975, "learning_rate": 1.4355322338830587e-07, "loss": 1.1799, "step": 767 }, { "epoch": 73.14285714285714, "grad_norm": 4.559001445770264, "learning_rate": 1.4374062968515743e-07, "loss": 1.1582, "step": 768 }, { "epoch": 73.23809523809524, "grad_norm": 3.7043728828430176, "learning_rate": 1.43928035982009e-07, "loss": 1.1928, "step": 769 }, { "epoch": 73.33333333333333, "grad_norm": 4.907166957855225, "learning_rate": 1.4411544227886056e-07, "loss": 1.1562, "step": 770 }, { "epoch": 73.42857142857143, "grad_norm": 10.487092971801758, "learning_rate": 1.4430284857571214e-07, "loss": 1.2349, "step": 771 }, { "epoch": 73.52380952380952, "grad_norm": 5.899962425231934, "learning_rate": 1.4449025487256373e-07, "loss": 1.2333, "step": 772 }, { "epoch": 73.61904761904762, "grad_norm": 6.258727073669434, "learning_rate": 1.446776611694153e-07, "loss": 1.1558, "step": 773 }, { "epoch": 73.71428571428571, "grad_norm": 4.086488723754883, "learning_rate": 1.448650674662669e-07, "loss": 1.221, "step": 774 }, { "epoch": 73.80952380952381, "grad_norm": 6.162347793579102, "learning_rate": 1.4505247376311844e-07, "loss": 1.2575, "step": 775 }, { "epoch": 73.9047619047619, "grad_norm": 4.086270809173584, "learning_rate": 1.4523988005997002e-07, "loss": 1.1659, "step": 776 }, { "epoch": 74.0, "grad_norm": 5.596920967102051, "learning_rate": 1.4542728635682158e-07, "loss": 1.1919, "step": 777 }, { "epoch": 74.0, "eval_accuracy": 0.6965155488947171, "eval_f1": 0.7554347826086957, "eval_loss": 0.5918785333633423, "eval_precision": 0.707579185520362, "eval_recall": 0.8102331606217616, "eval_roc_auc": 0.7373598157743234, "eval_runtime": 1.1713, "eval_samples_per_second": 2278.735, "eval_steps_per_second": 5.123, "step": 777 }, { "epoch": 74.0952380952381, "grad_norm": 6.950702667236328, "learning_rate": 1.4561469265367316e-07, "loss": 1.2012, "step": 778 }, { "epoch": 74.19047619047619, "grad_norm": 6.536509037017822, "learning_rate": 1.4580209895052477e-07, "loss": 1.1679, "step": 779 }, { "epoch": 74.28571428571429, "grad_norm": 3.573523998260498, "learning_rate": 1.4598950524737632e-07, "loss": 1.2136, "step": 780 }, { "epoch": 74.38095238095238, "grad_norm": 5.456609725952148, "learning_rate": 1.461769115442279e-07, "loss": 1.21, "step": 781 }, { "epoch": 74.47619047619048, "grad_norm": 6.9601922035217285, "learning_rate": 1.4636431784107946e-07, "loss": 1.1881, "step": 782 }, { "epoch": 74.57142857142857, "grad_norm": 9.396449089050293, "learning_rate": 1.4655172413793104e-07, "loss": 1.2234, "step": 783 }, { "epoch": 74.66666666666667, "grad_norm": 7.922398567199707, "learning_rate": 1.4673913043478262e-07, "loss": 1.1364, "step": 784 }, { "epoch": 74.76190476190476, "grad_norm": 7.5325093269348145, "learning_rate": 1.469265367316342e-07, "loss": 1.189, "step": 785 }, { "epoch": 74.85714285714286, "grad_norm": 5.111273765563965, "learning_rate": 1.4711394302848578e-07, "loss": 1.2269, "step": 786 }, { "epoch": 74.95238095238095, "grad_norm": 3.6373484134674072, "learning_rate": 1.4730134932533733e-07, "loss": 1.1915, "step": 787 }, { "epoch": 74.95238095238095, "eval_accuracy": 0.6976395653802923, "eval_f1": 0.7565610859728507, "eval_loss": 0.5917426943778992, "eval_precision": 0.7080745341614907, "eval_recall": 0.8121761658031088, "eval_roc_auc": 0.7373359240069084, "eval_runtime": 1.1623, "eval_samples_per_second": 2296.364, "eval_steps_per_second": 5.162, "step": 787 }, { "epoch": 75.04761904761905, "grad_norm": 3.7085328102111816, "learning_rate": 1.4748875562218891e-07, "loss": 1.2211, "step": 788 }, { "epoch": 75.14285714285714, "grad_norm": 5.547911643981934, "learning_rate": 1.4767616191904047e-07, "loss": 1.2192, "step": 789 }, { "epoch": 75.23809523809524, "grad_norm": 5.634171485900879, "learning_rate": 1.4786356821589205e-07, "loss": 1.2382, "step": 790 }, { "epoch": 75.33333333333333, "grad_norm": 4.337423801422119, "learning_rate": 1.4805097451274363e-07, "loss": 1.1841, "step": 791 }, { "epoch": 75.42857142857143, "grad_norm": 3.4552102088928223, "learning_rate": 1.482383808095952e-07, "loss": 1.1838, "step": 792 }, { "epoch": 75.52380952380952, "grad_norm": 4.098034381866455, "learning_rate": 1.484257871064468e-07, "loss": 1.1774, "step": 793 }, { "epoch": 75.61904761904762, "grad_norm": 3.889285087585449, "learning_rate": 1.4861319340329835e-07, "loss": 1.1794, "step": 794 }, { "epoch": 75.71428571428571, "grad_norm": 6.518879413604736, "learning_rate": 1.4880059970014993e-07, "loss": 1.1664, "step": 795 }, { "epoch": 75.80952380952381, "grad_norm": 5.728705883026123, "learning_rate": 1.489880059970015e-07, "loss": 1.1829, "step": 796 }, { "epoch": 75.9047619047619, "grad_norm": 6.4669389724731445, "learning_rate": 1.491754122938531e-07, "loss": 1.1957, "step": 797 }, { "epoch": 76.0, "grad_norm": 3.6122305393218994, "learning_rate": 1.4936281859070467e-07, "loss": 1.1849, "step": 798 }, { "epoch": 76.0, "eval_accuracy": 0.6980142375421506, "eval_f1": 0.7572289156626506, "eval_loss": 0.5914971232414246, "eval_precision": 0.7077702702702703, "eval_recall": 0.814119170984456, "eval_roc_auc": 0.7374999999999999, "eval_runtime": 1.2208, "eval_samples_per_second": 2186.252, "eval_steps_per_second": 4.915, "step": 798 }, { "epoch": 76.0952380952381, "grad_norm": 5.138617038726807, "learning_rate": 1.4955022488755623e-07, "loss": 1.1999, "step": 799 }, { "epoch": 76.19047619047619, "grad_norm": 4.243399143218994, "learning_rate": 1.497376311844078e-07, "loss": 1.1639, "step": 800 }, { "epoch": 76.28571428571429, "grad_norm": 3.2358734607696533, "learning_rate": 1.4992503748125936e-07, "loss": 1.1862, "step": 801 }, { "epoch": 76.38095238095238, "grad_norm": 3.2951650619506836, "learning_rate": 1.5011244377811097e-07, "loss": 1.2081, "step": 802 }, { "epoch": 76.47619047619048, "grad_norm": 7.856222629547119, "learning_rate": 1.5029985007496252e-07, "loss": 1.1956, "step": 803 }, { "epoch": 76.57142857142857, "grad_norm": 5.153473377227783, "learning_rate": 1.504872563718141e-07, "loss": 1.1984, "step": 804 }, { "epoch": 76.66666666666667, "grad_norm": 5.476777076721191, "learning_rate": 1.5067466266866568e-07, "loss": 1.1677, "step": 805 }, { "epoch": 76.76190476190476, "grad_norm": 3.60621976852417, "learning_rate": 1.5086206896551724e-07, "loss": 1.1979, "step": 806 }, { "epoch": 76.85714285714286, "grad_norm": 11.043159484863281, "learning_rate": 1.5104947526236882e-07, "loss": 1.2544, "step": 807 }, { "epoch": 76.95238095238095, "grad_norm": 4.6011433601379395, "learning_rate": 1.512368815592204e-07, "loss": 1.1528, "step": 808 }, { "epoch": 76.95238095238095, "eval_accuracy": 0.6995129261895842, "eval_f1": 0.7600239377618193, "eval_loss": 0.5912597179412842, "eval_precision": 0.7063403781979978, "eval_recall": 0.822538860103627, "eval_roc_auc": 0.7378198042602189, "eval_runtime": 1.1765, "eval_samples_per_second": 2268.536, "eval_steps_per_second": 5.1, "step": 808 }, { "epoch": 77.04761904761905, "grad_norm": 7.566778659820557, "learning_rate": 1.5142428785607198e-07, "loss": 1.1806, "step": 809 }, { "epoch": 77.14285714285714, "grad_norm": 4.038211345672607, "learning_rate": 1.5161169415292354e-07, "loss": 1.174, "step": 810 }, { "epoch": 77.23809523809524, "grad_norm": 4.832150459289551, "learning_rate": 1.5179910044977512e-07, "loss": 1.1962, "step": 811 }, { "epoch": 77.33333333333333, "grad_norm": 3.9250776767730713, "learning_rate": 1.519865067466267e-07, "loss": 1.1681, "step": 812 }, { "epoch": 77.42857142857143, "grad_norm": 5.859079360961914, "learning_rate": 1.5217391304347825e-07, "loss": 1.1581, "step": 813 }, { "epoch": 77.52380952380952, "grad_norm": 5.626636981964111, "learning_rate": 1.5236131934032986e-07, "loss": 1.1526, "step": 814 }, { "epoch": 77.61904761904762, "grad_norm": 2.9289772510528564, "learning_rate": 1.5254872563718141e-07, "loss": 1.1899, "step": 815 }, { "epoch": 77.71428571428571, "grad_norm": 3.3265268802642822, "learning_rate": 1.52736131934033e-07, "loss": 1.1873, "step": 816 }, { "epoch": 77.80952380952381, "grad_norm": 3.9200990200042725, "learning_rate": 1.5292353823088455e-07, "loss": 1.1851, "step": 817 }, { "epoch": 77.9047619047619, "grad_norm": 4.96051549911499, "learning_rate": 1.5311094452773613e-07, "loss": 1.209, "step": 818 }, { "epoch": 78.0, "grad_norm": 4.609013557434082, "learning_rate": 1.532983508245877e-07, "loss": 1.2238, "step": 819 }, { "epoch": 78.0, "eval_accuracy": 0.6991382540277258, "eval_f1": 0.7584962406015038, "eval_loss": 0.5908147096633911, "eval_precision": 0.708029197080292, "eval_recall": 0.8167098445595855, "eval_roc_auc": 0.7379674726540012, "eval_runtime": 1.328, "eval_samples_per_second": 2009.856, "eval_steps_per_second": 4.518, "step": 819 }, { "epoch": 78.0952380952381, "grad_norm": 6.584000587463379, "learning_rate": 1.534857571214393e-07, "loss": 1.197, "step": 820 }, { "epoch": 78.19047619047619, "grad_norm": 4.348421573638916, "learning_rate": 1.5367316341829087e-07, "loss": 1.1639, "step": 821 }, { "epoch": 78.28571428571429, "grad_norm": 5.262395858764648, "learning_rate": 1.5386056971514243e-07, "loss": 1.2254, "step": 822 }, { "epoch": 78.38095238095238, "grad_norm": 4.977214813232422, "learning_rate": 1.54047976011994e-07, "loss": 1.1838, "step": 823 }, { "epoch": 78.47619047619048, "grad_norm": 6.306005001068115, "learning_rate": 1.5423538230884556e-07, "loss": 1.1657, "step": 824 }, { "epoch": 78.57142857142857, "grad_norm": 4.172593593597412, "learning_rate": 1.5442278860569717e-07, "loss": 1.1989, "step": 825 }, { "epoch": 78.66666666666667, "grad_norm": 4.104514122009277, "learning_rate": 1.5461019490254875e-07, "loss": 1.1957, "step": 826 }, { "epoch": 78.76190476190476, "grad_norm": 11.191329002380371, "learning_rate": 1.547976011994003e-07, "loss": 1.1628, "step": 827 }, { "epoch": 78.85714285714286, "grad_norm": 3.7192697525024414, "learning_rate": 1.549850074962519e-07, "loss": 1.1801, "step": 828 }, { "epoch": 78.95238095238095, "grad_norm": 4.374116897583008, "learning_rate": 1.5517241379310344e-07, "loss": 1.1667, "step": 829 }, { "epoch": 78.95238095238095, "eval_accuracy": 0.6980142375421506, "eval_f1": 0.758248350329934, "eval_loss": 0.590340793132782, "eval_precision": 0.7061452513966481, "eval_recall": 0.8186528497409327, "eval_roc_auc": 0.7383906160046056, "eval_runtime": 1.1274, "eval_samples_per_second": 2367.487, "eval_steps_per_second": 5.322, "step": 829 }, { "epoch": 79.04761904761905, "grad_norm": 6.362501621246338, "learning_rate": 1.5535982008995502e-07, "loss": 1.1866, "step": 830 }, { "epoch": 79.14285714285714, "grad_norm": 7.911940097808838, "learning_rate": 1.555472263868066e-07, "loss": 1.1772, "step": 831 }, { "epoch": 79.23809523809524, "grad_norm": 7.175947666168213, "learning_rate": 1.5573463268365819e-07, "loss": 1.1564, "step": 832 }, { "epoch": 79.33333333333333, "grad_norm": 10.416701316833496, "learning_rate": 1.5592203898050977e-07, "loss": 1.2142, "step": 833 }, { "epoch": 79.42857142857143, "grad_norm": 8.933462142944336, "learning_rate": 1.5610944527736132e-07, "loss": 1.2047, "step": 834 }, { "epoch": 79.52380952380952, "grad_norm": 4.097228527069092, "learning_rate": 1.562968515742129e-07, "loss": 1.1948, "step": 835 }, { "epoch": 79.61904761904762, "grad_norm": 3.429769277572632, "learning_rate": 1.5648425787106446e-07, "loss": 1.1977, "step": 836 }, { "epoch": 79.71428571428571, "grad_norm": 3.96598744392395, "learning_rate": 1.5667166416791606e-07, "loss": 1.1859, "step": 837 }, { "epoch": 79.80952380952381, "grad_norm": 2.8495140075683594, "learning_rate": 1.5685907046476762e-07, "loss": 1.1776, "step": 838 }, { "epoch": 79.9047619047619, "grad_norm": 5.458662509918213, "learning_rate": 1.570464767616192e-07, "loss": 1.1592, "step": 839 }, { "epoch": 80.0, "grad_norm": 4.803214073181152, "learning_rate": 1.5723388305847078e-07, "loss": 1.2071, "step": 840 }, { "epoch": 80.0, "eval_accuracy": 0.7002622705133008, "eval_f1": 0.7593261131167268, "eval_loss": 0.5900521874427795, "eval_precision": 0.7089887640449438, "eval_recall": 0.8173575129533679, "eval_roc_auc": 0.7384001151410478, "eval_runtime": 1.2067, "eval_samples_per_second": 2211.756, "eval_steps_per_second": 4.972, "step": 840 }, { "epoch": 80.0952380952381, "grad_norm": 3.455930709838867, "learning_rate": 1.5742128935532233e-07, "loss": 1.1699, "step": 841 }, { "epoch": 80.19047619047619, "grad_norm": 4.584237575531006, "learning_rate": 1.5760869565217392e-07, "loss": 1.2239, "step": 842 }, { "epoch": 80.28571428571429, "grad_norm": 2.777982711791992, "learning_rate": 1.577961019490255e-07, "loss": 1.1637, "step": 843 }, { "epoch": 80.38095238095238, "grad_norm": 4.465492248535156, "learning_rate": 1.5798350824587708e-07, "loss": 1.195, "step": 844 }, { "epoch": 80.47619047619048, "grad_norm": 4.4649481773376465, "learning_rate": 1.5817091454272863e-07, "loss": 1.1838, "step": 845 }, { "epoch": 80.57142857142857, "grad_norm": 4.6688337326049805, "learning_rate": 1.583583208395802e-07, "loss": 1.2007, "step": 846 }, { "epoch": 80.66666666666667, "grad_norm": 2.963181257247925, "learning_rate": 1.585457271364318e-07, "loss": 1.1718, "step": 847 }, { "epoch": 80.76190476190476, "grad_norm": 6.0897979736328125, "learning_rate": 1.5873313343328337e-07, "loss": 1.2046, "step": 848 }, { "epoch": 80.85714285714286, "grad_norm": 3.63608717918396, "learning_rate": 1.5892053973013496e-07, "loss": 1.1933, "step": 849 }, { "epoch": 80.95238095238095, "grad_norm": 3.738607883453369, "learning_rate": 1.591079460269865e-07, "loss": 1.2184, "step": 850 }, { "epoch": 80.95238095238095, "eval_accuracy": 0.6991382540277258, "eval_f1": 0.758205359831376, "eval_loss": 0.5898799300193787, "eval_precision": 0.7084974676420934, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.7383805411629245, "eval_runtime": 1.1853, "eval_samples_per_second": 2251.813, "eval_steps_per_second": 5.062, "step": 850 }, { "epoch": 81.04761904761905, "grad_norm": 4.331661701202393, "learning_rate": 1.592953523238381e-07, "loss": 1.1899, "step": 851 }, { "epoch": 81.14285714285714, "grad_norm": 5.725168704986572, "learning_rate": 1.5948275862068967e-07, "loss": 1.1629, "step": 852 }, { "epoch": 81.23809523809524, "grad_norm": 6.6885504722595215, "learning_rate": 1.5967016491754123e-07, "loss": 1.1414, "step": 853 }, { "epoch": 81.33333333333333, "grad_norm": 4.884006977081299, "learning_rate": 1.598575712143928e-07, "loss": 1.2167, "step": 854 }, { "epoch": 81.42857142857143, "grad_norm": 4.00887393951416, "learning_rate": 1.600449775112444e-07, "loss": 1.194, "step": 855 }, { "epoch": 81.52380952380952, "grad_norm": 9.518308639526367, "learning_rate": 1.6023238380809597e-07, "loss": 1.2198, "step": 856 }, { "epoch": 81.61904761904762, "grad_norm": 4.420541286468506, "learning_rate": 1.6041979010494752e-07, "loss": 1.1623, "step": 857 }, { "epoch": 81.71428571428571, "grad_norm": 4.352540016174316, "learning_rate": 1.606071964017991e-07, "loss": 1.1953, "step": 858 }, { "epoch": 81.80952380952381, "grad_norm": 4.800368309020996, "learning_rate": 1.6079460269865069e-07, "loss": 1.2255, "step": 859 }, { "epoch": 81.9047619047619, "grad_norm": 9.488035202026367, "learning_rate": 1.6098200899550227e-07, "loss": 1.1621, "step": 860 }, { "epoch": 82.0, "grad_norm": 2.658348321914673, "learning_rate": 1.6116941529235385e-07, "loss": 1.1856, "step": 861 }, { "epoch": 82.0, "eval_accuracy": 0.6995129261895842, "eval_f1": 0.7597363690832835, "eval_loss": 0.5896615386009216, "eval_precision": 0.7068004459308808, "eval_recall": 0.8212435233160622, "eval_roc_auc": 0.7387081174438688, "eval_runtime": 1.3138, "eval_samples_per_second": 2031.445, "eval_steps_per_second": 4.567, "step": 861 }, { "epoch": 82.0952380952381, "grad_norm": 4.096118927001953, "learning_rate": 1.613568215892054e-07, "loss": 1.168, "step": 862 }, { "epoch": 82.19047619047619, "grad_norm": 3.9486894607543945, "learning_rate": 1.6154422788605698e-07, "loss": 1.2038, "step": 863 }, { "epoch": 82.28571428571429, "grad_norm": 7.675639629364014, "learning_rate": 1.6173163418290854e-07, "loss": 1.2139, "step": 864 }, { "epoch": 82.38095238095238, "grad_norm": 3.3430981636047363, "learning_rate": 1.6191904047976012e-07, "loss": 1.1643, "step": 865 }, { "epoch": 82.47619047619048, "grad_norm": 4.858800888061523, "learning_rate": 1.621064467766117e-07, "loss": 1.2191, "step": 866 }, { "epoch": 82.57142857142857, "grad_norm": 3.3145360946655273, "learning_rate": 1.6229385307346328e-07, "loss": 1.1903, "step": 867 }, { "epoch": 82.66666666666667, "grad_norm": 2.669579267501831, "learning_rate": 1.6248125937031486e-07, "loss": 1.1716, "step": 868 }, { "epoch": 82.76190476190476, "grad_norm": 5.312630653381348, "learning_rate": 1.6266866566716642e-07, "loss": 1.1918, "step": 869 }, { "epoch": 82.85714285714286, "grad_norm": 9.223489761352539, "learning_rate": 1.62856071964018e-07, "loss": 1.185, "step": 870 }, { "epoch": 82.95238095238095, "grad_norm": 6.911379337310791, "learning_rate": 1.6304347826086955e-07, "loss": 1.1429, "step": 871 }, { "epoch": 82.95238095238095, "eval_accuracy": 0.6995129261895842, "eval_f1": 0.7604540023894862, "eval_loss": 0.5893028974533081, "eval_precision": 0.7056541019955654, "eval_recall": 0.8244818652849741, "eval_roc_auc": 0.7389502014968335, "eval_runtime": 1.1376, "eval_samples_per_second": 2346.142, "eval_steps_per_second": 5.274, "step": 871 }, { "epoch": 83.04761904761905, "grad_norm": 4.5483174324035645, "learning_rate": 1.6323088455772116e-07, "loss": 1.2073, "step": 872 }, { "epoch": 83.14285714285714, "grad_norm": 6.0994062423706055, "learning_rate": 1.6341829085457274e-07, "loss": 1.1722, "step": 873 }, { "epoch": 83.23809523809524, "grad_norm": 3.3759844303131104, "learning_rate": 1.636056971514243e-07, "loss": 1.1758, "step": 874 }, { "epoch": 83.33333333333333, "grad_norm": 4.4953227043151855, "learning_rate": 1.6379310344827587e-07, "loss": 1.2061, "step": 875 }, { "epoch": 83.42857142857143, "grad_norm": 4.761415958404541, "learning_rate": 1.6398050974512743e-07, "loss": 1.1749, "step": 876 }, { "epoch": 83.52380952380952, "grad_norm": 9.842004776000977, "learning_rate": 1.64167916041979e-07, "loss": 1.2147, "step": 877 }, { "epoch": 83.61904761904762, "grad_norm": 4.910158634185791, "learning_rate": 1.643553223388306e-07, "loss": 1.1608, "step": 878 }, { "epoch": 83.71428571428571, "grad_norm": 3.1456174850463867, "learning_rate": 1.6454272863568217e-07, "loss": 1.1959, "step": 879 }, { "epoch": 83.80952380952381, "grad_norm": 3.7636923789978027, "learning_rate": 1.6473013493253375e-07, "loss": 1.169, "step": 880 }, { "epoch": 83.9047619047619, "grad_norm": 2.942305564880371, "learning_rate": 1.649175412293853e-07, "loss": 1.1658, "step": 881 }, { "epoch": 84.0, "grad_norm": 4.029232025146484, "learning_rate": 1.651049475262369e-07, "loss": 1.1984, "step": 882 }, { "epoch": 84.0, "eval_accuracy": 0.6987635818658674, "eval_f1": 0.7591372079089275, "eval_loss": 0.5888885259628296, "eval_precision": 0.7062430323299889, "eval_recall": 0.8205958549222798, "eval_roc_auc": 0.739222797927461, "eval_runtime": 1.1564, "eval_samples_per_second": 2308.117, "eval_steps_per_second": 5.189, "step": 882 }, { "epoch": 84.0952380952381, "grad_norm": 5.96453857421875, "learning_rate": 1.6529235382308847e-07, "loss": 1.16, "step": 883 }, { "epoch": 84.19047619047619, "grad_norm": 3.663802146911621, "learning_rate": 1.6547976011994005e-07, "loss": 1.192, "step": 884 }, { "epoch": 84.28571428571429, "grad_norm": 6.514193534851074, "learning_rate": 1.656671664167916e-07, "loss": 1.1417, "step": 885 }, { "epoch": 84.38095238095238, "grad_norm": 2.786970376968384, "learning_rate": 1.6585457271364319e-07, "loss": 1.1705, "step": 886 }, { "epoch": 84.47619047619048, "grad_norm": 4.917025566101074, "learning_rate": 1.6604197901049477e-07, "loss": 1.1819, "step": 887 }, { "epoch": 84.57142857142857, "grad_norm": 7.882221221923828, "learning_rate": 1.6622938530734632e-07, "loss": 1.1895, "step": 888 }, { "epoch": 84.66666666666667, "grad_norm": 3.4152791500091553, "learning_rate": 1.664167916041979e-07, "loss": 1.1616, "step": 889 }, { "epoch": 84.76190476190476, "grad_norm": 4.9652485847473145, "learning_rate": 1.6660419790104948e-07, "loss": 1.1608, "step": 890 }, { "epoch": 84.85714285714286, "grad_norm": 4.204701900482178, "learning_rate": 1.6679160419790106e-07, "loss": 1.1934, "step": 891 }, { "epoch": 84.95238095238095, "grad_norm": 6.280852794647217, "learning_rate": 1.6697901049475262e-07, "loss": 1.1434, "step": 892 }, { "epoch": 84.95238095238095, "eval_accuracy": 0.701386286998876, "eval_f1": 0.7617339312406577, "eval_loss": 0.5886748433113098, "eval_precision": 0.7073847862298723, "eval_recall": 0.8251295336787565, "eval_roc_auc": 0.7393137593552102, "eval_runtime": 1.2673, "eval_samples_per_second": 2106.034, "eval_steps_per_second": 4.734, "step": 892 }, { "epoch": 85.04761904761905, "grad_norm": 3.2578413486480713, "learning_rate": 1.671664167916042e-07, "loss": 1.1724, "step": 893 }, { "epoch": 85.14285714285714, "grad_norm": 7.428417682647705, "learning_rate": 1.6735382308845578e-07, "loss": 1.2087, "step": 894 }, { "epoch": 85.23809523809524, "grad_norm": 6.177584171295166, "learning_rate": 1.6754122938530736e-07, "loss": 1.2465, "step": 895 }, { "epoch": 85.33333333333333, "grad_norm": 3.587831497192383, "learning_rate": 1.6772863568215894e-07, "loss": 1.2009, "step": 896 }, { "epoch": 85.42857142857143, "grad_norm": 5.211238384246826, "learning_rate": 1.679160419790105e-07, "loss": 1.1999, "step": 897 }, { "epoch": 85.52380952380952, "grad_norm": 3.0934226512908936, "learning_rate": 1.6810344827586208e-07, "loss": 1.19, "step": 898 }, { "epoch": 85.61904761904762, "grad_norm": 10.23063850402832, "learning_rate": 1.6829085457271363e-07, "loss": 1.1526, "step": 899 }, { "epoch": 85.71428571428571, "grad_norm": 3.947057008743286, "learning_rate": 1.684782608695652e-07, "loss": 1.155, "step": 900 }, { "epoch": 85.80952380952381, "grad_norm": 3.20998477935791, "learning_rate": 1.6866566716641682e-07, "loss": 1.1781, "step": 901 }, { "epoch": 85.9047619047619, "grad_norm": 4.5016093254089355, "learning_rate": 1.6885307346326838e-07, "loss": 1.1671, "step": 902 }, { "epoch": 86.0, "grad_norm": 4.235739707946777, "learning_rate": 1.6904047976011996e-07, "loss": 1.2128, "step": 903 }, { "epoch": 86.0, "eval_accuracy": 0.6998875983514425, "eval_f1": 0.7603948549207299, "eval_loss": 0.5884115695953369, "eval_precision": 0.7065036131183992, "eval_recall": 0.8231865284974094, "eval_roc_auc": 0.7395020149683362, "eval_runtime": 1.2146, "eval_samples_per_second": 2197.493, "eval_steps_per_second": 4.94, "step": 903 }, { "epoch": 86.0952380952381, "grad_norm": 3.380007266998291, "learning_rate": 1.692278860569715e-07, "loss": 1.1776, "step": 904 }, { "epoch": 86.19047619047619, "grad_norm": 3.720041036605835, "learning_rate": 1.694152923538231e-07, "loss": 1.1642, "step": 905 }, { "epoch": 86.28571428571429, "grad_norm": 7.0945234298706055, "learning_rate": 1.6960269865067467e-07, "loss": 1.2604, "step": 906 }, { "epoch": 86.38095238095238, "grad_norm": 5.684896469116211, "learning_rate": 1.6979010494752625e-07, "loss": 1.1482, "step": 907 }, { "epoch": 86.47619047619048, "grad_norm": 5.6247029304504395, "learning_rate": 1.6997751124437783e-07, "loss": 1.1838, "step": 908 }, { "epoch": 86.57142857142857, "grad_norm": 7.488827705383301, "learning_rate": 1.701649175412294e-07, "loss": 1.1694, "step": 909 }, { "epoch": 86.66666666666667, "grad_norm": 3.2675230503082275, "learning_rate": 1.7035232383808097e-07, "loss": 1.1989, "step": 910 }, { "epoch": 86.76190476190476, "grad_norm": 5.990199565887451, "learning_rate": 1.7053973013493252e-07, "loss": 1.2253, "step": 911 }, { "epoch": 86.85714285714286, "grad_norm": 4.462316513061523, "learning_rate": 1.707271364317841e-07, "loss": 1.1602, "step": 912 }, { "epoch": 86.95238095238095, "grad_norm": 6.762936592102051, "learning_rate": 1.709145427286357e-07, "loss": 1.1828, "step": 913 }, { "epoch": 86.95238095238095, "eval_accuracy": 0.7021356313225927, "eval_f1": 0.7624738571855393, "eval_loss": 0.5883358120918274, "eval_precision": 0.7077093732667776, "eval_recall": 0.8264248704663213, "eval_roc_auc": 0.7396272308578008, "eval_runtime": 1.3325, "eval_samples_per_second": 2002.931, "eval_steps_per_second": 4.503, "step": 913 }, { "epoch": 87.04761904761905, "grad_norm": 3.4856576919555664, "learning_rate": 1.7110194902548727e-07, "loss": 1.1861, "step": 914 }, { "epoch": 87.14285714285714, "grad_norm": 5.72012996673584, "learning_rate": 1.7128935532233885e-07, "loss": 1.1541, "step": 915 }, { "epoch": 87.23809523809524, "grad_norm": 2.709678888320923, "learning_rate": 1.714767616191904e-07, "loss": 1.2146, "step": 916 }, { "epoch": 87.33333333333333, "grad_norm": 3.5865318775177, "learning_rate": 1.7166416791604198e-07, "loss": 1.2038, "step": 917 }, { "epoch": 87.42857142857143, "grad_norm": 3.717111349105835, "learning_rate": 1.7185157421289356e-07, "loss": 1.152, "step": 918 }, { "epoch": 87.52380952380952, "grad_norm": 5.886796474456787, "learning_rate": 1.7203898050974515e-07, "loss": 1.2045, "step": 919 }, { "epoch": 87.61904761904762, "grad_norm": 5.211839199066162, "learning_rate": 1.7222638680659673e-07, "loss": 1.1633, "step": 920 }, { "epoch": 87.71428571428571, "grad_norm": 4.2755842208862305, "learning_rate": 1.7241379310344828e-07, "loss": 1.1599, "step": 921 }, { "epoch": 87.80952380952381, "grad_norm": 5.807846546173096, "learning_rate": 1.7260119940029986e-07, "loss": 1.2609, "step": 922 }, { "epoch": 87.9047619047619, "grad_norm": 6.997453212738037, "learning_rate": 1.7278860569715142e-07, "loss": 1.1704, "step": 923 }, { "epoch": 88.0, "grad_norm": 6.106979846954346, "learning_rate": 1.72976011994003e-07, "loss": 1.162, "step": 924 }, { "epoch": 88.0, "eval_accuracy": 0.7006369426751592, "eval_f1": 0.7602760276027603, "eval_loss": 0.5882436037063599, "eval_precision": 0.7082168809390721, "eval_recall": 0.8205958549222798, "eval_roc_auc": 0.739506908462867, "eval_runtime": 1.1639, "eval_samples_per_second": 2293.21, "eval_steps_per_second": 5.155, "step": 924 }, { "epoch": 88.0952380952381, "grad_norm": 2.476592779159546, "learning_rate": 1.7316341829085458e-07, "loss": 1.1604, "step": 925 }, { "epoch": 88.19047619047619, "grad_norm": 4.160835266113281, "learning_rate": 1.7335082458770616e-07, "loss": 1.1682, "step": 926 }, { "epoch": 88.28571428571429, "grad_norm": 5.423745155334473, "learning_rate": 1.7353823088455774e-07, "loss": 1.1931, "step": 927 }, { "epoch": 88.38095238095238, "grad_norm": 3.820173740386963, "learning_rate": 1.737256371814093e-07, "loss": 1.1994, "step": 928 }, { "epoch": 88.47619047619048, "grad_norm": 4.19473934173584, "learning_rate": 1.7391304347826088e-07, "loss": 1.211, "step": 929 }, { "epoch": 88.57142857142857, "grad_norm": 5.371788501739502, "learning_rate": 1.7410044977511246e-07, "loss": 1.1899, "step": 930 }, { "epoch": 88.66666666666667, "grad_norm": 7.475896835327148, "learning_rate": 1.7428785607196404e-07, "loss": 1.1901, "step": 931 }, { "epoch": 88.76190476190476, "grad_norm": 2.701341390609741, "learning_rate": 1.744752623688156e-07, "loss": 1.1609, "step": 932 }, { "epoch": 88.85714285714286, "grad_norm": 8.771926879882812, "learning_rate": 1.7466266866566717e-07, "loss": 1.2532, "step": 933 }, { "epoch": 88.95238095238095, "grad_norm": 3.1728227138519287, "learning_rate": 1.7485007496251875e-07, "loss": 1.1807, "step": 934 }, { "epoch": 88.95238095238095, "eval_accuracy": 0.704383664293743, "eval_f1": 0.7652484379648914, "eval_loss": 0.5880123972892761, "eval_precision": 0.7077600440286186, "eval_recall": 0.832901554404145, "eval_roc_auc": 0.7398719055843408, "eval_runtime": 1.4168, "eval_samples_per_second": 1883.871, "eval_steps_per_second": 4.235, "step": 934 }, { "epoch": 89.04761904761905, "grad_norm": 4.075814723968506, "learning_rate": 1.750374812593703e-07, "loss": 1.1809, "step": 935 }, { "epoch": 89.14285714285714, "grad_norm": 3.444646120071411, "learning_rate": 1.7522488755622192e-07, "loss": 1.1393, "step": 936 }, { "epoch": 89.23809523809524, "grad_norm": 8.94544792175293, "learning_rate": 1.7541229385307347e-07, "loss": 1.2097, "step": 937 }, { "epoch": 89.33333333333333, "grad_norm": 4.065417766571045, "learning_rate": 1.7559970014992505e-07, "loss": 1.199, "step": 938 }, { "epoch": 89.42857142857143, "grad_norm": 7.621069431304932, "learning_rate": 1.757871064467766e-07, "loss": 1.1531, "step": 939 }, { "epoch": 89.52380952380952, "grad_norm": 3.296189069747925, "learning_rate": 1.7597451274362819e-07, "loss": 1.172, "step": 940 }, { "epoch": 89.61904761904762, "grad_norm": 2.939818859100342, "learning_rate": 1.7616191904047977e-07, "loss": 1.1395, "step": 941 }, { "epoch": 89.71428571428571, "grad_norm": 3.8071563243865967, "learning_rate": 1.7634932533733135e-07, "loss": 1.1599, "step": 942 }, { "epoch": 89.80952380952381, "grad_norm": 5.116278171539307, "learning_rate": 1.7653673163418293e-07, "loss": 1.2588, "step": 943 }, { "epoch": 89.9047619047619, "grad_norm": 3.6566922664642334, "learning_rate": 1.7672413793103448e-07, "loss": 1.1885, "step": 944 }, { "epoch": 90.0, "grad_norm": 4.694438457489014, "learning_rate": 1.7691154422788606e-07, "loss": 1.2195, "step": 945 }, { "epoch": 90.0, "eval_accuracy": 0.7040089921318846, "eval_f1": 0.7633313361294188, "eval_loss": 0.5877653956413269, "eval_precision": 0.7101449275362319, "eval_recall": 0.8251295336787565, "eval_roc_auc": 0.7399142199194012, "eval_runtime": 1.2109, "eval_samples_per_second": 2204.135, "eval_steps_per_second": 4.955, "step": 945 }, { "epoch": 90.0952380952381, "grad_norm": 5.013833522796631, "learning_rate": 1.7709895052473762e-07, "loss": 1.1197, "step": 946 }, { "epoch": 90.19047619047619, "grad_norm": 6.55540657043457, "learning_rate": 1.772863568215892e-07, "loss": 1.1313, "step": 947 }, { "epoch": 90.28571428571429, "grad_norm": 3.288936138153076, "learning_rate": 1.774737631184408e-07, "loss": 1.1893, "step": 948 }, { "epoch": 90.38095238095238, "grad_norm": 4.897750377655029, "learning_rate": 1.7766116941529236e-07, "loss": 1.1797, "step": 949 }, { "epoch": 90.47619047619048, "grad_norm": 5.5732903480529785, "learning_rate": 1.7784857571214394e-07, "loss": 1.2339, "step": 950 }, { "epoch": 90.57142857142857, "grad_norm": 5.4378767013549805, "learning_rate": 1.780359820089955e-07, "loss": 1.2078, "step": 951 }, { "epoch": 90.66666666666667, "grad_norm": 4.127724647521973, "learning_rate": 1.7822338830584708e-07, "loss": 1.1732, "step": 952 }, { "epoch": 90.76190476190476, "grad_norm": 4.829543590545654, "learning_rate": 1.7841079460269866e-07, "loss": 1.2367, "step": 953 }, { "epoch": 90.85714285714286, "grad_norm": 3.430927038192749, "learning_rate": 1.7859820089955024e-07, "loss": 1.1363, "step": 954 }, { "epoch": 90.95238095238095, "grad_norm": 3.5088884830474854, "learning_rate": 1.7878560719640182e-07, "loss": 1.1859, "step": 955 }, { "epoch": 90.95238095238095, "eval_accuracy": 0.7047583364556014, "eval_f1": 0.7649164677804295, "eval_loss": 0.5876271724700928, "eval_precision": 0.709070796460177, "eval_recall": 0.8303108808290155, "eval_roc_auc": 0.740163788140472, "eval_runtime": 1.2241, "eval_samples_per_second": 2180.311, "eval_steps_per_second": 4.901, "step": 955 }, { "epoch": 91.04761904761905, "grad_norm": 4.03278923034668, "learning_rate": 1.7897301349325338e-07, "loss": 1.1953, "step": 956 }, { "epoch": 91.14285714285714, "grad_norm": 4.45944356918335, "learning_rate": 1.7916041979010496e-07, "loss": 1.1832, "step": 957 }, { "epoch": 91.23809523809524, "grad_norm": 5.994672775268555, "learning_rate": 1.793478260869565e-07, "loss": 1.1691, "step": 958 }, { "epoch": 91.33333333333333, "grad_norm": 5.616822719573975, "learning_rate": 1.7953523238380812e-07, "loss": 1.1598, "step": 959 }, { "epoch": 91.42857142857143, "grad_norm": 5.643009662628174, "learning_rate": 1.7972263868065967e-07, "loss": 1.1954, "step": 960 }, { "epoch": 91.52380952380952, "grad_norm": 3.8720247745513916, "learning_rate": 1.7991004497751125e-07, "loss": 1.1931, "step": 961 }, { "epoch": 91.61904761904762, "grad_norm": 4.31902551651001, "learning_rate": 1.8009745127436284e-07, "loss": 1.1827, "step": 962 }, { "epoch": 91.71428571428571, "grad_norm": 4.202095031738281, "learning_rate": 1.802848575712144e-07, "loss": 1.159, "step": 963 }, { "epoch": 91.80952380952381, "grad_norm": 5.559122562408447, "learning_rate": 1.8047226386806597e-07, "loss": 1.1767, "step": 964 }, { "epoch": 91.9047619047619, "grad_norm": 2.7115461826324463, "learning_rate": 1.8065967016491755e-07, "loss": 1.1845, "step": 965 }, { "epoch": 92.0, "grad_norm": 3.8898439407348633, "learning_rate": 1.8084707646176913e-07, "loss": 1.1827, "step": 966 }, { "epoch": 92.0, "eval_accuracy": 0.7055076807793181, "eval_f1": 0.7655131264916468, "eval_loss": 0.5875535607337952, "eval_precision": 0.7096238938053098, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.7402328727691423, "eval_runtime": 1.2339, "eval_samples_per_second": 2162.998, "eval_steps_per_second": 4.862, "step": 966 }, { "epoch": 92.0952380952381, "grad_norm": 4.475981712341309, "learning_rate": 1.8103448275862071e-07, "loss": 1.226, "step": 967 }, { "epoch": 92.19047619047619, "grad_norm": 12.51380729675293, "learning_rate": 1.8122188905547227e-07, "loss": 1.1773, "step": 968 }, { "epoch": 92.28571428571429, "grad_norm": 3.1769425868988037, "learning_rate": 1.8140929535232385e-07, "loss": 1.2155, "step": 969 }, { "epoch": 92.38095238095238, "grad_norm": 6.938444137573242, "learning_rate": 1.815967016491754e-07, "loss": 1.1839, "step": 970 }, { "epoch": 92.47619047619048, "grad_norm": 5.644467353820801, "learning_rate": 1.81784107946027e-07, "loss": 1.1665, "step": 971 }, { "epoch": 92.57142857142857, "grad_norm": 4.443206310272217, "learning_rate": 1.8197151424287857e-07, "loss": 1.1653, "step": 972 }, { "epoch": 92.66666666666667, "grad_norm": 4.741915702819824, "learning_rate": 1.8215892053973015e-07, "loss": 1.1543, "step": 973 }, { "epoch": 92.76190476190476, "grad_norm": 3.8203067779541016, "learning_rate": 1.8234632683658173e-07, "loss": 1.1586, "step": 974 }, { "epoch": 92.85714285714286, "grad_norm": 7.701940536499023, "learning_rate": 1.8253373313343328e-07, "loss": 1.1927, "step": 975 }, { "epoch": 92.95238095238095, "grad_norm": 4.172492027282715, "learning_rate": 1.8272113943028486e-07, "loss": 1.234, "step": 976 }, { "epoch": 92.95238095238095, "eval_accuracy": 0.7055076807793181, "eval_f1": 0.7649521531100478, "eval_loss": 0.5876275300979614, "eval_precision": 0.7105555555555556, "eval_recall": 0.8283678756476683, "eval_roc_auc": 0.7400630397236615, "eval_runtime": 1.1616, "eval_samples_per_second": 2297.727, "eval_steps_per_second": 5.165, "step": 976 }, { "epoch": 93.04761904761905, "grad_norm": 3.0559241771698, "learning_rate": 1.8290854572713644e-07, "loss": 1.1455, "step": 977 }, { "epoch": 93.14285714285714, "grad_norm": 3.401667833328247, "learning_rate": 1.8309595202398802e-07, "loss": 1.1993, "step": 978 }, { "epoch": 93.23809523809524, "grad_norm": 4.739910125732422, "learning_rate": 1.8328335832083958e-07, "loss": 1.2022, "step": 979 }, { "epoch": 93.33333333333333, "grad_norm": 5.978082180023193, "learning_rate": 1.8347076461769116e-07, "loss": 1.225, "step": 980 }, { "epoch": 93.42857142857143, "grad_norm": 3.4651079177856445, "learning_rate": 1.8365817091454274e-07, "loss": 1.1836, "step": 981 }, { "epoch": 93.52380952380952, "grad_norm": 3.436234951019287, "learning_rate": 1.8384557721139432e-07, "loss": 1.2121, "step": 982 }, { "epoch": 93.61904761904762, "grad_norm": 2.9314863681793213, "learning_rate": 1.840329835082459e-07, "loss": 1.1723, "step": 983 }, { "epoch": 93.71428571428571, "grad_norm": 9.982884407043457, "learning_rate": 1.8422038980509746e-07, "loss": 1.186, "step": 984 }, { "epoch": 93.80952380952381, "grad_norm": 3.900665283203125, "learning_rate": 1.8440779610194904e-07, "loss": 1.164, "step": 985 }, { "epoch": 93.9047619047619, "grad_norm": 3.7296035289764404, "learning_rate": 1.845952023988006e-07, "loss": 1.1702, "step": 986 }, { "epoch": 94.0, "grad_norm": 2.609611988067627, "learning_rate": 1.8478260869565217e-07, "loss": 1.1407, "step": 987 }, { "epoch": 94.0, "eval_accuracy": 0.7088797302360434, "eval_f1": 0.7701863354037267, "eval_loss": 0.58750981092453, "eval_precision": 0.708764289602613, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7405644789867587, "eval_runtime": 1.1414, "eval_samples_per_second": 2338.278, "eval_steps_per_second": 5.257, "step": 987 }, { "epoch": 94.0952380952381, "grad_norm": 5.930230617523193, "learning_rate": 1.8497001499250375e-07, "loss": 1.183, "step": 988 }, { "epoch": 94.19047619047619, "grad_norm": 3.943761110305786, "learning_rate": 1.8515742128935534e-07, "loss": 1.1921, "step": 989 }, { "epoch": 94.28571428571429, "grad_norm": 9.60708999633789, "learning_rate": 1.8534482758620692e-07, "loss": 1.2425, "step": 990 }, { "epoch": 94.38095238095238, "grad_norm": 3.7731473445892334, "learning_rate": 1.8553223388305847e-07, "loss": 1.1399, "step": 991 }, { "epoch": 94.47619047619048, "grad_norm": 6.896683216094971, "learning_rate": 1.8571964017991005e-07, "loss": 1.239, "step": 992 }, { "epoch": 94.57142857142857, "grad_norm": 4.011725425720215, "learning_rate": 1.859070464767616e-07, "loss": 1.1741, "step": 993 }, { "epoch": 94.66666666666667, "grad_norm": 2.2900426387786865, "learning_rate": 1.8609445277361321e-07, "loss": 1.1527, "step": 994 }, { "epoch": 94.76190476190476, "grad_norm": 4.162290096282959, "learning_rate": 1.862818590704648e-07, "loss": 1.1454, "step": 995 }, { "epoch": 94.85714285714286, "grad_norm": 4.327588081359863, "learning_rate": 1.8646926536731635e-07, "loss": 1.1874, "step": 996 }, { "epoch": 94.95238095238095, "grad_norm": 3.36289119720459, "learning_rate": 1.8665667166416793e-07, "loss": 1.1501, "step": 997 }, { "epoch": 94.95238095238095, "eval_accuracy": 0.7070063694267515, "eval_f1": 0.767814726840855, "eval_loss": 0.5870710015296936, "eval_precision": 0.7088815789473685, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7406669545192861, "eval_runtime": 1.2238, "eval_samples_per_second": 2180.834, "eval_steps_per_second": 4.903, "step": 997 }, { "epoch": 95.04761904761905, "grad_norm": 4.248078346252441, "learning_rate": 1.8684407796101948e-07, "loss": 1.2222, "step": 998 }, { "epoch": 95.14285714285714, "grad_norm": 5.069729328155518, "learning_rate": 1.8703148425787107e-07, "loss": 1.1808, "step": 999 }, { "epoch": 95.23809523809524, "grad_norm": 8.28036880493164, "learning_rate": 1.8721889055472265e-07, "loss": 1.2253, "step": 1000 }, { "epoch": 95.33333333333333, "grad_norm": 6.096401214599609, "learning_rate": 1.8740629685157423e-07, "loss": 1.1584, "step": 1001 }, { "epoch": 95.42857142857143, "grad_norm": 4.412257194519043, "learning_rate": 1.875937031484258e-07, "loss": 1.2083, "step": 1002 }, { "epoch": 95.52380952380952, "grad_norm": 4.404515743255615, "learning_rate": 1.8778110944527736e-07, "loss": 1.1689, "step": 1003 }, { "epoch": 95.61904761904762, "grad_norm": 3.2303812503814697, "learning_rate": 1.8796851574212894e-07, "loss": 1.1695, "step": 1004 }, { "epoch": 95.71428571428571, "grad_norm": 3.9076950550079346, "learning_rate": 1.8815592203898052e-07, "loss": 1.1857, "step": 1005 }, { "epoch": 95.80952380952381, "grad_norm": 4.176351547241211, "learning_rate": 1.883433283358321e-07, "loss": 1.1781, "step": 1006 }, { "epoch": 95.9047619047619, "grad_norm": 3.8240013122558594, "learning_rate": 1.8853073463268366e-07, "loss": 1.1576, "step": 1007 }, { "epoch": 96.0, "grad_norm": 2.9544076919555664, "learning_rate": 1.8871814092953524e-07, "loss": 1.1761, "step": 1008 }, { "epoch": 96.0, "eval_accuracy": 0.7066316972648932, "eval_f1": 0.7667560321715817, "eval_loss": 0.5868284702301025, "eval_precision": 0.709873138444567, "eval_recall": 0.8335492227979274, "eval_roc_auc": 0.740759067357513, "eval_runtime": 1.2315, "eval_samples_per_second": 2167.19, "eval_steps_per_second": 4.872, "step": 1008 }, { "epoch": 96.0952380952381, "grad_norm": 6.389705657958984, "learning_rate": 1.8890554722638682e-07, "loss": 1.2402, "step": 1009 }, { "epoch": 96.19047619047619, "grad_norm": 5.934082508087158, "learning_rate": 1.8909295352323838e-07, "loss": 1.1749, "step": 1010 }, { "epoch": 96.28571428571429, "grad_norm": 3.007215976715088, "learning_rate": 1.8928035982008996e-07, "loss": 1.1969, "step": 1011 }, { "epoch": 96.38095238095238, "grad_norm": 6.05702018737793, "learning_rate": 1.8946776611694154e-07, "loss": 1.1679, "step": 1012 }, { "epoch": 96.47619047619048, "grad_norm": 6.701571941375732, "learning_rate": 1.8965517241379312e-07, "loss": 1.1731, "step": 1013 }, { "epoch": 96.57142857142857, "grad_norm": 4.297491073608398, "learning_rate": 1.8984257871064467e-07, "loss": 1.1646, "step": 1014 }, { "epoch": 96.66666666666667, "grad_norm": 5.01285982131958, "learning_rate": 1.9002998500749625e-07, "loss": 1.189, "step": 1015 }, { "epoch": 96.76190476190476, "grad_norm": 2.779428482055664, "learning_rate": 1.9021739130434784e-07, "loss": 1.1289, "step": 1016 }, { "epoch": 96.85714285714286, "grad_norm": 2.592012882232666, "learning_rate": 1.9040479760119942e-07, "loss": 1.1656, "step": 1017 }, { "epoch": 96.95238095238095, "grad_norm": 6.2044291496276855, "learning_rate": 1.90592203898051e-07, "loss": 1.1965, "step": 1018 }, { "epoch": 96.95238095238095, "eval_accuracy": 0.7062570251030349, "eval_f1": 0.7665276950565812, "eval_loss": 0.5867811441421509, "eval_precision": 0.7094818081587652, "eval_recall": 0.8335492227979274, "eval_roc_auc": 0.7409081750143928, "eval_runtime": 1.3479, "eval_samples_per_second": 1980.164, "eval_steps_per_second": 4.451, "step": 1018 }, { "epoch": 97.04761904761905, "grad_norm": 2.637268304824829, "learning_rate": 1.9077961019490255e-07, "loss": 1.1588, "step": 1019 }, { "epoch": 97.14285714285714, "grad_norm": 7.145625591278076, "learning_rate": 1.9096701649175413e-07, "loss": 1.1809, "step": 1020 }, { "epoch": 97.23809523809524, "grad_norm": 3.879804849624634, "learning_rate": 1.9115442278860571e-07, "loss": 1.1216, "step": 1021 }, { "epoch": 97.33333333333333, "grad_norm": 3.9333603382110596, "learning_rate": 1.9134182908545727e-07, "loss": 1.2074, "step": 1022 }, { "epoch": 97.42857142857143, "grad_norm": 3.357753038406372, "learning_rate": 1.9152923538230885e-07, "loss": 1.2006, "step": 1023 }, { "epoch": 97.52380952380952, "grad_norm": 7.069590091705322, "learning_rate": 1.9171664167916043e-07, "loss": 1.2018, "step": 1024 }, { "epoch": 97.61904761904762, "grad_norm": 3.987957715988159, "learning_rate": 1.91904047976012e-07, "loss": 1.1415, "step": 1025 }, { "epoch": 97.71428571428571, "grad_norm": 4.330837249755859, "learning_rate": 1.9209145427286357e-07, "loss": 1.1873, "step": 1026 }, { "epoch": 97.80952380952381, "grad_norm": 2.9018101692199707, "learning_rate": 1.9227886056971515e-07, "loss": 1.1942, "step": 1027 }, { "epoch": 97.9047619047619, "grad_norm": 3.1628873348236084, "learning_rate": 1.9246626686656673e-07, "loss": 1.141, "step": 1028 }, { "epoch": 98.0, "grad_norm": 6.722561359405518, "learning_rate": 1.926536731634183e-07, "loss": 1.237, "step": 1029 }, { "epoch": 98.0, "eval_accuracy": 0.7096290745597602, "eval_f1": 0.7706422018348624, "eval_loss": 0.586707592010498, "eval_precision": 0.7095367847411445, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7411977547495682, "eval_runtime": 1.1565, "eval_samples_per_second": 2307.874, "eval_steps_per_second": 5.188, "step": 1029 }, { "epoch": 98.0952380952381, "grad_norm": 4.370731830596924, "learning_rate": 1.928410794602699e-07, "loss": 1.1843, "step": 1030 }, { "epoch": 98.19047619047619, "grad_norm": 2.9520230293273926, "learning_rate": 1.9302848575712144e-07, "loss": 1.1888, "step": 1031 }, { "epoch": 98.28571428571429, "grad_norm": 3.1140427589416504, "learning_rate": 1.9321589205397303e-07, "loss": 1.1886, "step": 1032 }, { "epoch": 98.38095238095238, "grad_norm": 2.5182907581329346, "learning_rate": 1.9340329835082458e-07, "loss": 1.1701, "step": 1033 }, { "epoch": 98.47619047619048, "grad_norm": 7.141319274902344, "learning_rate": 1.9359070464767616e-07, "loss": 1.1674, "step": 1034 }, { "epoch": 98.57142857142857, "grad_norm": 3.4200496673583984, "learning_rate": 1.9377811094452777e-07, "loss": 1.1621, "step": 1035 }, { "epoch": 98.66666666666667, "grad_norm": 3.106863260269165, "learning_rate": 1.9396551724137932e-07, "loss": 1.1857, "step": 1036 }, { "epoch": 98.76190476190476, "grad_norm": 6.592114448547363, "learning_rate": 1.941529235382309e-07, "loss": 1.2144, "step": 1037 }, { "epoch": 98.85714285714286, "grad_norm": 4.127084255218506, "learning_rate": 1.9434032983508246e-07, "loss": 1.195, "step": 1038 }, { "epoch": 98.95238095238095, "grad_norm": 3.0792510509490967, "learning_rate": 1.9452773613193404e-07, "loss": 1.1611, "step": 1039 }, { "epoch": 98.95238095238095, "eval_accuracy": 0.7088797302360434, "eval_f1": 0.769504598042124, "eval_loss": 0.5864457488059998, "eval_precision": 0.7099069512862616, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7414237190558434, "eval_runtime": 1.141, "eval_samples_per_second": 2339.082, "eval_steps_per_second": 5.258, "step": 1039 }, { "epoch": 99.04761904761905, "grad_norm": 4.860541820526123, "learning_rate": 1.9471514242878562e-07, "loss": 1.2126, "step": 1040 }, { "epoch": 99.14285714285714, "grad_norm": 5.0624566078186035, "learning_rate": 1.949025487256372e-07, "loss": 1.1765, "step": 1041 }, { "epoch": 99.23809523809524, "grad_norm": 3.882685422897339, "learning_rate": 1.9508995502248878e-07, "loss": 1.1533, "step": 1042 }, { "epoch": 99.33333333333333, "grad_norm": 3.081310749053955, "learning_rate": 1.9527736131934034e-07, "loss": 1.1557, "step": 1043 }, { "epoch": 99.42857142857143, "grad_norm": 3.0742149353027344, "learning_rate": 1.954647676161919e-07, "loss": 1.1964, "step": 1044 }, { "epoch": 99.52380952380952, "grad_norm": 2.9772236347198486, "learning_rate": 1.956521739130435e-07, "loss": 1.1767, "step": 1045 }, { "epoch": 99.61904761904762, "grad_norm": 3.278563976287842, "learning_rate": 1.9583958020989505e-07, "loss": 1.186, "step": 1046 }, { "epoch": 99.71428571428571, "grad_norm": 2.069175958633423, "learning_rate": 1.9602698650674663e-07, "loss": 1.1763, "step": 1047 }, { "epoch": 99.80952380952381, "grad_norm": 5.378757953643799, "learning_rate": 1.9621439280359824e-07, "loss": 1.2126, "step": 1048 }, { "epoch": 99.9047619047619, "grad_norm": 7.8624749183654785, "learning_rate": 1.964017991004498e-07, "loss": 1.1484, "step": 1049 }, { "epoch": 100.0, "grad_norm": 3.2688686847686768, "learning_rate": 1.9658920539730135e-07, "loss": 1.1375, "step": 1050 }, { "epoch": 100.0, "eval_accuracy": 0.7077557137504683, "eval_f1": 0.768270944741533, "eval_loss": 0.5862920880317688, "eval_precision": 0.7096597145993414, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7416292458261371, "eval_runtime": 1.208, "eval_samples_per_second": 2209.442, "eval_steps_per_second": 4.967, "step": 1050 }, { "epoch": 100.0952380952381, "grad_norm": 4.440683364868164, "learning_rate": 1.967766116941529e-07, "loss": 1.184, "step": 1051 }, { "epoch": 100.19047619047619, "grad_norm": 2.988774061203003, "learning_rate": 1.969640179910045e-07, "loss": 1.1936, "step": 1052 }, { "epoch": 100.28571428571429, "grad_norm": 3.475858688354492, "learning_rate": 1.971514242878561e-07, "loss": 1.166, "step": 1053 }, { "epoch": 100.38095238095238, "grad_norm": 2.3044967651367188, "learning_rate": 1.9733883058470765e-07, "loss": 1.1795, "step": 1054 }, { "epoch": 100.47619047619048, "grad_norm": 2.6562399864196777, "learning_rate": 1.9752623688155925e-07, "loss": 1.1644, "step": 1055 }, { "epoch": 100.57142857142857, "grad_norm": 3.209141254425049, "learning_rate": 1.977136431784108e-07, "loss": 1.174, "step": 1056 }, { "epoch": 100.66666666666667, "grad_norm": 6.200804710388184, "learning_rate": 1.9790104947526236e-07, "loss": 1.1441, "step": 1057 }, { "epoch": 100.76190476190476, "grad_norm": 3.0925281047821045, "learning_rate": 1.9808845577211394e-07, "loss": 1.2014, "step": 1058 }, { "epoch": 100.85714285714286, "grad_norm": 3.244074821472168, "learning_rate": 1.9827586206896555e-07, "loss": 1.1827, "step": 1059 }, { "epoch": 100.95238095238095, "grad_norm": 4.83599853515625, "learning_rate": 1.984632683658171e-07, "loss": 1.187, "step": 1060 }, { "epoch": 100.95238095238095, "eval_accuracy": 0.7085050580741851, "eval_f1": 0.769139465875371, "eval_loss": 0.5860828757286072, "eval_precision": 0.7097480832420592, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7419314910765689, "eval_runtime": 1.2084, "eval_samples_per_second": 2208.654, "eval_steps_per_second": 4.965, "step": 1060 }, { "epoch": 101.04761904761905, "grad_norm": 8.952556610107422, "learning_rate": 1.9865067466266866e-07, "loss": 1.2129, "step": 1061 }, { "epoch": 101.14285714285714, "grad_norm": 7.221254348754883, "learning_rate": 1.9883808095952027e-07, "loss": 1.1905, "step": 1062 }, { "epoch": 101.23809523809524, "grad_norm": 4.311481952667236, "learning_rate": 1.9902548725637182e-07, "loss": 1.2263, "step": 1063 }, { "epoch": 101.33333333333333, "grad_norm": 3.539215087890625, "learning_rate": 1.992128935532234e-07, "loss": 1.1666, "step": 1064 }, { "epoch": 101.42857142857143, "grad_norm": 3.2866811752319336, "learning_rate": 1.9940029985007496e-07, "loss": 1.2134, "step": 1065 }, { "epoch": 101.52380952380952, "grad_norm": 3.0386905670166016, "learning_rate": 1.9958770614692657e-07, "loss": 1.168, "step": 1066 }, { "epoch": 101.61904761904762, "grad_norm": 6.335067272186279, "learning_rate": 1.9977511244377812e-07, "loss": 1.1881, "step": 1067 }, { "epoch": 101.71428571428571, "grad_norm": 11.900758743286133, "learning_rate": 1.9996251874062967e-07, "loss": 1.1134, "step": 1068 }, { "epoch": 101.80952380952381, "grad_norm": 3.2636590003967285, "learning_rate": 2.0014992503748128e-07, "loss": 1.2082, "step": 1069 }, { "epoch": 101.9047619047619, "grad_norm": 8.951799392700195, "learning_rate": 2.0033733133433286e-07, "loss": 1.1496, "step": 1070 }, { "epoch": 102.0, "grad_norm": 4.132380962371826, "learning_rate": 2.0052473763118442e-07, "loss": 1.1816, "step": 1071 }, { "epoch": 102.0, "eval_accuracy": 0.7081303859123267, "eval_f1": 0.7689113022841887, "eval_loss": 0.5860377550125122, "eval_precision": 0.7093596059113301, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7419801381692573, "eval_runtime": 1.2551, "eval_samples_per_second": 2126.441, "eval_steps_per_second": 4.78, "step": 1071 }, { "epoch": 102.0952380952381, "grad_norm": 2.5746874809265137, "learning_rate": 2.0071214392803597e-07, "loss": 1.1736, "step": 1072 }, { "epoch": 102.19047619047619, "grad_norm": 4.014669895172119, "learning_rate": 2.0089955022488758e-07, "loss": 1.1717, "step": 1073 }, { "epoch": 102.28571428571429, "grad_norm": 4.1914520263671875, "learning_rate": 2.0108695652173913e-07, "loss": 1.1752, "step": 1074 }, { "epoch": 102.38095238095238, "grad_norm": 4.514261245727539, "learning_rate": 2.0127436281859071e-07, "loss": 1.2236, "step": 1075 }, { "epoch": 102.47619047619048, "grad_norm": 3.665982961654663, "learning_rate": 2.014617691154423e-07, "loss": 1.1594, "step": 1076 }, { "epoch": 102.57142857142857, "grad_norm": 2.9374074935913086, "learning_rate": 2.0164917541229388e-07, "loss": 1.183, "step": 1077 }, { "epoch": 102.66666666666667, "grad_norm": 12.804777145385742, "learning_rate": 2.0183658170914543e-07, "loss": 1.2346, "step": 1078 }, { "epoch": 102.76190476190476, "grad_norm": 2.884666919708252, "learning_rate": 2.0202398800599699e-07, "loss": 1.1582, "step": 1079 }, { "epoch": 102.85714285714286, "grad_norm": 3.5441818237304688, "learning_rate": 2.022113943028486e-07, "loss": 1.15, "step": 1080 }, { "epoch": 102.95238095238095, "grad_norm": 3.3858766555786133, "learning_rate": 2.0239880059970015e-07, "loss": 1.1679, "step": 1081 }, { "epoch": 102.95238095238095, "eval_accuracy": 0.7100037467216186, "eval_f1": 0.7710059171597633, "eval_loss": 0.5860357284545898, "eval_precision": 0.7096949891067538, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7421658031088082, "eval_runtime": 1.3927, "eval_samples_per_second": 1916.353, "eval_steps_per_second": 4.308, "step": 1081 }, { "epoch": 103.04761904761905, "grad_norm": 5.704709053039551, "learning_rate": 2.0258620689655173e-07, "loss": 1.1737, "step": 1082 }, { "epoch": 103.14285714285714, "grad_norm": 5.600817680358887, "learning_rate": 2.0277361319340334e-07, "loss": 1.2036, "step": 1083 }, { "epoch": 103.23809523809524, "grad_norm": 4.802742958068848, "learning_rate": 2.029610194902549e-07, "loss": 1.2017, "step": 1084 }, { "epoch": 103.33333333333333, "grad_norm": 3.191215991973877, "learning_rate": 2.0314842578710645e-07, "loss": 1.1863, "step": 1085 }, { "epoch": 103.42857142857143, "grad_norm": 3.547861337661743, "learning_rate": 2.0333583208395803e-07, "loss": 1.2072, "step": 1086 }, { "epoch": 103.52380952380952, "grad_norm": 3.1179792881011963, "learning_rate": 2.035232383808096e-07, "loss": 1.1715, "step": 1087 }, { "epoch": 103.61904761904762, "grad_norm": 3.455887794494629, "learning_rate": 2.037106446776612e-07, "loss": 1.1727, "step": 1088 }, { "epoch": 103.71428571428571, "grad_norm": 3.4433748722076416, "learning_rate": 2.0389805097451274e-07, "loss": 1.2025, "step": 1089 }, { "epoch": 103.80952380952381, "grad_norm": 3.5270094871520996, "learning_rate": 2.0408545727136435e-07, "loss": 1.1437, "step": 1090 }, { "epoch": 103.9047619047619, "grad_norm": 5.01560640335083, "learning_rate": 2.042728635682159e-07, "loss": 1.1163, "step": 1091 }, { "epoch": 104.0, "grad_norm": 2.7155611515045166, "learning_rate": 2.0446026986506746e-07, "loss": 1.1798, "step": 1092 }, { "epoch": 104.0, "eval_accuracy": 0.710378418883477, "eval_f1": 0.770962962962963, "eval_loss": 0.5859765410423279, "eval_precision": 0.7105406881485528, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7421905584340817, "eval_runtime": 1.1491, "eval_samples_per_second": 2322.73, "eval_steps_per_second": 5.222, "step": 1092 }, { "epoch": 104.0952380952381, "grad_norm": 2.4141287803649902, "learning_rate": 2.0464767616191904e-07, "loss": 1.169, "step": 1093 }, { "epoch": 104.19047619047619, "grad_norm": 5.855286121368408, "learning_rate": 2.0483508245877065e-07, "loss": 1.1054, "step": 1094 }, { "epoch": 104.28571428571429, "grad_norm": 7.8229498863220215, "learning_rate": 2.050224887556222e-07, "loss": 1.1633, "step": 1095 }, { "epoch": 104.38095238095238, "grad_norm": 3.975707769393921, "learning_rate": 2.0520989505247376e-07, "loss": 1.2149, "step": 1096 }, { "epoch": 104.47619047619048, "grad_norm": 3.858734369277954, "learning_rate": 2.0539730134932536e-07, "loss": 1.2171, "step": 1097 }, { "epoch": 104.57142857142857, "grad_norm": 6.441440105438232, "learning_rate": 2.0558470764617692e-07, "loss": 1.1842, "step": 1098 }, { "epoch": 104.66666666666667, "grad_norm": 5.97361946105957, "learning_rate": 2.057721139430285e-07, "loss": 1.1458, "step": 1099 }, { "epoch": 104.76190476190476, "grad_norm": 5.021552562713623, "learning_rate": 2.0595952023988005e-07, "loss": 1.2462, "step": 1100 }, { "epoch": 104.85714285714286, "grad_norm": 9.252442359924316, "learning_rate": 2.0614692653673166e-07, "loss": 1.1586, "step": 1101 }, { "epoch": 104.95238095238095, "grad_norm": 4.486863613128662, "learning_rate": 2.0633433283358322e-07, "loss": 1.188, "step": 1102 }, { "epoch": 104.95238095238095, "eval_accuracy": 0.7081303859123267, "eval_f1": 0.768361581920904, "eval_loss": 0.5858822464942932, "eval_precision": 0.7102803738317757, "eval_recall": 0.8367875647668394, "eval_roc_auc": 0.7421781807714452, "eval_runtime": 1.176, "eval_samples_per_second": 2269.511, "eval_steps_per_second": 5.102, "step": 1102 }, { "epoch": 105.04761904761905, "grad_norm": 5.8441362380981445, "learning_rate": 2.0652173913043477e-07, "loss": 1.1511, "step": 1103 }, { "epoch": 105.14285714285714, "grad_norm": 6.34725284576416, "learning_rate": 2.0670914542728638e-07, "loss": 1.1635, "step": 1104 }, { "epoch": 105.23809523809524, "grad_norm": 2.6412837505340576, "learning_rate": 2.0689655172413796e-07, "loss": 1.2041, "step": 1105 }, { "epoch": 105.33333333333333, "grad_norm": 2.169490098953247, "learning_rate": 2.070839580209895e-07, "loss": 1.1938, "step": 1106 }, { "epoch": 105.42857142857143, "grad_norm": 3.8251116275787354, "learning_rate": 2.0727136431784107e-07, "loss": 1.2071, "step": 1107 }, { "epoch": 105.52380952380952, "grad_norm": 5.872073650360107, "learning_rate": 2.0745877061469267e-07, "loss": 1.0972, "step": 1108 }, { "epoch": 105.61904761904762, "grad_norm": 3.1469690799713135, "learning_rate": 2.0764617691154423e-07, "loss": 1.1162, "step": 1109 }, { "epoch": 105.71428571428571, "grad_norm": 2.4707601070404053, "learning_rate": 2.078335832083958e-07, "loss": 1.1861, "step": 1110 }, { "epoch": 105.80952380952381, "grad_norm": 4.59213924407959, "learning_rate": 2.080209895052474e-07, "loss": 1.1789, "step": 1111 }, { "epoch": 105.9047619047619, "grad_norm": 7.704277515411377, "learning_rate": 2.0820839580209897e-07, "loss": 1.2179, "step": 1112 }, { "epoch": 106.0, "grad_norm": 3.2231431007385254, "learning_rate": 2.0839580209895053e-07, "loss": 1.1869, "step": 1113 }, { "epoch": 106.0, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.7735459108355477, "eval_loss": 0.585536777973175, "eval_precision": 0.7107976125881714, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7428284398388025, "eval_runtime": 1.1995, "eval_samples_per_second": 2225.008, "eval_steps_per_second": 5.002, "step": 1113 }, { "epoch": 106.0952380952381, "grad_norm": 5.697627067565918, "learning_rate": 2.0858320839580208e-07, "loss": 1.1273, "step": 1114 }, { "epoch": 106.19047619047619, "grad_norm": 10.429737091064453, "learning_rate": 2.087706146926537e-07, "loss": 1.1915, "step": 1115 }, { "epoch": 106.28571428571429, "grad_norm": 3.9276132583618164, "learning_rate": 2.0895802098950527e-07, "loss": 1.1987, "step": 1116 }, { "epoch": 106.38095238095238, "grad_norm": 8.287455558776855, "learning_rate": 2.0914542728635682e-07, "loss": 1.2169, "step": 1117 }, { "epoch": 106.47619047619048, "grad_norm": 3.018214464187622, "learning_rate": 2.0933283358320843e-07, "loss": 1.1799, "step": 1118 }, { "epoch": 106.57142857142857, "grad_norm": 2.187699794769287, "learning_rate": 2.0952023988005999e-07, "loss": 1.1894, "step": 1119 }, { "epoch": 106.66666666666667, "grad_norm": 3.7179007530212402, "learning_rate": 2.0970764617691154e-07, "loss": 1.1598, "step": 1120 }, { "epoch": 106.76190476190476, "grad_norm": 10.05657958984375, "learning_rate": 2.0989505247376312e-07, "loss": 1.1403, "step": 1121 }, { "epoch": 106.85714285714286, "grad_norm": 2.488464117050171, "learning_rate": 2.100824587706147e-07, "loss": 1.1659, "step": 1122 }, { "epoch": 106.95238095238095, "grad_norm": 2.6016857624053955, "learning_rate": 2.1026986506746628e-07, "loss": 1.1786, "step": 1123 }, { "epoch": 106.95238095238095, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7715555555555556, "eval_loss": 0.5852952599525452, "eval_precision": 0.7110868377935554, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7429594127806562, "eval_runtime": 1.2724, "eval_samples_per_second": 2097.676, "eval_steps_per_second": 4.716, "step": 1123 }, { "epoch": 107.04761904761905, "grad_norm": 12.497701644897461, "learning_rate": 2.1045727136431784e-07, "loss": 1.2145, "step": 1124 }, { "epoch": 107.14285714285714, "grad_norm": 3.5568621158599854, "learning_rate": 2.1064467766116944e-07, "loss": 1.2061, "step": 1125 }, { "epoch": 107.23809523809524, "grad_norm": 9.025201797485352, "learning_rate": 2.10832083958021e-07, "loss": 1.1153, "step": 1126 }, { "epoch": 107.33333333333333, "grad_norm": 5.04897928237915, "learning_rate": 2.1101949025487255e-07, "loss": 1.1678, "step": 1127 }, { "epoch": 107.42857142857143, "grad_norm": 3.4108526706695557, "learning_rate": 2.1120689655172413e-07, "loss": 1.1833, "step": 1128 }, { "epoch": 107.52380952380952, "grad_norm": 2.397312641143799, "learning_rate": 2.1139430284857574e-07, "loss": 1.1684, "step": 1129 }, { "epoch": 107.61904761904762, "grad_norm": 3.07975697517395, "learning_rate": 2.115817091454273e-07, "loss": 1.2202, "step": 1130 }, { "epoch": 107.71428571428571, "grad_norm": 3.9994544982910156, "learning_rate": 2.1176911544227885e-07, "loss": 1.1658, "step": 1131 }, { "epoch": 107.80952380952381, "grad_norm": 3.7984204292297363, "learning_rate": 2.1195652173913046e-07, "loss": 1.1601, "step": 1132 }, { "epoch": 107.9047619047619, "grad_norm": 3.4608612060546875, "learning_rate": 2.12143928035982e-07, "loss": 1.17, "step": 1133 }, { "epoch": 108.0, "grad_norm": 3.1976664066314697, "learning_rate": 2.123313343328336e-07, "loss": 1.1929, "step": 1134 }, { "epoch": 108.0, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.771513353115727, "eval_loss": 0.5852454900741577, "eval_precision": 0.7119386637458927, "eval_recall": 0.8419689119170984, "eval_roc_auc": 0.7429919401266551, "eval_runtime": 1.1644, "eval_samples_per_second": 2292.254, "eval_steps_per_second": 5.153, "step": 1134 }, { "epoch": 108.0952380952381, "grad_norm": 5.28352689743042, "learning_rate": 2.1251874062968515e-07, "loss": 1.1692, "step": 1135 }, { "epoch": 108.19047619047619, "grad_norm": 5.02522611618042, "learning_rate": 2.1270614692653676e-07, "loss": 1.165, "step": 1136 }, { "epoch": 108.28571428571429, "grad_norm": 3.9818742275238037, "learning_rate": 2.128935532233883e-07, "loss": 1.1638, "step": 1137 }, { "epoch": 108.38095238095238, "grad_norm": 4.110209941864014, "learning_rate": 2.1308095952023986e-07, "loss": 1.197, "step": 1138 }, { "epoch": 108.47619047619048, "grad_norm": 4.014791965484619, "learning_rate": 2.1326836581709147e-07, "loss": 1.2021, "step": 1139 }, { "epoch": 108.57142857142857, "grad_norm": 3.4760916233062744, "learning_rate": 2.1345577211394305e-07, "loss": 1.2079, "step": 1140 }, { "epoch": 108.66666666666667, "grad_norm": 2.6710264682769775, "learning_rate": 2.136431784107946e-07, "loss": 1.1863, "step": 1141 }, { "epoch": 108.76190476190476, "grad_norm": 3.0839014053344727, "learning_rate": 2.1383058470764616e-07, "loss": 1.161, "step": 1142 }, { "epoch": 108.85714285714286, "grad_norm": 7.811803340911865, "learning_rate": 2.1401799100449777e-07, "loss": 1.156, "step": 1143 }, { "epoch": 108.95238095238095, "grad_norm": 2.7280216217041016, "learning_rate": 2.1420539730134932e-07, "loss": 1.1514, "step": 1144 }, { "epoch": 108.95238095238095, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7722419928825622, "eval_loss": 0.5852576494216919, "eval_precision": 0.712253829321663, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7431142774899251, "eval_runtime": 1.1663, "eval_samples_per_second": 2288.452, "eval_steps_per_second": 5.145, "step": 1144 }, { "epoch": 109.04761904761905, "grad_norm": 3.729548454284668, "learning_rate": 2.143928035982009e-07, "loss": 1.2153, "step": 1145 }, { "epoch": 109.14285714285714, "grad_norm": 4.262019157409668, "learning_rate": 2.145802098950525e-07, "loss": 1.1541, "step": 1146 }, { "epoch": 109.23809523809524, "grad_norm": 4.890219688415527, "learning_rate": 2.1476761619190407e-07, "loss": 1.1869, "step": 1147 }, { "epoch": 109.33333333333333, "grad_norm": 4.600557804107666, "learning_rate": 2.1495502248875562e-07, "loss": 1.1686, "step": 1148 }, { "epoch": 109.42857142857143, "grad_norm": 3.3701367378234863, "learning_rate": 2.1514242878560718e-07, "loss": 1.1191, "step": 1149 }, { "epoch": 109.52380952380952, "grad_norm": 3.143719434738159, "learning_rate": 2.1532983508245878e-07, "loss": 1.1985, "step": 1150 }, { "epoch": 109.61904761904762, "grad_norm": 2.748405694961548, "learning_rate": 2.1551724137931036e-07, "loss": 1.1981, "step": 1151 }, { "epoch": 109.71428571428571, "grad_norm": 3.2317254543304443, "learning_rate": 2.1570464767616192e-07, "loss": 1.1714, "step": 1152 }, { "epoch": 109.80952380952381, "grad_norm": 3.7985424995422363, "learning_rate": 2.1589205397301353e-07, "loss": 1.2185, "step": 1153 }, { "epoch": 109.9047619047619, "grad_norm": 3.4442954063415527, "learning_rate": 2.1607946026986508e-07, "loss": 1.1895, "step": 1154 }, { "epoch": 110.0, "grad_norm": 7.063841342926025, "learning_rate": 2.1626686656671664e-07, "loss": 1.1805, "step": 1155 }, { "epoch": 110.0, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.772821270310192, "eval_loss": 0.5851633548736572, "eval_precision": 0.710483432916893, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.743503742084053, "eval_runtime": 1.2251, "eval_samples_per_second": 2178.533, "eval_steps_per_second": 4.897, "step": 1155 }, { "epoch": 110.0952380952381, "grad_norm": 4.239227771759033, "learning_rate": 2.1645427286356824e-07, "loss": 1.1938, "step": 1156 }, { "epoch": 110.19047619047619, "grad_norm": 3.055314302444458, "learning_rate": 2.166416791604198e-07, "loss": 1.1715, "step": 1157 }, { "epoch": 110.28571428571429, "grad_norm": 4.560515880584717, "learning_rate": 2.1682908545727138e-07, "loss": 1.1353, "step": 1158 }, { "epoch": 110.38095238095238, "grad_norm": 3.283205509185791, "learning_rate": 2.1701649175412293e-07, "loss": 1.1551, "step": 1159 }, { "epoch": 110.47619047619048, "grad_norm": 3.285792589187622, "learning_rate": 2.1720389805097454e-07, "loss": 1.2105, "step": 1160 }, { "epoch": 110.57142857142857, "grad_norm": 7.5094380378723145, "learning_rate": 2.173913043478261e-07, "loss": 1.1471, "step": 1161 }, { "epoch": 110.66666666666667, "grad_norm": 4.275049209594727, "learning_rate": 2.1757871064467765e-07, "loss": 1.2339, "step": 1162 }, { "epoch": 110.76190476190476, "grad_norm": 4.221767902374268, "learning_rate": 2.1776611694152926e-07, "loss": 1.162, "step": 1163 }, { "epoch": 110.85714285714286, "grad_norm": 6.287664413452148, "learning_rate": 2.1795352323838084e-07, "loss": 1.1657, "step": 1164 }, { "epoch": 110.95238095238095, "grad_norm": 4.596139907836914, "learning_rate": 2.181409295352324e-07, "loss": 1.1674, "step": 1165 }, { "epoch": 110.95238095238095, "eval_accuracy": 0.710378418883477, "eval_f1": 0.7704187704187704, "eval_loss": 0.5850911140441895, "eval_precision": 0.7114646187602852, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7432691421991939, "eval_runtime": 1.2191, "eval_samples_per_second": 2189.248, "eval_steps_per_second": 4.922, "step": 1165 }, { "epoch": 111.04761904761905, "grad_norm": 3.9053878784179688, "learning_rate": 2.1832833583208395e-07, "loss": 1.1676, "step": 1166 }, { "epoch": 111.14285714285714, "grad_norm": 2.1512110233306885, "learning_rate": 2.1851574212893555e-07, "loss": 1.1689, "step": 1167 }, { "epoch": 111.23809523809524, "grad_norm": 4.1650590896606445, "learning_rate": 2.187031484257871e-07, "loss": 1.2255, "step": 1168 }, { "epoch": 111.33333333333333, "grad_norm": 3.7853543758392334, "learning_rate": 2.188905547226387e-07, "loss": 1.1473, "step": 1169 }, { "epoch": 111.42857142857143, "grad_norm": 5.21444034576416, "learning_rate": 2.190779610194903e-07, "loss": 1.1541, "step": 1170 }, { "epoch": 111.52380952380952, "grad_norm": 2.9562485218048096, "learning_rate": 2.1926536731634185e-07, "loss": 1.1349, "step": 1171 }, { "epoch": 111.61904761904762, "grad_norm": 3.4188034534454346, "learning_rate": 2.194527736131934e-07, "loss": 1.2242, "step": 1172 }, { "epoch": 111.71428571428571, "grad_norm": 2.437049627304077, "learning_rate": 2.1964017991004496e-07, "loss": 1.1651, "step": 1173 }, { "epoch": 111.80952380952381, "grad_norm": 4.054041385650635, "learning_rate": 2.1982758620689657e-07, "loss": 1.2115, "step": 1174 }, { "epoch": 111.9047619047619, "grad_norm": 5.592970371246338, "learning_rate": 2.2001499250374815e-07, "loss": 1.1969, "step": 1175 }, { "epoch": 112.0, "grad_norm": 3.1093966960906982, "learning_rate": 2.202023988005997e-07, "loss": 1.1868, "step": 1176 }, { "epoch": 112.0, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7726334414626954, "eval_loss": 0.5847890377044678, "eval_precision": 0.7092582566323768, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7440662061024754, "eval_runtime": 1.5656, "eval_samples_per_second": 1704.743, "eval_steps_per_second": 3.832, "step": 1176 }, { "epoch": 112.0952380952381, "grad_norm": 2.705421209335327, "learning_rate": 2.203898050974513e-07, "loss": 1.1709, "step": 1177 }, { "epoch": 112.19047619047619, "grad_norm": 2.8298819065093994, "learning_rate": 2.2057721139430286e-07, "loss": 1.1828, "step": 1178 }, { "epoch": 112.28571428571429, "grad_norm": 4.289648056030273, "learning_rate": 2.2076461769115442e-07, "loss": 1.1973, "step": 1179 }, { "epoch": 112.38095238095238, "grad_norm": 5.3589911460876465, "learning_rate": 2.20952023988006e-07, "loss": 1.1765, "step": 1180 }, { "epoch": 112.47619047619048, "grad_norm": 4.928300380706787, "learning_rate": 2.211394302848576e-07, "loss": 1.1323, "step": 1181 }, { "epoch": 112.57142857142857, "grad_norm": 3.292466878890991, "learning_rate": 2.2132683658170916e-07, "loss": 1.1578, "step": 1182 }, { "epoch": 112.66666666666667, "grad_norm": 3.1881377696990967, "learning_rate": 2.2151424287856072e-07, "loss": 1.2117, "step": 1183 }, { "epoch": 112.76190476190476, "grad_norm": 3.145686626434326, "learning_rate": 2.2170164917541232e-07, "loss": 1.1578, "step": 1184 }, { "epoch": 112.85714285714286, "grad_norm": 2.949483633041382, "learning_rate": 2.2188905547226388e-07, "loss": 1.1966, "step": 1185 }, { "epoch": 112.95238095238095, "grad_norm": 2.7736291885375977, "learning_rate": 2.2207646176911546e-07, "loss": 1.1991, "step": 1186 }, { "epoch": 112.95238095238095, "eval_accuracy": 0.710378418883477, "eval_f1": 0.7702823179791977, "eval_loss": 0.5846362113952637, "eval_precision": 0.7116968698517299, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7440405872193436, "eval_runtime": 1.1484, "eval_samples_per_second": 2324.094, "eval_steps_per_second": 5.225, "step": 1186 }, { "epoch": 113.04761904761905, "grad_norm": 2.5959534645080566, "learning_rate": 2.2226386806596701e-07, "loss": 1.1329, "step": 1187 }, { "epoch": 113.14285714285714, "grad_norm": 5.067058086395264, "learning_rate": 2.2245127436281862e-07, "loss": 1.1448, "step": 1188 }, { "epoch": 113.23809523809524, "grad_norm": 3.178851842880249, "learning_rate": 2.2263868065967018e-07, "loss": 1.1859, "step": 1189 }, { "epoch": 113.33333333333333, "grad_norm": 4.485405921936035, "learning_rate": 2.2282608695652173e-07, "loss": 1.1334, "step": 1190 }, { "epoch": 113.42857142857143, "grad_norm": 5.1164350509643555, "learning_rate": 2.2301349325337334e-07, "loss": 1.1733, "step": 1191 }, { "epoch": 113.52380952380952, "grad_norm": 4.061066150665283, "learning_rate": 2.2320089955022492e-07, "loss": 1.1619, "step": 1192 }, { "epoch": 113.61904761904762, "grad_norm": 3.489614725112915, "learning_rate": 2.2338830584707647e-07, "loss": 1.1891, "step": 1193 }, { "epoch": 113.71428571428571, "grad_norm": 3.178351640701294, "learning_rate": 2.2357571214392803e-07, "loss": 1.1894, "step": 1194 }, { "epoch": 113.80952380952381, "grad_norm": 3.2042295932769775, "learning_rate": 2.2376311844077963e-07, "loss": 1.1812, "step": 1195 }, { "epoch": 113.9047619047619, "grad_norm": 5.772481441497803, "learning_rate": 2.239505247376312e-07, "loss": 1.1984, "step": 1196 }, { "epoch": 114.0, "grad_norm": 6.261301040649414, "learning_rate": 2.2413793103448277e-07, "loss": 1.1562, "step": 1197 }, { "epoch": 114.0, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7726465364120781, "eval_loss": 0.5846668481826782, "eval_precision": 0.7115594329334788, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7440354058721933, "eval_runtime": 1.1159, "eval_samples_per_second": 2391.852, "eval_steps_per_second": 5.377, "step": 1197 }, { "epoch": 114.0952380952381, "grad_norm": 3.7072525024414062, "learning_rate": 2.2432533733133435e-07, "loss": 1.1596, "step": 1198 }, { "epoch": 114.19047619047619, "grad_norm": 4.127257823944092, "learning_rate": 2.2451274362818593e-07, "loss": 1.2198, "step": 1199 }, { "epoch": 114.28571428571429, "grad_norm": 5.291389465332031, "learning_rate": 2.247001499250375e-07, "loss": 1.214, "step": 1200 }, { "epoch": 114.38095238095238, "grad_norm": 4.877745628356934, "learning_rate": 2.2488755622188904e-07, "loss": 1.1677, "step": 1201 }, { "epoch": 114.47619047619048, "grad_norm": 7.776942253112793, "learning_rate": 2.2507496251874065e-07, "loss": 1.2202, "step": 1202 }, { "epoch": 114.57142857142857, "grad_norm": 5.472081661224365, "learning_rate": 2.252623688155922e-07, "loss": 1.1885, "step": 1203 }, { "epoch": 114.66666666666667, "grad_norm": 5.365962028503418, "learning_rate": 2.2544977511244378e-07, "loss": 1.115, "step": 1204 }, { "epoch": 114.76190476190476, "grad_norm": 3.9224565029144287, "learning_rate": 2.256371814092954e-07, "loss": 1.1652, "step": 1205 }, { "epoch": 114.85714285714286, "grad_norm": 4.8845133781433105, "learning_rate": 2.2582458770614695e-07, "loss": 1.1369, "step": 1206 }, { "epoch": 114.95238095238095, "grad_norm": 7.695840835571289, "learning_rate": 2.260119940029985e-07, "loss": 1.2119, "step": 1207 }, { "epoch": 114.95238095238095, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7720023626698169, "eval_loss": 0.5845794081687927, "eval_precision": 0.7095548317046688, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.744254749568221, "eval_runtime": 1.2453, "eval_samples_per_second": 2143.316, "eval_steps_per_second": 4.818, "step": 1207 }, { "epoch": 115.04761904761905, "grad_norm": 5.336812973022461, "learning_rate": 2.2619940029985005e-07, "loss": 1.216, "step": 1208 }, { "epoch": 115.14285714285714, "grad_norm": 5.411586761474609, "learning_rate": 2.2638680659670166e-07, "loss": 1.1411, "step": 1209 }, { "epoch": 115.23809523809524, "grad_norm": 3.4818155765533447, "learning_rate": 2.2657421289355324e-07, "loss": 1.2321, "step": 1210 }, { "epoch": 115.33333333333333, "grad_norm": 6.3038859367370605, "learning_rate": 2.267616191904048e-07, "loss": 1.105, "step": 1211 }, { "epoch": 115.42857142857143, "grad_norm": 3.4177987575531006, "learning_rate": 2.269490254872564e-07, "loss": 1.1846, "step": 1212 }, { "epoch": 115.52380952380952, "grad_norm": 6.946967124938965, "learning_rate": 2.2713643178410796e-07, "loss": 1.1919, "step": 1213 }, { "epoch": 115.61904761904762, "grad_norm": 3.6147685050964355, "learning_rate": 2.2732383808095951e-07, "loss": 1.1587, "step": 1214 }, { "epoch": 115.71428571428571, "grad_norm": 5.091091632843018, "learning_rate": 2.275112443778111e-07, "loss": 1.1811, "step": 1215 }, { "epoch": 115.80952380952381, "grad_norm": 2.9732019901275635, "learning_rate": 2.276986506746627e-07, "loss": 1.1818, "step": 1216 }, { "epoch": 115.9047619047619, "grad_norm": 3.7847113609313965, "learning_rate": 2.2788605697151426e-07, "loss": 1.1645, "step": 1217 }, { "epoch": 116.0, "grad_norm": 3.948373317718506, "learning_rate": 2.280734632683658e-07, "loss": 1.1802, "step": 1218 }, { "epoch": 116.0, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7714201008004744, "eval_loss": 0.5844592452049255, "eval_precision": 0.7113176599234554, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7443549222797927, "eval_runtime": 1.2081, "eval_samples_per_second": 2209.313, "eval_steps_per_second": 4.967, "step": 1218 }, { "epoch": 116.0952380952381, "grad_norm": 4.794178485870361, "learning_rate": 2.2826086956521742e-07, "loss": 1.1632, "step": 1219 }, { "epoch": 116.19047619047619, "grad_norm": 4.711289882659912, "learning_rate": 2.2844827586206897e-07, "loss": 1.2005, "step": 1220 }, { "epoch": 116.28571428571429, "grad_norm": 3.6995790004730225, "learning_rate": 2.2863568215892055e-07, "loss": 1.173, "step": 1221 }, { "epoch": 116.38095238095238, "grad_norm": 7.084836959838867, "learning_rate": 2.288230884557721e-07, "loss": 1.1971, "step": 1222 }, { "epoch": 116.47619047619048, "grad_norm": 4.868923187255859, "learning_rate": 2.2901049475262372e-07, "loss": 1.1268, "step": 1223 }, { "epoch": 116.57142857142857, "grad_norm": 3.7296676635742188, "learning_rate": 2.2919790104947527e-07, "loss": 1.1685, "step": 1224 }, { "epoch": 116.66666666666667, "grad_norm": 1.99024498462677, "learning_rate": 2.2938530734632683e-07, "loss": 1.1571, "step": 1225 }, { "epoch": 116.76190476190476, "grad_norm": 4.100002765655518, "learning_rate": 2.2957271364317843e-07, "loss": 1.1668, "step": 1226 }, { "epoch": 116.85714285714286, "grad_norm": 3.0545077323913574, "learning_rate": 2.2976011994003001e-07, "loss": 1.1952, "step": 1227 }, { "epoch": 116.95238095238095, "grad_norm": 6.123928546905518, "learning_rate": 2.2994752623688157e-07, "loss": 1.1824, "step": 1228 }, { "epoch": 116.95238095238095, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7718676122931442, "eval_loss": 0.5843604207038879, "eval_precision": 0.7097826086956521, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.744713298791019, "eval_runtime": 1.3086, "eval_samples_per_second": 2039.52, "eval_steps_per_second": 4.585, "step": 1228 }, { "epoch": 117.04761904761905, "grad_norm": 5.737049102783203, "learning_rate": 2.3013493253373312e-07, "loss": 1.2047, "step": 1229 }, { "epoch": 117.14285714285714, "grad_norm": 3.4674408435821533, "learning_rate": 2.3032233883058473e-07, "loss": 1.175, "step": 1230 }, { "epoch": 117.23809523809524, "grad_norm": 3.057267427444458, "learning_rate": 2.3050974512743628e-07, "loss": 1.1479, "step": 1231 }, { "epoch": 117.33333333333333, "grad_norm": 6.359550476074219, "learning_rate": 2.3069715142428787e-07, "loss": 1.2121, "step": 1232 }, { "epoch": 117.42857142857143, "grad_norm": 4.31020450592041, "learning_rate": 2.3088455772113945e-07, "loss": 1.2132, "step": 1233 }, { "epoch": 117.52380952380952, "grad_norm": 6.0614705085754395, "learning_rate": 2.3107196401799103e-07, "loss": 1.1611, "step": 1234 }, { "epoch": 117.61904761904762, "grad_norm": 6.543889999389648, "learning_rate": 2.3125937031484258e-07, "loss": 1.1608, "step": 1235 }, { "epoch": 117.71428571428571, "grad_norm": 2.0105361938476562, "learning_rate": 2.3144677661169414e-07, "loss": 1.1534, "step": 1236 }, { "epoch": 117.80952380952381, "grad_norm": 4.8146443367004395, "learning_rate": 2.3163418290854574e-07, "loss": 1.1649, "step": 1237 }, { "epoch": 117.9047619047619, "grad_norm": 4.572299003601074, "learning_rate": 2.318215892053973e-07, "loss": 1.1401, "step": 1238 }, { "epoch": 118.0, "grad_norm": 2.9639434814453125, "learning_rate": 2.3200899550224888e-07, "loss": 1.17, "step": 1239 }, { "epoch": 118.0, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7715555555555556, "eval_loss": 0.584344208240509, "eval_precision": 0.7110868377935554, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7446220495106506, "eval_runtime": 1.1134, "eval_samples_per_second": 2397.16, "eval_steps_per_second": 5.389, "step": 1239 }, { "epoch": 118.0952380952381, "grad_norm": 2.3465840816497803, "learning_rate": 2.3219640179910049e-07, "loss": 1.1434, "step": 1240 }, { "epoch": 118.19047619047619, "grad_norm": 2.926537275314331, "learning_rate": 2.3238380809595204e-07, "loss": 1.2079, "step": 1241 }, { "epoch": 118.28571428571429, "grad_norm": 2.285618305206299, "learning_rate": 2.325712143928036e-07, "loss": 1.1869, "step": 1242 }, { "epoch": 118.38095238095238, "grad_norm": 4.09526252746582, "learning_rate": 2.3275862068965515e-07, "loss": 1.188, "step": 1243 }, { "epoch": 118.47619047619048, "grad_norm": 2.2800137996673584, "learning_rate": 2.3294602698650676e-07, "loss": 1.199, "step": 1244 }, { "epoch": 118.57142857142857, "grad_norm": 2.6519763469696045, "learning_rate": 2.3313343328335834e-07, "loss": 1.1863, "step": 1245 }, { "epoch": 118.66666666666667, "grad_norm": 3.5764822959899902, "learning_rate": 2.333208395802099e-07, "loss": 1.2064, "step": 1246 }, { "epoch": 118.76190476190476, "grad_norm": 5.1671576499938965, "learning_rate": 2.335082458770615e-07, "loss": 1.1677, "step": 1247 }, { "epoch": 118.85714285714286, "grad_norm": 3.731257915496826, "learning_rate": 2.3369565217391305e-07, "loss": 1.1424, "step": 1248 }, { "epoch": 118.95238095238095, "grad_norm": 2.59311842918396, "learning_rate": 2.338830584707646e-07, "loss": 1.1511, "step": 1249 }, { "epoch": 118.95238095238095, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.7720544701006513, "eval_loss": 0.5842468738555908, "eval_precision": 0.7110141766630316, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7448177892918826, "eval_runtime": 1.2423, "eval_samples_per_second": 2148.371, "eval_steps_per_second": 4.83, "step": 1249 }, { "epoch": 119.04761904761905, "grad_norm": 4.2373857498168945, "learning_rate": 2.340704647676162e-07, "loss": 1.1727, "step": 1250 }, { "epoch": 119.14285714285714, "grad_norm": 4.400254249572754, "learning_rate": 2.342578710644678e-07, "loss": 1.1225, "step": 1251 }, { "epoch": 119.23809523809524, "grad_norm": 4.6587419509887695, "learning_rate": 2.3444527736131935e-07, "loss": 1.1804, "step": 1252 }, { "epoch": 119.33333333333333, "grad_norm": 6.453824520111084, "learning_rate": 2.346326836581709e-07, "loss": 1.2251, "step": 1253 }, { "epoch": 119.42857142857143, "grad_norm": 4.208118915557861, "learning_rate": 2.3482008995502251e-07, "loss": 1.1708, "step": 1254 }, { "epoch": 119.52380952380952, "grad_norm": 6.1400346755981445, "learning_rate": 2.3500749625187407e-07, "loss": 1.1984, "step": 1255 }, { "epoch": 119.61904761904762, "grad_norm": 6.060001850128174, "learning_rate": 2.3519490254872565e-07, "loss": 1.1838, "step": 1256 }, { "epoch": 119.71428571428571, "grad_norm": 3.5763938426971436, "learning_rate": 2.353823088455772e-07, "loss": 1.1486, "step": 1257 }, { "epoch": 119.80952380952381, "grad_norm": 2.3884499073028564, "learning_rate": 2.355697151424288e-07, "loss": 1.1696, "step": 1258 }, { "epoch": 119.9047619047619, "grad_norm": 2.4123425483703613, "learning_rate": 2.3575712143928037e-07, "loss": 1.1633, "step": 1259 }, { "epoch": 120.0, "grad_norm": 3.5864171981811523, "learning_rate": 2.3594452773613192e-07, "loss": 1.1351, "step": 1260 }, { "epoch": 120.0, "eval_accuracy": 0.7100037467216186, "eval_f1": 0.7710059171597633, "eval_loss": 0.584304690361023, "eval_precision": 0.7096949891067538, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7448684513529072, "eval_runtime": 1.2312, "eval_samples_per_second": 2167.728, "eval_steps_per_second": 4.873, "step": 1260 }, { "epoch": 120.0952380952381, "grad_norm": 4.860226631164551, "learning_rate": 2.3613193403298353e-07, "loss": 1.1599, "step": 1261 }, { "epoch": 120.19047619047619, "grad_norm": 3.0438621044158936, "learning_rate": 2.363193403298351e-07, "loss": 1.2055, "step": 1262 }, { "epoch": 120.28571428571429, "grad_norm": 3.7721710205078125, "learning_rate": 2.3650674662668666e-07, "loss": 1.1663, "step": 1263 }, { "epoch": 120.38095238095238, "grad_norm": 2.985271692276001, "learning_rate": 2.3669415292353827e-07, "loss": 1.2082, "step": 1264 }, { "epoch": 120.47619047619048, "grad_norm": 4.9321489334106445, "learning_rate": 2.3688155922038983e-07, "loss": 1.1306, "step": 1265 }, { "epoch": 120.57142857142857, "grad_norm": 4.529792308807373, "learning_rate": 2.3706896551724138e-07, "loss": 1.1922, "step": 1266 }, { "epoch": 120.66666666666667, "grad_norm": 3.7236716747283936, "learning_rate": 2.3725637181409296e-07, "loss": 1.191, "step": 1267 }, { "epoch": 120.76190476190476, "grad_norm": 2.2532882690429688, "learning_rate": 2.3744377811094454e-07, "loss": 1.1717, "step": 1268 }, { "epoch": 120.85714285714286, "grad_norm": 1.9853754043579102, "learning_rate": 2.3763118440779612e-07, "loss": 1.1676, "step": 1269 }, { "epoch": 120.95238095238095, "grad_norm": 3.591606378555298, "learning_rate": 2.3781859070464768e-07, "loss": 1.1982, "step": 1270 }, { "epoch": 120.95238095238095, "eval_accuracy": 0.7096290745597602, "eval_f1": 0.7702342128668841, "eval_loss": 0.5842850208282471, "eval_precision": 0.7102241662110443, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7447354634427173, "eval_runtime": 1.3384, "eval_samples_per_second": 1994.107, "eval_steps_per_second": 4.483, "step": 1270 }, { "epoch": 121.04761904761905, "grad_norm": 4.7795891761779785, "learning_rate": 2.3800599700149928e-07, "loss": 1.1261, "step": 1271 }, { "epoch": 121.14285714285714, "grad_norm": 4.506382465362549, "learning_rate": 2.3819340329835084e-07, "loss": 1.1857, "step": 1272 }, { "epoch": 121.23809523809524, "grad_norm": 6.346680641174316, "learning_rate": 2.3838080959520242e-07, "loss": 1.183, "step": 1273 }, { "epoch": 121.33333333333333, "grad_norm": 2.1210904121398926, "learning_rate": 2.3856821589205395e-07, "loss": 1.1398, "step": 1274 }, { "epoch": 121.42857142857143, "grad_norm": 8.61037540435791, "learning_rate": 2.3875562218890556e-07, "loss": 1.1491, "step": 1275 }, { "epoch": 121.52380952380952, "grad_norm": 4.891674041748047, "learning_rate": 2.389430284857571e-07, "loss": 1.1495, "step": 1276 }, { "epoch": 121.61904761904762, "grad_norm": 3.5965917110443115, "learning_rate": 2.391304347826087e-07, "loss": 1.1773, "step": 1277 }, { "epoch": 121.71428571428571, "grad_norm": 4.160679340362549, "learning_rate": 2.393178410794603e-07, "loss": 1.1941, "step": 1278 }, { "epoch": 121.80952380952381, "grad_norm": 5.527775287628174, "learning_rate": 2.395052473763119e-07, "loss": 1.1966, "step": 1279 }, { "epoch": 121.9047619047619, "grad_norm": 3.736419200897217, "learning_rate": 2.3969265367316343e-07, "loss": 1.1968, "step": 1280 }, { "epoch": 122.0, "grad_norm": 4.868208408355713, "learning_rate": 2.39880059970015e-07, "loss": 1.1798, "step": 1281 }, { "epoch": 122.0, "eval_accuracy": 0.7100037467216186, "eval_f1": 0.7716814159292036, "eval_loss": 0.5840305089950562, "eval_precision": 0.7085590465872156, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.745215889464594, "eval_runtime": 1.1693, "eval_samples_per_second": 2282.508, "eval_steps_per_second": 5.131, "step": 1281 }, { "epoch": 122.0952380952381, "grad_norm": 2.7348592281341553, "learning_rate": 2.400674662668666e-07, "loss": 1.2017, "step": 1282 }, { "epoch": 122.19047619047619, "grad_norm": 4.157247066497803, "learning_rate": 2.4025487256371815e-07, "loss": 1.1972, "step": 1283 }, { "epoch": 122.28571428571429, "grad_norm": 2.7334799766540527, "learning_rate": 2.404422788605697e-07, "loss": 1.1519, "step": 1284 }, { "epoch": 122.38095238095238, "grad_norm": 4.976047515869141, "learning_rate": 2.406296851574213e-07, "loss": 1.1174, "step": 1285 }, { "epoch": 122.47619047619048, "grad_norm": 2.340428352355957, "learning_rate": 2.4081709145427287e-07, "loss": 1.1801, "step": 1286 }, { "epoch": 122.57142857142857, "grad_norm": 5.766010284423828, "learning_rate": 2.410044977511244e-07, "loss": 1.1731, "step": 1287 }, { "epoch": 122.66666666666667, "grad_norm": 7.431637763977051, "learning_rate": 2.4119190404797603e-07, "loss": 1.1525, "step": 1288 }, { "epoch": 122.76190476190476, "grad_norm": 6.350968360900879, "learning_rate": 2.4137931034482764e-07, "loss": 1.1964, "step": 1289 }, { "epoch": 122.85714285714286, "grad_norm": 4.233700752258301, "learning_rate": 2.415667166416792e-07, "loss": 1.1762, "step": 1290 }, { "epoch": 122.95238095238095, "grad_norm": 3.7037289142608643, "learning_rate": 2.4175412293853074e-07, "loss": 1.2184, "step": 1291 }, { "epoch": 122.95238095238095, "eval_accuracy": 0.710378418883477, "eval_f1": 0.7717744316504281, "eval_loss": 0.5837914943695068, "eval_precision": 0.7091698317959848, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7454545192861255, "eval_runtime": 1.4528, "eval_samples_per_second": 1837.107, "eval_steps_per_second": 4.13, "step": 1291 }, { "epoch": 123.04761904761905, "grad_norm": 4.702028751373291, "learning_rate": 2.4194152923538235e-07, "loss": 1.1751, "step": 1292 }, { "epoch": 123.14285714285714, "grad_norm": 4.630548000335693, "learning_rate": 2.421289355322339e-07, "loss": 1.1632, "step": 1293 }, { "epoch": 123.23809523809524, "grad_norm": 3.001009702682495, "learning_rate": 2.4231634182908546e-07, "loss": 1.147, "step": 1294 }, { "epoch": 123.33333333333333, "grad_norm": 2.9855480194091797, "learning_rate": 2.42503748125937e-07, "loss": 1.1795, "step": 1295 }, { "epoch": 123.42857142857143, "grad_norm": 4.929809093475342, "learning_rate": 2.426911544227886e-07, "loss": 1.199, "step": 1296 }, { "epoch": 123.52380952380952, "grad_norm": 3.593780994415283, "learning_rate": 2.428785607196402e-07, "loss": 1.2116, "step": 1297 }, { "epoch": 123.61904761904762, "grad_norm": 3.6316239833831787, "learning_rate": 2.4306596701649173e-07, "loss": 1.2109, "step": 1298 }, { "epoch": 123.71428571428571, "grad_norm": 4.772429466247559, "learning_rate": 2.4325337331334334e-07, "loss": 1.1818, "step": 1299 }, { "epoch": 123.80952380952381, "grad_norm": 2.6498239040374756, "learning_rate": 2.4344077961019495e-07, "loss": 1.1726, "step": 1300 }, { "epoch": 123.9047619047619, "grad_norm": 4.785904407501221, "learning_rate": 2.436281859070465e-07, "loss": 1.1772, "step": 1301 }, { "epoch": 124.0, "grad_norm": 5.397092819213867, "learning_rate": 2.4381559220389806e-07, "loss": 1.1346, "step": 1302 }, { "epoch": 124.0, "eval_accuracy": 0.7092544023979018, "eval_f1": 0.7700059276822763, "eval_loss": 0.583419919013977, "eval_precision": 0.7098360655737705, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7457625215889465, "eval_runtime": 1.1763, "eval_samples_per_second": 2269.028, "eval_steps_per_second": 5.101, "step": 1302 }, { "epoch": 124.0952380952381, "grad_norm": 3.3638992309570312, "learning_rate": 2.4400299850074966e-07, "loss": 1.1885, "step": 1303 }, { "epoch": 124.19047619047619, "grad_norm": 4.315061569213867, "learning_rate": 2.441904047976012e-07, "loss": 1.1324, "step": 1304 }, { "epoch": 124.28571428571429, "grad_norm": 1.9663842916488647, "learning_rate": 2.4437781109445277e-07, "loss": 1.1901, "step": 1305 }, { "epoch": 124.38095238095238, "grad_norm": 6.373414039611816, "learning_rate": 2.445652173913044e-07, "loss": 1.2036, "step": 1306 }, { "epoch": 124.47619047619048, "grad_norm": 4.554337978363037, "learning_rate": 2.4475262368815593e-07, "loss": 1.133, "step": 1307 }, { "epoch": 124.57142857142857, "grad_norm": 7.343845844268799, "learning_rate": 2.449400299850075e-07, "loss": 1.1857, "step": 1308 }, { "epoch": 124.66666666666667, "grad_norm": 3.1621153354644775, "learning_rate": 2.4512743628185904e-07, "loss": 1.1735, "step": 1309 }, { "epoch": 124.76190476190476, "grad_norm": 2.7307000160217285, "learning_rate": 2.4531484257871065e-07, "loss": 1.1837, "step": 1310 }, { "epoch": 124.85714285714286, "grad_norm": 4.455824851989746, "learning_rate": 2.455022488755622e-07, "loss": 1.1798, "step": 1311 }, { "epoch": 124.95238095238095, "grad_norm": 8.043598175048828, "learning_rate": 2.456896551724138e-07, "loss": 1.1953, "step": 1312 }, { "epoch": 124.95238095238095, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.7737955346650999, "eval_loss": 0.5832556486129761, "eval_precision": 0.7080645161290322, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7464021301093839, "eval_runtime": 1.2072, "eval_samples_per_second": 2210.932, "eval_steps_per_second": 4.97, "step": 1312 }, { "epoch": 125.04761904761905, "grad_norm": 3.0496273040771484, "learning_rate": 2.458770614692654e-07, "loss": 1.1428, "step": 1313 }, { "epoch": 125.14285714285714, "grad_norm": 2.3517754077911377, "learning_rate": 2.46064467766117e-07, "loss": 1.1795, "step": 1314 }, { "epoch": 125.23809523809524, "grad_norm": 4.751769065856934, "learning_rate": 2.4625187406296853e-07, "loss": 1.1206, "step": 1315 }, { "epoch": 125.33333333333333, "grad_norm": 3.0632271766662598, "learning_rate": 2.464392803598201e-07, "loss": 1.196, "step": 1316 }, { "epoch": 125.42857142857143, "grad_norm": 7.766650199890137, "learning_rate": 2.466266866566717e-07, "loss": 1.2012, "step": 1317 }, { "epoch": 125.52380952380952, "grad_norm": 3.9962661266326904, "learning_rate": 2.4681409295352324e-07, "loss": 1.1835, "step": 1318 }, { "epoch": 125.61904761904762, "grad_norm": 4.209256649017334, "learning_rate": 2.470014992503748e-07, "loss": 1.1662, "step": 1319 }, { "epoch": 125.71428571428571, "grad_norm": 3.4023072719573975, "learning_rate": 2.471889055472264e-07, "loss": 1.1473, "step": 1320 }, { "epoch": 125.80952380952381, "grad_norm": 3.4137771129608154, "learning_rate": 2.4737631184407796e-07, "loss": 1.1915, "step": 1321 }, { "epoch": 125.9047619047619, "grad_norm": 3.2793519496917725, "learning_rate": 2.475637181409295e-07, "loss": 1.2126, "step": 1322 }, { "epoch": 126.0, "grad_norm": 4.163229465484619, "learning_rate": 2.477511244377811e-07, "loss": 1.126, "step": 1323 }, { "epoch": 126.0, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.7719194312796208, "eval_loss": 0.582944929599762, "eval_precision": 0.7112445414847162, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7463943580886586, "eval_runtime": 1.3269, "eval_samples_per_second": 2011.487, "eval_steps_per_second": 4.522, "step": 1323 }, { "epoch": 126.0952380952381, "grad_norm": 2.1432619094848633, "learning_rate": 2.4793853073463273e-07, "loss": 1.199, "step": 1324 }, { "epoch": 126.19047619047619, "grad_norm": 3.3765182495117188, "learning_rate": 2.481259370314843e-07, "loss": 1.1606, "step": 1325 }, { "epoch": 126.28571428571429, "grad_norm": 3.6215734481811523, "learning_rate": 2.4831334332833584e-07, "loss": 1.2013, "step": 1326 }, { "epoch": 126.38095238095238, "grad_norm": 3.0442678928375244, "learning_rate": 2.4850074962518745e-07, "loss": 1.1996, "step": 1327 }, { "epoch": 126.47619047619048, "grad_norm": 3.2260961532592773, "learning_rate": 2.48688155922039e-07, "loss": 1.1508, "step": 1328 }, { "epoch": 126.57142857142857, "grad_norm": 8.212839126586914, "learning_rate": 2.4887556221889056e-07, "loss": 1.0953, "step": 1329 }, { "epoch": 126.66666666666667, "grad_norm": 3.9986352920532227, "learning_rate": 2.490629685157421e-07, "loss": 1.2293, "step": 1330 }, { "epoch": 126.76190476190476, "grad_norm": 3.904066324234009, "learning_rate": 2.492503748125937e-07, "loss": 1.1291, "step": 1331 }, { "epoch": 126.85714285714286, "grad_norm": 8.378240585327148, "learning_rate": 2.4943778110944527e-07, "loss": 1.1579, "step": 1332 }, { "epoch": 126.95238095238095, "grad_norm": 3.0266950130462646, "learning_rate": 2.4962518740629683e-07, "loss": 1.2049, "step": 1333 }, { "epoch": 126.95238095238095, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7713270142180095, "eval_loss": 0.5828430652618408, "eval_precision": 0.7106986899563319, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7465633275762809, "eval_runtime": 1.1244, "eval_samples_per_second": 2373.785, "eval_steps_per_second": 5.336, "step": 1333 }, { "epoch": 127.04761904761905, "grad_norm": 6.547031402587891, "learning_rate": 2.4981259370314843e-07, "loss": 1.1745, "step": 1334 }, { "epoch": 127.14285714285714, "grad_norm": 5.5182013511657715, "learning_rate": 2.5000000000000004e-07, "loss": 1.1564, "step": 1335 }, { "epoch": 127.23809523809524, "grad_norm": 2.4143738746643066, "learning_rate": 2.501874062968516e-07, "loss": 1.1717, "step": 1336 }, { "epoch": 127.33333333333333, "grad_norm": 4.863409042358398, "learning_rate": 2.5037481259370315e-07, "loss": 1.1933, "step": 1337 }, { "epoch": 127.42857142857143, "grad_norm": 3.1126413345336914, "learning_rate": 2.5056221889055476e-07, "loss": 1.1717, "step": 1338 }, { "epoch": 127.52380952380952, "grad_norm": 4.216578006744385, "learning_rate": 2.507496251874063e-07, "loss": 1.2029, "step": 1339 }, { "epoch": 127.61904761904762, "grad_norm": 2.9910659790039062, "learning_rate": 2.5093703148425787e-07, "loss": 1.1934, "step": 1340 }, { "epoch": 127.71428571428571, "grad_norm": 2.8089070320129395, "learning_rate": 2.511244377811095e-07, "loss": 1.1704, "step": 1341 }, { "epoch": 127.80952380952381, "grad_norm": 5.142420291900635, "learning_rate": 2.5131184407796103e-07, "loss": 1.1208, "step": 1342 }, { "epoch": 127.9047619047619, "grad_norm": 5.560968399047852, "learning_rate": 2.514992503748126e-07, "loss": 1.1662, "step": 1343 }, { "epoch": 128.0, "grad_norm": 4.953802108764648, "learning_rate": 2.5168665667166414e-07, "loss": 1.2133, "step": 1344 }, { "epoch": 128.0, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.7729952830188679, "eval_loss": 0.5828237533569336, "eval_precision": 0.7094155844155844, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7466491076568796, "eval_runtime": 1.1537, "eval_samples_per_second": 2313.46, "eval_steps_per_second": 5.201, "step": 1344 }, { "epoch": 128.0952380952381, "grad_norm": 2.2175867557525635, "learning_rate": 2.5187406296851575e-07, "loss": 1.1923, "step": 1345 }, { "epoch": 128.1904761904762, "grad_norm": 3.569380760192871, "learning_rate": 2.5206146926536735e-07, "loss": 1.1585, "step": 1346 }, { "epoch": 128.28571428571428, "grad_norm": 3.4245896339416504, "learning_rate": 2.522488755622189e-07, "loss": 1.1983, "step": 1347 }, { "epoch": 128.38095238095238, "grad_norm": 2.583385467529297, "learning_rate": 2.524362818590705e-07, "loss": 1.1682, "step": 1348 }, { "epoch": 128.47619047619048, "grad_norm": 3.7909340858459473, "learning_rate": 2.5262368815592207e-07, "loss": 1.1604, "step": 1349 }, { "epoch": 128.57142857142858, "grad_norm": 4.447571754455566, "learning_rate": 2.528110944527736e-07, "loss": 1.1556, "step": 1350 }, { "epoch": 128.66666666666666, "grad_norm": 4.511852264404297, "learning_rate": 2.529985007496252e-07, "loss": 1.1578, "step": 1351 }, { "epoch": 128.76190476190476, "grad_norm": 3.8867123126983643, "learning_rate": 2.531859070464768e-07, "loss": 1.1497, "step": 1352 }, { "epoch": 128.85714285714286, "grad_norm": 4.38187313079834, "learning_rate": 2.5337331334332834e-07, "loss": 1.1803, "step": 1353 }, { "epoch": 128.95238095238096, "grad_norm": 3.493037462234497, "learning_rate": 2.535607196401799e-07, "loss": 1.1846, "step": 1354 }, { "epoch": 128.95238095238096, "eval_accuracy": 0.710378418883477, "eval_f1": 0.7710986082321587, "eval_loss": 0.5827430486679077, "eval_precision": 0.7103109656301145, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7467225100748416, "eval_runtime": 1.1675, "eval_samples_per_second": 2285.99, "eval_steps_per_second": 5.139, "step": 1354 }, { "epoch": 129.04761904761904, "grad_norm": 6.4895548820495605, "learning_rate": 2.537481259370315e-07, "loss": 1.1259, "step": 1355 }, { "epoch": 129.14285714285714, "grad_norm": 3.8797340393066406, "learning_rate": 2.5393553223388306e-07, "loss": 1.1476, "step": 1356 }, { "epoch": 129.23809523809524, "grad_norm": 4.010172367095947, "learning_rate": 2.541229385307346e-07, "loss": 1.1496, "step": 1357 }, { "epoch": 129.33333333333334, "grad_norm": 4.8416595458984375, "learning_rate": 2.543103448275862e-07, "loss": 1.146, "step": 1358 }, { "epoch": 129.42857142857142, "grad_norm": 2.402299642562866, "learning_rate": 2.544977511244378e-07, "loss": 1.187, "step": 1359 }, { "epoch": 129.52380952380952, "grad_norm": 4.096165657043457, "learning_rate": 2.546851574212894e-07, "loss": 1.1673, "step": 1360 }, { "epoch": 129.61904761904762, "grad_norm": 6.944141864776611, "learning_rate": 2.5487256371814093e-07, "loss": 1.1702, "step": 1361 }, { "epoch": 129.71428571428572, "grad_norm": 3.276299476623535, "learning_rate": 2.5505997001499254e-07, "loss": 1.1306, "step": 1362 }, { "epoch": 129.8095238095238, "grad_norm": 9.009763717651367, "learning_rate": 2.552473763118441e-07, "loss": 1.2419, "step": 1363 }, { "epoch": 129.9047619047619, "grad_norm": 3.6103036403656006, "learning_rate": 2.5543478260869565e-07, "loss": 1.172, "step": 1364 }, { "epoch": 130.0, "grad_norm": 3.216857433319092, "learning_rate": 2.556221889055472e-07, "loss": 1.1975, "step": 1365 }, { "epoch": 130.0, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7726334414626954, "eval_loss": 0.5825395584106445, "eval_precision": 0.7092582566323768, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7471263672999424, "eval_runtime": 1.3705, "eval_samples_per_second": 1947.421, "eval_steps_per_second": 4.378, "step": 1365 }, { "epoch": 130.0952380952381, "grad_norm": 3.9114797115325928, "learning_rate": 2.558095952023988e-07, "loss": 1.1589, "step": 1366 }, { "epoch": 130.1904761904762, "grad_norm": 2.176995038986206, "learning_rate": 2.5599700149925037e-07, "loss": 1.1442, "step": 1367 }, { "epoch": 130.28571428571428, "grad_norm": 2.248365879058838, "learning_rate": 2.561844077961019e-07, "loss": 1.1856, "step": 1368 }, { "epoch": 130.38095238095238, "grad_norm": 2.4582395553588867, "learning_rate": 2.5637181409295353e-07, "loss": 1.1695, "step": 1369 }, { "epoch": 130.47619047619048, "grad_norm": 2.164710283279419, "learning_rate": 2.5655922038980514e-07, "loss": 1.1698, "step": 1370 }, { "epoch": 130.57142857142858, "grad_norm": 2.6641697883605957, "learning_rate": 2.567466266866567e-07, "loss": 1.1838, "step": 1371 }, { "epoch": 130.66666666666666, "grad_norm": 2.3906073570251465, "learning_rate": 2.5693403298350825e-07, "loss": 1.161, "step": 1372 }, { "epoch": 130.76190476190476, "grad_norm": 2.098816394805908, "learning_rate": 2.5712143928035985e-07, "loss": 1.1615, "step": 1373 }, { "epoch": 130.85714285714286, "grad_norm": 1.8485771417617798, "learning_rate": 2.573088455772114e-07, "loss": 1.1923, "step": 1374 }, { "epoch": 130.95238095238096, "grad_norm": 3.726562261581421, "learning_rate": 2.5749625187406296e-07, "loss": 1.1684, "step": 1375 }, { "epoch": 130.95238095238096, "eval_accuracy": 0.7100037467216186, "eval_f1": 0.7710059171597633, "eval_loss": 0.5824764370918274, "eval_precision": 0.7096949891067538, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7473465745538285, "eval_runtime": 1.1899, "eval_samples_per_second": 2242.957, "eval_steps_per_second": 5.042, "step": 1375 }, { "epoch": 131.04761904761904, "grad_norm": 3.8663816452026367, "learning_rate": 2.5768365817091457e-07, "loss": 1.1569, "step": 1376 }, { "epoch": 131.14285714285714, "grad_norm": 3.5459115505218506, "learning_rate": 2.578710644677661e-07, "loss": 1.1762, "step": 1377 }, { "epoch": 131.23809523809524, "grad_norm": 3.518721580505371, "learning_rate": 2.580584707646177e-07, "loss": 1.1732, "step": 1378 }, { "epoch": 131.33333333333334, "grad_norm": 4.939159393310547, "learning_rate": 2.582458770614693e-07, "loss": 1.1596, "step": 1379 }, { "epoch": 131.42857142857142, "grad_norm": 3.7521708011627197, "learning_rate": 2.5843328335832084e-07, "loss": 1.1283, "step": 1380 }, { "epoch": 131.52380952380952, "grad_norm": 2.7267091274261475, "learning_rate": 2.5862068965517245e-07, "loss": 1.1724, "step": 1381 }, { "epoch": 131.61904761904762, "grad_norm": 6.932819366455078, "learning_rate": 2.58808095952024e-07, "loss": 1.1694, "step": 1382 }, { "epoch": 131.71428571428572, "grad_norm": 2.6388416290283203, "learning_rate": 2.589955022488756e-07, "loss": 1.171, "step": 1383 }, { "epoch": 131.8095238095238, "grad_norm": 2.6707115173339844, "learning_rate": 2.5918290854572716e-07, "loss": 1.2032, "step": 1384 }, { "epoch": 131.9047619047619, "grad_norm": 2.9166555404663086, "learning_rate": 2.593703148425787e-07, "loss": 1.1955, "step": 1385 }, { "epoch": 132.0, "grad_norm": 8.50768756866455, "learning_rate": 2.595577211394303e-07, "loss": 1.1999, "step": 1386 }, { "epoch": 132.0, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7743830787309048, "eval_loss": 0.5823792815208435, "eval_precision": 0.7086021505376344, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7480109383995394, "eval_runtime": 1.1519, "eval_samples_per_second": 2316.954, "eval_steps_per_second": 5.209, "step": 1386 }, { "epoch": 132.0952380952381, "grad_norm": 2.550859212875366, "learning_rate": 2.597451274362819e-07, "loss": 1.1689, "step": 1387 }, { "epoch": 132.1904761904762, "grad_norm": 3.9516186714172363, "learning_rate": 2.5993253373313343e-07, "loss": 1.1599, "step": 1388 }, { "epoch": 132.28571428571428, "grad_norm": 8.39937686920166, "learning_rate": 2.60119940029985e-07, "loss": 1.2044, "step": 1389 }, { "epoch": 132.38095238095238, "grad_norm": 9.259173393249512, "learning_rate": 2.603073463268366e-07, "loss": 1.1766, "step": 1390 }, { "epoch": 132.47619047619048, "grad_norm": 3.003726005554199, "learning_rate": 2.6049475262368815e-07, "loss": 1.1569, "step": 1391 }, { "epoch": 132.57142857142858, "grad_norm": 3.2033815383911133, "learning_rate": 2.606821589205397e-07, "loss": 1.1957, "step": 1392 }, { "epoch": 132.66666666666666, "grad_norm": 2.2865841388702393, "learning_rate": 2.608695652173913e-07, "loss": 1.1646, "step": 1393 }, { "epoch": 132.76190476190476, "grad_norm": 3.505711317062378, "learning_rate": 2.610569715142429e-07, "loss": 1.1766, "step": 1394 }, { "epoch": 132.85714285714286, "grad_norm": 5.692092418670654, "learning_rate": 2.612443778110945e-07, "loss": 1.1545, "step": 1395 }, { "epoch": 132.95238095238096, "grad_norm": 6.441229343414307, "learning_rate": 2.6143178410794603e-07, "loss": 1.1214, "step": 1396 }, { "epoch": 132.95238095238096, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7713270142180095, "eval_loss": 0.5824524760246277, "eval_precision": 0.7106986899563319, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7477320092112838, "eval_runtime": 1.162, "eval_samples_per_second": 2296.861, "eval_steps_per_second": 5.163, "step": 1396 }, { "epoch": 133.04761904761904, "grad_norm": 3.6968135833740234, "learning_rate": 2.6161919040479764e-07, "loss": 1.1982, "step": 1397 }, { "epoch": 133.14285714285714, "grad_norm": 2.27152156829834, "learning_rate": 2.618065967016492e-07, "loss": 1.1361, "step": 1398 }, { "epoch": 133.23809523809524, "grad_norm": 5.761772155761719, "learning_rate": 2.6199400299850075e-07, "loss": 1.2019, "step": 1399 }, { "epoch": 133.33333333333334, "grad_norm": 6.545323848724365, "learning_rate": 2.6218140929535235e-07, "loss": 1.1749, "step": 1400 }, { "epoch": 133.42857142857142, "grad_norm": 10.077860832214355, "learning_rate": 2.623688155922039e-07, "loss": 1.2102, "step": 1401 }, { "epoch": 133.52380952380952, "grad_norm": 2.281158208847046, "learning_rate": 2.6255622188905546e-07, "loss": 1.1869, "step": 1402 }, { "epoch": 133.61904761904762, "grad_norm": 3.5024521350860596, "learning_rate": 2.62743628185907e-07, "loss": 1.1421, "step": 1403 }, { "epoch": 133.71428571428572, "grad_norm": 2.298860549926758, "learning_rate": 2.629310344827586e-07, "loss": 1.1538, "step": 1404 }, { "epoch": 133.8095238095238, "grad_norm": 4.36176872253418, "learning_rate": 2.6311844077961023e-07, "loss": 1.1518, "step": 1405 }, { "epoch": 133.9047619047619, "grad_norm": 2.632678508758545, "learning_rate": 2.633058470764618e-07, "loss": 1.1745, "step": 1406 }, { "epoch": 134.0, "grad_norm": 4.977126598358154, "learning_rate": 2.634932533733134e-07, "loss": 1.1899, "step": 1407 }, { "epoch": 134.0, "eval_accuracy": 0.7115024353690521, "eval_f1": 0.7725930301240401, "eval_loss": 0.5824618339538574, "eval_precision": 0.7100977198697068, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7481289579735175, "eval_runtime": 1.2194, "eval_samples_per_second": 2188.826, "eval_steps_per_second": 4.921, "step": 1407 }, { "epoch": 134.0952380952381, "grad_norm": 3.8728256225585938, "learning_rate": 2.6368065967016495e-07, "loss": 1.1517, "step": 1408 }, { "epoch": 134.1904761904762, "grad_norm": 6.059358596801758, "learning_rate": 2.638680659670165e-07, "loss": 1.1919, "step": 1409 }, { "epoch": 134.28571428571428, "grad_norm": 4.765652656555176, "learning_rate": 2.6405547226386806e-07, "loss": 1.2029, "step": 1410 }, { "epoch": 134.38095238095238, "grad_norm": 4.513894557952881, "learning_rate": 2.6424287856071966e-07, "loss": 1.1677, "step": 1411 }, { "epoch": 134.47619047619048, "grad_norm": 5.389193534851074, "learning_rate": 2.644302848575712e-07, "loss": 1.1616, "step": 1412 }, { "epoch": 134.57142857142858, "grad_norm": 2.583965301513672, "learning_rate": 2.6461769115442277e-07, "loss": 1.1809, "step": 1413 }, { "epoch": 134.66666666666666, "grad_norm": 3.9450693130493164, "learning_rate": 2.648050974512744e-07, "loss": 1.1799, "step": 1414 }, { "epoch": 134.76190476190476, "grad_norm": 6.976175308227539, "learning_rate": 2.6499250374812594e-07, "loss": 1.1621, "step": 1415 }, { "epoch": 134.85714285714286, "grad_norm": 2.458829879760742, "learning_rate": 2.6517991004497754e-07, "loss": 1.181, "step": 1416 }, { "epoch": 134.95238095238096, "grad_norm": 3.2024972438812256, "learning_rate": 2.653673163418291e-07, "loss": 1.1247, "step": 1417 }, { "epoch": 134.95238095238096, "eval_accuracy": 0.710378418883477, "eval_f1": 0.7721780135573239, "eval_loss": 0.5821589827537537, "eval_precision": 0.7084910762574365, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7485892343120323, "eval_runtime": 1.1491, "eval_samples_per_second": 2322.722, "eval_steps_per_second": 5.222, "step": 1417 }, { "epoch": 135.04761904761904, "grad_norm": 3.9485716819763184, "learning_rate": 2.655547226386807e-07, "loss": 1.1981, "step": 1418 }, { "epoch": 135.14285714285714, "grad_norm": 3.075244665145874, "learning_rate": 2.6574212893553226e-07, "loss": 1.1506, "step": 1419 }, { "epoch": 135.23809523809524, "grad_norm": 3.1542418003082275, "learning_rate": 2.659295352323838e-07, "loss": 1.2158, "step": 1420 }, { "epoch": 135.33333333333334, "grad_norm": 2.9404492378234863, "learning_rate": 2.661169415292354e-07, "loss": 1.1874, "step": 1421 }, { "epoch": 135.42857142857142, "grad_norm": 2.844932794570923, "learning_rate": 2.66304347826087e-07, "loss": 1.1846, "step": 1422 }, { "epoch": 135.52380952380952, "grad_norm": 3.8234691619873047, "learning_rate": 2.6649175412293853e-07, "loss": 1.1572, "step": 1423 }, { "epoch": 135.61904761904762, "grad_norm": 2.9241669178009033, "learning_rate": 2.666791604197901e-07, "loss": 1.1255, "step": 1424 }, { "epoch": 135.71428571428572, "grad_norm": 6.175973892211914, "learning_rate": 2.668665667166417e-07, "loss": 1.1565, "step": 1425 }, { "epoch": 135.8095238095238, "grad_norm": 7.453120708465576, "learning_rate": 2.6705397301349325e-07, "loss": 1.1774, "step": 1426 }, { "epoch": 135.9047619047619, "grad_norm": 5.209527969360352, "learning_rate": 2.6724137931034485e-07, "loss": 1.149, "step": 1427 }, { "epoch": 136.0, "grad_norm": 7.697098255157471, "learning_rate": 2.674287856071964e-07, "loss": 1.2117, "step": 1428 }, { "epoch": 136.0, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7737569873492204, "eval_loss": 0.5818696618080139, "eval_precision": 0.7088948787061995, "eval_recall": 0.8516839378238342, "eval_roc_auc": 0.7490256188831319, "eval_runtime": 1.1367, "eval_samples_per_second": 2348.011, "eval_steps_per_second": 5.278, "step": 1428 }, { "epoch": 136.0952380952381, "grad_norm": 6.136030673980713, "learning_rate": 2.67616191904048e-07, "loss": 1.2038, "step": 1429 }, { "epoch": 136.1904761904762, "grad_norm": 2.5705273151397705, "learning_rate": 2.6780359820089957e-07, "loss": 1.1662, "step": 1430 }, { "epoch": 136.28571428571428, "grad_norm": 5.877147674560547, "learning_rate": 2.679910044977511e-07, "loss": 1.1392, "step": 1431 }, { "epoch": 136.38095238095238, "grad_norm": 2.156723976135254, "learning_rate": 2.6817841079460273e-07, "loss": 1.1652, "step": 1432 }, { "epoch": 136.47619047619048, "grad_norm": 3.0156610012054443, "learning_rate": 2.683658170914543e-07, "loss": 1.1567, "step": 1433 }, { "epoch": 136.57142857142858, "grad_norm": 2.8673226833343506, "learning_rate": 2.6855322338830584e-07, "loss": 1.1952, "step": 1434 }, { "epoch": 136.66666666666666, "grad_norm": 3.6061689853668213, "learning_rate": 2.6874062968515745e-07, "loss": 1.1871, "step": 1435 }, { "epoch": 136.76190476190476, "grad_norm": 3.9926493167877197, "learning_rate": 2.68928035982009e-07, "loss": 1.1449, "step": 1436 }, { "epoch": 136.85714285714286, "grad_norm": 4.736005783081055, "learning_rate": 2.6911544227886056e-07, "loss": 1.2134, "step": 1437 }, { "epoch": 136.95238095238096, "grad_norm": 2.764103651046753, "learning_rate": 2.693028485757121e-07, "loss": 1.1678, "step": 1438 }, { "epoch": 136.95238095238096, "eval_accuracy": 0.7096290745597602, "eval_f1": 0.7698247698247698, "eval_loss": 0.5817188620567322, "eval_precision": 0.7109160724081185, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7489231433506045, "eval_runtime": 1.193, "eval_samples_per_second": 2237.207, "eval_steps_per_second": 5.029, "step": 1438 }, { "epoch": 137.04761904761904, "grad_norm": 3.620636224746704, "learning_rate": 2.694902548725637e-07, "loss": 1.1886, "step": 1439 }, { "epoch": 137.14285714285714, "grad_norm": 4.410121440887451, "learning_rate": 2.6967766116941533e-07, "loss": 1.1758, "step": 1440 }, { "epoch": 137.23809523809524, "grad_norm": 2.61210036277771, "learning_rate": 2.698650674662669e-07, "loss": 1.1506, "step": 1441 }, { "epoch": 137.33333333333334, "grad_norm": 3.4050190448760986, "learning_rate": 2.700524737631185e-07, "loss": 1.1437, "step": 1442 }, { "epoch": 137.42857142857142, "grad_norm": 2.654712438583374, "learning_rate": 2.7023988005997004e-07, "loss": 1.1457, "step": 1443 }, { "epoch": 137.52380952380952, "grad_norm": 3.8472094535827637, "learning_rate": 2.704272863568216e-07, "loss": 1.1954, "step": 1444 }, { "epoch": 137.61904761904762, "grad_norm": 3.686859130859375, "learning_rate": 2.7061469265367315e-07, "loss": 1.1606, "step": 1445 }, { "epoch": 137.71428571428572, "grad_norm": 2.689823627471924, "learning_rate": 2.7080209895052476e-07, "loss": 1.1483, "step": 1446 }, { "epoch": 137.8095238095238, "grad_norm": 3.9015514850616455, "learning_rate": 2.709895052473763e-07, "loss": 1.1997, "step": 1447 }, { "epoch": 137.9047619047619, "grad_norm": 3.536795139312744, "learning_rate": 2.7117691154422787e-07, "loss": 1.2187, "step": 1448 }, { "epoch": 138.0, "grad_norm": 3.393120288848877, "learning_rate": 2.713643178410795e-07, "loss": 1.1405, "step": 1449 }, { "epoch": 138.0, "eval_accuracy": 0.7100037467216186, "eval_f1": 0.7718160377358491, "eval_loss": 0.5816904902458191, "eval_precision": 0.7083333333333334, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7490673575129533, "eval_runtime": 1.2137, "eval_samples_per_second": 2199.026, "eval_steps_per_second": 4.943, "step": 1449 }, { "epoch": 138.0952380952381, "grad_norm": 6.070676803588867, "learning_rate": 2.7155172413793103e-07, "loss": 1.188, "step": 1450 }, { "epoch": 138.1904761904762, "grad_norm": 3.7084219455718994, "learning_rate": 2.7173913043478264e-07, "loss": 1.1275, "step": 1451 }, { "epoch": 138.28571428571428, "grad_norm": 3.660405397415161, "learning_rate": 2.719265367316342e-07, "loss": 1.1814, "step": 1452 }, { "epoch": 138.38095238095238, "grad_norm": 5.310576915740967, "learning_rate": 2.721139430284858e-07, "loss": 1.2151, "step": 1453 }, { "epoch": 138.47619047619048, "grad_norm": 3.6512293815612793, "learning_rate": 2.7230134932533735e-07, "loss": 1.1324, "step": 1454 }, { "epoch": 138.57142857142858, "grad_norm": 5.349870204925537, "learning_rate": 2.724887556221889e-07, "loss": 1.1598, "step": 1455 }, { "epoch": 138.66666666666666, "grad_norm": 3.535796880722046, "learning_rate": 2.726761619190405e-07, "loss": 1.2146, "step": 1456 }, { "epoch": 138.76190476190476, "grad_norm": 3.3005921840667725, "learning_rate": 2.7286356821589207e-07, "loss": 1.1762, "step": 1457 }, { "epoch": 138.85714285714286, "grad_norm": 5.350109577178955, "learning_rate": 2.730509745127436e-07, "loss": 1.1281, "step": 1458 }, { "epoch": 138.95238095238096, "grad_norm": 2.827852487564087, "learning_rate": 2.732383808095952e-07, "loss": 1.1908, "step": 1459 }, { "epoch": 138.95238095238096, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7720023626698169, "eval_loss": 0.5818772315979004, "eval_precision": 0.7095548317046688, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7488477259643063, "eval_runtime": 1.3464, "eval_samples_per_second": 1982.292, "eval_steps_per_second": 4.456, "step": 1459 }, { "epoch": 139.04761904761904, "grad_norm": 3.779970645904541, "learning_rate": 2.734257871064468e-07, "loss": 1.1934, "step": 1460 }, { "epoch": 139.14285714285714, "grad_norm": 4.349908828735352, "learning_rate": 2.7361319340329834e-07, "loss": 1.1311, "step": 1461 }, { "epoch": 139.23809523809524, "grad_norm": 6.623378753662109, "learning_rate": 2.7380059970014995e-07, "loss": 1.1791, "step": 1462 }, { "epoch": 139.33333333333334, "grad_norm": 5.597513198852539, "learning_rate": 2.739880059970015e-07, "loss": 1.1463, "step": 1463 }, { "epoch": 139.42857142857142, "grad_norm": 2.972928047180176, "learning_rate": 2.741754122938531e-07, "loss": 1.1486, "step": 1464 }, { "epoch": 139.52380952380952, "grad_norm": 5.351190567016602, "learning_rate": 2.7436281859070467e-07, "loss": 1.2003, "step": 1465 }, { "epoch": 139.61904761904762, "grad_norm": 2.219123363494873, "learning_rate": 2.745502248875562e-07, "loss": 1.1734, "step": 1466 }, { "epoch": 139.71428571428572, "grad_norm": 2.1666510105133057, "learning_rate": 2.7473763118440783e-07, "loss": 1.1937, "step": 1467 }, { "epoch": 139.8095238095238, "grad_norm": 3.52593994140625, "learning_rate": 2.749250374812594e-07, "loss": 1.1555, "step": 1468 }, { "epoch": 139.9047619047619, "grad_norm": 3.0705714225769043, "learning_rate": 2.7511244377811094e-07, "loss": 1.1667, "step": 1469 }, { "epoch": 140.0, "grad_norm": 2.8801708221435547, "learning_rate": 2.7529985007496254e-07, "loss": 1.1483, "step": 1470 }, { "epoch": 140.0, "eval_accuracy": 0.7111277632071937, "eval_f1": 0.7727674624226348, "eval_loss": 0.5820427536964417, "eval_precision": 0.7090319091400757, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7490020149683363, "eval_runtime": 1.1074, "eval_samples_per_second": 2410.145, "eval_steps_per_second": 5.418, "step": 1470 }, { "epoch": 140.0952380952381, "grad_norm": 2.7389588356018066, "learning_rate": 2.754872563718141e-07, "loss": 1.2047, "step": 1471 }, { "epoch": 140.1904761904762, "grad_norm": 2.8530631065368652, "learning_rate": 2.7567466266866565e-07, "loss": 1.19, "step": 1472 }, { "epoch": 140.28571428571428, "grad_norm": 2.755894899368286, "learning_rate": 2.758620689655172e-07, "loss": 1.1528, "step": 1473 }, { "epoch": 140.38095238095238, "grad_norm": 4.2909464836120605, "learning_rate": 2.760494752623688e-07, "loss": 1.1439, "step": 1474 }, { "epoch": 140.47619047619048, "grad_norm": 4.200039863586426, "learning_rate": 2.762368815592204e-07, "loss": 1.1652, "step": 1475 }, { "epoch": 140.57142857142858, "grad_norm": 6.071081638336182, "learning_rate": 2.76424287856072e-07, "loss": 1.1684, "step": 1476 }, { "epoch": 140.66666666666666, "grad_norm": 3.9966976642608643, "learning_rate": 2.766116941529236e-07, "loss": 1.1467, "step": 1477 }, { "epoch": 140.76190476190476, "grad_norm": 2.3274450302124023, "learning_rate": 2.7679910044977514e-07, "loss": 1.1746, "step": 1478 }, { "epoch": 140.85714285714286, "grad_norm": 5.671662330627441, "learning_rate": 2.769865067466267e-07, "loss": 1.153, "step": 1479 }, { "epoch": 140.95238095238096, "grad_norm": 4.11067533493042, "learning_rate": 2.7717391304347825e-07, "loss": 1.1295, "step": 1480 }, { "epoch": 140.95238095238096, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7733569112879458, "eval_loss": 0.581862211227417, "eval_precision": 0.709572742022715, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7493468624064479, "eval_runtime": 1.1773, "eval_samples_per_second": 2267.004, "eval_steps_per_second": 5.096, "step": 1480 }, { "epoch": 141.04761904761904, "grad_norm": 3.680772542953491, "learning_rate": 2.7736131934032985e-07, "loss": 1.1456, "step": 1481 }, { "epoch": 141.14285714285714, "grad_norm": 3.461637258529663, "learning_rate": 2.775487256371814e-07, "loss": 1.1318, "step": 1482 }, { "epoch": 141.23809523809524, "grad_norm": 7.937395095825195, "learning_rate": 2.7773613193403296e-07, "loss": 1.1882, "step": 1483 }, { "epoch": 141.33333333333334, "grad_norm": 2.5857739448547363, "learning_rate": 2.7792353823088457e-07, "loss": 1.155, "step": 1484 }, { "epoch": 141.42857142857142, "grad_norm": 4.173889636993408, "learning_rate": 2.781109445277361e-07, "loss": 1.144, "step": 1485 }, { "epoch": 141.52380952380952, "grad_norm": 2.58803653717041, "learning_rate": 2.7829835082458773e-07, "loss": 1.1905, "step": 1486 }, { "epoch": 141.61904761904762, "grad_norm": 7.0563530921936035, "learning_rate": 2.7848575712143934e-07, "loss": 1.181, "step": 1487 }, { "epoch": 141.71428571428572, "grad_norm": 4.463258743286133, "learning_rate": 2.786731634182909e-07, "loss": 1.2167, "step": 1488 }, { "epoch": 141.8095238095238, "grad_norm": 3.7728588581085205, "learning_rate": 2.7886056971514245e-07, "loss": 1.1736, "step": 1489 }, { "epoch": 141.9047619047619, "grad_norm": 3.2672605514526367, "learning_rate": 2.79047976011994e-07, "loss": 1.1645, "step": 1490 }, { "epoch": 142.0, "grad_norm": 4.211281776428223, "learning_rate": 2.792353823088456e-07, "loss": 1.1652, "step": 1491 }, { "epoch": 142.0, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7741176470588236, "eval_loss": 0.5816420316696167, "eval_precision": 0.709051724137931, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7496450777202072, "eval_runtime": 1.2197, "eval_samples_per_second": 2188.197, "eval_steps_per_second": 4.919, "step": 1491 }, { "epoch": 142.0952380952381, "grad_norm": 5.845593452453613, "learning_rate": 2.7942278860569717e-07, "loss": 1.1628, "step": 1492 }, { "epoch": 142.1904761904762, "grad_norm": 4.2243170738220215, "learning_rate": 2.796101949025487e-07, "loss": 1.1997, "step": 1493 }, { "epoch": 142.28571428571428, "grad_norm": 2.975741147994995, "learning_rate": 2.7979760119940033e-07, "loss": 1.1822, "step": 1494 }, { "epoch": 142.38095238095238, "grad_norm": 3.2676541805267334, "learning_rate": 2.799850074962519e-07, "loss": 1.1531, "step": 1495 }, { "epoch": 142.47619047619048, "grad_norm": 3.6477251052856445, "learning_rate": 2.8017241379310344e-07, "loss": 1.1689, "step": 1496 }, { "epoch": 142.57142857142858, "grad_norm": 3.187495231628418, "learning_rate": 2.8035982008995504e-07, "loss": 1.1464, "step": 1497 }, { "epoch": 142.66666666666666, "grad_norm": 8.654559135437012, "learning_rate": 2.805472263868066e-07, "loss": 1.2071, "step": 1498 }, { "epoch": 142.76190476190476, "grad_norm": 3.998518228530884, "learning_rate": 2.807346326836582e-07, "loss": 1.1739, "step": 1499 }, { "epoch": 142.85714285714286, "grad_norm": 2.40251088142395, "learning_rate": 2.8092203898050976e-07, "loss": 1.1866, "step": 1500 }, { "epoch": 142.95238095238096, "grad_norm": 6.565639019012451, "learning_rate": 2.8110944527736137e-07, "loss": 1.1773, "step": 1501 }, { "epoch": 142.95238095238096, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7734904270986745, "eval_loss": 0.5813953280448914, "eval_precision": 0.7093462992976769, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7498618307426598, "eval_runtime": 1.3528, "eval_samples_per_second": 1972.9, "eval_steps_per_second": 4.435, "step": 1501 }, { "epoch": 143.04761904761904, "grad_norm": 2.458740472793579, "learning_rate": 2.812968515742129e-07, "loss": 1.1938, "step": 1502 }, { "epoch": 143.14285714285714, "grad_norm": 7.570618629455566, "learning_rate": 2.814842578710645e-07, "loss": 1.2069, "step": 1503 }, { "epoch": 143.23809523809524, "grad_norm": 6.107430934906006, "learning_rate": 2.8167166416791603e-07, "loss": 1.176, "step": 1504 }, { "epoch": 143.33333333333334, "grad_norm": 4.109244346618652, "learning_rate": 2.8185907046476764e-07, "loss": 1.1538, "step": 1505 }, { "epoch": 143.42857142857142, "grad_norm": 3.977546453475952, "learning_rate": 2.820464767616192e-07, "loss": 1.1966, "step": 1506 }, { "epoch": 143.52380952380952, "grad_norm": 5.082589626312256, "learning_rate": 2.8223388305847075e-07, "loss": 1.117, "step": 1507 }, { "epoch": 143.61904761904762, "grad_norm": 6.457265377044678, "learning_rate": 2.8242128935532235e-07, "loss": 1.146, "step": 1508 }, { "epoch": 143.71428571428572, "grad_norm": 4.3683648109436035, "learning_rate": 2.826086956521739e-07, "loss": 1.1888, "step": 1509 }, { "epoch": 143.8095238095238, "grad_norm": 2.4352548122406006, "learning_rate": 2.827961019490255e-07, "loss": 1.1526, "step": 1510 }, { "epoch": 143.9047619047619, "grad_norm": 8.339164733886719, "learning_rate": 2.8298350824587707e-07, "loss": 1.1664, "step": 1511 }, { "epoch": 144.0, "grad_norm": 3.4991281032562256, "learning_rate": 2.831709145427287e-07, "loss": 1.1515, "step": 1512 }, { "epoch": 144.0, "eval_accuracy": 0.7096290745597602, "eval_f1": 0.7714538484222944, "eval_loss": 0.5811057686805725, "eval_precision": 0.7081754195993503, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.750195739781232, "eval_runtime": 1.1549, "eval_samples_per_second": 2311.007, "eval_steps_per_second": 5.195, "step": 1512 }, { "epoch": 144.0952380952381, "grad_norm": 9.736416816711426, "learning_rate": 2.8335832083958023e-07, "loss": 1.2106, "step": 1513 }, { "epoch": 144.1904761904762, "grad_norm": 3.4234447479248047, "learning_rate": 2.835457271364318e-07, "loss": 1.164, "step": 1514 }, { "epoch": 144.28571428571428, "grad_norm": 7.636989593505859, "learning_rate": 2.837331334332834e-07, "loss": 1.2098, "step": 1515 }, { "epoch": 144.38095238095238, "grad_norm": 3.0599682331085205, "learning_rate": 2.8392053973013495e-07, "loss": 1.1746, "step": 1516 }, { "epoch": 144.47619047619048, "grad_norm": 3.2300186157226562, "learning_rate": 2.841079460269865e-07, "loss": 1.1701, "step": 1517 }, { "epoch": 144.57142857142858, "grad_norm": 2.491485118865967, "learning_rate": 2.8429535232383806e-07, "loss": 1.1558, "step": 1518 }, { "epoch": 144.66666666666666, "grad_norm": 5.057804584503174, "learning_rate": 2.8448275862068967e-07, "loss": 1.1231, "step": 1519 }, { "epoch": 144.76190476190476, "grad_norm": 3.5403995513916016, "learning_rate": 2.846701649175412e-07, "loss": 1.1828, "step": 1520 }, { "epoch": 144.85714285714286, "grad_norm": 2.31864333152771, "learning_rate": 2.8485757121439283e-07, "loss": 1.1614, "step": 1521 }, { "epoch": 144.95238095238096, "grad_norm": 6.159409999847412, "learning_rate": 2.8504497751124444e-07, "loss": 1.1686, "step": 1522 }, { "epoch": 144.95238095238096, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7720023626698169, "eval_loss": 0.5807437896728516, "eval_precision": 0.7095548317046688, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7504884858952218, "eval_runtime": 1.1783, "eval_samples_per_second": 2265.14, "eval_steps_per_second": 5.092, "step": 1522 }, { "epoch": 145.04761904761904, "grad_norm": 6.86565637588501, "learning_rate": 2.85232383808096e-07, "loss": 1.1791, "step": 1523 }, { "epoch": 145.14285714285714, "grad_norm": 3.2620322704315186, "learning_rate": 2.8541979010494754e-07, "loss": 1.1668, "step": 1524 }, { "epoch": 145.23809523809524, "grad_norm": 3.2191574573516846, "learning_rate": 2.856071964017991e-07, "loss": 1.1789, "step": 1525 }, { "epoch": 145.33333333333334, "grad_norm": 2.270550012588501, "learning_rate": 2.857946026986507e-07, "loss": 1.2023, "step": 1526 }, { "epoch": 145.42857142857142, "grad_norm": 4.476311206817627, "learning_rate": 2.8598200899550226e-07, "loss": 1.1375, "step": 1527 }, { "epoch": 145.52380952380952, "grad_norm": 3.982914447784424, "learning_rate": 2.861694152923538e-07, "loss": 1.1816, "step": 1528 }, { "epoch": 145.61904761904762, "grad_norm": 4.971301555633545, "learning_rate": 2.863568215892054e-07, "loss": 1.1739, "step": 1529 }, { "epoch": 145.71428571428572, "grad_norm": 4.544134616851807, "learning_rate": 2.86544227886057e-07, "loss": 1.1737, "step": 1530 }, { "epoch": 145.8095238095238, "grad_norm": 2.5624587535858154, "learning_rate": 2.8673163418290853e-07, "loss": 1.1578, "step": 1531 }, { "epoch": 145.9047619047619, "grad_norm": 4.983476638793945, "learning_rate": 2.8691904047976014e-07, "loss": 1.1557, "step": 1532 }, { "epoch": 146.0, "grad_norm": 4.180739402770996, "learning_rate": 2.8710644677661175e-07, "loss": 1.1512, "step": 1533 }, { "epoch": 146.0, "eval_accuracy": 0.7137504683402023, "eval_f1": 0.7744982290436836, "eval_loss": 0.5804224610328674, "eval_precision": 0.7114967462039046, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7510523891767416, "eval_runtime": 1.1812, "eval_samples_per_second": 2259.473, "eval_steps_per_second": 5.079, "step": 1533 }, { "epoch": 146.0952380952381, "grad_norm": 3.5563807487487793, "learning_rate": 2.872938530734633e-07, "loss": 1.1534, "step": 1534 }, { "epoch": 146.1904761904762, "grad_norm": 2.703789710998535, "learning_rate": 2.8748125937031486e-07, "loss": 1.1653, "step": 1535 }, { "epoch": 146.28571428571428, "grad_norm": 7.2478928565979, "learning_rate": 2.8766866566716646e-07, "loss": 1.172, "step": 1536 }, { "epoch": 146.38095238095238, "grad_norm": 6.74290132522583, "learning_rate": 2.87856071964018e-07, "loss": 1.1539, "step": 1537 }, { "epoch": 146.47619047619048, "grad_norm": 4.319555759429932, "learning_rate": 2.8804347826086957e-07, "loss": 1.1777, "step": 1538 }, { "epoch": 146.57142857142858, "grad_norm": 3.823532819747925, "learning_rate": 2.882308845577211e-07, "loss": 1.1957, "step": 1539 }, { "epoch": 146.66666666666666, "grad_norm": 5.92212438583374, "learning_rate": 2.8841829085457273e-07, "loss": 1.2175, "step": 1540 }, { "epoch": 146.76190476190476, "grad_norm": 2.487151622772217, "learning_rate": 2.886056971514243e-07, "loss": 1.1577, "step": 1541 }, { "epoch": 146.85714285714286, "grad_norm": 4.248367786407471, "learning_rate": 2.8879310344827584e-07, "loss": 1.153, "step": 1542 }, { "epoch": 146.95238095238096, "grad_norm": 5.908459663391113, "learning_rate": 2.8898050974512745e-07, "loss": 1.1667, "step": 1543 }, { "epoch": 146.95238095238096, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7721481481481481, "eval_loss": 0.5804580450057983, "eval_precision": 0.7116329874385582, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7509289004029935, "eval_runtime": 1.3128, "eval_samples_per_second": 2033.01, "eval_steps_per_second": 4.57, "step": 1543 }, { "epoch": 147.04761904761904, "grad_norm": 3.983474016189575, "learning_rate": 2.89167916041979e-07, "loss": 1.1745, "step": 1544 }, { "epoch": 147.14285714285714, "grad_norm": 5.25045108795166, "learning_rate": 2.893553223388306e-07, "loss": 1.1795, "step": 1545 }, { "epoch": 147.23809523809524, "grad_norm": 6.433871269226074, "learning_rate": 2.8954272863568217e-07, "loss": 1.1582, "step": 1546 }, { "epoch": 147.33333333333334, "grad_norm": 2.469536542892456, "learning_rate": 2.897301349325338e-07, "loss": 1.1536, "step": 1547 }, { "epoch": 147.42857142857142, "grad_norm": 7.244877338409424, "learning_rate": 2.8991754122938533e-07, "loss": 1.1583, "step": 1548 }, { "epoch": 147.52380952380952, "grad_norm": 5.108227729797363, "learning_rate": 2.901049475262369e-07, "loss": 1.1873, "step": 1549 }, { "epoch": 147.61904761904762, "grad_norm": 4.663844585418701, "learning_rate": 2.902923538230885e-07, "loss": 1.1375, "step": 1550 }, { "epoch": 147.71428571428572, "grad_norm": 9.764266014099121, "learning_rate": 2.9047976011994004e-07, "loss": 1.1745, "step": 1551 }, { "epoch": 147.8095238095238, "grad_norm": 2.4404590129852295, "learning_rate": 2.906671664167916e-07, "loss": 1.1535, "step": 1552 }, { "epoch": 147.9047619047619, "grad_norm": 4.609780788421631, "learning_rate": 2.9085457271364315e-07, "loss": 1.1226, "step": 1553 }, { "epoch": 148.0, "grad_norm": 7.716403961181641, "learning_rate": 2.9104197901049476e-07, "loss": 1.275, "step": 1554 }, { "epoch": 148.0, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.7746106376726418, "eval_loss": 0.5806289315223694, "eval_precision": 0.7089833243679398, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7512006332757629, "eval_runtime": 1.139, "eval_samples_per_second": 2343.255, "eval_steps_per_second": 5.268, "step": 1554 }, { "epoch": 148.0952380952381, "grad_norm": 3.309723138809204, "learning_rate": 2.912293853073463e-07, "loss": 1.135, "step": 1555 }, { "epoch": 148.1904761904762, "grad_norm": 4.461905479431152, "learning_rate": 2.914167916041979e-07, "loss": 1.1913, "step": 1556 }, { "epoch": 148.28571428571428, "grad_norm": 4.881902694702148, "learning_rate": 2.9160419790104953e-07, "loss": 1.2228, "step": 1557 }, { "epoch": 148.38095238095238, "grad_norm": 4.563226699829102, "learning_rate": 2.917916041979011e-07, "loss": 1.1685, "step": 1558 }, { "epoch": 148.47619047619048, "grad_norm": 2.4159016609191895, "learning_rate": 2.9197901049475264e-07, "loss": 1.1282, "step": 1559 }, { "epoch": 148.57142857142858, "grad_norm": 2.5408880710601807, "learning_rate": 2.921664167916042e-07, "loss": 1.1528, "step": 1560 }, { "epoch": 148.66666666666666, "grad_norm": 2.2992091178894043, "learning_rate": 2.923538230884558e-07, "loss": 1.1591, "step": 1561 }, { "epoch": 148.76190476190476, "grad_norm": 3.7680423259735107, "learning_rate": 2.9254122938530736e-07, "loss": 1.1838, "step": 1562 }, { "epoch": 148.85714285714286, "grad_norm": 3.443899631500244, "learning_rate": 2.927286356821589e-07, "loss": 1.1458, "step": 1563 }, { "epoch": 148.95238095238096, "grad_norm": 3.9514212608337402, "learning_rate": 2.929160419790105e-07, "loss": 1.1756, "step": 1564 }, { "epoch": 148.95238095238096, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7718676122931442, "eval_loss": 0.5807510018348694, "eval_precision": 0.7097826086956521, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7508871617731722, "eval_runtime": 1.1401, "eval_samples_per_second": 2341.037, "eval_steps_per_second": 5.263, "step": 1564 }, { "epoch": 149.04761904761904, "grad_norm": 3.019251823425293, "learning_rate": 2.9310344827586207e-07, "loss": 1.1705, "step": 1565 }, { "epoch": 149.14285714285714, "grad_norm": 6.118655204772949, "learning_rate": 2.9329085457271363e-07, "loss": 1.1902, "step": 1566 }, { "epoch": 149.23809523809524, "grad_norm": 5.084479331970215, "learning_rate": 2.9347826086956523e-07, "loss": 1.1595, "step": 1567 }, { "epoch": 149.33333333333334, "grad_norm": 3.2713265419006348, "learning_rate": 2.9366566716641684e-07, "loss": 1.1787, "step": 1568 }, { "epoch": 149.42857142857142, "grad_norm": 4.392632961273193, "learning_rate": 2.938530734632684e-07, "loss": 1.1612, "step": 1569 }, { "epoch": 149.52380952380952, "grad_norm": 5.749429225921631, "learning_rate": 2.9404047976011995e-07, "loss": 1.1431, "step": 1570 }, { "epoch": 149.61904761904762, "grad_norm": 3.3047690391540527, "learning_rate": 2.9422788605697156e-07, "loss": 1.1407, "step": 1571 }, { "epoch": 149.71428571428572, "grad_norm": 4.179772853851318, "learning_rate": 2.944152923538231e-07, "loss": 1.1356, "step": 1572 }, { "epoch": 149.8095238095238, "grad_norm": 6.8627448081970215, "learning_rate": 2.9460269865067467e-07, "loss": 1.2415, "step": 1573 }, { "epoch": 149.9047619047619, "grad_norm": 2.2968685626983643, "learning_rate": 2.947901049475262e-07, "loss": 1.1914, "step": 1574 }, { "epoch": 150.0, "grad_norm": 6.251117706298828, "learning_rate": 2.9497751124437783e-07, "loss": 1.1623, "step": 1575 }, { "epoch": 150.0, "eval_accuracy": 0.7133757961783439, "eval_f1": 0.7761194029850746, "eval_loss": 0.580627977848053, "eval_precision": 0.7079551521623064, "eval_recall": 0.8588082901554405, "eval_roc_auc": 0.7516729994242948, "eval_runtime": 1.1794, "eval_samples_per_second": 2262.925, "eval_steps_per_second": 5.087, "step": 1575 }, { "epoch": 150.0952380952381, "grad_norm": 4.730234146118164, "learning_rate": 2.951649175412294e-07, "loss": 1.1716, "step": 1576 }, { "epoch": 150.1904761904762, "grad_norm": 3.1547439098358154, "learning_rate": 2.9535232383808094e-07, "loss": 1.1722, "step": 1577 }, { "epoch": 150.28571428571428, "grad_norm": 5.73432731628418, "learning_rate": 2.9553973013493255e-07, "loss": 1.1863, "step": 1578 }, { "epoch": 150.38095238095238, "grad_norm": 3.081118583679199, "learning_rate": 2.957271364317841e-07, "loss": 1.1773, "step": 1579 }, { "epoch": 150.47619047619048, "grad_norm": 6.794926643371582, "learning_rate": 2.959145427286357e-07, "loss": 1.1139, "step": 1580 }, { "epoch": 150.57142857142858, "grad_norm": 2.378877639770508, "learning_rate": 2.9610194902548726e-07, "loss": 1.1698, "step": 1581 }, { "epoch": 150.66666666666666, "grad_norm": 6.612598419189453, "learning_rate": 2.9628935532233887e-07, "loss": 1.2078, "step": 1582 }, { "epoch": 150.76190476190476, "grad_norm": 3.649820327758789, "learning_rate": 2.964767616191904e-07, "loss": 1.1628, "step": 1583 }, { "epoch": 150.85714285714286, "grad_norm": 9.336405754089355, "learning_rate": 2.96664167916042e-07, "loss": 1.1628, "step": 1584 }, { "epoch": 150.95238095238096, "grad_norm": 2.9944679737091064, "learning_rate": 2.968515742128936e-07, "loss": 1.1524, "step": 1585 }, { "epoch": 150.95238095238096, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7735849056603774, "eval_loss": 0.5800833106040955, "eval_precision": 0.70995670995671, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.75201065054692, "eval_runtime": 1.193, "eval_samples_per_second": 2237.236, "eval_steps_per_second": 5.029, "step": 1585 }, { "epoch": 151.04761904761904, "grad_norm": 3.0176329612731934, "learning_rate": 2.9703898050974514e-07, "loss": 1.1367, "step": 1586 }, { "epoch": 151.14285714285714, "grad_norm": 3.8548378944396973, "learning_rate": 2.972263868065967e-07, "loss": 1.1625, "step": 1587 }, { "epoch": 151.23809523809524, "grad_norm": 3.6817703247070312, "learning_rate": 2.9741379310344825e-07, "loss": 1.1805, "step": 1588 }, { "epoch": 151.33333333333334, "grad_norm": 2.0584304332733154, "learning_rate": 2.9760119940029986e-07, "loss": 1.165, "step": 1589 }, { "epoch": 151.42857142857142, "grad_norm": 3.7080583572387695, "learning_rate": 2.977886056971514e-07, "loss": 1.1501, "step": 1590 }, { "epoch": 151.52380952380952, "grad_norm": 4.056233882904053, "learning_rate": 2.97976011994003e-07, "loss": 1.1399, "step": 1591 }, { "epoch": 151.61904761904762, "grad_norm": 3.2380008697509766, "learning_rate": 2.981634182908546e-07, "loss": 1.1614, "step": 1592 }, { "epoch": 151.71428571428572, "grad_norm": 5.078243732452393, "learning_rate": 2.983508245877062e-07, "loss": 1.1707, "step": 1593 }, { "epoch": 151.8095238095238, "grad_norm": 5.057522296905518, "learning_rate": 2.9853823088455773e-07, "loss": 1.1609, "step": 1594 }, { "epoch": 151.9047619047619, "grad_norm": 6.1785078048706055, "learning_rate": 2.9872563718140934e-07, "loss": 1.1885, "step": 1595 }, { "epoch": 152.0, "grad_norm": 4.74155330657959, "learning_rate": 2.989130434782609e-07, "loss": 1.1702, "step": 1596 }, { "epoch": 152.0, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.7739463601532567, "eval_loss": 0.5797269344329834, "eval_precision": 0.7101135749053542, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7524715025906736, "eval_runtime": 1.1764, "eval_samples_per_second": 2268.706, "eval_steps_per_second": 5.1, "step": 1596 }, { "epoch": 152.0952380952381, "grad_norm": 3.2207934856414795, "learning_rate": 2.9910044977511245e-07, "loss": 1.1762, "step": 1597 }, { "epoch": 152.1904761904762, "grad_norm": 2.9277608394622803, "learning_rate": 2.99287856071964e-07, "loss": 1.1555, "step": 1598 }, { "epoch": 152.28571428571428, "grad_norm": 6.294363021850586, "learning_rate": 2.994752623688156e-07, "loss": 1.2192, "step": 1599 }, { "epoch": 152.38095238095238, "grad_norm": 2.1523854732513428, "learning_rate": 2.9966266866566717e-07, "loss": 1.1764, "step": 1600 }, { "epoch": 152.47619047619048, "grad_norm": 3.6600348949432373, "learning_rate": 2.998500749625187e-07, "loss": 1.166, "step": 1601 }, { "epoch": 152.57142857142858, "grad_norm": 4.593667507171631, "learning_rate": 3.0003748125937033e-07, "loss": 1.2047, "step": 1602 }, { "epoch": 152.66666666666666, "grad_norm": 5.261059761047363, "learning_rate": 3.0022488755622194e-07, "loss": 1.1187, "step": 1603 }, { "epoch": 152.76190476190476, "grad_norm": 4.2113213539123535, "learning_rate": 3.004122938530735e-07, "loss": 1.1643, "step": 1604 }, { "epoch": 152.85714285714286, "grad_norm": 4.283077239990234, "learning_rate": 3.0059970014992505e-07, "loss": 1.1297, "step": 1605 }, { "epoch": 152.95238095238096, "grad_norm": 4.8893961906433105, "learning_rate": 3.0078710644677665e-07, "loss": 1.1703, "step": 1606 }, { "epoch": 152.95238095238096, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.7738130345030965, "eval_loss": 0.5795263648033142, "eval_precision": 0.7103410936654033, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7529726540011514, "eval_runtime": 1.1454, "eval_samples_per_second": 2330.141, "eval_steps_per_second": 5.238, "step": 1606 }, { "epoch": 153.04761904761904, "grad_norm": 4.8148722648620605, "learning_rate": 3.009745127436282e-07, "loss": 1.1685, "step": 1607 }, { "epoch": 153.14285714285714, "grad_norm": 3.824242115020752, "learning_rate": 3.0116191904047976e-07, "loss": 1.155, "step": 1608 }, { "epoch": 153.23809523809524, "grad_norm": 3.3369882106781006, "learning_rate": 3.0134932533733137e-07, "loss": 1.1803, "step": 1609 }, { "epoch": 153.33333333333334, "grad_norm": 5.317166805267334, "learning_rate": 3.015367316341829e-07, "loss": 1.1508, "step": 1610 }, { "epoch": 153.42857142857142, "grad_norm": 2.601602077484131, "learning_rate": 3.017241379310345e-07, "loss": 1.1432, "step": 1611 }, { "epoch": 153.52380952380952, "grad_norm": 4.989608287811279, "learning_rate": 3.0191154422788603e-07, "loss": 1.165, "step": 1612 }, { "epoch": 153.61904761904762, "grad_norm": 3.7194533348083496, "learning_rate": 3.0209895052473764e-07, "loss": 1.1652, "step": 1613 }, { "epoch": 153.71428571428572, "grad_norm": 2.8490591049194336, "learning_rate": 3.0228635682158925e-07, "loss": 1.1779, "step": 1614 }, { "epoch": 153.8095238095238, "grad_norm": 3.7863845825195312, "learning_rate": 3.024737631184408e-07, "loss": 1.1378, "step": 1615 }, { "epoch": 153.9047619047619, "grad_norm": 3.58205246925354, "learning_rate": 3.026611694152924e-07, "loss": 1.1786, "step": 1616 }, { "epoch": 154.0, "grad_norm": 2.69224214553833, "learning_rate": 3.0284857571214396e-07, "loss": 1.1933, "step": 1617 }, { "epoch": 154.0, "eval_accuracy": 0.7133757961783439, "eval_f1": 0.7741364038972542, "eval_loss": 0.5793300867080688, "eval_precision": 0.711340206185567, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7533151986183074, "eval_runtime": 1.2676, "eval_samples_per_second": 2105.495, "eval_steps_per_second": 4.733, "step": 1617 }, { "epoch": 154.0952380952381, "grad_norm": 8.547765731811523, "learning_rate": 3.030359820089955e-07, "loss": 1.1543, "step": 1618 }, { "epoch": 154.1904761904762, "grad_norm": 4.940852642059326, "learning_rate": 3.0322338830584707e-07, "loss": 1.1645, "step": 1619 }, { "epoch": 154.28571428571428, "grad_norm": 2.6271817684173584, "learning_rate": 3.034107946026987e-07, "loss": 1.1482, "step": 1620 }, { "epoch": 154.38095238095238, "grad_norm": 3.078421115875244, "learning_rate": 3.0359820089955023e-07, "loss": 1.1222, "step": 1621 }, { "epoch": 154.47619047619048, "grad_norm": 6.029754638671875, "learning_rate": 3.037856071964018e-07, "loss": 1.2054, "step": 1622 }, { "epoch": 154.57142857142858, "grad_norm": 4.2551188468933105, "learning_rate": 3.039730134932534e-07, "loss": 1.1916, "step": 1623 }, { "epoch": 154.66666666666666, "grad_norm": 3.1497421264648438, "learning_rate": 3.0416041979010495e-07, "loss": 1.1322, "step": 1624 }, { "epoch": 154.76190476190476, "grad_norm": 3.4132707118988037, "learning_rate": 3.043478260869565e-07, "loss": 1.1689, "step": 1625 }, { "epoch": 154.85714285714286, "grad_norm": 3.04034161567688, "learning_rate": 3.045352323838081e-07, "loss": 1.1495, "step": 1626 }, { "epoch": 154.95238095238096, "grad_norm": 3.6474931240081787, "learning_rate": 3.047226386806597e-07, "loss": 1.1827, "step": 1627 }, { "epoch": 154.95238095238096, "eval_accuracy": 0.7141251405020607, "eval_f1": 0.7753900500441566, "eval_loss": 0.5792854428291321, "eval_precision": 0.710739341608203, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7535290731145654, "eval_runtime": 1.1995, "eval_samples_per_second": 2225.127, "eval_steps_per_second": 5.002, "step": 1627 }, { "epoch": 155.04761904761904, "grad_norm": 5.743706703186035, "learning_rate": 3.049100449775113e-07, "loss": 1.1394, "step": 1628 }, { "epoch": 155.14285714285714, "grad_norm": 2.847236394882202, "learning_rate": 3.0509745127436283e-07, "loss": 1.1703, "step": 1629 }, { "epoch": 155.23809523809524, "grad_norm": 3.0275094509124756, "learning_rate": 3.0528485757121444e-07, "loss": 1.196, "step": 1630 }, { "epoch": 155.33333333333334, "grad_norm": 4.455320358276367, "learning_rate": 3.05472263868066e-07, "loss": 1.1521, "step": 1631 }, { "epoch": 155.42857142857142, "grad_norm": 4.467602252960205, "learning_rate": 3.0565967016491755e-07, "loss": 1.1466, "step": 1632 }, { "epoch": 155.52380952380952, "grad_norm": 4.637279510498047, "learning_rate": 3.058470764617691e-07, "loss": 1.1421, "step": 1633 }, { "epoch": 155.61904761904762, "grad_norm": 7.024368762969971, "learning_rate": 3.060344827586207e-07, "loss": 1.1524, "step": 1634 }, { "epoch": 155.71428571428572, "grad_norm": 2.950413942337036, "learning_rate": 3.0622188905547226e-07, "loss": 1.1879, "step": 1635 }, { "epoch": 155.8095238095238, "grad_norm": 3.2424774169921875, "learning_rate": 3.064092953523238e-07, "loss": 1.1723, "step": 1636 }, { "epoch": 155.9047619047619, "grad_norm": 3.6627702713012695, "learning_rate": 3.065967016491754e-07, "loss": 1.1706, "step": 1637 }, { "epoch": 156.0, "grad_norm": 4.732444763183594, "learning_rate": 3.0678410794602703e-07, "loss": 1.1414, "step": 1638 }, { "epoch": 156.0, "eval_accuracy": 0.7141251405020607, "eval_f1": 0.7744605379840378, "eval_loss": 0.5791009664535522, "eval_precision": 0.7123436650353453, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7538658606793321, "eval_runtime": 1.2898, "eval_samples_per_second": 2069.323, "eval_steps_per_second": 4.652, "step": 1638 }, { "epoch": 156.0952380952381, "grad_norm": 8.923810958862305, "learning_rate": 3.069715142428786e-07, "loss": 1.1463, "step": 1639 }, { "epoch": 156.1904761904762, "grad_norm": 4.258830547332764, "learning_rate": 3.0715892053973014e-07, "loss": 1.1602, "step": 1640 }, { "epoch": 156.28571428571428, "grad_norm": 5.375497341156006, "learning_rate": 3.0734632683658175e-07, "loss": 1.1538, "step": 1641 }, { "epoch": 156.38095238095238, "grad_norm": 2.5572433471679688, "learning_rate": 3.075337331334333e-07, "loss": 1.1395, "step": 1642 }, { "epoch": 156.47619047619048, "grad_norm": 4.052694797515869, "learning_rate": 3.0772113943028486e-07, "loss": 1.2135, "step": 1643 }, { "epoch": 156.57142857142858, "grad_norm": 5.803094387054443, "learning_rate": 3.0790854572713646e-07, "loss": 1.2036, "step": 1644 }, { "epoch": 156.66666666666666, "grad_norm": 4.393676280975342, "learning_rate": 3.08095952023988e-07, "loss": 1.1575, "step": 1645 }, { "epoch": 156.76190476190476, "grad_norm": 2.8207361698150635, "learning_rate": 3.0828335832083957e-07, "loss": 1.172, "step": 1646 }, { "epoch": 156.85714285714286, "grad_norm": 3.8662214279174805, "learning_rate": 3.0847076461769113e-07, "loss": 1.1986, "step": 1647 }, { "epoch": 156.95238095238096, "grad_norm": 2.2252655029296875, "learning_rate": 3.0865817091454274e-07, "loss": 1.1686, "step": 1648 }, { "epoch": 156.95238095238096, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7735849056603774, "eval_loss": 0.5791285634040833, "eval_precision": 0.70995670995671, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7541652274035694, "eval_runtime": 1.132, "eval_samples_per_second": 2357.715, "eval_steps_per_second": 5.3, "step": 1648 }, { "epoch": 157.04761904761904, "grad_norm": 5.072911262512207, "learning_rate": 3.0884557721139434e-07, "loss": 1.1583, "step": 1649 }, { "epoch": 157.14285714285714, "grad_norm": 3.5795886516571045, "learning_rate": 3.090329835082459e-07, "loss": 1.1602, "step": 1650 }, { "epoch": 157.23809523809524, "grad_norm": 2.649064064025879, "learning_rate": 3.092203898050975e-07, "loss": 1.1591, "step": 1651 }, { "epoch": 157.33333333333334, "grad_norm": 2.5507094860076904, "learning_rate": 3.0940779610194906e-07, "loss": 1.1871, "step": 1652 }, { "epoch": 157.42857142857142, "grad_norm": 4.117077827453613, "learning_rate": 3.095952023988006e-07, "loss": 1.1838, "step": 1653 }, { "epoch": 157.52380952380952, "grad_norm": 3.221205949783325, "learning_rate": 3.0978260869565217e-07, "loss": 1.1684, "step": 1654 }, { "epoch": 157.61904761904762, "grad_norm": 8.151176452636719, "learning_rate": 3.099700149925038e-07, "loss": 1.1948, "step": 1655 }, { "epoch": 157.71428571428572, "grad_norm": 3.3436615467071533, "learning_rate": 3.1015742128935533e-07, "loss": 1.182, "step": 1656 }, { "epoch": 157.8095238095238, "grad_norm": 5.598202228546143, "learning_rate": 3.103448275862069e-07, "loss": 1.1113, "step": 1657 }, { "epoch": 157.9047619047619, "grad_norm": 3.317392587661743, "learning_rate": 3.105322338830585e-07, "loss": 1.132, "step": 1658 }, { "epoch": 158.0, "grad_norm": 3.3303115367889404, "learning_rate": 3.1071964017991005e-07, "loss": 1.2094, "step": 1659 }, { "epoch": 158.0, "eval_accuracy": 0.7130011240164855, "eval_f1": 0.7735068007096393, "eval_loss": 0.5792306661605835, "eval_precision": 0.7116430903155604, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7539985607369027, "eval_runtime": 1.2239, "eval_samples_per_second": 2180.666, "eval_steps_per_second": 4.902, "step": 1659 }, { "epoch": 158.0952380952381, "grad_norm": 3.01957631111145, "learning_rate": 3.109070464767616e-07, "loss": 1.1732, "step": 1660 }, { "epoch": 158.1904761904762, "grad_norm": 4.130393981933594, "learning_rate": 3.110944527736132e-07, "loss": 1.1752, "step": 1661 }, { "epoch": 158.28571428571428, "grad_norm": 3.2094545364379883, "learning_rate": 3.112818590704648e-07, "loss": 1.1735, "step": 1662 }, { "epoch": 158.38095238095238, "grad_norm": 6.156533718109131, "learning_rate": 3.1146926536731637e-07, "loss": 1.1488, "step": 1663 }, { "epoch": 158.47619047619048, "grad_norm": 14.892172813415527, "learning_rate": 3.116566716641679e-07, "loss": 1.2048, "step": 1664 }, { "epoch": 158.57142857142858, "grad_norm": 3.576772689819336, "learning_rate": 3.1184407796101953e-07, "loss": 1.1629, "step": 1665 }, { "epoch": 158.66666666666666, "grad_norm": 5.326858997344971, "learning_rate": 3.120314842578711e-07, "loss": 1.1625, "step": 1666 }, { "epoch": 158.76190476190476, "grad_norm": 7.5055084228515625, "learning_rate": 3.1221889055472264e-07, "loss": 1.113, "step": 1667 }, { "epoch": 158.85714285714286, "grad_norm": 5.809722423553467, "learning_rate": 3.124062968515742e-07, "loss": 1.1523, "step": 1668 }, { "epoch": 158.95238095238096, "grad_norm": 5.568952560424805, "learning_rate": 3.125937031484258e-07, "loss": 1.1823, "step": 1669 }, { "epoch": 158.95238095238096, "eval_accuracy": 0.7107530910453354, "eval_f1": 0.7726737338044759, "eval_loss": 0.5794424414634705, "eval_precision": 0.7084233261339092, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7538808290155441, "eval_runtime": 1.2192, "eval_samples_per_second": 2189.195, "eval_steps_per_second": 4.921, "step": 1669 }, { "epoch": 159.04761904761904, "grad_norm": 2.202739953994751, "learning_rate": 3.1278110944527736e-07, "loss": 1.1819, "step": 1670 }, { "epoch": 159.14285714285714, "grad_norm": 3.3776562213897705, "learning_rate": 3.129685157421289e-07, "loss": 1.1367, "step": 1671 }, { "epoch": 159.23809523809524, "grad_norm": 7.199225425720215, "learning_rate": 3.131559220389805e-07, "loss": 1.144, "step": 1672 }, { "epoch": 159.33333333333334, "grad_norm": 3.1565022468566895, "learning_rate": 3.1334332833583213e-07, "loss": 1.2118, "step": 1673 }, { "epoch": 159.42857142857142, "grad_norm": 12.221293449401855, "learning_rate": 3.135307346326837e-07, "loss": 1.1934, "step": 1674 }, { "epoch": 159.52380952380952, "grad_norm": 6.696208477020264, "learning_rate": 3.1371814092953524e-07, "loss": 1.1831, "step": 1675 }, { "epoch": 159.61904761904762, "grad_norm": 3.944971799850464, "learning_rate": 3.1390554722638684e-07, "loss": 1.1751, "step": 1676 }, { "epoch": 159.71428571428572, "grad_norm": 5.407956123352051, "learning_rate": 3.140929535232384e-07, "loss": 1.1373, "step": 1677 }, { "epoch": 159.8095238095238, "grad_norm": 3.2926764488220215, "learning_rate": 3.1428035982008995e-07, "loss": 1.1734, "step": 1678 }, { "epoch": 159.9047619047619, "grad_norm": 4.692826747894287, "learning_rate": 3.1446776611694156e-07, "loss": 1.1429, "step": 1679 }, { "epoch": 160.0, "grad_norm": 5.594501495361328, "learning_rate": 3.146551724137931e-07, "loss": 1.1453, "step": 1680 }, { "epoch": 160.0, "eval_accuracy": 0.7130011240164855, "eval_f1": 0.7735068007096393, "eval_loss": 0.5794059038162231, "eval_precision": 0.7116430903155604, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7536447898675878, "eval_runtime": 1.3207, "eval_samples_per_second": 2020.963, "eval_steps_per_second": 4.543, "step": 1680 }, { "epoch": 160.0952380952381, "grad_norm": 2.961744785308838, "learning_rate": 3.1484257871064467e-07, "loss": 1.1718, "step": 1681 }, { "epoch": 160.1904761904762, "grad_norm": 4.337584972381592, "learning_rate": 3.150299850074962e-07, "loss": 1.2078, "step": 1682 }, { "epoch": 160.28571428571428, "grad_norm": 3.1441948413848877, "learning_rate": 3.1521739130434783e-07, "loss": 1.1365, "step": 1683 }, { "epoch": 160.38095238095238, "grad_norm": 10.010917663574219, "learning_rate": 3.1540479760119944e-07, "loss": 1.1623, "step": 1684 }, { "epoch": 160.47619047619048, "grad_norm": 3.047410249710083, "learning_rate": 3.15592203898051e-07, "loss": 1.1479, "step": 1685 }, { "epoch": 160.57142857142858, "grad_norm": 2.7744319438934326, "learning_rate": 3.157796101949026e-07, "loss": 1.1684, "step": 1686 }, { "epoch": 160.66666666666666, "grad_norm": 2.880967617034912, "learning_rate": 3.1596701649175415e-07, "loss": 1.1694, "step": 1687 }, { "epoch": 160.76190476190476, "grad_norm": 3.9865102767944336, "learning_rate": 3.161544227886057e-07, "loss": 1.1591, "step": 1688 }, { "epoch": 160.85714285714286, "grad_norm": 9.290629386901855, "learning_rate": 3.1634182908545726e-07, "loss": 1.225, "step": 1689 }, { "epoch": 160.95238095238096, "grad_norm": 6.068443775177002, "learning_rate": 3.1652923538230887e-07, "loss": 1.1401, "step": 1690 }, { "epoch": 160.95238095238096, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.7735459108355477, "eval_loss": 0.5790919661521912, "eval_precision": 0.7107976125881714, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7541301093839954, "eval_runtime": 1.1361, "eval_samples_per_second": 2349.34, "eval_steps_per_second": 5.281, "step": 1690 }, { "epoch": 161.04761904761904, "grad_norm": 3.733458995819092, "learning_rate": 3.167166416791604e-07, "loss": 1.1656, "step": 1691 }, { "epoch": 161.14285714285714, "grad_norm": 3.664858102798462, "learning_rate": 3.16904047976012e-07, "loss": 1.1442, "step": 1692 }, { "epoch": 161.23809523809524, "grad_norm": 2.717442274093628, "learning_rate": 3.170914542728636e-07, "loss": 1.1454, "step": 1693 }, { "epoch": 161.33333333333334, "grad_norm": 3.2815489768981934, "learning_rate": 3.1727886056971514e-07, "loss": 1.1479, "step": 1694 }, { "epoch": 161.42857142857142, "grad_norm": 2.4982147216796875, "learning_rate": 3.1746626686656675e-07, "loss": 1.1653, "step": 1695 }, { "epoch": 161.52380952380952, "grad_norm": 4.64665412902832, "learning_rate": 3.176536731634183e-07, "loss": 1.1571, "step": 1696 }, { "epoch": 161.61904761904762, "grad_norm": 3.416707992553711, "learning_rate": 3.178410794602699e-07, "loss": 1.1982, "step": 1697 }, { "epoch": 161.71428571428572, "grad_norm": 4.434908390045166, "learning_rate": 3.1802848575712147e-07, "loss": 1.1707, "step": 1698 }, { "epoch": 161.8095238095238, "grad_norm": 7.077025890350342, "learning_rate": 3.18215892053973e-07, "loss": 1.1793, "step": 1699 }, { "epoch": 161.9047619047619, "grad_norm": 3.968093156814575, "learning_rate": 3.1840329835082463e-07, "loss": 1.1527, "step": 1700 }, { "epoch": 162.0, "grad_norm": 2.842799663543701, "learning_rate": 3.185907046476762e-07, "loss": 1.1789, "step": 1701 }, { "epoch": 162.0, "eval_accuracy": 0.7133757961783439, "eval_f1": 0.7738693467336684, "eval_loss": 0.5786438584327698, "eval_precision": 0.711799891245242, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7546603339090385, "eval_runtime": 1.2166, "eval_samples_per_second": 2193.76, "eval_steps_per_second": 4.932, "step": 1701 }, { "epoch": 162.0952380952381, "grad_norm": 2.8696420192718506, "learning_rate": 3.1877811094452774e-07, "loss": 1.1815, "step": 1702 }, { "epoch": 162.1904761904762, "grad_norm": 5.638867378234863, "learning_rate": 3.1896551724137934e-07, "loss": 1.1527, "step": 1703 }, { "epoch": 162.28571428571428, "grad_norm": 3.586515426635742, "learning_rate": 3.191529235382309e-07, "loss": 1.1662, "step": 1704 }, { "epoch": 162.38095238095238, "grad_norm": 3.2161989212036133, "learning_rate": 3.1934032983508245e-07, "loss": 1.2009, "step": 1705 }, { "epoch": 162.47619047619048, "grad_norm": 5.910765647888184, "learning_rate": 3.19527736131934e-07, "loss": 1.1426, "step": 1706 }, { "epoch": 162.57142857142858, "grad_norm": 4.062796592712402, "learning_rate": 3.197151424287856e-07, "loss": 1.1538, "step": 1707 }, { "epoch": 162.66666666666666, "grad_norm": 3.743438243865967, "learning_rate": 3.199025487256372e-07, "loss": 1.1472, "step": 1708 }, { "epoch": 162.76190476190476, "grad_norm": 7.2488508224487305, "learning_rate": 3.200899550224888e-07, "loss": 1.1939, "step": 1709 }, { "epoch": 162.85714285714286, "grad_norm": 3.9524292945861816, "learning_rate": 3.202773613193404e-07, "loss": 1.1322, "step": 1710 }, { "epoch": 162.95238095238096, "grad_norm": 5.516472339630127, "learning_rate": 3.2046476761619194e-07, "loss": 1.1396, "step": 1711 }, { "epoch": 162.95238095238096, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7721481481481481, "eval_loss": 0.5783283710479736, "eval_precision": 0.7116329874385582, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.755014968336212, "eval_runtime": 1.2621, "eval_samples_per_second": 2114.736, "eval_steps_per_second": 4.754, "step": 1711 }, { "epoch": 163.04761904761904, "grad_norm": 5.999850749969482, "learning_rate": 3.206521739130435e-07, "loss": 1.1333, "step": 1712 }, { "epoch": 163.14285714285714, "grad_norm": 5.138979911804199, "learning_rate": 3.2083958020989505e-07, "loss": 1.1356, "step": 1713 }, { "epoch": 163.23809523809524, "grad_norm": 2.94199275970459, "learning_rate": 3.2102698650674665e-07, "loss": 1.206, "step": 1714 }, { "epoch": 163.33333333333334, "grad_norm": 4.085175037384033, "learning_rate": 3.212143928035982e-07, "loss": 1.1372, "step": 1715 }, { "epoch": 163.42857142857142, "grad_norm": 2.2926993370056152, "learning_rate": 3.2140179910044976e-07, "loss": 1.1509, "step": 1716 }, { "epoch": 163.52380952380952, "grad_norm": 3.0990068912506104, "learning_rate": 3.2158920539730137e-07, "loss": 1.1416, "step": 1717 }, { "epoch": 163.61904761904762, "grad_norm": 5.597672462463379, "learning_rate": 3.217766116941529e-07, "loss": 1.1744, "step": 1718 }, { "epoch": 163.71428571428572, "grad_norm": 2.897651433944702, "learning_rate": 3.2196401799100453e-07, "loss": 1.158, "step": 1719 }, { "epoch": 163.8095238095238, "grad_norm": 6.378485202789307, "learning_rate": 3.221514242878561e-07, "loss": 1.1771, "step": 1720 }, { "epoch": 163.9047619047619, "grad_norm": 9.879870414733887, "learning_rate": 3.223388305847077e-07, "loss": 1.2047, "step": 1721 }, { "epoch": 164.0, "grad_norm": 5.101013660430908, "learning_rate": 3.2252623688155925e-07, "loss": 1.1571, "step": 1722 }, { "epoch": 164.0, "eval_accuracy": 0.7126264518546273, "eval_f1": 0.774345395704619, "eval_loss": 0.5781943202018738, "eval_precision": 0.7094339622641509, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7555529648819804, "eval_runtime": 1.3698, "eval_samples_per_second": 1948.521, "eval_steps_per_second": 4.38, "step": 1722 }, { "epoch": 164.0952380952381, "grad_norm": 2.68703556060791, "learning_rate": 3.227136431784108e-07, "loss": 1.1588, "step": 1723 }, { "epoch": 164.1904761904762, "grad_norm": 5.951897621154785, "learning_rate": 3.229010494752624e-07, "loss": 1.1672, "step": 1724 }, { "epoch": 164.28571428571428, "grad_norm": 6.9368438720703125, "learning_rate": 3.2308845577211397e-07, "loss": 1.1562, "step": 1725 }, { "epoch": 164.38095238095238, "grad_norm": 4.164420127868652, "learning_rate": 3.232758620689655e-07, "loss": 1.1505, "step": 1726 }, { "epoch": 164.47619047619048, "grad_norm": 2.9614076614379883, "learning_rate": 3.234632683658171e-07, "loss": 1.1773, "step": 1727 }, { "epoch": 164.57142857142858, "grad_norm": 4.118006706237793, "learning_rate": 3.236506746626687e-07, "loss": 1.125, "step": 1728 }, { "epoch": 164.66666666666666, "grad_norm": 6.172893524169922, "learning_rate": 3.2383808095952024e-07, "loss": 1.1511, "step": 1729 }, { "epoch": 164.76190476190476, "grad_norm": 2.903033494949341, "learning_rate": 3.2402548725637184e-07, "loss": 1.1505, "step": 1730 }, { "epoch": 164.85714285714286, "grad_norm": 4.236114025115967, "learning_rate": 3.242128935532234e-07, "loss": 1.1901, "step": 1731 }, { "epoch": 164.95238095238096, "grad_norm": 5.413357734680176, "learning_rate": 3.24400299850075e-07, "loss": 1.1835, "step": 1732 }, { "epoch": 164.95238095238096, "eval_accuracy": 0.7130011240164855, "eval_f1": 0.7725653206650831, "eval_loss": 0.5783714056015015, "eval_precision": 0.7132675438596491, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7549363845710997, "eval_runtime": 1.1442, "eval_samples_per_second": 2332.686, "eval_steps_per_second": 5.244, "step": 1732 }, { "epoch": 165.04761904761904, "grad_norm": 5.139907360076904, "learning_rate": 3.2458770614692656e-07, "loss": 1.1825, "step": 1733 }, { "epoch": 165.14285714285714, "grad_norm": 7.035548686981201, "learning_rate": 3.247751124437781e-07, "loss": 1.1158, "step": 1734 }, { "epoch": 165.23809523809524, "grad_norm": 2.559943199157715, "learning_rate": 3.249625187406297e-07, "loss": 1.165, "step": 1735 }, { "epoch": 165.33333333333334, "grad_norm": 4.389877796173096, "learning_rate": 3.251499250374813e-07, "loss": 1.1688, "step": 1736 }, { "epoch": 165.42857142857142, "grad_norm": 3.601215362548828, "learning_rate": 3.2533733133433283e-07, "loss": 1.2054, "step": 1737 }, { "epoch": 165.52380952380952, "grad_norm": 3.479674816131592, "learning_rate": 3.2552473763118444e-07, "loss": 1.1752, "step": 1738 }, { "epoch": 165.61904761904762, "grad_norm": 4.253512382507324, "learning_rate": 3.25712143928036e-07, "loss": 1.1698, "step": 1739 }, { "epoch": 165.71428571428572, "grad_norm": 6.709036350250244, "learning_rate": 3.2589955022488755e-07, "loss": 1.1617, "step": 1740 }, { "epoch": 165.8095238095238, "grad_norm": 4.812006950378418, "learning_rate": 3.260869565217391e-07, "loss": 1.2067, "step": 1741 }, { "epoch": 165.9047619047619, "grad_norm": 3.5801570415496826, "learning_rate": 3.262743628185907e-07, "loss": 1.1455, "step": 1742 }, { "epoch": 166.0, "grad_norm": 5.324729919433594, "learning_rate": 3.264617691154423e-07, "loss": 1.1339, "step": 1743 }, { "epoch": 166.0, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7732232379828959, "eval_loss": 0.5782414674758911, "eval_precision": 0.7097996751488901, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7554231433506045, "eval_runtime": 1.2109, "eval_samples_per_second": 2204.106, "eval_steps_per_second": 4.955, "step": 1743 }, { "epoch": 166.0952380952381, "grad_norm": 6.025104522705078, "learning_rate": 3.2664917541229387e-07, "loss": 1.1132, "step": 1744 }, { "epoch": 166.1904761904762, "grad_norm": 4.197454929351807, "learning_rate": 3.268365817091455e-07, "loss": 1.2025, "step": 1745 }, { "epoch": 166.28571428571428, "grad_norm": 2.0884721279144287, "learning_rate": 3.2702398800599703e-07, "loss": 1.1522, "step": 1746 }, { "epoch": 166.38095238095238, "grad_norm": 5.595632553100586, "learning_rate": 3.272113943028486e-07, "loss": 1.1931, "step": 1747 }, { "epoch": 166.47619047619048, "grad_norm": 2.5613150596618652, "learning_rate": 3.2739880059970014e-07, "loss": 1.1487, "step": 1748 }, { "epoch": 166.57142857142858, "grad_norm": 4.494848728179932, "learning_rate": 3.2758620689655175e-07, "loss": 1.1442, "step": 1749 }, { "epoch": 166.66666666666666, "grad_norm": 4.147763729095459, "learning_rate": 3.277736131934033e-07, "loss": 1.1473, "step": 1750 }, { "epoch": 166.76190476190476, "grad_norm": 4.883570194244385, "learning_rate": 3.2796101949025486e-07, "loss": 1.1481, "step": 1751 }, { "epoch": 166.85714285714286, "grad_norm": 3.538252353668213, "learning_rate": 3.2814842578710647e-07, "loss": 1.1876, "step": 1752 }, { "epoch": 166.95238095238096, "grad_norm": 3.5304596424102783, "learning_rate": 3.28335832083958e-07, "loss": 1.1672, "step": 1753 }, { "epoch": 166.95238095238096, "eval_accuracy": 0.7122517796927689, "eval_f1": 0.7730496453900709, "eval_loss": 0.5780826807022095, "eval_precision": 0.7108695652173913, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7556171560161197, "eval_runtime": 1.23, "eval_samples_per_second": 2169.958, "eval_steps_per_second": 4.878, "step": 1753 }, { "epoch": 167.04761904761904, "grad_norm": 6.027864456176758, "learning_rate": 3.2852323838080963e-07, "loss": 1.115, "step": 1754 }, { "epoch": 167.14285714285714, "grad_norm": 4.775788307189941, "learning_rate": 3.287106446776612e-07, "loss": 1.1697, "step": 1755 }, { "epoch": 167.23809523809524, "grad_norm": 5.333404064178467, "learning_rate": 3.288980509745128e-07, "loss": 1.2077, "step": 1756 }, { "epoch": 167.33333333333334, "grad_norm": 5.563086032867432, "learning_rate": 3.2908545727136434e-07, "loss": 1.156, "step": 1757 }, { "epoch": 167.42857142857142, "grad_norm": 5.439138412475586, "learning_rate": 3.292728635682159e-07, "loss": 1.172, "step": 1758 }, { "epoch": 167.52380952380952, "grad_norm": 3.481065511703491, "learning_rate": 3.294602698650675e-07, "loss": 1.1268, "step": 1759 }, { "epoch": 167.61904761904762, "grad_norm": 2.863832473754883, "learning_rate": 3.2964767616191906e-07, "loss": 1.1691, "step": 1760 }, { "epoch": 167.71428571428572, "grad_norm": 7.240921974182129, "learning_rate": 3.298350824587706e-07, "loss": 1.2117, "step": 1761 }, { "epoch": 167.8095238095238, "grad_norm": 3.0119240283966064, "learning_rate": 3.3002248875562217e-07, "loss": 1.1734, "step": 1762 }, { "epoch": 167.9047619047619, "grad_norm": 5.17049503326416, "learning_rate": 3.302098950524738e-07, "loss": 1.125, "step": 1763 }, { "epoch": 168.0, "grad_norm": 2.893317222595215, "learning_rate": 3.3039730134932533e-07, "loss": 1.1594, "step": 1764 }, { "epoch": 168.0, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7767962308598351, "eval_loss": 0.5774693489074707, "eval_precision": 0.7122030237580994, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7565512377662635, "eval_runtime": 1.25, "eval_samples_per_second": 2135.174, "eval_steps_per_second": 4.8, "step": 1764 }, { "epoch": 168.0952380952381, "grad_norm": 4.347754955291748, "learning_rate": 3.3058470764617694e-07, "loss": 1.1856, "step": 1765 }, { "epoch": 168.1904761904762, "grad_norm": 3.9195492267608643, "learning_rate": 3.307721139430285e-07, "loss": 1.1709, "step": 1766 }, { "epoch": 168.28571428571428, "grad_norm": 6.3589887619018555, "learning_rate": 3.309595202398801e-07, "loss": 1.1727, "step": 1767 }, { "epoch": 168.38095238095238, "grad_norm": 2.310558319091797, "learning_rate": 3.3114692653673166e-07, "loss": 1.1734, "step": 1768 }, { "epoch": 168.47619047619048, "grad_norm": 3.736511707305908, "learning_rate": 3.313343328335832e-07, "loss": 1.1803, "step": 1769 }, { "epoch": 168.57142857142858, "grad_norm": 3.2041683197021484, "learning_rate": 3.315217391304348e-07, "loss": 1.2179, "step": 1770 }, { "epoch": 168.66666666666666, "grad_norm": 2.9342873096466064, "learning_rate": 3.3170914542728637e-07, "loss": 1.1647, "step": 1771 }, { "epoch": 168.76190476190476, "grad_norm": 4.083154201507568, "learning_rate": 3.318965517241379e-07, "loss": 1.1399, "step": 1772 }, { "epoch": 168.85714285714286, "grad_norm": 3.029041290283203, "learning_rate": 3.3208395802098953e-07, "loss": 1.1488, "step": 1773 }, { "epoch": 168.95238095238096, "grad_norm": 6.299585819244385, "learning_rate": 3.322713643178411e-07, "loss": 1.1108, "step": 1774 }, { "epoch": 168.95238095238096, "eval_accuracy": 0.7152491569876358, "eval_f1": 0.7744807121661721, "eval_loss": 0.5772000551223755, "eval_precision": 0.7146768893756845, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.75660103626943, "eval_runtime": 1.159, "eval_samples_per_second": 2302.943, "eval_steps_per_second": 5.177, "step": 1774 }, { "epoch": 169.04761904761904, "grad_norm": 3.145608901977539, "learning_rate": 3.3245877061469264e-07, "loss": 1.1624, "step": 1775 }, { "epoch": 169.14285714285714, "grad_norm": 3.2308030128479004, "learning_rate": 3.3264617691154425e-07, "loss": 1.1641, "step": 1776 }, { "epoch": 169.23809523809524, "grad_norm": 2.888650417327881, "learning_rate": 3.328335832083958e-07, "loss": 1.1488, "step": 1777 }, { "epoch": 169.33333333333334, "grad_norm": 6.054225444793701, "learning_rate": 3.330209895052474e-07, "loss": 1.1517, "step": 1778 }, { "epoch": 169.42857142857142, "grad_norm": 10.32518482208252, "learning_rate": 3.3320839580209897e-07, "loss": 1.1155, "step": 1779 }, { "epoch": 169.52380952380952, "grad_norm": 4.094943523406982, "learning_rate": 3.333958020989506e-07, "loss": 1.159, "step": 1780 }, { "epoch": 169.61904761904762, "grad_norm": 4.512625217437744, "learning_rate": 3.3358320839580213e-07, "loss": 1.148, "step": 1781 }, { "epoch": 169.71428571428572, "grad_norm": 3.9920194149017334, "learning_rate": 3.337706146926537e-07, "loss": 1.2131, "step": 1782 }, { "epoch": 169.8095238095238, "grad_norm": 3.589287519454956, "learning_rate": 3.3395802098950524e-07, "loss": 1.1572, "step": 1783 }, { "epoch": 169.9047619047619, "grad_norm": 5.493167400360107, "learning_rate": 3.3414542728635684e-07, "loss": 1.1631, "step": 1784 }, { "epoch": 170.0, "grad_norm": 3.7833290100097656, "learning_rate": 3.343328335832084e-07, "loss": 1.1449, "step": 1785 }, { "epoch": 170.0, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.7775480059084195, "eval_loss": 0.577059268951416, "eval_precision": 0.714828897338403, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7568560736902705, "eval_runtime": 1.1623, "eval_samples_per_second": 2296.241, "eval_steps_per_second": 5.162, "step": 1785 }, { "epoch": 170.0952380952381, "grad_norm": 3.7169711589813232, "learning_rate": 3.3452023988005995e-07, "loss": 1.1807, "step": 1786 }, { "epoch": 170.1904761904762, "grad_norm": 3.6557819843292236, "learning_rate": 3.3470764617691156e-07, "loss": 1.1478, "step": 1787 }, { "epoch": 170.28571428571428, "grad_norm": 2.8102827072143555, "learning_rate": 3.348950524737631e-07, "loss": 1.1509, "step": 1788 }, { "epoch": 170.38095238095238, "grad_norm": 4.472448348999023, "learning_rate": 3.350824587706147e-07, "loss": 1.2002, "step": 1789 }, { "epoch": 170.47619047619048, "grad_norm": 3.8644332885742188, "learning_rate": 3.352698650674663e-07, "loss": 1.1361, "step": 1790 }, { "epoch": 170.57142857142858, "grad_norm": 5.075953960418701, "learning_rate": 3.354572713643179e-07, "loss": 1.1837, "step": 1791 }, { "epoch": 170.66666666666666, "grad_norm": 2.4229235649108887, "learning_rate": 3.3564467766116944e-07, "loss": 1.1228, "step": 1792 }, { "epoch": 170.76190476190476, "grad_norm": 3.281778573989868, "learning_rate": 3.35832083958021e-07, "loss": 1.1924, "step": 1793 }, { "epoch": 170.85714285714286, "grad_norm": 8.002859115600586, "learning_rate": 3.360194902548726e-07, "loss": 1.1562, "step": 1794 }, { "epoch": 170.95238095238096, "grad_norm": 3.903043270111084, "learning_rate": 3.3620689655172416e-07, "loss": 1.1253, "step": 1795 }, { "epoch": 170.95238095238096, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.7774830533451224, "eval_loss": 0.5771528482437134, "eval_precision": 0.7133585722011898, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7570040299366724, "eval_runtime": 1.2237, "eval_samples_per_second": 2181.027, "eval_steps_per_second": 4.903, "step": 1795 }, { "epoch": 171.04761904761904, "grad_norm": 3.943495035171509, "learning_rate": 3.363943028485757e-07, "loss": 1.1992, "step": 1796 }, { "epoch": 171.14285714285714, "grad_norm": 7.058687210083008, "learning_rate": 3.3658170914542726e-07, "loss": 1.1109, "step": 1797 }, { "epoch": 171.23809523809524, "grad_norm": 5.804258823394775, "learning_rate": 3.3676911544227887e-07, "loss": 1.2426, "step": 1798 }, { "epoch": 171.33333333333334, "grad_norm": 6.50364875793457, "learning_rate": 3.369565217391304e-07, "loss": 1.1299, "step": 1799 }, { "epoch": 171.42857142857142, "grad_norm": 10.158699989318848, "learning_rate": 3.3714392803598203e-07, "loss": 1.1628, "step": 1800 }, { "epoch": 171.52380952380952, "grad_norm": 4.90827751159668, "learning_rate": 3.3733133433283364e-07, "loss": 1.1583, "step": 1801 }, { "epoch": 171.61904761904762, "grad_norm": 3.285327672958374, "learning_rate": 3.375187406296852e-07, "loss": 1.1732, "step": 1802 }, { "epoch": 171.71428571428572, "grad_norm": 6.1621222496032715, "learning_rate": 3.3770614692653675e-07, "loss": 1.1565, "step": 1803 }, { "epoch": 171.8095238095238, "grad_norm": 5.6082892417907715, "learning_rate": 3.378935532233883e-07, "loss": 1.1558, "step": 1804 }, { "epoch": 171.9047619047619, "grad_norm": 6.2096052169799805, "learning_rate": 3.380809595202399e-07, "loss": 1.1366, "step": 1805 }, { "epoch": 172.0, "grad_norm": 3.6200644969940186, "learning_rate": 3.3826836581709147e-07, "loss": 1.166, "step": 1806 }, { "epoch": 172.0, "eval_accuracy": 0.7141251405020607, "eval_f1": 0.7736576683476714, "eval_loss": 0.577314555644989, "eval_precision": 0.7137383689107827, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7566957397812321, "eval_runtime": 1.327, "eval_samples_per_second": 2011.312, "eval_steps_per_second": 4.521, "step": 1806 }, { "epoch": 172.0952380952381, "grad_norm": 2.324643135070801, "learning_rate": 3.38455772113943e-07, "loss": 1.1627, "step": 1807 }, { "epoch": 172.1904761904762, "grad_norm": 7.590202808380127, "learning_rate": 3.3864317841079463e-07, "loss": 1.1201, "step": 1808 }, { "epoch": 172.28571428571428, "grad_norm": 4.35092306137085, "learning_rate": 3.388305847076462e-07, "loss": 1.1914, "step": 1809 }, { "epoch": 172.38095238095238, "grad_norm": 4.167804718017578, "learning_rate": 3.3901799100449774e-07, "loss": 1.165, "step": 1810 }, { "epoch": 172.47619047619048, "grad_norm": 4.640730381011963, "learning_rate": 3.3920539730134934e-07, "loss": 1.1631, "step": 1811 }, { "epoch": 172.57142857142858, "grad_norm": 5.027839183807373, "learning_rate": 3.393928035982009e-07, "loss": 1.2238, "step": 1812 }, { "epoch": 172.66666666666666, "grad_norm": 3.8021483421325684, "learning_rate": 3.395802098950525e-07, "loss": 1.1714, "step": 1813 }, { "epoch": 172.76190476190476, "grad_norm": 5.696237564086914, "learning_rate": 3.3976761619190406e-07, "loss": 1.1689, "step": 1814 }, { "epoch": 172.85714285714286, "grad_norm": 4.191372871398926, "learning_rate": 3.3995502248875567e-07, "loss": 1.1471, "step": 1815 }, { "epoch": 172.95238095238096, "grad_norm": 2.8746988773345947, "learning_rate": 3.401424287856072e-07, "loss": 1.1472, "step": 1816 }, { "epoch": 172.95238095238096, "eval_accuracy": 0.7152491569876358, "eval_f1": 0.7764705882352941, "eval_loss": 0.5772579312324524, "eval_precision": 0.7112068965517241, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7572544617156016, "eval_runtime": 1.1525, "eval_samples_per_second": 2315.776, "eval_steps_per_second": 5.206, "step": 1816 }, { "epoch": 173.04761904761904, "grad_norm": 5.165085315704346, "learning_rate": 3.403298350824588e-07, "loss": 1.1126, "step": 1817 }, { "epoch": 173.14285714285714, "grad_norm": 8.25855541229248, "learning_rate": 3.405172413793104e-07, "loss": 1.1671, "step": 1818 }, { "epoch": 173.23809523809524, "grad_norm": 5.805681228637695, "learning_rate": 3.4070464767616194e-07, "loss": 1.1901, "step": 1819 }, { "epoch": 173.33333333333334, "grad_norm": 5.523385047912598, "learning_rate": 3.408920539730135e-07, "loss": 1.1544, "step": 1820 }, { "epoch": 173.42857142857142, "grad_norm": 3.4633982181549072, "learning_rate": 3.4107946026986505e-07, "loss": 1.1649, "step": 1821 }, { "epoch": 173.52380952380952, "grad_norm": 3.0569186210632324, "learning_rate": 3.4126686656671666e-07, "loss": 1.1594, "step": 1822 }, { "epoch": 173.61904761904762, "grad_norm": 11.102819442749023, "learning_rate": 3.414542728635682e-07, "loss": 1.1987, "step": 1823 }, { "epoch": 173.71428571428572, "grad_norm": 7.979638576507568, "learning_rate": 3.416416791604198e-07, "loss": 1.1861, "step": 1824 }, { "epoch": 173.8095238095238, "grad_norm": 4.374381065368652, "learning_rate": 3.418290854572714e-07, "loss": 1.1493, "step": 1825 }, { "epoch": 173.9047619047619, "grad_norm": 4.893923282623291, "learning_rate": 3.42016491754123e-07, "loss": 1.1151, "step": 1826 }, { "epoch": 174.0, "grad_norm": 5.232106685638428, "learning_rate": 3.4220389805097453e-07, "loss": 1.1677, "step": 1827 }, { "epoch": 174.0, "eval_accuracy": 0.7167478456350693, "eval_f1": 0.7773851590106007, "eval_loss": 0.5769217014312744, "eval_precision": 0.712742980561555, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7575754173862982, "eval_runtime": 1.2067, "eval_samples_per_second": 2211.813, "eval_steps_per_second": 4.972, "step": 1827 }, { "epoch": 174.0952380952381, "grad_norm": 2.487565040588379, "learning_rate": 3.423913043478261e-07, "loss": 1.1533, "step": 1828 }, { "epoch": 174.1904761904762, "grad_norm": 2.702059030532837, "learning_rate": 3.425787106446777e-07, "loss": 1.1271, "step": 1829 }, { "epoch": 174.28571428571428, "grad_norm": 4.765565395355225, "learning_rate": 3.4276611694152925e-07, "loss": 1.1576, "step": 1830 }, { "epoch": 174.38095238095238, "grad_norm": 4.310859680175781, "learning_rate": 3.429535232383808e-07, "loss": 1.1629, "step": 1831 }, { "epoch": 174.47619047619048, "grad_norm": 2.900235652923584, "learning_rate": 3.431409295352324e-07, "loss": 1.145, "step": 1832 }, { "epoch": 174.57142857142858, "grad_norm": 5.54879903793335, "learning_rate": 3.4332833583208397e-07, "loss": 1.1828, "step": 1833 }, { "epoch": 174.66666666666666, "grad_norm": 3.8463099002838135, "learning_rate": 3.435157421289355e-07, "loss": 1.2078, "step": 1834 }, { "epoch": 174.76190476190476, "grad_norm": 4.029839992523193, "learning_rate": 3.4370314842578713e-07, "loss": 1.176, "step": 1835 }, { "epoch": 174.85714285714286, "grad_norm": 6.263148307800293, "learning_rate": 3.4389055472263874e-07, "loss": 1.1302, "step": 1836 }, { "epoch": 174.95238095238096, "grad_norm": 3.3856215476989746, "learning_rate": 3.440779610194903e-07, "loss": 1.1422, "step": 1837 }, { "epoch": 174.95238095238096, "eval_accuracy": 0.7137504683402023, "eval_f1": 0.7732937685459941, "eval_loss": 0.5764679908752441, "eval_precision": 0.7135815991237678, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7579099021301094, "eval_runtime": 1.2138, "eval_samples_per_second": 2198.854, "eval_steps_per_second": 4.943, "step": 1837 }, { "epoch": 175.04761904761904, "grad_norm": 4.67045783996582, "learning_rate": 3.4426536731634185e-07, "loss": 1.1808, "step": 1838 }, { "epoch": 175.14285714285714, "grad_norm": 5.133697986602783, "learning_rate": 3.4445277361319345e-07, "loss": 1.1475, "step": 1839 }, { "epoch": 175.23809523809524, "grad_norm": 2.8523051738739014, "learning_rate": 3.44640179910045e-07, "loss": 1.1904, "step": 1840 }, { "epoch": 175.33333333333334, "grad_norm": 4.153690814971924, "learning_rate": 3.4482758620689656e-07, "loss": 1.125, "step": 1841 }, { "epoch": 175.42857142857142, "grad_norm": 2.2273285388946533, "learning_rate": 3.450149925037481e-07, "loss": 1.1406, "step": 1842 }, { "epoch": 175.52380952380952, "grad_norm": 7.832995891571045, "learning_rate": 3.452023988005997e-07, "loss": 1.0953, "step": 1843 }, { "epoch": 175.61904761904762, "grad_norm": 5.448593616485596, "learning_rate": 3.453898050974513e-07, "loss": 1.208, "step": 1844 }, { "epoch": 175.71428571428572, "grad_norm": 5.135989665985107, "learning_rate": 3.4557721139430283e-07, "loss": 1.1912, "step": 1845 }, { "epoch": 175.8095238095238, "grad_norm": 5.489715099334717, "learning_rate": 3.4576461769115444e-07, "loss": 1.1702, "step": 1846 }, { "epoch": 175.9047619047619, "grad_norm": 4.431760311126709, "learning_rate": 3.45952023988006e-07, "loss": 1.1431, "step": 1847 }, { "epoch": 176.0, "grad_norm": 3.1945583820343018, "learning_rate": 3.461394302848576e-07, "loss": 1.1766, "step": 1848 }, { "epoch": 176.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7779740871613663, "eval_loss": 0.5763475894927979, "eval_precision": 0.7132829373650108, "eval_recall": 0.8555699481865285, "eval_roc_auc": 0.7584815774323548, "eval_runtime": 1.3454, "eval_samples_per_second": 1983.803, "eval_steps_per_second": 4.46, "step": 1848 }, { "epoch": 176.0952380952381, "grad_norm": 5.7543439865112305, "learning_rate": 3.4632683658170916e-07, "loss": 1.1441, "step": 1849 }, { "epoch": 176.1904761904762, "grad_norm": 2.080615520477295, "learning_rate": 3.4651424287856076e-07, "loss": 1.1376, "step": 1850 }, { "epoch": 176.28571428571428, "grad_norm": 11.54956340789795, "learning_rate": 3.467016491754123e-07, "loss": 1.2058, "step": 1851 }, { "epoch": 176.38095238095238, "grad_norm": 6.846175193786621, "learning_rate": 3.4688905547226387e-07, "loss": 1.2177, "step": 1852 }, { "epoch": 176.47619047619048, "grad_norm": 5.839255332946777, "learning_rate": 3.470764617691155e-07, "loss": 1.1688, "step": 1853 }, { "epoch": 176.57142857142858, "grad_norm": 2.9963738918304443, "learning_rate": 3.4726386806596703e-07, "loss": 1.1329, "step": 1854 }, { "epoch": 176.66666666666666, "grad_norm": 3.8140134811401367, "learning_rate": 3.474512743628186e-07, "loss": 1.1437, "step": 1855 }, { "epoch": 176.76190476190476, "grad_norm": 4.6968207359313965, "learning_rate": 3.4763868065967014e-07, "loss": 1.1591, "step": 1856 }, { "epoch": 176.85714285714286, "grad_norm": 6.985243797302246, "learning_rate": 3.4782608695652175e-07, "loss": 1.1364, "step": 1857 }, { "epoch": 176.95238095238096, "grad_norm": 4.726886749267578, "learning_rate": 3.480134932533733e-07, "loss": 1.1693, "step": 1858 }, { "epoch": 176.95238095238096, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7761370348493798, "eval_loss": 0.5765255093574524, "eval_precision": 0.7133550488599348, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.758339378238342, "eval_runtime": 1.1569, "eval_samples_per_second": 2307.045, "eval_steps_per_second": 5.186, "step": 1858 }, { "epoch": 177.04761904761904, "grad_norm": 11.07115364074707, "learning_rate": 3.482008995502249e-07, "loss": 1.1426, "step": 1859 }, { "epoch": 177.14285714285714, "grad_norm": 2.7573394775390625, "learning_rate": 3.483883058470765e-07, "loss": 1.1399, "step": 1860 }, { "epoch": 177.23809523809524, "grad_norm": 5.692921161651611, "learning_rate": 3.485757121439281e-07, "loss": 1.1824, "step": 1861 }, { "epoch": 177.33333333333334, "grad_norm": 4.1265339851379395, "learning_rate": 3.4876311844077963e-07, "loss": 1.1755, "step": 1862 }, { "epoch": 177.42857142857142, "grad_norm": 6.006172180175781, "learning_rate": 3.489505247376312e-07, "loss": 1.1776, "step": 1863 }, { "epoch": 177.52380952380952, "grad_norm": 4.846124649047852, "learning_rate": 3.491379310344828e-07, "loss": 1.1543, "step": 1864 }, { "epoch": 177.61904761904762, "grad_norm": 3.1988956928253174, "learning_rate": 3.4932533733133435e-07, "loss": 1.1367, "step": 1865 }, { "epoch": 177.71428571428572, "grad_norm": 5.961909770965576, "learning_rate": 3.495127436281859e-07, "loss": 1.1896, "step": 1866 }, { "epoch": 177.8095238095238, "grad_norm": 3.8654661178588867, "learning_rate": 3.497001499250375e-07, "loss": 1.1268, "step": 1867 }, { "epoch": 177.9047619047619, "grad_norm": 4.253927707672119, "learning_rate": 3.4988755622188906e-07, "loss": 1.1478, "step": 1868 }, { "epoch": 178.0, "grad_norm": 11.02048397064209, "learning_rate": 3.500749625187406e-07, "loss": 1.146, "step": 1869 }, { "epoch": 178.0, "eval_accuracy": 0.7156238291494942, "eval_f1": 0.7747105966162066, "eval_loss": 0.5769871473312378, "eval_precision": 0.7150684931506849, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7574484743811168, "eval_runtime": 1.1238, "eval_samples_per_second": 2374.888, "eval_steps_per_second": 5.339, "step": 1869 }, { "epoch": 178.0952380952381, "grad_norm": 4.868641376495361, "learning_rate": 3.502623688155922e-07, "loss": 1.1933, "step": 1870 }, { "epoch": 178.1904761904762, "grad_norm": 5.7350335121154785, "learning_rate": 3.5044977511244383e-07, "loss": 1.1308, "step": 1871 }, { "epoch": 178.28571428571428, "grad_norm": 2.5435409545898438, "learning_rate": 3.506371814092954e-07, "loss": 1.158, "step": 1872 }, { "epoch": 178.38095238095238, "grad_norm": 5.791324138641357, "learning_rate": 3.5082458770614694e-07, "loss": 1.167, "step": 1873 }, { "epoch": 178.47619047619048, "grad_norm": 5.436519622802734, "learning_rate": 3.5101199400299855e-07, "loss": 1.1139, "step": 1874 }, { "epoch": 178.57142857142858, "grad_norm": 3.6552703380584717, "learning_rate": 3.511994002998501e-07, "loss": 1.1787, "step": 1875 }, { "epoch": 178.66666666666666, "grad_norm": 5.235700607299805, "learning_rate": 3.5138680659670166e-07, "loss": 1.175, "step": 1876 }, { "epoch": 178.76190476190476, "grad_norm": 7.033854007720947, "learning_rate": 3.515742128935532e-07, "loss": 1.1755, "step": 1877 }, { "epoch": 178.85714285714286, "grad_norm": 6.690187931060791, "learning_rate": 3.517616191904048e-07, "loss": 1.1441, "step": 1878 }, { "epoch": 178.95238095238096, "grad_norm": 12.126110076904297, "learning_rate": 3.5194902548725637e-07, "loss": 1.189, "step": 1879 }, { "epoch": 178.95238095238096, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7771898883009994, "eval_loss": 0.5770087242126465, "eval_precision": 0.7115177610333692, "eval_recall": 0.8562176165803109, "eval_roc_auc": 0.757912204951065, "eval_runtime": 1.1826, "eval_samples_per_second": 2256.886, "eval_steps_per_second": 5.074, "step": 1879 }, { "epoch": 179.04761904761904, "grad_norm": 6.9428205490112305, "learning_rate": 3.5213643178410793e-07, "loss": 1.1226, "step": 1880 }, { "epoch": 179.14285714285714, "grad_norm": 4.363292217254639, "learning_rate": 3.5232383808095954e-07, "loss": 1.2072, "step": 1881 }, { "epoch": 179.23809523809524, "grad_norm": 4.244919776916504, "learning_rate": 3.5251124437781114e-07, "loss": 1.1241, "step": 1882 }, { "epoch": 179.33333333333334, "grad_norm": 3.1588680744171143, "learning_rate": 3.526986506746627e-07, "loss": 1.1578, "step": 1883 }, { "epoch": 179.42857142857142, "grad_norm": 3.038062334060669, "learning_rate": 3.5288605697151425e-07, "loss": 1.1442, "step": 1884 }, { "epoch": 179.52380952380952, "grad_norm": 6.6813788414001465, "learning_rate": 3.5307346326836586e-07, "loss": 1.1435, "step": 1885 }, { "epoch": 179.61904761904762, "grad_norm": 3.848639488220215, "learning_rate": 3.532608695652174e-07, "loss": 1.1718, "step": 1886 }, { "epoch": 179.71428571428572, "grad_norm": 5.688107490539551, "learning_rate": 3.5344827586206897e-07, "loss": 1.166, "step": 1887 }, { "epoch": 179.8095238095238, "grad_norm": 11.533185005187988, "learning_rate": 3.536356821589206e-07, "loss": 1.1493, "step": 1888 }, { "epoch": 179.9047619047619, "grad_norm": 11.271069526672363, "learning_rate": 3.5382308845577213e-07, "loss": 1.1494, "step": 1889 }, { "epoch": 180.0, "grad_norm": 5.525691509246826, "learning_rate": 3.540104947526237e-07, "loss": 1.189, "step": 1890 }, { "epoch": 180.0, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7746730083234245, "eval_loss": 0.5767568349838257, "eval_precision": 0.7159340659340659, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.75776597582038, "eval_runtime": 1.1887, "eval_samples_per_second": 2245.287, "eval_steps_per_second": 5.047, "step": 1890 }, { "epoch": 180.0952380952381, "grad_norm": 3.933689832687378, "learning_rate": 3.5419790104947524e-07, "loss": 1.1657, "step": 1891 }, { "epoch": 180.1904761904762, "grad_norm": 5.561554431915283, "learning_rate": 3.5438530734632685e-07, "loss": 1.1765, "step": 1892 }, { "epoch": 180.28571428571428, "grad_norm": 3.654136896133423, "learning_rate": 3.545727136431784e-07, "loss": 1.1255, "step": 1893 }, { "epoch": 180.38095238095238, "grad_norm": 6.423804759979248, "learning_rate": 3.5476011994003e-07, "loss": 1.1659, "step": 1894 }, { "epoch": 180.47619047619048, "grad_norm": 2.809739351272583, "learning_rate": 3.549475262368816e-07, "loss": 1.1434, "step": 1895 }, { "epoch": 180.57142857142858, "grad_norm": 4.307884693145752, "learning_rate": 3.5513493253373317e-07, "loss": 1.1903, "step": 1896 }, { "epoch": 180.66666666666666, "grad_norm": 5.794278144836426, "learning_rate": 3.553223388305847e-07, "loss": 1.1238, "step": 1897 }, { "epoch": 180.76190476190476, "grad_norm": 6.3453450202941895, "learning_rate": 3.555097451274363e-07, "loss": 1.1934, "step": 1898 }, { "epoch": 180.85714285714286, "grad_norm": 2.430403232574463, "learning_rate": 3.556971514242879e-07, "loss": 1.1543, "step": 1899 }, { "epoch": 180.95238095238096, "grad_norm": 10.58128833770752, "learning_rate": 3.5588455772113944e-07, "loss": 1.1845, "step": 1900 }, { "epoch": 180.95238095238096, "eval_accuracy": 0.7156238291494942, "eval_f1": 0.7753773305711749, "eval_loss": 0.5764981508255005, "eval_precision": 0.7138964577656676, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7583736327000575, "eval_runtime": 1.2787, "eval_samples_per_second": 2087.334, "eval_steps_per_second": 4.692, "step": 1900 }, { "epoch": 181.04761904761904, "grad_norm": 2.947765827178955, "learning_rate": 3.56071964017991e-07, "loss": 1.1545, "step": 1901 }, { "epoch": 181.14285714285714, "grad_norm": 4.267097473144531, "learning_rate": 3.562593703148426e-07, "loss": 1.1873, "step": 1902 }, { "epoch": 181.23809523809524, "grad_norm": 12.315367698669434, "learning_rate": 3.5644677661169416e-07, "loss": 1.116, "step": 1903 }, { "epoch": 181.33333333333334, "grad_norm": 7.7876715660095215, "learning_rate": 3.566341829085457e-07, "loss": 1.1758, "step": 1904 }, { "epoch": 181.42857142857142, "grad_norm": 7.434537410736084, "learning_rate": 3.568215892053973e-07, "loss": 1.1752, "step": 1905 }, { "epoch": 181.52380952380952, "grad_norm": 5.361802101135254, "learning_rate": 3.570089955022489e-07, "loss": 1.1933, "step": 1906 }, { "epoch": 181.61904761904762, "grad_norm": 2.9396021366119385, "learning_rate": 3.571964017991005e-07, "loss": 1.1814, "step": 1907 }, { "epoch": 181.71428571428572, "grad_norm": 9.127396583557129, "learning_rate": 3.5738380809595204e-07, "loss": 1.131, "step": 1908 }, { "epoch": 181.8095238095238, "grad_norm": 7.989266872406006, "learning_rate": 3.5757121439280364e-07, "loss": 1.1635, "step": 1909 }, { "epoch": 181.9047619047619, "grad_norm": 5.933679103851318, "learning_rate": 3.577586206896552e-07, "loss": 1.149, "step": 1910 }, { "epoch": 182.0, "grad_norm": 4.823862552642822, "learning_rate": 3.5794602698650675e-07, "loss": 1.1308, "step": 1911 }, { "epoch": 182.0, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7757396449704143, "eval_loss": 0.5764167308807373, "eval_precision": 0.7140522875816994, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.758552677029361, "eval_runtime": 1.1291, "eval_samples_per_second": 2363.802, "eval_steps_per_second": 5.314, "step": 1911 }, { "epoch": 182.0952380952381, "grad_norm": 3.4051706790924072, "learning_rate": 3.581334332833583e-07, "loss": 1.1716, "step": 1912 }, { "epoch": 182.1904761904762, "grad_norm": 3.0059852600097656, "learning_rate": 3.583208395802099e-07, "loss": 1.1393, "step": 1913 }, { "epoch": 182.28571428571428, "grad_norm": 3.3431499004364014, "learning_rate": 3.5850824587706147e-07, "loss": 1.1219, "step": 1914 }, { "epoch": 182.38095238095238, "grad_norm": 3.8559226989746094, "learning_rate": 3.58695652173913e-07, "loss": 1.1325, "step": 1915 }, { "epoch": 182.47619047619048, "grad_norm": 4.502316474914551, "learning_rate": 3.5888305847076463e-07, "loss": 1.1543, "step": 1916 }, { "epoch": 182.57142857142858, "grad_norm": 8.332260131835938, "learning_rate": 3.5907046476761624e-07, "loss": 1.1884, "step": 1917 }, { "epoch": 182.66666666666666, "grad_norm": 6.321863174438477, "learning_rate": 3.592578710644678e-07, "loss": 1.191, "step": 1918 }, { "epoch": 182.76190476190476, "grad_norm": 3.683375597000122, "learning_rate": 3.5944527736131935e-07, "loss": 1.1066, "step": 1919 }, { "epoch": 182.85714285714286, "grad_norm": 5.32153844833374, "learning_rate": 3.5963268365817095e-07, "loss": 1.1706, "step": 1920 }, { "epoch": 182.95238095238096, "grad_norm": 4.957334995269775, "learning_rate": 3.598200899550225e-07, "loss": 1.1953, "step": 1921 }, { "epoch": 182.95238095238096, "eval_accuracy": 0.7152491569876358, "eval_f1": 0.7760754272245138, "eval_loss": 0.576198160648346, "eval_precision": 0.7118918918918918, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7590506620610248, "eval_runtime": 1.2035, "eval_samples_per_second": 2217.773, "eval_steps_per_second": 4.986, "step": 1921 }, { "epoch": 183.04761904761904, "grad_norm": 6.652534484863281, "learning_rate": 3.6000749625187406e-07, "loss": 1.2154, "step": 1922 }, { "epoch": 183.14285714285714, "grad_norm": 2.489819288253784, "learning_rate": 3.6019490254872567e-07, "loss": 1.149, "step": 1923 }, { "epoch": 183.23809523809524, "grad_norm": 5.232963562011719, "learning_rate": 3.603823088455772e-07, "loss": 1.2149, "step": 1924 }, { "epoch": 183.33333333333334, "grad_norm": 5.250478267669678, "learning_rate": 3.605697151424288e-07, "loss": 1.1563, "step": 1925 }, { "epoch": 183.42857142857142, "grad_norm": 8.206533432006836, "learning_rate": 3.607571214392804e-07, "loss": 1.1054, "step": 1926 }, { "epoch": 183.52380952380952, "grad_norm": 3.369680166244507, "learning_rate": 3.6094452773613194e-07, "loss": 1.1832, "step": 1927 }, { "epoch": 183.61904761904762, "grad_norm": 4.8924078941345215, "learning_rate": 3.611319340329835e-07, "loss": 1.1753, "step": 1928 }, { "epoch": 183.71428571428572, "grad_norm": 4.222039222717285, "learning_rate": 3.613193403298351e-07, "loss": 1.1668, "step": 1929 }, { "epoch": 183.8095238095238, "grad_norm": 2.895860433578491, "learning_rate": 3.615067466266867e-07, "loss": 1.1363, "step": 1930 }, { "epoch": 183.9047619047619, "grad_norm": 5.144315719604492, "learning_rate": 3.6169415292353826e-07, "loss": 1.1378, "step": 1931 }, { "epoch": 184.0, "grad_norm": 8.487016677856445, "learning_rate": 3.618815592203898e-07, "loss": 1.1334, "step": 1932 }, { "epoch": 184.0, "eval_accuracy": 0.7167478456350693, "eval_f1": 0.7763313609467456, "eval_loss": 0.5758945941925049, "eval_precision": 0.7145969498910676, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7593747841105355, "eval_runtime": 1.2153, "eval_samples_per_second": 2196.076, "eval_steps_per_second": 4.937, "step": 1932 }, { "epoch": 184.0952380952381, "grad_norm": 6.197401523590088, "learning_rate": 3.6206896551724143e-07, "loss": 1.1115, "step": 1933 }, { "epoch": 184.1904761904762, "grad_norm": 8.295247077941895, "learning_rate": 3.62256371814093e-07, "loss": 1.1771, "step": 1934 }, { "epoch": 184.28571428571428, "grad_norm": 3.6977932453155518, "learning_rate": 3.6244377811094454e-07, "loss": 1.1489, "step": 1935 }, { "epoch": 184.38095238095238, "grad_norm": 6.017064094543457, "learning_rate": 3.626311844077961e-07, "loss": 1.1236, "step": 1936 }, { "epoch": 184.47619047619048, "grad_norm": 4.377641677856445, "learning_rate": 3.628185907046477e-07, "loss": 1.1521, "step": 1937 }, { "epoch": 184.57142857142858, "grad_norm": 12.319416999816895, "learning_rate": 3.6300599700149925e-07, "loss": 1.1957, "step": 1938 }, { "epoch": 184.66666666666666, "grad_norm": 5.208367824554443, "learning_rate": 3.631934032983508e-07, "loss": 1.1538, "step": 1939 }, { "epoch": 184.76190476190476, "grad_norm": 8.780491828918457, "learning_rate": 3.633808095952024e-07, "loss": 1.165, "step": 1940 }, { "epoch": 184.85714285714286, "grad_norm": 12.607026100158691, "learning_rate": 3.63568215892054e-07, "loss": 1.1552, "step": 1941 }, { "epoch": 184.95238095238096, "grad_norm": 7.559422492980957, "learning_rate": 3.637556221889056e-07, "loss": 1.2093, "step": 1942 }, { "epoch": 184.95238095238096, "eval_accuracy": 0.7159985013113526, "eval_f1": 0.7782328847279111, "eval_loss": 0.575957179069519, "eval_precision": 0.7097118463180363, "eval_recall": 0.8613989637305699, "eval_roc_auc": 0.7598085780080599, "eval_runtime": 1.3194, "eval_samples_per_second": 2022.932, "eval_steps_per_second": 4.548, "step": 1942 }, { "epoch": 185.04761904761904, "grad_norm": 4.233537197113037, "learning_rate": 3.6394302848575713e-07, "loss": 1.1932, "step": 1943 }, { "epoch": 185.14285714285714, "grad_norm": 7.4202752113342285, "learning_rate": 3.6413043478260874e-07, "loss": 1.1603, "step": 1944 }, { "epoch": 185.23809523809524, "grad_norm": 2.546308755874634, "learning_rate": 3.643178410794603e-07, "loss": 1.1637, "step": 1945 }, { "epoch": 185.33333333333334, "grad_norm": 2.575289011001587, "learning_rate": 3.6450524737631185e-07, "loss": 1.2095, "step": 1946 }, { "epoch": 185.42857142857142, "grad_norm": 3.5033421516418457, "learning_rate": 3.6469265367316345e-07, "loss": 1.1268, "step": 1947 }, { "epoch": 185.52380952380952, "grad_norm": 7.139731407165527, "learning_rate": 3.64880059970015e-07, "loss": 1.1341, "step": 1948 }, { "epoch": 185.61904761904762, "grad_norm": 2.049715995788574, "learning_rate": 3.6506746626686656e-07, "loss": 1.1605, "step": 1949 }, { "epoch": 185.71428571428572, "grad_norm": 3.4298887252807617, "learning_rate": 3.652548725637181e-07, "loss": 1.1748, "step": 1950 }, { "epoch": 185.8095238095238, "grad_norm": 11.024620056152344, "learning_rate": 3.654422788605697e-07, "loss": 1.1378, "step": 1951 }, { "epoch": 185.9047619047619, "grad_norm": 3.8029916286468506, "learning_rate": 3.6562968515742133e-07, "loss": 1.1897, "step": 1952 }, { "epoch": 186.0, "grad_norm": 3.457836627960205, "learning_rate": 3.658170914542729e-07, "loss": 1.1673, "step": 1953 }, { "epoch": 186.0, "eval_accuracy": 0.7156238291494942, "eval_f1": 0.77390527256479, "eval_loss": 0.5756555795669556, "eval_precision": 0.716492002206288, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7596646516983304, "eval_runtime": 1.192, "eval_samples_per_second": 2239.091, "eval_steps_per_second": 5.034, "step": 1953 }, { "epoch": 186.0952380952381, "grad_norm": 3.491661787033081, "learning_rate": 3.660044977511245e-07, "loss": 1.1534, "step": 1954 }, { "epoch": 186.1904761904762, "grad_norm": 5.940413951873779, "learning_rate": 3.6619190404797605e-07, "loss": 1.1502, "step": 1955 }, { "epoch": 186.28571428571428, "grad_norm": 3.201045036315918, "learning_rate": 3.663793103448276e-07, "loss": 1.1731, "step": 1956 }, { "epoch": 186.38095238095238, "grad_norm": 3.6759390830993652, "learning_rate": 3.6656671664167916e-07, "loss": 1.1626, "step": 1957 }, { "epoch": 186.47619047619048, "grad_norm": 2.4864330291748047, "learning_rate": 3.6675412293853077e-07, "loss": 1.1512, "step": 1958 }, { "epoch": 186.57142857142858, "grad_norm": 3.656336545944214, "learning_rate": 3.669415292353823e-07, "loss": 1.1399, "step": 1959 }, { "epoch": 186.66666666666666, "grad_norm": 2.4883368015289307, "learning_rate": 3.671289355322339e-07, "loss": 1.1748, "step": 1960 }, { "epoch": 186.76190476190476, "grad_norm": 5.662504196166992, "learning_rate": 3.673163418290855e-07, "loss": 1.1648, "step": 1961 }, { "epoch": 186.85714285714286, "grad_norm": 3.2387423515319824, "learning_rate": 3.6750374812593704e-07, "loss": 1.1512, "step": 1962 }, { "epoch": 186.95238095238096, "grad_norm": 5.380378723144531, "learning_rate": 3.6769115442278864e-07, "loss": 1.1682, "step": 1963 }, { "epoch": 186.95238095238096, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7779740871613663, "eval_loss": 0.5752643942832947, "eval_precision": 0.7132829373650108, "eval_recall": 0.8555699481865285, "eval_roc_auc": 0.7605184225676453, "eval_runtime": 1.2174, "eval_samples_per_second": 2192.344, "eval_steps_per_second": 4.928, "step": 1963 }, { "epoch": 187.04761904761904, "grad_norm": 4.980207443237305, "learning_rate": 3.678785607196402e-07, "loss": 1.2062, "step": 1964 }, { "epoch": 187.14285714285714, "grad_norm": 3.1846394538879395, "learning_rate": 3.680659670164918e-07, "loss": 1.1531, "step": 1965 }, { "epoch": 187.23809523809524, "grad_norm": 2.922837495803833, "learning_rate": 3.6825337331334336e-07, "loss": 1.1524, "step": 1966 }, { "epoch": 187.33333333333334, "grad_norm": 3.404749870300293, "learning_rate": 3.684407796101949e-07, "loss": 1.1296, "step": 1967 }, { "epoch": 187.42857142857142, "grad_norm": 3.8887367248535156, "learning_rate": 3.686281859070465e-07, "loss": 1.1537, "step": 1968 }, { "epoch": 187.52380952380952, "grad_norm": 4.6107611656188965, "learning_rate": 3.688155922038981e-07, "loss": 1.1757, "step": 1969 }, { "epoch": 187.61904761904762, "grad_norm": 4.161106109619141, "learning_rate": 3.6900299850074963e-07, "loss": 1.1987, "step": 1970 }, { "epoch": 187.71428571428572, "grad_norm": 6.394876480102539, "learning_rate": 3.691904047976012e-07, "loss": 1.1902, "step": 1971 }, { "epoch": 187.8095238095238, "grad_norm": 8.40281867980957, "learning_rate": 3.693778110944528e-07, "loss": 1.151, "step": 1972 }, { "epoch": 187.9047619047619, "grad_norm": 5.9665937423706055, "learning_rate": 3.6956521739130435e-07, "loss": 1.1155, "step": 1973 }, { "epoch": 188.0, "grad_norm": 3.7505180835723877, "learning_rate": 3.697526236881559e-07, "loss": 1.187, "step": 1974 }, { "epoch": 188.0, "eval_accuracy": 0.7152491569876358, "eval_f1": 0.7740784780023782, "eval_loss": 0.5748009085655212, "eval_precision": 0.7153846153846154, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7608298791018998, "eval_runtime": 1.1991, "eval_samples_per_second": 2225.752, "eval_steps_per_second": 5.004, "step": 1974 }, { "epoch": 188.0952380952381, "grad_norm": 2.0834028720855713, "learning_rate": 3.699400299850075e-07, "loss": 1.1773, "step": 1975 }, { "epoch": 188.1904761904762, "grad_norm": 5.331453800201416, "learning_rate": 3.701274362818591e-07, "loss": 1.1259, "step": 1976 }, { "epoch": 188.28571428571428, "grad_norm": 5.906782150268555, "learning_rate": 3.7031484257871067e-07, "loss": 1.2037, "step": 1977 }, { "epoch": 188.38095238095238, "grad_norm": 4.825325012207031, "learning_rate": 3.705022488755622e-07, "loss": 1.1547, "step": 1978 }, { "epoch": 188.47619047619048, "grad_norm": 6.54678201675415, "learning_rate": 3.7068965517241383e-07, "loss": 1.148, "step": 1979 }, { "epoch": 188.57142857142858, "grad_norm": 4.087493419647217, "learning_rate": 3.708770614692654e-07, "loss": 1.1481, "step": 1980 }, { "epoch": 188.66666666666666, "grad_norm": 4.3248467445373535, "learning_rate": 3.7106446776611694e-07, "loss": 1.1438, "step": 1981 }, { "epoch": 188.76190476190476, "grad_norm": 2.789849281311035, "learning_rate": 3.7125187406296855e-07, "loss": 1.1641, "step": 1982 }, { "epoch": 188.85714285714286, "grad_norm": 5.392822742462158, "learning_rate": 3.714392803598201e-07, "loss": 1.1577, "step": 1983 }, { "epoch": 188.95238095238096, "grad_norm": 3.5019123554229736, "learning_rate": 3.7162668665667166e-07, "loss": 1.1604, "step": 1984 }, { "epoch": 188.95238095238096, "eval_accuracy": 0.7148744848257774, "eval_f1": 0.7746520580396802, "eval_loss": 0.5750032067298889, "eval_precision": 0.7135842880523732, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.760961139896373, "eval_runtime": 1.2975, "eval_samples_per_second": 2056.973, "eval_steps_per_second": 4.624, "step": 1984 }, { "epoch": 189.04761904761904, "grad_norm": 3.6215627193450928, "learning_rate": 3.718140929535232e-07, "loss": 1.195, "step": 1985 }, { "epoch": 189.14285714285714, "grad_norm": 5.709612846374512, "learning_rate": 3.720014992503748e-07, "loss": 1.1404, "step": 1986 }, { "epoch": 189.23809523809524, "grad_norm": 8.219498634338379, "learning_rate": 3.7218890554722643e-07, "loss": 1.1572, "step": 1987 }, { "epoch": 189.33333333333334, "grad_norm": 6.0207061767578125, "learning_rate": 3.72376311844078e-07, "loss": 1.1864, "step": 1988 }, { "epoch": 189.42857142857142, "grad_norm": 7.10655403137207, "learning_rate": 3.725637181409296e-07, "loss": 1.23, "step": 1989 }, { "epoch": 189.52380952380952, "grad_norm": 3.2644829750061035, "learning_rate": 3.7275112443778114e-07, "loss": 1.147, "step": 1990 }, { "epoch": 189.61904761904762, "grad_norm": 4.245572090148926, "learning_rate": 3.729385307346327e-07, "loss": 1.137, "step": 1991 }, { "epoch": 189.71428571428572, "grad_norm": 4.594895362854004, "learning_rate": 3.7312593703148425e-07, "loss": 1.1319, "step": 1992 }, { "epoch": 189.8095238095238, "grad_norm": 3.6682186126708984, "learning_rate": 3.7331334332833586e-07, "loss": 1.162, "step": 1993 }, { "epoch": 189.9047619047619, "grad_norm": 3.349700689315796, "learning_rate": 3.735007496251874e-07, "loss": 1.1492, "step": 1994 }, { "epoch": 190.0, "grad_norm": 4.069940090179443, "learning_rate": 3.7368815592203897e-07, "loss": 1.1501, "step": 1995 }, { "epoch": 190.0, "eval_accuracy": 0.7118771075309105, "eval_f1": 0.7724178751109796, "eval_loss": 0.5751973390579224, "eval_precision": 0.7111716621253406, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7608937823834199, "eval_runtime": 1.1744, "eval_samples_per_second": 2272.561, "eval_steps_per_second": 5.109, "step": 1995 }, { "epoch": 190.0952380952381, "grad_norm": 7.315029621124268, "learning_rate": 3.738755622188906e-07, "loss": 1.1588, "step": 1996 }, { "epoch": 190.1904761904762, "grad_norm": 3.7769927978515625, "learning_rate": 3.7406296851574213e-07, "loss": 1.1753, "step": 1997 }, { "epoch": 190.28571428571428, "grad_norm": 5.542825698852539, "learning_rate": 3.7425037481259374e-07, "loss": 1.1572, "step": 1998 }, { "epoch": 190.38095238095238, "grad_norm": 4.059653282165527, "learning_rate": 3.744377811094453e-07, "loss": 1.1302, "step": 1999 }, { "epoch": 190.47619047619048, "grad_norm": 6.358081340789795, "learning_rate": 3.746251874062969e-07, "loss": 1.1556, "step": 2000 }, { "epoch": 190.57142857142858, "grad_norm": 3.3779191970825195, "learning_rate": 3.7481259370314846e-07, "loss": 1.181, "step": 2001 }, { "epoch": 190.66666666666666, "grad_norm": 2.771360158920288, "learning_rate": 3.75e-07, "loss": 1.1661, "step": 2002 }, { "epoch": 190.76190476190476, "grad_norm": 4.921844482421875, "learning_rate": 3.751874062968516e-07, "loss": 1.1727, "step": 2003 }, { "epoch": 190.85714285714286, "grad_norm": 3.3652825355529785, "learning_rate": 3.7537481259370317e-07, "loss": 1.1663, "step": 2004 }, { "epoch": 190.95238095238096, "grad_norm": 7.287751197814941, "learning_rate": 3.755622188905547e-07, "loss": 1.1232, "step": 2005 }, { "epoch": 190.95238095238096, "eval_accuracy": 0.7137504683402023, "eval_f1": 0.7746312684365781, "eval_loss": 0.5749582052230835, "eval_precision": 0.7112676056338029, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7615135290731145, "eval_runtime": 1.1582, "eval_samples_per_second": 2304.46, "eval_steps_per_second": 5.181, "step": 2005 }, { "epoch": 191.04761904761904, "grad_norm": 3.0846879482269287, "learning_rate": 3.757496251874063e-07, "loss": 1.1737, "step": 2006 }, { "epoch": 191.14285714285714, "grad_norm": 2.902621030807495, "learning_rate": 3.759370314842579e-07, "loss": 1.1728, "step": 2007 }, { "epoch": 191.23809523809524, "grad_norm": 3.8469674587249756, "learning_rate": 3.7612443778110944e-07, "loss": 1.1821, "step": 2008 }, { "epoch": 191.33333333333334, "grad_norm": 4.116859436035156, "learning_rate": 3.7631184407796105e-07, "loss": 1.1174, "step": 2009 }, { "epoch": 191.42857142857142, "grad_norm": 7.507976055145264, "learning_rate": 3.764992503748126e-07, "loss": 1.2025, "step": 2010 }, { "epoch": 191.52380952380952, "grad_norm": 6.259975433349609, "learning_rate": 3.766866566716642e-07, "loss": 1.1568, "step": 2011 }, { "epoch": 191.61904761904762, "grad_norm": 5.180998802185059, "learning_rate": 3.7687406296851577e-07, "loss": 1.1603, "step": 2012 }, { "epoch": 191.71428571428572, "grad_norm": 3.2812700271606445, "learning_rate": 3.770614692653673e-07, "loss": 1.157, "step": 2013 }, { "epoch": 191.8095238095238, "grad_norm": 4.7818379402160645, "learning_rate": 3.7724887556221893e-07, "loss": 1.1923, "step": 2014 }, { "epoch": 191.9047619047619, "grad_norm": 8.186214447021484, "learning_rate": 3.774362818590705e-07, "loss": 1.1419, "step": 2015 }, { "epoch": 192.0, "grad_norm": 6.474642753601074, "learning_rate": 3.7762368815592204e-07, "loss": 1.1409, "step": 2016 }, { "epoch": 192.0, "eval_accuracy": 0.7137504683402023, "eval_f1": 0.7719402985074627, "eval_loss": 0.5745519995689392, "eval_precision": 0.7159468438538206, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7616039147956246, "eval_runtime": 1.1466, "eval_samples_per_second": 2327.785, "eval_steps_per_second": 5.233, "step": 2016 }, { "epoch": 192.0952380952381, "grad_norm": 6.402791976928711, "learning_rate": 3.7781109445277364e-07, "loss": 1.195, "step": 2017 }, { "epoch": 192.1904761904762, "grad_norm": 4.038552284240723, "learning_rate": 3.779985007496252e-07, "loss": 1.1443, "step": 2018 }, { "epoch": 192.28571428571428, "grad_norm": 3.914491653442383, "learning_rate": 3.7818590704647675e-07, "loss": 1.1565, "step": 2019 }, { "epoch": 192.38095238095238, "grad_norm": 4.010745525360107, "learning_rate": 3.783733133433283e-07, "loss": 1.1719, "step": 2020 }, { "epoch": 192.47619047619048, "grad_norm": 6.152637958526611, "learning_rate": 3.785607196401799e-07, "loss": 1.1406, "step": 2021 }, { "epoch": 192.57142857142858, "grad_norm": 5.661224842071533, "learning_rate": 3.787481259370315e-07, "loss": 1.1677, "step": 2022 }, { "epoch": 192.66666666666666, "grad_norm": 3.715280294418335, "learning_rate": 3.789355322338831e-07, "loss": 1.1605, "step": 2023 }, { "epoch": 192.76190476190476, "grad_norm": 4.92529296875, "learning_rate": 3.791229385307347e-07, "loss": 1.1522, "step": 2024 }, { "epoch": 192.85714285714286, "grad_norm": 4.248322486877441, "learning_rate": 3.7931034482758624e-07, "loss": 1.1564, "step": 2025 }, { "epoch": 192.95238095238096, "grad_norm": 4.95744514465332, "learning_rate": 3.794977511244378e-07, "loss": 1.1798, "step": 2026 }, { "epoch": 192.95238095238096, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7779090372120496, "eval_loss": 0.5743523836135864, "eval_precision": 0.7149837133550488, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7618511801957397, "eval_runtime": 1.239, "eval_samples_per_second": 2154.151, "eval_steps_per_second": 4.843, "step": 2026 }, { "epoch": 193.04761904761904, "grad_norm": 3.4746222496032715, "learning_rate": 3.7968515742128935e-07, "loss": 1.1368, "step": 2027 }, { "epoch": 193.14285714285714, "grad_norm": 3.15362548828125, "learning_rate": 3.7987256371814096e-07, "loss": 1.1771, "step": 2028 }, { "epoch": 193.23809523809524, "grad_norm": 5.005309104919434, "learning_rate": 3.800599700149925e-07, "loss": 1.1872, "step": 2029 }, { "epoch": 193.33333333333334, "grad_norm": 3.0793089866638184, "learning_rate": 3.8024737631184406e-07, "loss": 1.128, "step": 2030 }, { "epoch": 193.42857142857142, "grad_norm": 2.29117751121521, "learning_rate": 3.8043478260869567e-07, "loss": 1.1459, "step": 2031 }, { "epoch": 193.52380952380952, "grad_norm": 4.069672107696533, "learning_rate": 3.806221889055472e-07, "loss": 1.1598, "step": 2032 }, { "epoch": 193.61904761904762, "grad_norm": 4.900834083557129, "learning_rate": 3.8080959520239883e-07, "loss": 1.1229, "step": 2033 }, { "epoch": 193.71428571428572, "grad_norm": 3.2819442749023438, "learning_rate": 3.809970014992504e-07, "loss": 1.1383, "step": 2034 }, { "epoch": 193.8095238095238, "grad_norm": 6.044792175292969, "learning_rate": 3.81184407796102e-07, "loss": 1.1592, "step": 2035 }, { "epoch": 193.9047619047619, "grad_norm": 3.2246363162994385, "learning_rate": 3.8137181409295355e-07, "loss": 1.1529, "step": 2036 }, { "epoch": 194.0, "grad_norm": 4.810468673706055, "learning_rate": 3.815592203898051e-07, "loss": 1.1537, "step": 2037 }, { "epoch": 194.0, "eval_accuracy": 0.7152491569876358, "eval_f1": 0.7755463673951565, "eval_loss": 0.5744901299476624, "eval_precision": 0.7128121606948968, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7617599309153713, "eval_runtime": 1.1555, "eval_samples_per_second": 2309.841, "eval_steps_per_second": 5.193, "step": 2037 }, { "epoch": 194.0952380952381, "grad_norm": 3.0019352436065674, "learning_rate": 3.817466266866567e-07, "loss": 1.1464, "step": 2038 }, { "epoch": 194.1904761904762, "grad_norm": 6.727803707122803, "learning_rate": 3.8193403298350827e-07, "loss": 1.1953, "step": 2039 }, { "epoch": 194.28571428571428, "grad_norm": 6.521770000457764, "learning_rate": 3.821214392803598e-07, "loss": 1.1938, "step": 2040 }, { "epoch": 194.38095238095238, "grad_norm": 5.184866905212402, "learning_rate": 3.8230884557721143e-07, "loss": 1.1416, "step": 2041 }, { "epoch": 194.47619047619048, "grad_norm": 2.739201784133911, "learning_rate": 3.82496251874063e-07, "loss": 1.1729, "step": 2042 }, { "epoch": 194.57142857142858, "grad_norm": 4.125956058502197, "learning_rate": 3.8268365817091454e-07, "loss": 1.1504, "step": 2043 }, { "epoch": 194.66666666666666, "grad_norm": 7.283012390136719, "learning_rate": 3.8287106446776614e-07, "loss": 1.143, "step": 2044 }, { "epoch": 194.76190476190476, "grad_norm": 5.7457122802734375, "learning_rate": 3.830584707646177e-07, "loss": 1.1596, "step": 2045 }, { "epoch": 194.85714285714286, "grad_norm": 3.988215446472168, "learning_rate": 3.832458770614693e-07, "loss": 1.1372, "step": 2046 }, { "epoch": 194.95238095238096, "grad_norm": 3.1475110054016113, "learning_rate": 3.8343328335832086e-07, "loss": 1.1506, "step": 2047 }, { "epoch": 194.95238095238096, "eval_accuracy": 0.7156238291494942, "eval_f1": 0.774844259863542, "eval_loss": 0.5744240283966064, "eval_precision": 0.7148330596606459, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7617547495682211, "eval_runtime": 1.1349, "eval_samples_per_second": 2351.667, "eval_steps_per_second": 5.287, "step": 2047 }, { "epoch": 195.04761904761904, "grad_norm": 3.0281283855438232, "learning_rate": 3.8362068965517247e-07, "loss": 1.1494, "step": 2048 }, { "epoch": 195.14285714285714, "grad_norm": 4.342933177947998, "learning_rate": 3.83808095952024e-07, "loss": 1.1916, "step": 2049 }, { "epoch": 195.23809523809524, "grad_norm": 3.2373242378234863, "learning_rate": 3.839955022488756e-07, "loss": 1.1375, "step": 2050 }, { "epoch": 195.33333333333334, "grad_norm": 3.446120262145996, "learning_rate": 3.8418290854572713e-07, "loss": 1.1064, "step": 2051 }, { "epoch": 195.42857142857142, "grad_norm": 2.7731194496154785, "learning_rate": 3.8437031484257874e-07, "loss": 1.1574, "step": 2052 }, { "epoch": 195.52380952380952, "grad_norm": 2.7649197578430176, "learning_rate": 3.845577211394303e-07, "loss": 1.1417, "step": 2053 }, { "epoch": 195.61904761904762, "grad_norm": 6.727789402008057, "learning_rate": 3.8474512743628185e-07, "loss": 1.1949, "step": 2054 }, { "epoch": 195.71428571428572, "grad_norm": 7.244948863983154, "learning_rate": 3.8493253373313346e-07, "loss": 1.1084, "step": 2055 }, { "epoch": 195.8095238095238, "grad_norm": 4.702371120452881, "learning_rate": 3.85119940029985e-07, "loss": 1.1538, "step": 2056 }, { "epoch": 195.9047619047619, "grad_norm": 7.4241557121276855, "learning_rate": 3.853073463268366e-07, "loss": 1.1966, "step": 2057 }, { "epoch": 196.0, "grad_norm": 3.8166346549987793, "learning_rate": 3.8549475262368817e-07, "loss": 1.1731, "step": 2058 }, { "epoch": 196.0, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7768545994065282, "eval_loss": 0.5741073489189148, "eval_precision": 0.7168674698795181, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7620805987334485, "eval_runtime": 1.2068, "eval_samples_per_second": 2211.58, "eval_steps_per_second": 4.972, "step": 2058 }, { "epoch": 196.0952380952381, "grad_norm": 3.794862985610962, "learning_rate": 3.856821589205398e-07, "loss": 1.1682, "step": 2059 }, { "epoch": 196.1904761904762, "grad_norm": 4.032334804534912, "learning_rate": 3.8586956521739133e-07, "loss": 1.1684, "step": 2060 }, { "epoch": 196.28571428571428, "grad_norm": 2.493825674057007, "learning_rate": 3.860569715142429e-07, "loss": 1.1872, "step": 2061 }, { "epoch": 196.38095238095238, "grad_norm": 6.8107171058654785, "learning_rate": 3.862443778110945e-07, "loss": 1.1476, "step": 2062 }, { "epoch": 196.47619047619048, "grad_norm": 5.340165615081787, "learning_rate": 3.8643178410794605e-07, "loss": 1.1115, "step": 2063 }, { "epoch": 196.57142857142858, "grad_norm": 8.906362533569336, "learning_rate": 3.866191904047976e-07, "loss": 1.1214, "step": 2064 }, { "epoch": 196.66666666666666, "grad_norm": 5.633921146392822, "learning_rate": 3.8680659670164916e-07, "loss": 1.1858, "step": 2065 }, { "epoch": 196.76190476190476, "grad_norm": 3.9064207077026367, "learning_rate": 3.8699400299850077e-07, "loss": 1.1631, "step": 2066 }, { "epoch": 196.85714285714286, "grad_norm": 3.3096764087677, "learning_rate": 3.871814092953523e-07, "loss": 1.1868, "step": 2067 }, { "epoch": 196.95238095238096, "grad_norm": 6.722177982330322, "learning_rate": 3.8736881559220393e-07, "loss": 1.1433, "step": 2068 }, { "epoch": 196.95238095238096, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.7756315007429421, "eval_loss": 0.573854386806488, "eval_precision": 0.7166392092257001, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7624821531375935, "eval_runtime": 1.3412, "eval_samples_per_second": 1990.066, "eval_steps_per_second": 4.474, "step": 2068 }, { "epoch": 197.04761904761904, "grad_norm": 4.195196628570557, "learning_rate": 3.8755622188905554e-07, "loss": 1.1479, "step": 2069 }, { "epoch": 197.14285714285714, "grad_norm": 9.604462623596191, "learning_rate": 3.877436281859071e-07, "loss": 1.173, "step": 2070 }, { "epoch": 197.23809523809524, "grad_norm": 3.1296794414520264, "learning_rate": 3.8793103448275865e-07, "loss": 1.1208, "step": 2071 }, { "epoch": 197.33333333333334, "grad_norm": 4.265172004699707, "learning_rate": 3.881184407796102e-07, "loss": 1.1395, "step": 2072 }, { "epoch": 197.42857142857142, "grad_norm": 4.306982040405273, "learning_rate": 3.883058470764618e-07, "loss": 1.207, "step": 2073 }, { "epoch": 197.52380952380952, "grad_norm": 2.888141632080078, "learning_rate": 3.8849325337331336e-07, "loss": 1.1417, "step": 2074 }, { "epoch": 197.61904761904762, "grad_norm": 4.370743274688721, "learning_rate": 3.886806596701649e-07, "loss": 1.1533, "step": 2075 }, { "epoch": 197.71428571428572, "grad_norm": 8.061007499694824, "learning_rate": 3.888680659670165e-07, "loss": 1.1919, "step": 2076 }, { "epoch": 197.8095238095238, "grad_norm": 3.11177659034729, "learning_rate": 3.890554722638681e-07, "loss": 1.1521, "step": 2077 }, { "epoch": 197.9047619047619, "grad_norm": 3.6492860317230225, "learning_rate": 3.8924287856071963e-07, "loss": 1.1524, "step": 2078 }, { "epoch": 198.0, "grad_norm": 5.121402740478516, "learning_rate": 3.8943028485757124e-07, "loss": 1.1617, "step": 2079 }, { "epoch": 198.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7763938315539739, "eval_loss": 0.5734204053878784, "eval_precision": 0.7160831509846827, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.763346862406448, "eval_runtime": 1.1368, "eval_samples_per_second": 2347.743, "eval_steps_per_second": 5.278, "step": 2079 }, { "epoch": 198.0952380952381, "grad_norm": 5.118983745574951, "learning_rate": 3.896176911544228e-07, "loss": 1.1499, "step": 2080 }, { "epoch": 198.1904761904762, "grad_norm": 5.3729400634765625, "learning_rate": 3.898050974512744e-07, "loss": 1.1532, "step": 2081 }, { "epoch": 198.28571428571428, "grad_norm": 3.7796311378479004, "learning_rate": 3.8999250374812596e-07, "loss": 1.1294, "step": 2082 }, { "epoch": 198.38095238095238, "grad_norm": 2.8317065238952637, "learning_rate": 3.9017991004497756e-07, "loss": 1.153, "step": 2083 }, { "epoch": 198.47619047619048, "grad_norm": 6.204058647155762, "learning_rate": 3.903673163418291e-07, "loss": 1.175, "step": 2084 }, { "epoch": 198.57142857142858, "grad_norm": 4.600681781768799, "learning_rate": 3.9055472263868067e-07, "loss": 1.1812, "step": 2085 }, { "epoch": 198.66666666666666, "grad_norm": 3.409691095352173, "learning_rate": 3.9074212893553223e-07, "loss": 1.1537, "step": 2086 }, { "epoch": 198.76190476190476, "grad_norm": 5.143957138061523, "learning_rate": 3.909295352323838e-07, "loss": 1.1778, "step": 2087 }, { "epoch": 198.85714285714286, "grad_norm": 6.1059393882751465, "learning_rate": 3.9111694152923544e-07, "loss": 1.1434, "step": 2088 }, { "epoch": 198.95238095238096, "grad_norm": 5.029159069061279, "learning_rate": 3.91304347826087e-07, "loss": 1.1449, "step": 2089 }, { "epoch": 198.95238095238096, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.7772848269742679, "eval_loss": 0.5733014345169067, "eval_precision": 0.7152966793685357, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7635958549222799, "eval_runtime": 1.22, "eval_samples_per_second": 2187.731, "eval_steps_per_second": 4.918, "step": 2089 }, { "epoch": 199.04761904761904, "grad_norm": 4.662865161895752, "learning_rate": 3.9149175412293855e-07, "loss": 1.1717, "step": 2090 }, { "epoch": 199.14285714285714, "grad_norm": 4.18621826171875, "learning_rate": 3.916791604197901e-07, "loss": 1.2011, "step": 2091 }, { "epoch": 199.23809523809524, "grad_norm": 4.175061225891113, "learning_rate": 3.918665667166417e-07, "loss": 1.1323, "step": 2092 }, { "epoch": 199.33333333333334, "grad_norm": 4.9943928718566895, "learning_rate": 3.9205397301349327e-07, "loss": 1.1413, "step": 2093 }, { "epoch": 199.42857142857142, "grad_norm": 4.654736042022705, "learning_rate": 3.922413793103448e-07, "loss": 1.1301, "step": 2094 }, { "epoch": 199.52380952380952, "grad_norm": 4.7878098487854, "learning_rate": 3.924287856071965e-07, "loss": 1.1916, "step": 2095 }, { "epoch": 199.61904761904762, "grad_norm": 4.815639495849609, "learning_rate": 3.9261619190404804e-07, "loss": 1.2155, "step": 2096 }, { "epoch": 199.71428571428572, "grad_norm": 4.483081817626953, "learning_rate": 3.928035982008996e-07, "loss": 1.1616, "step": 2097 }, { "epoch": 199.8095238095238, "grad_norm": 3.760834217071533, "learning_rate": 3.9299100449775115e-07, "loss": 1.1406, "step": 2098 }, { "epoch": 199.9047619047619, "grad_norm": 4.239285945892334, "learning_rate": 3.931784107946027e-07, "loss": 1.1427, "step": 2099 }, { "epoch": 200.0, "grad_norm": 3.784797430038452, "learning_rate": 3.9336581709145425e-07, "loss": 1.1143, "step": 2100 }, { "epoch": 200.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7781047675103002, "eval_loss": 0.5735935568809509, "eval_precision": 0.7130528586839266, "eval_recall": 0.8562176165803109, "eval_roc_auc": 0.7635814622913067, "eval_runtime": 1.1863, "eval_samples_per_second": 2249.924, "eval_steps_per_second": 5.058, "step": 2100 }, { "epoch": 200.0952380952381, "grad_norm": 4.269444942474365, "learning_rate": 3.935532233883058e-07, "loss": 1.1332, "step": 2101 }, { "epoch": 200.1904761904762, "grad_norm": 9.206201553344727, "learning_rate": 3.9374062968515747e-07, "loss": 1.1922, "step": 2102 }, { "epoch": 200.28571428571428, "grad_norm": 4.745060443878174, "learning_rate": 3.93928035982009e-07, "loss": 1.0933, "step": 2103 }, { "epoch": 200.38095238095238, "grad_norm": 2.75970458984375, "learning_rate": 3.9411544227886063e-07, "loss": 1.209, "step": 2104 }, { "epoch": 200.47619047619048, "grad_norm": 7.039388179779053, "learning_rate": 3.943028485757122e-07, "loss": 1.1453, "step": 2105 }, { "epoch": 200.57142857142858, "grad_norm": 2.3920702934265137, "learning_rate": 3.9449025487256374e-07, "loss": 1.1373, "step": 2106 }, { "epoch": 200.66666666666666, "grad_norm": 3.3662586212158203, "learning_rate": 3.946776611694153e-07, "loss": 1.1773, "step": 2107 }, { "epoch": 200.76190476190476, "grad_norm": 3.275402784347534, "learning_rate": 3.9486506746626685e-07, "loss": 1.1561, "step": 2108 }, { "epoch": 200.85714285714286, "grad_norm": 2.953057289123535, "learning_rate": 3.950524737631185e-07, "loss": 1.1738, "step": 2109 }, { "epoch": 200.95238095238096, "grad_norm": 2.5332555770874023, "learning_rate": 3.9523988005997006e-07, "loss": 1.1239, "step": 2110 }, { "epoch": 200.95238095238096, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7766587677725119, "eval_loss": 0.573513925075531, "eval_precision": 0.7156113537117904, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7633212435233161, "eval_runtime": 1.2693, "eval_samples_per_second": 2102.79, "eval_steps_per_second": 4.727, "step": 2110 }, { "epoch": 201.04761904761904, "grad_norm": 3.6361196041107178, "learning_rate": 3.954272863568216e-07, "loss": 1.1166, "step": 2111 }, { "epoch": 201.14285714285714, "grad_norm": 1.9977929592132568, "learning_rate": 3.9561469265367317e-07, "loss": 1.1666, "step": 2112 }, { "epoch": 201.23809523809524, "grad_norm": 3.9000375270843506, "learning_rate": 3.9580209895052473e-07, "loss": 1.1772, "step": 2113 }, { "epoch": 201.33333333333334, "grad_norm": 5.172115802764893, "learning_rate": 3.9598950524737633e-07, "loss": 1.1216, "step": 2114 }, { "epoch": 201.42857142857142, "grad_norm": 4.378371238708496, "learning_rate": 3.961769115442279e-07, "loss": 1.1377, "step": 2115 }, { "epoch": 201.52380952380952, "grad_norm": 3.344632863998413, "learning_rate": 3.963643178410795e-07, "loss": 1.149, "step": 2116 }, { "epoch": 201.61904761904762, "grad_norm": 6.246377944946289, "learning_rate": 3.965517241379311e-07, "loss": 1.1658, "step": 2117 }, { "epoch": 201.71428571428572, "grad_norm": 5.464715003967285, "learning_rate": 3.9673913043478266e-07, "loss": 1.1611, "step": 2118 }, { "epoch": 201.8095238095238, "grad_norm": 8.447710990905762, "learning_rate": 3.969265367316342e-07, "loss": 1.1469, "step": 2119 }, { "epoch": 201.9047619047619, "grad_norm": 5.849113464355469, "learning_rate": 3.9711394302848577e-07, "loss": 1.1664, "step": 2120 }, { "epoch": 202.0, "grad_norm": 2.994105339050293, "learning_rate": 3.973013493253373e-07, "loss": 1.1456, "step": 2121 }, { "epoch": 202.0, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.7755588673621461, "eval_loss": 0.573491096496582, "eval_precision": 0.718387631143015, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7629887737478411, "eval_runtime": 1.1257, "eval_samples_per_second": 2370.919, "eval_steps_per_second": 5.33, "step": 2121 }, { "epoch": 202.0952380952381, "grad_norm": 9.809347152709961, "learning_rate": 3.974887556221889e-07, "loss": 1.1572, "step": 2122 }, { "epoch": 202.1904761904762, "grad_norm": 3.824449300765991, "learning_rate": 3.9767616191904054e-07, "loss": 1.1721, "step": 2123 }, { "epoch": 202.28571428571428, "grad_norm": 3.7709100246429443, "learning_rate": 3.978635682158921e-07, "loss": 1.1496, "step": 2124 }, { "epoch": 202.38095238095238, "grad_norm": 8.865727424621582, "learning_rate": 3.9805097451274365e-07, "loss": 1.1732, "step": 2125 }, { "epoch": 202.47619047619048, "grad_norm": 6.136362552642822, "learning_rate": 3.982383808095952e-07, "loss": 1.1341, "step": 2126 }, { "epoch": 202.57142857142858, "grad_norm": 4.306957244873047, "learning_rate": 3.984257871064468e-07, "loss": 1.1121, "step": 2127 }, { "epoch": 202.66666666666666, "grad_norm": 4.527181148529053, "learning_rate": 3.9861319340329836e-07, "loss": 1.1743, "step": 2128 }, { "epoch": 202.76190476190476, "grad_norm": 5.983310222625732, "learning_rate": 3.988005997001499e-07, "loss": 1.1754, "step": 2129 }, { "epoch": 202.85714285714286, "grad_norm": 4.577535629272461, "learning_rate": 3.989880059970016e-07, "loss": 1.1163, "step": 2130 }, { "epoch": 202.95238095238096, "grad_norm": 5.112128257751465, "learning_rate": 3.9917541229385313e-07, "loss": 1.1802, "step": 2131 }, { "epoch": 202.95238095238096, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.7783338239623196, "eval_loss": 0.5732585191726685, "eval_precision": 0.7134376686454398, "eval_recall": 0.8562176165803109, "eval_roc_auc": 0.7635374208405297, "eval_runtime": 1.2737, "eval_samples_per_second": 2095.482, "eval_steps_per_second": 4.711, "step": 2131 }, { "epoch": 203.04761904761904, "grad_norm": 5.311213970184326, "learning_rate": 3.993628185907047e-07, "loss": 1.1193, "step": 2132 }, { "epoch": 203.14285714285714, "grad_norm": 4.955393314361572, "learning_rate": 3.9955022488755624e-07, "loss": 1.1466, "step": 2133 }, { "epoch": 203.23809523809524, "grad_norm": 4.441088676452637, "learning_rate": 3.997376311844078e-07, "loss": 1.1374, "step": 2134 }, { "epoch": 203.33333333333334, "grad_norm": 7.389732837677002, "learning_rate": 3.9992503748125935e-07, "loss": 1.1618, "step": 2135 }, { "epoch": 203.42857142857142, "grad_norm": 4.27824592590332, "learning_rate": 4.001124437781109e-07, "loss": 1.1846, "step": 2136 }, { "epoch": 203.52380952380952, "grad_norm": 3.7284393310546875, "learning_rate": 4.0029985007496256e-07, "loss": 1.1396, "step": 2137 }, { "epoch": 203.61904761904762, "grad_norm": 3.8060035705566406, "learning_rate": 4.004872563718141e-07, "loss": 1.1884, "step": 2138 }, { "epoch": 203.71428571428572, "grad_norm": 5.725419521331787, "learning_rate": 4.006746626686657e-07, "loss": 1.1493, "step": 2139 }, { "epoch": 203.8095238095238, "grad_norm": 5.268664836883545, "learning_rate": 4.008620689655173e-07, "loss": 1.1579, "step": 2140 }, { "epoch": 203.9047619047619, "grad_norm": 2.8941152095794678, "learning_rate": 4.0104947526236884e-07, "loss": 1.1393, "step": 2141 }, { "epoch": 204.0, "grad_norm": 8.16684627532959, "learning_rate": 4.012368815592204e-07, "loss": 1.1049, "step": 2142 }, { "epoch": 204.0, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7764565992865636, "eval_loss": 0.5726208090782166, "eval_precision": 0.7175824175824176, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7639162348877375, "eval_runtime": 1.2147, "eval_samples_per_second": 2197.252, "eval_steps_per_second": 4.939, "step": 2142 }, { "epoch": 204.0952380952381, "grad_norm": 6.156484603881836, "learning_rate": 4.0142428785607194e-07, "loss": 1.2162, "step": 2143 }, { "epoch": 204.1904761904762, "grad_norm": 4.6590681076049805, "learning_rate": 4.016116941529236e-07, "loss": 1.1895, "step": 2144 }, { "epoch": 204.28571428571428, "grad_norm": 4.852943420410156, "learning_rate": 4.0179910044977516e-07, "loss": 1.1787, "step": 2145 }, { "epoch": 204.38095238095238, "grad_norm": 5.074846267700195, "learning_rate": 4.019865067466267e-07, "loss": 1.1044, "step": 2146 }, { "epoch": 204.47619047619048, "grad_norm": 5.913023471832275, "learning_rate": 4.0217391304347827e-07, "loss": 1.1562, "step": 2147 }, { "epoch": 204.57142857142858, "grad_norm": 4.1200971603393555, "learning_rate": 4.023613193403298e-07, "loss": 1.1657, "step": 2148 }, { "epoch": 204.66666666666666, "grad_norm": 6.686506271362305, "learning_rate": 4.0254872563718143e-07, "loss": 1.1429, "step": 2149 }, { "epoch": 204.76190476190476, "grad_norm": 4.119016647338867, "learning_rate": 4.02736131934033e-07, "loss": 1.1531, "step": 2150 }, { "epoch": 204.85714285714286, "grad_norm": 6.244527339935303, "learning_rate": 4.029235382308846e-07, "loss": 1.1425, "step": 2151 }, { "epoch": 204.95238095238096, "grad_norm": 9.093226432800293, "learning_rate": 4.031109445277362e-07, "loss": 1.1276, "step": 2152 }, { "epoch": 204.95238095238096, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7767910005920663, "eval_loss": 0.5725477933883667, "eval_precision": 0.7153762268266085, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.76413298791019, "eval_runtime": 1.1696, "eval_samples_per_second": 2282.001, "eval_steps_per_second": 5.13, "step": 2152 }, { "epoch": 205.04761904761904, "grad_norm": 5.017244815826416, "learning_rate": 4.0329835082458775e-07, "loss": 1.1546, "step": 2153 }, { "epoch": 205.14285714285714, "grad_norm": 8.460247039794922, "learning_rate": 4.034857571214393e-07, "loss": 1.1757, "step": 2154 }, { "epoch": 205.23809523809524, "grad_norm": 4.2112908363342285, "learning_rate": 4.0367316341829086e-07, "loss": 1.1581, "step": 2155 }, { "epoch": 205.33333333333334, "grad_norm": 6.3861236572265625, "learning_rate": 4.038605697151424e-07, "loss": 1.1644, "step": 2156 }, { "epoch": 205.42857142857142, "grad_norm": 3.080798387527466, "learning_rate": 4.0404797601199397e-07, "loss": 1.1655, "step": 2157 }, { "epoch": 205.52380952380952, "grad_norm": 2.398893356323242, "learning_rate": 4.0423538230884563e-07, "loss": 1.162, "step": 2158 }, { "epoch": 205.61904761904762, "grad_norm": 3.430669069290161, "learning_rate": 4.044227886056972e-07, "loss": 1.1521, "step": 2159 }, { "epoch": 205.71428571428572, "grad_norm": 7.12416934967041, "learning_rate": 4.0461019490254874e-07, "loss": 1.1749, "step": 2160 }, { "epoch": 205.8095238095238, "grad_norm": 5.870452404022217, "learning_rate": 4.047976011994003e-07, "loss": 1.1205, "step": 2161 }, { "epoch": 205.9047619047619, "grad_norm": 4.407711982727051, "learning_rate": 4.049850074962519e-07, "loss": 1.1705, "step": 2162 }, { "epoch": 206.0, "grad_norm": 9.45398235321045, "learning_rate": 4.0517241379310346e-07, "loss": 1.0999, "step": 2163 }, { "epoch": 206.0, "eval_accuracy": 0.716373173473211, "eval_f1": 0.7738273080370481, "eval_loss": 0.5725975036621094, "eval_precision": 0.7182473655019412, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7642725964306275, "eval_runtime": 1.1717, "eval_samples_per_second": 2277.879, "eval_steps_per_second": 5.121, "step": 2163 }, { "epoch": 206.0952380952381, "grad_norm": 3.9617202281951904, "learning_rate": 4.05359820089955e-07, "loss": 1.1539, "step": 2164 }, { "epoch": 206.1904761904762, "grad_norm": 4.726264476776123, "learning_rate": 4.0554722638680667e-07, "loss": 1.1628, "step": 2165 }, { "epoch": 206.28571428571428, "grad_norm": 4.4896697998046875, "learning_rate": 4.0573463268365823e-07, "loss": 1.1192, "step": 2166 }, { "epoch": 206.38095238095238, "grad_norm": 7.291873931884766, "learning_rate": 4.059220389805098e-07, "loss": 1.1721, "step": 2167 }, { "epoch": 206.47619047619048, "grad_norm": 7.347695827484131, "learning_rate": 4.0610944527736134e-07, "loss": 1.0977, "step": 2168 }, { "epoch": 206.57142857142858, "grad_norm": 6.2318572998046875, "learning_rate": 4.062968515742129e-07, "loss": 1.1896, "step": 2169 }, { "epoch": 206.66666666666666, "grad_norm": 12.115691184997559, "learning_rate": 4.0648425787106444e-07, "loss": 1.1579, "step": 2170 }, { "epoch": 206.76190476190476, "grad_norm": 3.2223641872406006, "learning_rate": 4.0667166416791605e-07, "loss": 1.1495, "step": 2171 }, { "epoch": 206.85714285714286, "grad_norm": 8.225313186645508, "learning_rate": 4.0685907046476766e-07, "loss": 1.1384, "step": 2172 }, { "epoch": 206.95238095238096, "grad_norm": 11.196160316467285, "learning_rate": 4.070464767616192e-07, "loss": 1.1882, "step": 2173 }, { "epoch": 206.95238095238096, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.7780725022104332, "eval_loss": 0.5724543333053589, "eval_precision": 0.7138994050838291, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7647561888313184, "eval_runtime": 1.166, "eval_samples_per_second": 2288.928, "eval_steps_per_second": 5.146, "step": 2173 }, { "epoch": 207.04761904761904, "grad_norm": 4.099627494812012, "learning_rate": 4.072338830584708e-07, "loss": 1.1492, "step": 2174 }, { "epoch": 207.14285714285714, "grad_norm": 7.178406238555908, "learning_rate": 4.074212893553224e-07, "loss": 1.1689, "step": 2175 }, { "epoch": 207.23809523809524, "grad_norm": 3.8665335178375244, "learning_rate": 4.0760869565217393e-07, "loss": 1.1435, "step": 2176 }, { "epoch": 207.33333333333334, "grad_norm": 5.064719200134277, "learning_rate": 4.077961019490255e-07, "loss": 1.1111, "step": 2177 }, { "epoch": 207.42857142857142, "grad_norm": 6.1857147216796875, "learning_rate": 4.0798350824587704e-07, "loss": 1.1411, "step": 2178 }, { "epoch": 207.52380952380952, "grad_norm": 6.367858409881592, "learning_rate": 4.081709145427287e-07, "loss": 1.1286, "step": 2179 }, { "epoch": 207.61904761904762, "grad_norm": 12.050115585327148, "learning_rate": 4.0835832083958025e-07, "loss": 1.2575, "step": 2180 }, { "epoch": 207.71428571428572, "grad_norm": 8.755186080932617, "learning_rate": 4.085457271364318e-07, "loss": 1.1639, "step": 2181 }, { "epoch": 207.8095238095238, "grad_norm": 5.138454914093018, "learning_rate": 4.0873313343328336e-07, "loss": 1.1696, "step": 2182 }, { "epoch": 207.9047619047619, "grad_norm": 5.218094348907471, "learning_rate": 4.089205397301349e-07, "loss": 1.1381, "step": 2183 }, { "epoch": 208.0, "grad_norm": 8.238438606262207, "learning_rate": 4.091079460269865e-07, "loss": 1.1741, "step": 2184 }, { "epoch": 208.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7766587677725119, "eval_loss": 0.5723700523376465, "eval_precision": 0.7156113537117904, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7646459412780657, "eval_runtime": 1.3691, "eval_samples_per_second": 1949.417, "eval_steps_per_second": 4.382, "step": 2184 }, { "epoch": 208.0952380952381, "grad_norm": 4.4855804443359375, "learning_rate": 4.092953523238381e-07, "loss": 1.18, "step": 2185 }, { "epoch": 208.1904761904762, "grad_norm": 4.409901142120361, "learning_rate": 4.094827586206897e-07, "loss": 1.1369, "step": 2186 }, { "epoch": 208.28571428571428, "grad_norm": 3.015300750732422, "learning_rate": 4.096701649175413e-07, "loss": 1.1921, "step": 2187 }, { "epoch": 208.38095238095238, "grad_norm": 5.867238521575928, "learning_rate": 4.0985757121439285e-07, "loss": 1.1966, "step": 2188 }, { "epoch": 208.47619047619048, "grad_norm": 9.443415641784668, "learning_rate": 4.100449775112444e-07, "loss": 1.1324, "step": 2189 }, { "epoch": 208.57142857142858, "grad_norm": 5.5111589431762695, "learning_rate": 4.1023238380809596e-07, "loss": 1.1466, "step": 2190 }, { "epoch": 208.66666666666666, "grad_norm": 10.333208084106445, "learning_rate": 4.104197901049475e-07, "loss": 1.1346, "step": 2191 }, { "epoch": 208.76190476190476, "grad_norm": 7.5379462242126465, "learning_rate": 4.1060719640179907e-07, "loss": 1.1363, "step": 2192 }, { "epoch": 208.85714285714286, "grad_norm": 3.4411845207214355, "learning_rate": 4.1079460269865073e-07, "loss": 1.136, "step": 2193 }, { "epoch": 208.95238095238096, "grad_norm": 10.017407417297363, "learning_rate": 4.109820089955023e-07, "loss": 1.1699, "step": 2194 }, { "epoch": 208.95238095238096, "eval_accuracy": 0.716373173473211, "eval_f1": 0.7768936044798114, "eval_loss": 0.5722417235374451, "eval_precision": 0.7128177393185505, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7651404720782958, "eval_runtime": 1.1633, "eval_samples_per_second": 2294.263, "eval_steps_per_second": 5.158, "step": 2194 }, { "epoch": 209.04761904761904, "grad_norm": 8.25883960723877, "learning_rate": 4.1116941529235384e-07, "loss": 1.1607, "step": 2195 }, { "epoch": 209.14285714285714, "grad_norm": 4.923328876495361, "learning_rate": 4.1135682158920544e-07, "loss": 1.1483, "step": 2196 }, { "epoch": 209.23809523809524, "grad_norm": 3.406498670578003, "learning_rate": 4.11544227886057e-07, "loss": 1.1324, "step": 2197 }, { "epoch": 209.33333333333334, "grad_norm": 3.716740131378174, "learning_rate": 4.1173163418290855e-07, "loss": 1.1369, "step": 2198 }, { "epoch": 209.42857142857142, "grad_norm": 10.365269660949707, "learning_rate": 4.119190404797601e-07, "loss": 1.1487, "step": 2199 }, { "epoch": 209.52380952380952, "grad_norm": 2.718311309814453, "learning_rate": 4.1210644677661177e-07, "loss": 1.1306, "step": 2200 }, { "epoch": 209.61904761904762, "grad_norm": 9.89725112915039, "learning_rate": 4.122938530734633e-07, "loss": 1.1405, "step": 2201 }, { "epoch": 209.71428571428572, "grad_norm": 3.5114660263061523, "learning_rate": 4.124812593703149e-07, "loss": 1.1697, "step": 2202 }, { "epoch": 209.8095238095238, "grad_norm": 10.346630096435547, "learning_rate": 4.1266866566716643e-07, "loss": 1.2071, "step": 2203 }, { "epoch": 209.9047619047619, "grad_norm": 9.676673889160156, "learning_rate": 4.12856071964018e-07, "loss": 1.2243, "step": 2204 }, { "epoch": 210.0, "grad_norm": 7.612486839294434, "learning_rate": 4.1304347826086954e-07, "loss": 1.1319, "step": 2205 }, { "epoch": 210.0, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.7761636525348354, "eval_loss": 0.5721729397773743, "eval_precision": 0.7156916347731, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7652625215889464, "eval_runtime": 1.1999, "eval_samples_per_second": 2224.387, "eval_steps_per_second": 5.0, "step": 2205 }, { "epoch": 210.0952380952381, "grad_norm": 3.142979383468628, "learning_rate": 4.1323088455772115e-07, "loss": 1.1685, "step": 2206 }, { "epoch": 210.1904761904762, "grad_norm": 4.474756240844727, "learning_rate": 4.1341829085457275e-07, "loss": 1.1643, "step": 2207 }, { "epoch": 210.28571428571428, "grad_norm": 3.771099805831909, "learning_rate": 4.136056971514243e-07, "loss": 1.1463, "step": 2208 }, { "epoch": 210.38095238095238, "grad_norm": 7.966793060302734, "learning_rate": 4.137931034482759e-07, "loss": 1.0928, "step": 2209 }, { "epoch": 210.47619047619048, "grad_norm": 3.9173672199249268, "learning_rate": 4.1398050974512747e-07, "loss": 1.2006, "step": 2210 }, { "epoch": 210.57142857142858, "grad_norm": 4.2888875007629395, "learning_rate": 4.14167916041979e-07, "loss": 1.1902, "step": 2211 }, { "epoch": 210.66666666666666, "grad_norm": 5.334024906158447, "learning_rate": 4.143553223388306e-07, "loss": 1.1327, "step": 2212 }, { "epoch": 210.76190476190476, "grad_norm": 3.1423180103302, "learning_rate": 4.1454272863568213e-07, "loss": 1.1423, "step": 2213 }, { "epoch": 210.85714285714286, "grad_norm": 3.1354405879974365, "learning_rate": 4.147301349325338e-07, "loss": 1.1604, "step": 2214 }, { "epoch": 210.95238095238096, "grad_norm": 5.210052490234375, "learning_rate": 4.1491754122938535e-07, "loss": 1.1864, "step": 2215 }, { "epoch": 210.95238095238096, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.776092774308653, "eval_loss": 0.5724236965179443, "eval_precision": 0.7174271577789995, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7650028785261946, "eval_runtime": 1.1778, "eval_samples_per_second": 2266.118, "eval_steps_per_second": 5.094, "step": 2215 }, { "epoch": 211.04761904761904, "grad_norm": 7.045891761779785, "learning_rate": 4.151049475262369e-07, "loss": 1.1503, "step": 2216 }, { "epoch": 211.14285714285714, "grad_norm": 3.449984550476074, "learning_rate": 4.1529235382308846e-07, "loss": 1.1661, "step": 2217 }, { "epoch": 211.23809523809524, "grad_norm": 4.770975589752197, "learning_rate": 4.1547976011994e-07, "loss": 1.205, "step": 2218 }, { "epoch": 211.33333333333334, "grad_norm": 6.622992515563965, "learning_rate": 4.156671664167916e-07, "loss": 1.0991, "step": 2219 }, { "epoch": 211.42857142857142, "grad_norm": 5.019271373748779, "learning_rate": 4.158545727136432e-07, "loss": 1.1856, "step": 2220 }, { "epoch": 211.52380952380952, "grad_norm": 3.0830953121185303, "learning_rate": 4.160419790104948e-07, "loss": 1.1469, "step": 2221 }, { "epoch": 211.61904761904762, "grad_norm": 3.6828019618988037, "learning_rate": 4.162293853073464e-07, "loss": 1.161, "step": 2222 }, { "epoch": 211.71428571428572, "grad_norm": 3.6262388229370117, "learning_rate": 4.1641679160419794e-07, "loss": 1.1006, "step": 2223 }, { "epoch": 211.8095238095238, "grad_norm": 4.846446990966797, "learning_rate": 4.166041979010495e-07, "loss": 1.1494, "step": 2224 }, { "epoch": 211.9047619047619, "grad_norm": 3.820887804031372, "learning_rate": 4.1679160419790105e-07, "loss": 1.151, "step": 2225 }, { "epoch": 212.0, "grad_norm": 3.8886525630950928, "learning_rate": 4.169790104947526e-07, "loss": 1.1847, "step": 2226 }, { "epoch": 212.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7777122641509434, "eval_loss": 0.5727176666259766, "eval_precision": 0.7137445887445888, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7648865860679332, "eval_runtime": 1.2503, "eval_samples_per_second": 2134.72, "eval_steps_per_second": 4.799, "step": 2226 }, { "epoch": 212.0952380952381, "grad_norm": 8.59073543548584, "learning_rate": 4.1716641679160416e-07, "loss": 1.1629, "step": 2227 }, { "epoch": 212.1904761904762, "grad_norm": 2.888920783996582, "learning_rate": 4.173538230884558e-07, "loss": 1.127, "step": 2228 }, { "epoch": 212.28571428571428, "grad_norm": 5.506545066833496, "learning_rate": 4.175412293853074e-07, "loss": 1.1072, "step": 2229 }, { "epoch": 212.38095238095238, "grad_norm": 4.807139873504639, "learning_rate": 4.1772863568215893e-07, "loss": 1.1414, "step": 2230 }, { "epoch": 212.47619047619048, "grad_norm": 6.0634965896606445, "learning_rate": 4.1791604197901054e-07, "loss": 1.1252, "step": 2231 }, { "epoch": 212.57142857142858, "grad_norm": 4.370108127593994, "learning_rate": 4.181034482758621e-07, "loss": 1.2013, "step": 2232 }, { "epoch": 212.66666666666666, "grad_norm": 4.020362377166748, "learning_rate": 4.1829085457271365e-07, "loss": 1.1788, "step": 2233 }, { "epoch": 212.76190476190476, "grad_norm": 4.844730854034424, "learning_rate": 4.184782608695652e-07, "loss": 1.1873, "step": 2234 }, { "epoch": 212.85714285714286, "grad_norm": 6.405012607574463, "learning_rate": 4.1866566716641686e-07, "loss": 1.1471, "step": 2235 }, { "epoch": 212.95238095238096, "grad_norm": 7.924243927001953, "learning_rate": 4.188530734632684e-07, "loss": 1.1486, "step": 2236 }, { "epoch": 212.95238095238096, "eval_accuracy": 0.7178718621206445, "eval_f1": 0.776624147137348, "eval_loss": 0.5724982023239136, "eval_precision": 0.7164750957854407, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7647668393782383, "eval_runtime": 1.1569, "eval_samples_per_second": 2306.976, "eval_steps_per_second": 5.186, "step": 2236 }, { "epoch": 213.04761904761904, "grad_norm": 7.97761344909668, "learning_rate": 4.1904047976011997e-07, "loss": 1.1542, "step": 2237 }, { "epoch": 213.14285714285714, "grad_norm": 2.720165252685547, "learning_rate": 4.192278860569715e-07, "loss": 1.1536, "step": 2238 }, { "epoch": 213.23809523809524, "grad_norm": 2.4307806491851807, "learning_rate": 4.194152923538231e-07, "loss": 1.1604, "step": 2239 }, { "epoch": 213.33333333333334, "grad_norm": 10.833077430725098, "learning_rate": 4.1960269865067463e-07, "loss": 1.1664, "step": 2240 }, { "epoch": 213.42857142857142, "grad_norm": 6.367500305175781, "learning_rate": 4.1979010494752624e-07, "loss": 1.2025, "step": 2241 }, { "epoch": 213.52380952380952, "grad_norm": 2.9368419647216797, "learning_rate": 4.1997751124437785e-07, "loss": 1.1574, "step": 2242 }, { "epoch": 213.61904761904762, "grad_norm": 4.824771881103516, "learning_rate": 4.201649175412294e-07, "loss": 1.1246, "step": 2243 }, { "epoch": 213.71428571428572, "grad_norm": 7.3083696365356445, "learning_rate": 4.20352323838081e-07, "loss": 1.1425, "step": 2244 }, { "epoch": 213.8095238095238, "grad_norm": 4.5281453132629395, "learning_rate": 4.2053973013493257e-07, "loss": 1.1185, "step": 2245 }, { "epoch": 213.9047619047619, "grad_norm": 5.061934947967529, "learning_rate": 4.207271364317841e-07, "loss": 1.1074, "step": 2246 }, { "epoch": 214.0, "grad_norm": 5.4383673667907715, "learning_rate": 4.209145427286357e-07, "loss": 1.1617, "step": 2247 }, { "epoch": 214.0, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.775897892549718, "eval_loss": 0.5720090270042419, "eval_precision": 0.7161643835616438, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7653572251007483, "eval_runtime": 1.1624, "eval_samples_per_second": 2296.067, "eval_steps_per_second": 5.162, "step": 2247 }, { "epoch": 214.0952380952381, "grad_norm": 3.1440367698669434, "learning_rate": 4.2110194902548723e-07, "loss": 1.1204, "step": 2248 }, { "epoch": 214.1904761904762, "grad_norm": 3.43729567527771, "learning_rate": 4.212893553223389e-07, "loss": 1.1387, "step": 2249 }, { "epoch": 214.28571428571428, "grad_norm": 6.375138282775879, "learning_rate": 4.2147676161919044e-07, "loss": 1.1185, "step": 2250 }, { "epoch": 214.38095238095238, "grad_norm": 6.120760917663574, "learning_rate": 4.21664167916042e-07, "loss": 1.1553, "step": 2251 }, { "epoch": 214.47619047619048, "grad_norm": 8.098929405212402, "learning_rate": 4.2185157421289355e-07, "loss": 1.1866, "step": 2252 }, { "epoch": 214.57142857142858, "grad_norm": 5.230090141296387, "learning_rate": 4.220389805097451e-07, "loss": 1.2084, "step": 2253 }, { "epoch": 214.66666666666666, "grad_norm": 5.517597198486328, "learning_rate": 4.222263868065967e-07, "loss": 1.1486, "step": 2254 }, { "epoch": 214.76190476190476, "grad_norm": 7.162447452545166, "learning_rate": 4.2241379310344827e-07, "loss": 1.1461, "step": 2255 }, { "epoch": 214.85714285714286, "grad_norm": 4.703765392303467, "learning_rate": 4.2260119940029993e-07, "loss": 1.1885, "step": 2256 }, { "epoch": 214.95238095238096, "grad_norm": 3.9778876304626465, "learning_rate": 4.227886056971515e-07, "loss": 1.1438, "step": 2257 }, { "epoch": 214.95238095238096, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7777777777777778, "eval_loss": 0.57195645570755, "eval_precision": 0.7152173913043478, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7654749568221071, "eval_runtime": 1.2576, "eval_samples_per_second": 2122.364, "eval_steps_per_second": 4.771, "step": 2257 }, { "epoch": 215.04761904761904, "grad_norm": 4.2935051918029785, "learning_rate": 4.2297601199400304e-07, "loss": 1.1877, "step": 2258 }, { "epoch": 215.14285714285714, "grad_norm": 7.638199806213379, "learning_rate": 4.231634182908546e-07, "loss": 1.116, "step": 2259 }, { "epoch": 215.23809523809524, "grad_norm": 4.469337463378906, "learning_rate": 4.2335082458770615e-07, "loss": 1.169, "step": 2260 }, { "epoch": 215.33333333333334, "grad_norm": 4.260531902313232, "learning_rate": 4.235382308845577e-07, "loss": 1.1045, "step": 2261 }, { "epoch": 215.42857142857142, "grad_norm": 5.3834638595581055, "learning_rate": 4.2372563718140926e-07, "loss": 1.0919, "step": 2262 }, { "epoch": 215.52380952380952, "grad_norm": 7.487053871154785, "learning_rate": 4.239130434782609e-07, "loss": 1.1892, "step": 2263 }, { "epoch": 215.61904761904762, "grad_norm": 8.624944686889648, "learning_rate": 4.2410044977511247e-07, "loss": 1.152, "step": 2264 }, { "epoch": 215.71428571428572, "grad_norm": 4.9617438316345215, "learning_rate": 4.24287856071964e-07, "loss": 1.1661, "step": 2265 }, { "epoch": 215.8095238095238, "grad_norm": 4.929933071136475, "learning_rate": 4.2447526236881563e-07, "loss": 1.1726, "step": 2266 }, { "epoch": 215.9047619047619, "grad_norm": 3.8353214263916016, "learning_rate": 4.246626686656672e-07, "loss": 1.1477, "step": 2267 }, { "epoch": 216.0, "grad_norm": 3.108581304550171, "learning_rate": 4.2485007496251874e-07, "loss": 1.184, "step": 2268 }, { "epoch": 216.0, "eval_accuracy": 0.716373173473211, "eval_f1": 0.7761017450458444, "eval_loss": 0.5715641975402832, "eval_precision": 0.7142079477408819, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7657878526194588, "eval_runtime": 1.2948, "eval_samples_per_second": 2061.335, "eval_steps_per_second": 4.634, "step": 2268 }, { "epoch": 216.0952380952381, "grad_norm": 6.062177658081055, "learning_rate": 4.250374812593703e-07, "loss": 1.1456, "step": 2269 }, { "epoch": 216.1904761904762, "grad_norm": 6.099572658538818, "learning_rate": 4.2522488755622196e-07, "loss": 1.1492, "step": 2270 }, { "epoch": 216.28571428571428, "grad_norm": 7.1180291175842285, "learning_rate": 4.254122938530735e-07, "loss": 1.1307, "step": 2271 }, { "epoch": 216.38095238095238, "grad_norm": 3.0259740352630615, "learning_rate": 4.2559970014992507e-07, "loss": 1.1654, "step": 2272 }, { "epoch": 216.47619047619048, "grad_norm": 4.901364803314209, "learning_rate": 4.257871064467766e-07, "loss": 1.1515, "step": 2273 }, { "epoch": 216.57142857142858, "grad_norm": 4.846545696258545, "learning_rate": 4.259745127436282e-07, "loss": 1.1758, "step": 2274 }, { "epoch": 216.66666666666666, "grad_norm": 8.551472663879395, "learning_rate": 4.2616191904047973e-07, "loss": 1.1208, "step": 2275 }, { "epoch": 216.76190476190476, "grad_norm": 4.555139541625977, "learning_rate": 4.2634932533733134e-07, "loss": 1.1495, "step": 2276 }, { "epoch": 216.85714285714286, "grad_norm": 10.477103233337402, "learning_rate": 4.2653673163418294e-07, "loss": 1.1901, "step": 2277 }, { "epoch": 216.95238095238096, "grad_norm": 8.654388427734375, "learning_rate": 4.267241379310345e-07, "loss": 1.1739, "step": 2278 }, { "epoch": 216.95238095238096, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7754615842763549, "eval_loss": 0.5713890790939331, "eval_precision": 0.7177508269018743, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7659024179620034, "eval_runtime": 1.1422, "eval_samples_per_second": 2336.711, "eval_steps_per_second": 5.253, "step": 2278 }, { "epoch": 217.04761904761904, "grad_norm": 14.373431205749512, "learning_rate": 4.269115442278861e-07, "loss": 1.1315, "step": 2279 }, { "epoch": 217.14285714285714, "grad_norm": 6.774090766906738, "learning_rate": 4.2709895052473766e-07, "loss": 1.1362, "step": 2280 }, { "epoch": 217.23809523809524, "grad_norm": 4.8597259521484375, "learning_rate": 4.272863568215892e-07, "loss": 1.1534, "step": 2281 }, { "epoch": 217.33333333333334, "grad_norm": 6.150089740753174, "learning_rate": 4.2747376311844077e-07, "loss": 1.1144, "step": 2282 }, { "epoch": 217.42857142857142, "grad_norm": 10.673328399658203, "learning_rate": 4.276611694152923e-07, "loss": 1.1573, "step": 2283 }, { "epoch": 217.52380952380952, "grad_norm": 6.223097324371338, "learning_rate": 4.27848575712144e-07, "loss": 1.1323, "step": 2284 }, { "epoch": 217.61904761904762, "grad_norm": 9.125777244567871, "learning_rate": 4.2803598200899554e-07, "loss": 1.1752, "step": 2285 }, { "epoch": 217.71428571428572, "grad_norm": 4.172426700592041, "learning_rate": 4.282233883058471e-07, "loss": 1.1602, "step": 2286 }, { "epoch": 217.8095238095238, "grad_norm": 4.502418041229248, "learning_rate": 4.2841079460269865e-07, "loss": 1.1482, "step": 2287 }, { "epoch": 217.9047619047619, "grad_norm": 13.205572128295898, "learning_rate": 4.285982008995502e-07, "loss": 1.1785, "step": 2288 }, { "epoch": 218.0, "grad_norm": 7.098892688751221, "learning_rate": 4.287856071964018e-07, "loss": 1.1286, "step": 2289 }, { "epoch": 218.0, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7760571768910065, "eval_loss": 0.5713024139404297, "eval_precision": 0.7183020948180816, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7658497409326425, "eval_runtime": 1.1913, "eval_samples_per_second": 2240.34, "eval_steps_per_second": 5.036, "step": 2289 }, { "epoch": 218.0952380952381, "grad_norm": 2.8342089653015137, "learning_rate": 4.2897301349325336e-07, "loss": 1.1516, "step": 2290 }, { "epoch": 218.1904761904762, "grad_norm": 8.66518783569336, "learning_rate": 4.29160419790105e-07, "loss": 1.1909, "step": 2291 }, { "epoch": 218.28571428571428, "grad_norm": 5.65387487411499, "learning_rate": 4.293478260869566e-07, "loss": 1.1667, "step": 2292 }, { "epoch": 218.38095238095238, "grad_norm": 2.9471826553344727, "learning_rate": 4.2953523238380813e-07, "loss": 1.1703, "step": 2293 }, { "epoch": 218.47619047619048, "grad_norm": 5.311910629272461, "learning_rate": 4.297226386806597e-07, "loss": 1.1337, "step": 2294 }, { "epoch": 218.57142857142858, "grad_norm": 3.465955972671509, "learning_rate": 4.2991004497751124e-07, "loss": 1.1064, "step": 2295 }, { "epoch": 218.66666666666666, "grad_norm": 3.5405027866363525, "learning_rate": 4.300974512743628e-07, "loss": 1.1456, "step": 2296 }, { "epoch": 218.76190476190476, "grad_norm": 4.713162899017334, "learning_rate": 4.3028485757121435e-07, "loss": 1.1944, "step": 2297 }, { "epoch": 218.85714285714286, "grad_norm": 3.6023991107940674, "learning_rate": 4.30472263868066e-07, "loss": 1.1501, "step": 2298 }, { "epoch": 218.95238095238096, "grad_norm": 4.573559761047363, "learning_rate": 4.3065967016491757e-07, "loss": 1.12, "step": 2299 }, { "epoch": 218.95238095238096, "eval_accuracy": 0.7186212064443612, "eval_f1": 0.7772174428952833, "eval_loss": 0.5710164904594421, "eval_precision": 0.7170224411603722, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7663013816925734, "eval_runtime": 1.2075, "eval_samples_per_second": 2210.389, "eval_steps_per_second": 4.969, "step": 2299 }, { "epoch": 219.04761904761904, "grad_norm": 6.432830810546875, "learning_rate": 4.308470764617691e-07, "loss": 1.1894, "step": 2300 }, { "epoch": 219.14285714285714, "grad_norm": 9.601896286010742, "learning_rate": 4.3103448275862073e-07, "loss": 1.1693, "step": 2301 }, { "epoch": 219.23809523809524, "grad_norm": 3.743082284927368, "learning_rate": 4.312218890554723e-07, "loss": 1.1238, "step": 2302 }, { "epoch": 219.33333333333334, "grad_norm": 4.7529168128967285, "learning_rate": 4.3140929535232384e-07, "loss": 1.1241, "step": 2303 }, { "epoch": 219.42857142857142, "grad_norm": 11.7463960647583, "learning_rate": 4.315967016491754e-07, "loss": 1.1282, "step": 2304 }, { "epoch": 219.52380952380952, "grad_norm": 4.231055736541748, "learning_rate": 4.3178410794602705e-07, "loss": 1.1524, "step": 2305 }, { "epoch": 219.61904761904762, "grad_norm": 3.1641948223114014, "learning_rate": 4.319715142428786e-07, "loss": 1.1592, "step": 2306 }, { "epoch": 219.71428571428572, "grad_norm": 3.844000816345215, "learning_rate": 4.3215892053973016e-07, "loss": 1.1645, "step": 2307 }, { "epoch": 219.8095238095238, "grad_norm": 10.697543144226074, "learning_rate": 4.323463268365817e-07, "loss": 1.1261, "step": 2308 }, { "epoch": 219.9047619047619, "grad_norm": 8.771400451660156, "learning_rate": 4.3253373313343327e-07, "loss": 1.1431, "step": 2309 }, { "epoch": 220.0, "grad_norm": 3.680692195892334, "learning_rate": 4.327211394302848e-07, "loss": 1.1709, "step": 2310 }, { "epoch": 220.0, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7761904761904762, "eval_loss": 0.5708158612251282, "eval_precision": 0.7180616740088106, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7665362694300518, "eval_runtime": 1.2926, "eval_samples_per_second": 2064.826, "eval_steps_per_second": 4.642, "step": 2310 }, { "epoch": 220.0952380952381, "grad_norm": 5.360668182373047, "learning_rate": 4.329085457271365e-07, "loss": 1.155, "step": 2311 }, { "epoch": 220.1904761904762, "grad_norm": 2.6076276302337646, "learning_rate": 4.3309595202398804e-07, "loss": 1.1352, "step": 2312 }, { "epoch": 220.28571428571428, "grad_norm": 2.816897392272949, "learning_rate": 4.332833583208396e-07, "loss": 1.1547, "step": 2313 }, { "epoch": 220.38095238095238, "grad_norm": 9.164076805114746, "learning_rate": 4.334707646176912e-07, "loss": 1.1507, "step": 2314 }, { "epoch": 220.47619047619048, "grad_norm": 9.430002212524414, "learning_rate": 4.3365817091454276e-07, "loss": 1.1124, "step": 2315 }, { "epoch": 220.57142857142858, "grad_norm": 8.547354698181152, "learning_rate": 4.338455772113943e-07, "loss": 1.1953, "step": 2316 }, { "epoch": 220.66666666666666, "grad_norm": 3.7488644123077393, "learning_rate": 4.3403298350824586e-07, "loss": 1.1528, "step": 2317 }, { "epoch": 220.76190476190476, "grad_norm": 3.1749069690704346, "learning_rate": 4.342203898050975e-07, "loss": 1.1311, "step": 2318 }, { "epoch": 220.85714285714286, "grad_norm": 3.318031072616577, "learning_rate": 4.344077961019491e-07, "loss": 1.1468, "step": 2319 }, { "epoch": 220.95238095238096, "grad_norm": 3.5835556983947754, "learning_rate": 4.3459520239880063e-07, "loss": 1.1506, "step": 2320 }, { "epoch": 220.95238095238096, "eval_accuracy": 0.7186212064443612, "eval_f1": 0.7772174428952833, "eval_loss": 0.5706951022148132, "eval_precision": 0.7170224411603722, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7666669545192862, "eval_runtime": 1.1476, "eval_samples_per_second": 2325.798, "eval_steps_per_second": 5.228, "step": 2320 }, { "epoch": 221.04761904761904, "grad_norm": 6.415576457977295, "learning_rate": 4.347826086956522e-07, "loss": 1.1645, "step": 2321 }, { "epoch": 221.14285714285714, "grad_norm": 6.643709659576416, "learning_rate": 4.3497001499250374e-07, "loss": 1.1505, "step": 2322 }, { "epoch": 221.23809523809524, "grad_norm": 4.2586822509765625, "learning_rate": 4.351574212893553e-07, "loss": 1.1772, "step": 2323 }, { "epoch": 221.33333333333334, "grad_norm": 4.354816913604736, "learning_rate": 4.353448275862069e-07, "loss": 1.1163, "step": 2324 }, { "epoch": 221.42857142857142, "grad_norm": 3.812659502029419, "learning_rate": 4.355322338830585e-07, "loss": 1.1893, "step": 2325 }, { "epoch": 221.52380952380952, "grad_norm": 2.639324188232422, "learning_rate": 4.357196401799101e-07, "loss": 1.1589, "step": 2326 }, { "epoch": 221.61904761904762, "grad_norm": 8.526213645935059, "learning_rate": 4.359070464767617e-07, "loss": 1.1101, "step": 2327 }, { "epoch": 221.71428571428572, "grad_norm": 8.683570861816406, "learning_rate": 4.3609445277361323e-07, "loss": 1.1543, "step": 2328 }, { "epoch": 221.8095238095238, "grad_norm": 4.670089244842529, "learning_rate": 4.362818590704648e-07, "loss": 1.1489, "step": 2329 }, { "epoch": 221.9047619047619, "grad_norm": 3.6284923553466797, "learning_rate": 4.3646926536731634e-07, "loss": 1.1382, "step": 2330 }, { "epoch": 222.0, "grad_norm": 6.06208610534668, "learning_rate": 4.366566716641679e-07, "loss": 1.1784, "step": 2331 }, { "epoch": 222.0, "eval_accuracy": 0.7186212064443612, "eval_f1": 0.7749475576865448, "eval_loss": 0.5703732371330261, "eval_precision": 0.7211377579475738, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7669130685089234, "eval_runtime": 1.4477, "eval_samples_per_second": 1843.605, "eval_steps_per_second": 4.144, "step": 2331 }, { "epoch": 222.0952380952381, "grad_norm": 5.371791362762451, "learning_rate": 4.3684407796101955e-07, "loss": 1.124, "step": 2332 }, { "epoch": 222.1904761904762, "grad_norm": 6.109635829925537, "learning_rate": 4.370314842578711e-07, "loss": 1.1417, "step": 2333 }, { "epoch": 222.28571428571428, "grad_norm": 5.1480560302734375, "learning_rate": 4.3721889055472266e-07, "loss": 1.1479, "step": 2334 }, { "epoch": 222.38095238095238, "grad_norm": 3.191652536392212, "learning_rate": 4.374062968515742e-07, "loss": 1.1171, "step": 2335 }, { "epoch": 222.47619047619048, "grad_norm": 7.5411272048950195, "learning_rate": 4.375937031484258e-07, "loss": 1.1868, "step": 2336 }, { "epoch": 222.57142857142858, "grad_norm": 4.1391401290893555, "learning_rate": 4.377811094452774e-07, "loss": 1.143, "step": 2337 }, { "epoch": 222.66666666666666, "grad_norm": 8.584721565246582, "learning_rate": 4.3796851574212893e-07, "loss": 1.1658, "step": 2338 }, { "epoch": 222.76190476190476, "grad_norm": 8.805205345153809, "learning_rate": 4.381559220389806e-07, "loss": 1.1905, "step": 2339 }, { "epoch": 222.85714285714286, "grad_norm": 4.957940101623535, "learning_rate": 4.3834332833583215e-07, "loss": 1.1784, "step": 2340 }, { "epoch": 222.95238095238096, "grad_norm": 9.811948776245117, "learning_rate": 4.385307346326837e-07, "loss": 1.1905, "step": 2341 }, { "epoch": 222.95238095238096, "eval_accuracy": 0.7189958786062196, "eval_f1": 0.775851763299462, "eval_loss": 0.570548951625824, "eval_precision": 0.7203107658157603, "eval_recall": 0.8406735751295337, "eval_roc_auc": 0.7670463442717328, "eval_runtime": 1.2238, "eval_samples_per_second": 2180.979, "eval_steps_per_second": 4.903, "step": 2341 }, { "epoch": 223.04761904761904, "grad_norm": 8.00674819946289, "learning_rate": 4.3871814092953526e-07, "loss": 1.1571, "step": 2342 }, { "epoch": 223.14285714285714, "grad_norm": 8.01071834564209, "learning_rate": 4.389055472263868e-07, "loss": 1.105, "step": 2343 }, { "epoch": 223.23809523809524, "grad_norm": 8.743189811706543, "learning_rate": 4.3909295352323837e-07, "loss": 1.1469, "step": 2344 }, { "epoch": 223.33333333333334, "grad_norm": 3.163163900375366, "learning_rate": 4.392803598200899e-07, "loss": 1.155, "step": 2345 }, { "epoch": 223.42857142857142, "grad_norm": 5.545464515686035, "learning_rate": 4.394677661169416e-07, "loss": 1.1703, "step": 2346 }, { "epoch": 223.52380952380952, "grad_norm": 6.351076126098633, "learning_rate": 4.3965517241379313e-07, "loss": 1.1415, "step": 2347 }, { "epoch": 223.61904761904762, "grad_norm": 4.026891231536865, "learning_rate": 4.398425787106447e-07, "loss": 1.1376, "step": 2348 }, { "epoch": 223.71428571428572, "grad_norm": 9.977435111999512, "learning_rate": 4.400299850074963e-07, "loss": 1.1836, "step": 2349 }, { "epoch": 223.8095238095238, "grad_norm": 3.645195722579956, "learning_rate": 4.4021739130434785e-07, "loss": 1.1816, "step": 2350 }, { "epoch": 223.9047619047619, "grad_norm": 3.4103715419769287, "learning_rate": 4.404047976011994e-07, "loss": 1.1381, "step": 2351 }, { "epoch": 224.0, "grad_norm": 3.77848744392395, "learning_rate": 4.4059220389805096e-07, "loss": 1.1195, "step": 2352 }, { "epoch": 224.0, "eval_accuracy": 0.7197452229299363, "eval_f1": 0.7783046828689982, "eval_loss": 0.5704892873764038, "eval_precision": 0.7174863387978142, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7676044905008634, "eval_runtime": 1.424, "eval_samples_per_second": 1874.259, "eval_steps_per_second": 4.213, "step": 2352 }, { "epoch": 224.0952380952381, "grad_norm": 4.599103927612305, "learning_rate": 4.407796101949026e-07, "loss": 1.1545, "step": 2353 }, { "epoch": 224.1904761904762, "grad_norm": 4.559905052185059, "learning_rate": 4.409670164917542e-07, "loss": 1.1492, "step": 2354 }, { "epoch": 224.28571428571428, "grad_norm": 4.591866493225098, "learning_rate": 4.4115442278860573e-07, "loss": 1.077, "step": 2355 }, { "epoch": 224.38095238095238, "grad_norm": 5.925033092498779, "learning_rate": 4.413418290854573e-07, "loss": 1.1578, "step": 2356 }, { "epoch": 224.47619047619048, "grad_norm": 9.684907913208008, "learning_rate": 4.4152923538230884e-07, "loss": 1.1807, "step": 2357 }, { "epoch": 224.57142857142858, "grad_norm": 4.006957530975342, "learning_rate": 4.4171664167916045e-07, "loss": 1.1713, "step": 2358 }, { "epoch": 224.66666666666666, "grad_norm": 5.357590198516846, "learning_rate": 4.41904047976012e-07, "loss": 1.1408, "step": 2359 }, { "epoch": 224.76190476190476, "grad_norm": 7.220717906951904, "learning_rate": 4.420914542728636e-07, "loss": 1.2085, "step": 2360 }, { "epoch": 224.85714285714286, "grad_norm": 3.5976977348327637, "learning_rate": 4.422788605697152e-07, "loss": 1.1425, "step": 2361 }, { "epoch": 224.95238095238096, "grad_norm": 6.680579662322998, "learning_rate": 4.4246626686656677e-07, "loss": 1.1477, "step": 2362 }, { "epoch": 224.95238095238096, "eval_accuracy": 0.7193705507680779, "eval_f1": 0.7772821885221528, "eval_loss": 0.5701494216918945, "eval_precision": 0.7185266630016492, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7679289004029937, "eval_runtime": 1.1883, "eval_samples_per_second": 2245.985, "eval_steps_per_second": 5.049, "step": 2362 }, { "epoch": 225.04761904761904, "grad_norm": 2.512918472290039, "learning_rate": 4.426536731634183e-07, "loss": 1.1161, "step": 2363 }, { "epoch": 225.14285714285714, "grad_norm": 4.432623863220215, "learning_rate": 4.428410794602699e-07, "loss": 1.141, "step": 2364 }, { "epoch": 225.23809523809524, "grad_norm": 2.8132100105285645, "learning_rate": 4.4302848575712143e-07, "loss": 1.1529, "step": 2365 }, { "epoch": 225.33333333333334, "grad_norm": 5.2681097984313965, "learning_rate": 4.43215892053973e-07, "loss": 1.1792, "step": 2366 }, { "epoch": 225.42857142857142, "grad_norm": 2.881300210952759, "learning_rate": 4.4340329835082465e-07, "loss": 1.1431, "step": 2367 }, { "epoch": 225.52380952380952, "grad_norm": 3.3978936672210693, "learning_rate": 4.435907046476762e-07, "loss": 1.1452, "step": 2368 }, { "epoch": 225.61904761904762, "grad_norm": 2.8875365257263184, "learning_rate": 4.4377811094452776e-07, "loss": 1.1367, "step": 2369 }, { "epoch": 225.71428571428572, "grad_norm": 5.989372730255127, "learning_rate": 4.439655172413793e-07, "loss": 1.1804, "step": 2370 }, { "epoch": 225.8095238095238, "grad_norm": 2.5179331302642822, "learning_rate": 4.441529235382309e-07, "loss": 1.1437, "step": 2371 }, { "epoch": 225.9047619047619, "grad_norm": 2.771777391433716, "learning_rate": 4.4434032983508247e-07, "loss": 1.132, "step": 2372 }, { "epoch": 226.0, "grad_norm": 3.8657586574554443, "learning_rate": 4.4452773613193403e-07, "loss": 1.1608, "step": 2373 }, { "epoch": 226.0, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.7760308513794126, "eval_loss": 0.5697022080421448, "eval_precision": 0.715927750410509, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.768458549222798, "eval_runtime": 1.1924, "eval_samples_per_second": 2238.258, "eval_steps_per_second": 5.032, "step": 2373 }, { "epoch": 226.0952380952381, "grad_norm": 6.003801345825195, "learning_rate": 4.447151424287857e-07, "loss": 1.1063, "step": 2374 }, { "epoch": 226.1904761904762, "grad_norm": 4.66921329498291, "learning_rate": 4.4490254872563724e-07, "loss": 1.145, "step": 2375 }, { "epoch": 226.28571428571428, "grad_norm": 3.6423964500427246, "learning_rate": 4.450899550224888e-07, "loss": 1.0927, "step": 2376 }, { "epoch": 226.38095238095238, "grad_norm": 5.096931457519531, "learning_rate": 4.4527736131934035e-07, "loss": 1.1127, "step": 2377 }, { "epoch": 226.47619047619048, "grad_norm": 8.15682601928711, "learning_rate": 4.454647676161919e-07, "loss": 1.1469, "step": 2378 }, { "epoch": 226.57142857142858, "grad_norm": 2.7751176357269287, "learning_rate": 4.4565217391304346e-07, "loss": 1.2006, "step": 2379 }, { "epoch": 226.66666666666666, "grad_norm": 4.12217378616333, "learning_rate": 4.45839580209895e-07, "loss": 1.162, "step": 2380 }, { "epoch": 226.76190476190476, "grad_norm": 4.791848182678223, "learning_rate": 4.460269865067467e-07, "loss": 1.154, "step": 2381 }, { "epoch": 226.85714285714286, "grad_norm": 5.367868423461914, "learning_rate": 4.4621439280359823e-07, "loss": 1.2002, "step": 2382 }, { "epoch": 226.95238095238096, "grad_norm": 2.4993393421173096, "learning_rate": 4.4640179910044984e-07, "loss": 1.1519, "step": 2383 }, { "epoch": 226.95238095238096, "eval_accuracy": 0.720494567253653, "eval_f1": 0.776378896882494, "eval_loss": 0.5695188045501709, "eval_precision": 0.72265625, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7686076568796776, "eval_runtime": 1.1904, "eval_samples_per_second": 2242.195, "eval_steps_per_second": 5.041, "step": 2383 }, { "epoch": 227.04761904761904, "grad_norm": 5.448750019073486, "learning_rate": 4.465892053973014e-07, "loss": 1.1544, "step": 2384 }, { "epoch": 227.14285714285714, "grad_norm": 9.408559799194336, "learning_rate": 4.4677661169415295e-07, "loss": 1.1194, "step": 2385 }, { "epoch": 227.23809523809524, "grad_norm": 5.749595642089844, "learning_rate": 4.469640179910045e-07, "loss": 1.1666, "step": 2386 }, { "epoch": 227.33333333333334, "grad_norm": 3.787705183029175, "learning_rate": 4.4715142428785606e-07, "loss": 1.1724, "step": 2387 }, { "epoch": 227.42857142857142, "grad_norm": 3.9639134407043457, "learning_rate": 4.473388305847077e-07, "loss": 1.1584, "step": 2388 }, { "epoch": 227.52380952380952, "grad_norm": 3.05351185798645, "learning_rate": 4.4752623688155927e-07, "loss": 1.1573, "step": 2389 }, { "epoch": 227.61904761904762, "grad_norm": 7.798318862915039, "learning_rate": 4.477136431784108e-07, "loss": 1.1385, "step": 2390 }, { "epoch": 227.71428571428572, "grad_norm": 4.202153205871582, "learning_rate": 4.479010494752624e-07, "loss": 1.1519, "step": 2391 }, { "epoch": 227.8095238095238, "grad_norm": 9.05360221862793, "learning_rate": 4.4808845577211393e-07, "loss": 1.151, "step": 2392 }, { "epoch": 227.9047619047619, "grad_norm": 5.996427536010742, "learning_rate": 4.4827586206896554e-07, "loss": 1.1745, "step": 2393 }, { "epoch": 228.0, "grad_norm": 3.683551073074341, "learning_rate": 4.484632683658171e-07, "loss": 1.0939, "step": 2394 }, { "epoch": 228.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7759952465834818, "eval_loss": 0.5693243145942688, "eval_precision": 0.716794731064764, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7688845710995971, "eval_runtime": 1.223, "eval_samples_per_second": 2182.303, "eval_steps_per_second": 4.906, "step": 2394 }, { "epoch": 228.0952380952381, "grad_norm": 4.042111396789551, "learning_rate": 4.486506746626687e-07, "loss": 1.1741, "step": 2395 }, { "epoch": 228.1904761904762, "grad_norm": 3.9594829082489014, "learning_rate": 4.488380809595203e-07, "loss": 1.1649, "step": 2396 }, { "epoch": 228.28571428571428, "grad_norm": 5.861592769622803, "learning_rate": 4.4902548725637186e-07, "loss": 1.1348, "step": 2397 }, { "epoch": 228.38095238095238, "grad_norm": 4.552924156188965, "learning_rate": 4.492128935532234e-07, "loss": 1.1528, "step": 2398 }, { "epoch": 228.47619047619048, "grad_norm": 3.354738712310791, "learning_rate": 4.49400299850075e-07, "loss": 1.158, "step": 2399 }, { "epoch": 228.57142857142858, "grad_norm": 3.973210573196411, "learning_rate": 4.4958770614692653e-07, "loss": 1.1165, "step": 2400 }, { "epoch": 228.66666666666666, "grad_norm": 8.278904914855957, "learning_rate": 4.497751124437781e-07, "loss": 1.1517, "step": 2401 }, { "epoch": 228.76190476190476, "grad_norm": 5.11875581741333, "learning_rate": 4.4996251874062974e-07, "loss": 1.1558, "step": 2402 }, { "epoch": 228.85714285714286, "grad_norm": 8.316819190979004, "learning_rate": 4.501499250374813e-07, "loss": 1.1962, "step": 2403 }, { "epoch": 228.95238095238096, "grad_norm": 4.274223327636719, "learning_rate": 4.5033733133433285e-07, "loss": 1.1209, "step": 2404 }, { "epoch": 228.95238095238096, "eval_accuracy": 0.7189958786062196, "eval_f1": 0.7765196662693683, "eval_loss": 0.5691817402839661, "eval_precision": 0.7190949227373068, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7688310305123778, "eval_runtime": 1.1439, "eval_samples_per_second": 2333.192, "eval_steps_per_second": 5.245, "step": 2404 }, { "epoch": 229.04761904761904, "grad_norm": 3.7137949466705322, "learning_rate": 4.505247376311844e-07, "loss": 1.1322, "step": 2405 }, { "epoch": 229.14285714285714, "grad_norm": 4.170365810394287, "learning_rate": 4.50712143928036e-07, "loss": 1.116, "step": 2406 }, { "epoch": 229.23809523809524, "grad_norm": 3.865349054336548, "learning_rate": 4.5089955022488757e-07, "loss": 1.1371, "step": 2407 }, { "epoch": 229.33333333333334, "grad_norm": 4.7025909423828125, "learning_rate": 4.510869565217391e-07, "loss": 1.1646, "step": 2408 }, { "epoch": 229.42857142857142, "grad_norm": 4.561888694763184, "learning_rate": 4.512743628185908e-07, "loss": 1.1553, "step": 2409 }, { "epoch": 229.52380952380952, "grad_norm": 4.621860504150391, "learning_rate": 4.5146176911544234e-07, "loss": 1.1162, "step": 2410 }, { "epoch": 229.61904761904762, "grad_norm": 6.832645893096924, "learning_rate": 4.516491754122939e-07, "loss": 1.1645, "step": 2411 }, { "epoch": 229.71428571428572, "grad_norm": 7.709583759307861, "learning_rate": 4.5183658170914545e-07, "loss": 1.1843, "step": 2412 }, { "epoch": 229.8095238095238, "grad_norm": 6.104475498199463, "learning_rate": 4.52023988005997e-07, "loss": 1.1422, "step": 2413 }, { "epoch": 229.9047619047619, "grad_norm": 5.455144882202148, "learning_rate": 4.5221139430284856e-07, "loss": 1.1822, "step": 2414 }, { "epoch": 230.0, "grad_norm": 9.39183235168457, "learning_rate": 4.523988005997001e-07, "loss": 1.2096, "step": 2415 }, { "epoch": 230.0, "eval_accuracy": 0.7174971899587861, "eval_f1": 0.7750596658711217, "eval_loss": 0.5693327188491821, "eval_precision": 0.7184734513274337, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7688477259643063, "eval_runtime": 1.1519, "eval_samples_per_second": 2317.07, "eval_steps_per_second": 5.209, "step": 2415 }, { "epoch": 230.0952380952381, "grad_norm": 5.667901515960693, "learning_rate": 4.5258620689655177e-07, "loss": 1.1667, "step": 2416 }, { "epoch": 230.1904761904762, "grad_norm": 5.791493892669678, "learning_rate": 4.527736131934033e-07, "loss": 1.0979, "step": 2417 }, { "epoch": 230.28571428571428, "grad_norm": 3.5346291065216064, "learning_rate": 4.5296101949025493e-07, "loss": 1.1542, "step": 2418 }, { "epoch": 230.38095238095238, "grad_norm": 5.434539318084717, "learning_rate": 4.531484257871065e-07, "loss": 1.1854, "step": 2419 }, { "epoch": 230.47619047619048, "grad_norm": 5.917377948760986, "learning_rate": 4.5333583208395804e-07, "loss": 1.1524, "step": 2420 }, { "epoch": 230.57142857142858, "grad_norm": 7.932279586791992, "learning_rate": 4.535232383808096e-07, "loss": 1.1634, "step": 2421 }, { "epoch": 230.66666666666666, "grad_norm": 12.676122665405273, "learning_rate": 4.5371064467766115e-07, "loss": 1.1323, "step": 2422 }, { "epoch": 230.76190476190476, "grad_norm": 8.025681495666504, "learning_rate": 4.538980509745128e-07, "loss": 1.141, "step": 2423 }, { "epoch": 230.85714285714286, "grad_norm": 5.724260330200195, "learning_rate": 4.5408545727136436e-07, "loss": 1.1401, "step": 2424 }, { "epoch": 230.95238095238096, "grad_norm": 3.4070522785186768, "learning_rate": 4.542728635682159e-07, "loss": 1.1843, "step": 2425 }, { "epoch": 230.95238095238096, "eval_accuracy": 0.7182465342825028, "eval_f1": 0.7772511848341233, "eval_loss": 0.5696398019790649, "eval_precision": 0.7161572052401747, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7688304548071386, "eval_runtime": 1.1655, "eval_samples_per_second": 2289.921, "eval_steps_per_second": 5.148, "step": 2425 }, { "epoch": 231.04761904761904, "grad_norm": 3.4953441619873047, "learning_rate": 4.544602698650675e-07, "loss": 1.1192, "step": 2426 }, { "epoch": 231.14285714285714, "grad_norm": 5.022072792053223, "learning_rate": 4.5464767616191903e-07, "loss": 1.1471, "step": 2427 }, { "epoch": 231.23809523809524, "grad_norm": 7.191939353942871, "learning_rate": 4.5483508245877064e-07, "loss": 1.1319, "step": 2428 }, { "epoch": 231.33333333333334, "grad_norm": 7.880967140197754, "learning_rate": 4.550224887556222e-07, "loss": 1.1201, "step": 2429 }, { "epoch": 231.42857142857142, "grad_norm": 12.768455505371094, "learning_rate": 4.552098950524738e-07, "loss": 1.2004, "step": 2430 }, { "epoch": 231.52380952380952, "grad_norm": 7.712562084197998, "learning_rate": 4.553973013493254e-07, "loss": 1.1585, "step": 2431 }, { "epoch": 231.61904761904762, "grad_norm": 4.747276782989502, "learning_rate": 4.5558470764617696e-07, "loss": 1.1559, "step": 2432 }, { "epoch": 231.71428571428572, "grad_norm": 8.367605209350586, "learning_rate": 4.557721139430285e-07, "loss": 1.1298, "step": 2433 }, { "epoch": 231.8095238095238, "grad_norm": 6.592958927154541, "learning_rate": 4.5595952023988007e-07, "loss": 1.1743, "step": 2434 }, { "epoch": 231.9047619047619, "grad_norm": 4.433559417724609, "learning_rate": 4.561469265367316e-07, "loss": 1.1075, "step": 2435 }, { "epoch": 232.0, "grad_norm": 8.43216609954834, "learning_rate": 4.563343328335832e-07, "loss": 1.12, "step": 2436 }, { "epoch": 232.0, "eval_accuracy": 0.7171225177969277, "eval_f1": 0.774828511780495, "eval_loss": 0.5698753595352173, "eval_precision": 0.7180762852404643, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.768350604490501, "eval_runtime": 1.3776, "eval_samples_per_second": 1937.376, "eval_steps_per_second": 4.355, "step": 2436 }, { "epoch": 232.0952380952381, "grad_norm": 4.639498233795166, "learning_rate": 4.5652173913043484e-07, "loss": 1.131, "step": 2437 }, { "epoch": 232.1904761904762, "grad_norm": 4.520444393157959, "learning_rate": 4.567091454272864e-07, "loss": 1.1549, "step": 2438 }, { "epoch": 232.28571428571428, "grad_norm": 3.809060573577881, "learning_rate": 4.5689655172413795e-07, "loss": 1.1717, "step": 2439 }, { "epoch": 232.38095238095238, "grad_norm": 3.0320441722869873, "learning_rate": 4.570839580209895e-07, "loss": 1.1552, "step": 2440 }, { "epoch": 232.47619047619048, "grad_norm": 7.700618743896484, "learning_rate": 4.572713643178411e-07, "loss": 1.1288, "step": 2441 }, { "epoch": 232.57142857142858, "grad_norm": 8.786882400512695, "learning_rate": 4.5745877061469266e-07, "loss": 1.1354, "step": 2442 }, { "epoch": 232.66666666666666, "grad_norm": 13.372679710388184, "learning_rate": 4.576461769115442e-07, "loss": 1.1771, "step": 2443 }, { "epoch": 232.76190476190476, "grad_norm": 3.7465243339538574, "learning_rate": 4.578335832083959e-07, "loss": 1.1508, "step": 2444 }, { "epoch": 232.85714285714286, "grad_norm": 3.863992214202881, "learning_rate": 4.5802098950524743e-07, "loss": 1.1347, "step": 2445 }, { "epoch": 232.95238095238096, "grad_norm": 4.447786331176758, "learning_rate": 4.58208395802099e-07, "loss": 1.1635, "step": 2446 }, { "epoch": 232.95238095238096, "eval_accuracy": 0.7189958786062196, "eval_f1": 0.7781065088757396, "eval_loss": 0.5697467923164368, "eval_precision": 0.7162309368191722, "eval_recall": 0.8516839378238342, "eval_roc_auc": 0.7686997697179043, "eval_runtime": 1.1824, "eval_samples_per_second": 2257.22, "eval_steps_per_second": 5.074, "step": 2446 }, { "epoch": 233.04761904761904, "grad_norm": 4.46320104598999, "learning_rate": 4.5839580209895054e-07, "loss": 1.1732, "step": 2447 }, { "epoch": 233.14285714285714, "grad_norm": 6.758903980255127, "learning_rate": 4.585832083958021e-07, "loss": 1.1669, "step": 2448 }, { "epoch": 233.23809523809524, "grad_norm": 12.404580116271973, "learning_rate": 4.5877061469265365e-07, "loss": 1.1597, "step": 2449 }, { "epoch": 233.33333333333334, "grad_norm": 5.045426368713379, "learning_rate": 4.589580209895052e-07, "loss": 1.1608, "step": 2450 }, { "epoch": 233.42857142857142, "grad_norm": 5.315583229064941, "learning_rate": 4.5914542728635687e-07, "loss": 1.1361, "step": 2451 }, { "epoch": 233.52380952380952, "grad_norm": 6.253724575042725, "learning_rate": 4.593328335832084e-07, "loss": 1.1571, "step": 2452 }, { "epoch": 233.61904761904762, "grad_norm": 3.5962157249450684, "learning_rate": 4.5952023988006003e-07, "loss": 1.1451, "step": 2453 }, { "epoch": 233.71428571428572, "grad_norm": 5.8950514793396, "learning_rate": 4.597076461769116e-07, "loss": 1.137, "step": 2454 }, { "epoch": 233.8095238095238, "grad_norm": 3.1741690635681152, "learning_rate": 4.5989505247376314e-07, "loss": 1.1279, "step": 2455 }, { "epoch": 233.9047619047619, "grad_norm": 3.401339054107666, "learning_rate": 4.600824587706147e-07, "loss": 1.1507, "step": 2456 }, { "epoch": 234.0, "grad_norm": 4.254356384277344, "learning_rate": 4.6026986506746625e-07, "loss": 1.1214, "step": 2457 }, { "epoch": 234.0, "eval_accuracy": 0.7197452229299363, "eval_f1": 0.7759137207908927, "eval_loss": 0.569048285484314, "eval_precision": 0.721850613154961, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7692043753598157, "eval_runtime": 1.1584, "eval_samples_per_second": 2303.977, "eval_steps_per_second": 5.179, "step": 2457 }, { "epoch": 234.0952380952381, "grad_norm": 4.8844146728515625, "learning_rate": 4.604572713643179e-07, "loss": 1.1469, "step": 2458 }, { "epoch": 234.1904761904762, "grad_norm": 4.128713607788086, "learning_rate": 4.6064467766116946e-07, "loss": 1.1274, "step": 2459 }, { "epoch": 234.28571428571428, "grad_norm": 3.244645118713379, "learning_rate": 4.60832083958021e-07, "loss": 1.1395, "step": 2460 }, { "epoch": 234.38095238095238, "grad_norm": 5.353634834289551, "learning_rate": 4.6101949025487257e-07, "loss": 1.1475, "step": 2461 }, { "epoch": 234.47619047619048, "grad_norm": 5.806576728820801, "learning_rate": 4.612068965517241e-07, "loss": 1.1432, "step": 2462 }, { "epoch": 234.57142857142858, "grad_norm": 13.247061729431152, "learning_rate": 4.6139430284857573e-07, "loss": 1.1144, "step": 2463 }, { "epoch": 234.66666666666666, "grad_norm": 2.7329390048980713, "learning_rate": 4.615817091454273e-07, "loss": 1.1431, "step": 2464 }, { "epoch": 234.76190476190476, "grad_norm": 3.864218235015869, "learning_rate": 4.617691154422789e-07, "loss": 1.1806, "step": 2465 }, { "epoch": 234.85714285714286, "grad_norm": 14.171060562133789, "learning_rate": 4.619565217391305e-07, "loss": 1.1975, "step": 2466 }, { "epoch": 234.95238095238096, "grad_norm": 8.89664077758789, "learning_rate": 4.6214392803598205e-07, "loss": 1.139, "step": 2467 }, { "epoch": 234.95238095238096, "eval_accuracy": 0.720494567253653, "eval_f1": 0.7783719548425431, "eval_loss": 0.5684369206428528, "eval_precision": 0.7189901207464325, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7698618307426597, "eval_runtime": 1.1725, "eval_samples_per_second": 2276.408, "eval_steps_per_second": 5.117, "step": 2467 }, { "epoch": 235.04761904761904, "grad_norm": 7.042123794555664, "learning_rate": 4.623313343328336e-07, "loss": 1.1587, "step": 2468 }, { "epoch": 235.14285714285714, "grad_norm": 6.67264986038208, "learning_rate": 4.6251874062968516e-07, "loss": 1.155, "step": 2469 }, { "epoch": 235.23809523809524, "grad_norm": 2.7996301651000977, "learning_rate": 4.627061469265367e-07, "loss": 1.1606, "step": 2470 }, { "epoch": 235.33333333333334, "grad_norm": 6.432344913482666, "learning_rate": 4.6289355322338827e-07, "loss": 1.1244, "step": 2471 }, { "epoch": 235.42857142857142, "grad_norm": 7.238835334777832, "learning_rate": 4.6308095952023993e-07, "loss": 1.1404, "step": 2472 }, { "epoch": 235.52380952380952, "grad_norm": 7.994748592376709, "learning_rate": 4.632683658170915e-07, "loss": 1.1516, "step": 2473 }, { "epoch": 235.61904761904762, "grad_norm": 9.119189262390137, "learning_rate": 4.6345577211394304e-07, "loss": 1.1525, "step": 2474 }, { "epoch": 235.71428571428572, "grad_norm": 3.28615403175354, "learning_rate": 4.636431784107946e-07, "loss": 1.1559, "step": 2475 }, { "epoch": 235.8095238095238, "grad_norm": 3.7294490337371826, "learning_rate": 4.638305847076462e-07, "loss": 1.157, "step": 2476 }, { "epoch": 235.9047619047619, "grad_norm": 7.549288272857666, "learning_rate": 4.6401799100449776e-07, "loss": 1.1369, "step": 2477 }, { "epoch": 236.0, "grad_norm": 6.290277481079102, "learning_rate": 4.642053973013493e-07, "loss": 1.0902, "step": 2478 }, { "epoch": 236.0, "eval_accuracy": 0.720494567253653, "eval_f1": 0.7765128819652487, "eval_loss": 0.5682975053787231, "eval_precision": 0.7224080267558528, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7701263672999424, "eval_runtime": 1.2309, "eval_samples_per_second": 2168.419, "eval_steps_per_second": 4.875, "step": 2478 }, { "epoch": 236.0952380952381, "grad_norm": 5.862534523010254, "learning_rate": 4.6439280359820097e-07, "loss": 1.1472, "step": 2479 }, { "epoch": 236.1904761904762, "grad_norm": 4.279983997344971, "learning_rate": 4.6458020989505253e-07, "loss": 1.1246, "step": 2480 }, { "epoch": 236.28571428571428, "grad_norm": 14.398469924926758, "learning_rate": 4.647676161919041e-07, "loss": 1.1189, "step": 2481 }, { "epoch": 236.38095238095238, "grad_norm": 3.601273536682129, "learning_rate": 4.6495502248875564e-07, "loss": 1.1493, "step": 2482 }, { "epoch": 236.47619047619048, "grad_norm": 8.719027519226074, "learning_rate": 4.651424287856072e-07, "loss": 1.1419, "step": 2483 }, { "epoch": 236.57142857142858, "grad_norm": 3.2955257892608643, "learning_rate": 4.6532983508245875e-07, "loss": 1.1642, "step": 2484 }, { "epoch": 236.66666666666666, "grad_norm": 3.568007707595825, "learning_rate": 4.655172413793103e-07, "loss": 1.1462, "step": 2485 }, { "epoch": 236.76190476190476, "grad_norm": 5.648521900177002, "learning_rate": 4.6570464767616196e-07, "loss": 1.1876, "step": 2486 }, { "epoch": 236.85714285714286, "grad_norm": 6.08394193649292, "learning_rate": 4.658920539730135e-07, "loss": 1.1548, "step": 2487 }, { "epoch": 236.95238095238096, "grad_norm": 3.1771278381347656, "learning_rate": 4.660794602698651e-07, "loss": 1.1594, "step": 2488 }, { "epoch": 236.95238095238096, "eval_accuracy": 0.7212439115773698, "eval_f1": 0.7785714285714286, "eval_loss": 0.5684502720832825, "eval_precision": 0.7202643171806168, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7701934369602764, "eval_runtime": 1.2586, "eval_samples_per_second": 2120.603, "eval_steps_per_second": 4.767, "step": 2488 }, { "epoch": 237.04761904761904, "grad_norm": 7.633711814880371, "learning_rate": 4.662668665667167e-07, "loss": 1.1075, "step": 2489 }, { "epoch": 237.14285714285714, "grad_norm": 7.139071464538574, "learning_rate": 4.6645427286356823e-07, "loss": 1.112, "step": 2490 }, { "epoch": 237.23809523809524, "grad_norm": 4.67210054397583, "learning_rate": 4.666416791604198e-07, "loss": 1.1502, "step": 2491 }, { "epoch": 237.33333333333334, "grad_norm": 5.977503299713135, "learning_rate": 4.6682908545727134e-07, "loss": 1.1348, "step": 2492 }, { "epoch": 237.42857142857142, "grad_norm": 7.3450236320495605, "learning_rate": 4.67016491754123e-07, "loss": 1.1331, "step": 2493 }, { "epoch": 237.52380952380952, "grad_norm": 12.501540184020996, "learning_rate": 4.6720389805097456e-07, "loss": 1.1568, "step": 2494 }, { "epoch": 237.61904761904762, "grad_norm": 4.337953090667725, "learning_rate": 4.673913043478261e-07, "loss": 1.1957, "step": 2495 }, { "epoch": 237.71428571428572, "grad_norm": 7.36350679397583, "learning_rate": 4.6757871064467766e-07, "loss": 1.1367, "step": 2496 }, { "epoch": 237.8095238095238, "grad_norm": 12.35847282409668, "learning_rate": 4.677661169415292e-07, "loss": 1.1597, "step": 2497 }, { "epoch": 237.9047619047619, "grad_norm": 4.463048458099365, "learning_rate": 4.679535232383808e-07, "loss": 1.178, "step": 2498 }, { "epoch": 238.0, "grad_norm": 5.635746479034424, "learning_rate": 4.681409295352324e-07, "loss": 1.1515, "step": 2499 }, { "epoch": 238.0, "eval_accuracy": 0.7208692394155114, "eval_f1": 0.777810915597972, "eval_loss": 0.568373441696167, "eval_precision": 0.720840243228303, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7703940702360392, "eval_runtime": 1.1859, "eval_samples_per_second": 2250.588, "eval_steps_per_second": 5.059, "step": 2499 }, { "epoch": 238.0952380952381, "grad_norm": 3.2132604122161865, "learning_rate": 4.68328335832084e-07, "loss": 1.1247, "step": 2500 }, { "epoch": 238.1904761904762, "grad_norm": 5.9469194412231445, "learning_rate": 4.685157421289356e-07, "loss": 1.1052, "step": 2501 }, { "epoch": 238.28571428571428, "grad_norm": 6.368658542633057, "learning_rate": 4.6870314842578715e-07, "loss": 1.159, "step": 2502 }, { "epoch": 238.38095238095238, "grad_norm": 4.543058395385742, "learning_rate": 4.688905547226387e-07, "loss": 1.1733, "step": 2503 }, { "epoch": 238.47619047619048, "grad_norm": 8.83027172088623, "learning_rate": 4.6907796101949026e-07, "loss": 1.1485, "step": 2504 }, { "epoch": 238.57142857142858, "grad_norm": 4.797172546386719, "learning_rate": 4.692653673163418e-07, "loss": 1.1442, "step": 2505 }, { "epoch": 238.66666666666666, "grad_norm": 7.15238094329834, "learning_rate": 4.6945277361319337e-07, "loss": 1.1412, "step": 2506 }, { "epoch": 238.76190476190476, "grad_norm": 9.403074264526367, "learning_rate": 4.6964017991004503e-07, "loss": 1.1514, "step": 2507 }, { "epoch": 238.85714285714286, "grad_norm": 7.695036888122559, "learning_rate": 4.698275862068966e-07, "loss": 1.1721, "step": 2508 }, { "epoch": 238.95238095238096, "grad_norm": 5.925418853759766, "learning_rate": 4.7001499250374814e-07, "loss": 1.1613, "step": 2509 }, { "epoch": 238.95238095238096, "eval_accuracy": 0.7231172723866617, "eval_f1": 0.7807772174428953, "eval_loss": 0.568254292011261, "eval_precision": 0.7203065134099617, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7708552101324121, "eval_runtime": 1.1677, "eval_samples_per_second": 2285.74, "eval_steps_per_second": 5.138, "step": 2509 }, { "epoch": 239.04761904761904, "grad_norm": 4.571139812469482, "learning_rate": 4.702023988005997e-07, "loss": 1.1747, "step": 2510 }, { "epoch": 239.14285714285714, "grad_norm": 3.523711919784546, "learning_rate": 4.703898050974513e-07, "loss": 1.1276, "step": 2511 }, { "epoch": 239.23809523809524, "grad_norm": 3.364738941192627, "learning_rate": 4.7057721139430285e-07, "loss": 1.1753, "step": 2512 }, { "epoch": 239.33333333333334, "grad_norm": 3.596714973449707, "learning_rate": 4.707646176911544e-07, "loss": 1.15, "step": 2513 }, { "epoch": 239.42857142857142, "grad_norm": 10.352349281311035, "learning_rate": 4.7095202398800607e-07, "loss": 1.1382, "step": 2514 }, { "epoch": 239.52380952380952, "grad_norm": 10.572864532470703, "learning_rate": 4.711394302848576e-07, "loss": 1.1505, "step": 2515 }, { "epoch": 239.61904761904762, "grad_norm": 3.3369085788726807, "learning_rate": 4.713268365817092e-07, "loss": 1.136, "step": 2516 }, { "epoch": 239.71428571428572, "grad_norm": 7.171133041381836, "learning_rate": 4.7151424287856073e-07, "loss": 1.1219, "step": 2517 }, { "epoch": 239.8095238095238, "grad_norm": 7.1187238693237305, "learning_rate": 4.717016491754123e-07, "loss": 1.0975, "step": 2518 }, { "epoch": 239.9047619047619, "grad_norm": 12.227962493896484, "learning_rate": 4.7188905547226384e-07, "loss": 1.1898, "step": 2519 }, { "epoch": 240.0, "grad_norm": 3.9097506999969482, "learning_rate": 4.7207646176911545e-07, "loss": 1.1471, "step": 2520 }, { "epoch": 240.0, "eval_accuracy": 0.7219932559010865, "eval_f1": 0.7795603089720736, "eval_loss": 0.5678714513778687, "eval_precision": 0.7200878155872668, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7711871042026482, "eval_runtime": 1.2783, "eval_samples_per_second": 2087.875, "eval_steps_per_second": 4.694, "step": 2520 }, { "epoch": 240.0952380952381, "grad_norm": 7.612576961517334, "learning_rate": 4.7226386806596706e-07, "loss": 1.0953, "step": 2521 }, { "epoch": 240.1904761904762, "grad_norm": 9.153794288635254, "learning_rate": 4.724512743628186e-07, "loss": 1.1538, "step": 2522 }, { "epoch": 240.28571428571428, "grad_norm": 3.632295608520508, "learning_rate": 4.726386806596702e-07, "loss": 1.178, "step": 2523 }, { "epoch": 240.38095238095238, "grad_norm": 3.858614444732666, "learning_rate": 4.7282608695652177e-07, "loss": 1.1294, "step": 2524 }, { "epoch": 240.47619047619048, "grad_norm": 6.603476524353027, "learning_rate": 4.730134932533733e-07, "loss": 1.1404, "step": 2525 }, { "epoch": 240.57142857142858, "grad_norm": 6.5844550132751465, "learning_rate": 4.732008995502249e-07, "loss": 1.1337, "step": 2526 }, { "epoch": 240.66666666666666, "grad_norm": 6.402451038360596, "learning_rate": 4.7338830584707654e-07, "loss": 1.1919, "step": 2527 }, { "epoch": 240.76190476190476, "grad_norm": 4.313340187072754, "learning_rate": 4.735757121439281e-07, "loss": 1.1371, "step": 2528 }, { "epoch": 240.85714285714286, "grad_norm": 2.5007331371307373, "learning_rate": 4.7376311844077965e-07, "loss": 1.1315, "step": 2529 }, { "epoch": 240.95238095238096, "grad_norm": 3.1889805793762207, "learning_rate": 4.739505247376312e-07, "loss": 1.124, "step": 2530 }, { "epoch": 240.95238095238096, "eval_accuracy": 0.7223679280629449, "eval_f1": 0.7767399819222658, "eval_loss": 0.5677710175514221, "eval_precision": 0.7261971830985916, "eval_recall": 0.8348445595854922, "eval_roc_auc": 0.7709075993091538, "eval_runtime": 1.1464, "eval_samples_per_second": 2328.123, "eval_steps_per_second": 5.234, "step": 2530 }, { "epoch": 241.04761904761904, "grad_norm": 3.370150327682495, "learning_rate": 4.7413793103448276e-07, "loss": 1.1566, "step": 2531 }, { "epoch": 241.14285714285714, "grad_norm": 7.428020000457764, "learning_rate": 4.743253373313343e-07, "loss": 1.1453, "step": 2532 }, { "epoch": 241.23809523809524, "grad_norm": 4.833296775817871, "learning_rate": 4.745127436281859e-07, "loss": 1.2207, "step": 2533 }, { "epoch": 241.33333333333334, "grad_norm": 7.765406131744385, "learning_rate": 4.7470014992503753e-07, "loss": 1.1642, "step": 2534 }, { "epoch": 241.42857142857142, "grad_norm": 12.157771110534668, "learning_rate": 4.748875562218891e-07, "loss": 1.1321, "step": 2535 }, { "epoch": 241.52380952380952, "grad_norm": 4.821937084197998, "learning_rate": 4.750749625187407e-07, "loss": 1.1655, "step": 2536 }, { "epoch": 241.61904761904762, "grad_norm": 4.60735559463501, "learning_rate": 4.7526236881559224e-07, "loss": 1.1469, "step": 2537 }, { "epoch": 241.71428571428572, "grad_norm": 3.312079668045044, "learning_rate": 4.754497751124438e-07, "loss": 1.1382, "step": 2538 }, { "epoch": 241.8095238095238, "grad_norm": 6.541538238525391, "learning_rate": 4.7563718140929535e-07, "loss": 1.1609, "step": 2539 }, { "epoch": 241.9047619047619, "grad_norm": 3.9310693740844727, "learning_rate": 4.758245877061469e-07, "loss": 1.1411, "step": 2540 }, { "epoch": 242.0, "grad_norm": 7.887485980987549, "learning_rate": 4.7601199400299857e-07, "loss": 1.1084, "step": 2541 }, { "epoch": 242.0, "eval_accuracy": 0.7246159610340952, "eval_f1": 0.7822222222222223, "eval_loss": 0.5681803226470947, "eval_precision": 0.7209175314036046, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7706738629821532, "eval_runtime": 1.1999, "eval_samples_per_second": 2224.376, "eval_steps_per_second": 5.0, "step": 2541 }, { "epoch": 242.0952380952381, "grad_norm": 3.5262506008148193, "learning_rate": 4.761994002998501e-07, "loss": 1.1124, "step": 2542 }, { "epoch": 242.1904761904762, "grad_norm": 4.377519130706787, "learning_rate": 4.763868065967017e-07, "loss": 1.1453, "step": 2543 }, { "epoch": 242.28571428571428, "grad_norm": 3.5326547622680664, "learning_rate": 4.7657421289355323e-07, "loss": 1.1554, "step": 2544 }, { "epoch": 242.38095238095238, "grad_norm": 11.76237964630127, "learning_rate": 4.7676161919040484e-07, "loss": 1.1544, "step": 2545 }, { "epoch": 242.47619047619048, "grad_norm": 10.268291473388672, "learning_rate": 4.769490254872563e-07, "loss": 1.1011, "step": 2546 }, { "epoch": 242.57142857142858, "grad_norm": 5.779625415802002, "learning_rate": 4.771364317841079e-07, "loss": 1.108, "step": 2547 }, { "epoch": 242.66666666666666, "grad_norm": 9.959461212158203, "learning_rate": 4.773238380809596e-07, "loss": 1.16, "step": 2548 }, { "epoch": 242.76190476190476, "grad_norm": 7.101002216339111, "learning_rate": 4.775112443778111e-07, "loss": 1.151, "step": 2549 }, { "epoch": 242.85714285714286, "grad_norm": 7.493264675140381, "learning_rate": 4.776986506746627e-07, "loss": 1.1914, "step": 2550 }, { "epoch": 242.95238095238096, "grad_norm": 5.301811695098877, "learning_rate": 4.778860569715142e-07, "loss": 1.15, "step": 2551 }, { "epoch": 242.95238095238096, "eval_accuracy": 0.7212439115773698, "eval_f1": 0.7785714285714286, "eval_loss": 0.5683825016021729, "eval_precision": 0.7202643171806168, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7700987334484745, "eval_runtime": 1.2247, "eval_samples_per_second": 2179.256, "eval_steps_per_second": 4.899, "step": 2551 }, { "epoch": 243.04761904761904, "grad_norm": 10.990437507629395, "learning_rate": 4.780734632683659e-07, "loss": 1.1112, "step": 2552 }, { "epoch": 243.14285714285714, "grad_norm": 6.5883660316467285, "learning_rate": 4.782608695652174e-07, "loss": 1.186, "step": 2553 }, { "epoch": 243.23809523809524, "grad_norm": 5.139063358306885, "learning_rate": 4.78448275862069e-07, "loss": 1.1898, "step": 2554 }, { "epoch": 243.33333333333334, "grad_norm": 5.769028186798096, "learning_rate": 4.786356821589206e-07, "loss": 1.1261, "step": 2555 }, { "epoch": 243.42857142857142, "grad_norm": 2.6461703777313232, "learning_rate": 4.788230884557722e-07, "loss": 1.1643, "step": 2556 }, { "epoch": 243.52380952380952, "grad_norm": 8.833353996276855, "learning_rate": 4.790104947526238e-07, "loss": 1.0969, "step": 2557 }, { "epoch": 243.61904761904762, "grad_norm": 8.292926788330078, "learning_rate": 4.791979010494753e-07, "loss": 1.1508, "step": 2558 }, { "epoch": 243.71428571428572, "grad_norm": 5.664553642272949, "learning_rate": 4.793853073463269e-07, "loss": 1.1831, "step": 2559 }, { "epoch": 243.8095238095238, "grad_norm": 3.355820655822754, "learning_rate": 4.795727136431784e-07, "loss": 1.1163, "step": 2560 }, { "epoch": 243.9047619047619, "grad_norm": 3.2066733837127686, "learning_rate": 4.7976011994003e-07, "loss": 1.1299, "step": 2561 }, { "epoch": 244.0, "grad_norm": 16.678447723388672, "learning_rate": 4.799475262368816e-07, "loss": 1.1288, "step": 2562 }, { "epoch": 244.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7806183115338883, "eval_loss": 0.5679145455360413, "eval_precision": 0.7214285714285714, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7707867012089811, "eval_runtime": 1.6147, "eval_samples_per_second": 1652.969, "eval_steps_per_second": 3.716, "step": 2562 }, { "epoch": 244.0952380952381, "grad_norm": 3.1166558265686035, "learning_rate": 4.801349325337332e-07, "loss": 1.1614, "step": 2563 }, { "epoch": 244.1904761904762, "grad_norm": 4.723116874694824, "learning_rate": 4.803223388305847e-07, "loss": 1.1669, "step": 2564 }, { "epoch": 244.28571428571428, "grad_norm": 5.824684143066406, "learning_rate": 4.805097451274363e-07, "loss": 1.1639, "step": 2565 }, { "epoch": 244.38095238095238, "grad_norm": 4.820675849914551, "learning_rate": 4.806971514242879e-07, "loss": 1.1597, "step": 2566 }, { "epoch": 244.47619047619048, "grad_norm": 4.829911708831787, "learning_rate": 4.808845577211394e-07, "loss": 1.177, "step": 2567 }, { "epoch": 244.57142857142858, "grad_norm": 3.4412834644317627, "learning_rate": 4.81071964017991e-07, "loss": 1.1339, "step": 2568 }, { "epoch": 244.66666666666666, "grad_norm": 5.206437110900879, "learning_rate": 4.812593703148426e-07, "loss": 1.1264, "step": 2569 }, { "epoch": 244.76190476190476, "grad_norm": 7.612485408782959, "learning_rate": 4.814467766116942e-07, "loss": 1.1412, "step": 2570 }, { "epoch": 244.85714285714286, "grad_norm": 3.95589542388916, "learning_rate": 4.816341829085457e-07, "loss": 1.1521, "step": 2571 }, { "epoch": 244.95238095238096, "grad_norm": 5.139015197753906, "learning_rate": 4.818215892053973e-07, "loss": 1.1365, "step": 2572 }, { "epoch": 244.95238095238096, "eval_accuracy": 0.7216185837392282, "eval_f1": 0.7805022156573117, "eval_loss": 0.5677750706672668, "eval_precision": 0.7175448126018468, "eval_recall": 0.8555699481865285, "eval_roc_auc": 0.7708963730569948, "eval_runtime": 1.1166, "eval_samples_per_second": 2390.274, "eval_steps_per_second": 5.373, "step": 2572 }, { "epoch": 245.04761904761904, "grad_norm": 5.804911136627197, "learning_rate": 4.820089955022488e-07, "loss": 1.1618, "step": 2573 }, { "epoch": 245.14285714285714, "grad_norm": 8.898213386535645, "learning_rate": 4.821964017991005e-07, "loss": 1.1644, "step": 2574 }, { "epoch": 245.23809523809524, "grad_norm": 5.84812593460083, "learning_rate": 4.823838080959521e-07, "loss": 1.0921, "step": 2575 }, { "epoch": 245.33333333333334, "grad_norm": 7.604754447937012, "learning_rate": 4.825712143928036e-07, "loss": 1.1487, "step": 2576 }, { "epoch": 245.42857142857142, "grad_norm": 7.634527683258057, "learning_rate": 4.827586206896553e-07, "loss": 1.1865, "step": 2577 }, { "epoch": 245.52380952380952, "grad_norm": 4.072860240936279, "learning_rate": 4.829460269865068e-07, "loss": 1.1139, "step": 2578 }, { "epoch": 245.61904761904762, "grad_norm": 8.710189819335938, "learning_rate": 4.831334332833584e-07, "loss": 1.1172, "step": 2579 }, { "epoch": 245.71428571428572, "grad_norm": 4.168026924133301, "learning_rate": 4.833208395802099e-07, "loss": 1.1178, "step": 2580 }, { "epoch": 245.8095238095238, "grad_norm": 4.758199214935303, "learning_rate": 4.835082458770615e-07, "loss": 1.1392, "step": 2581 }, { "epoch": 245.9047619047619, "grad_norm": 4.961451053619385, "learning_rate": 4.83695652173913e-07, "loss": 1.1511, "step": 2582 }, { "epoch": 246.0, "grad_norm": 3.5863423347473145, "learning_rate": 4.838830584707647e-07, "loss": 1.1492, "step": 2583 }, { "epoch": 246.0, "eval_accuracy": 0.7219932559010865, "eval_f1": 0.7774445110977805, "eval_loss": 0.5673912167549133, "eval_precision": 0.7240223463687151, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7707337363270006, "eval_runtime": 1.1796, "eval_samples_per_second": 2262.657, "eval_steps_per_second": 5.087, "step": 2583 }, { "epoch": 246.0952380952381, "grad_norm": 5.360644817352295, "learning_rate": 4.840704647676163e-07, "loss": 1.1396, "step": 2584 }, { "epoch": 246.1904761904762, "grad_norm": 5.5368523597717285, "learning_rate": 4.842578710644678e-07, "loss": 1.1432, "step": 2585 }, { "epoch": 246.28571428571428, "grad_norm": 7.85012674331665, "learning_rate": 4.844452773613194e-07, "loss": 1.142, "step": 2586 }, { "epoch": 246.38095238095238, "grad_norm": 3.8528225421905518, "learning_rate": 4.846326836581709e-07, "loss": 1.1475, "step": 2587 }, { "epoch": 246.47619047619048, "grad_norm": 4.584531307220459, "learning_rate": 4.848200899550225e-07, "loss": 1.1688, "step": 2588 }, { "epoch": 246.57142857142858, "grad_norm": 4.979501247406006, "learning_rate": 4.85007496251874e-07, "loss": 1.1626, "step": 2589 }, { "epoch": 246.66666666666666, "grad_norm": 11.004454612731934, "learning_rate": 4.851949025487257e-07, "loss": 1.1249, "step": 2590 }, { "epoch": 246.76190476190476, "grad_norm": 10.931334495544434, "learning_rate": 4.853823088455772e-07, "loss": 1.1176, "step": 2591 }, { "epoch": 246.85714285714286, "grad_norm": 6.858458995819092, "learning_rate": 4.855697151424288e-07, "loss": 1.1268, "step": 2592 }, { "epoch": 246.95238095238096, "grad_norm": 6.829026222229004, "learning_rate": 4.857571214392804e-07, "loss": 1.1773, "step": 2593 }, { "epoch": 246.95238095238096, "eval_accuracy": 0.7223679280629449, "eval_f1": 0.7797919762258544, "eval_loss": 0.5674595236778259, "eval_precision": 0.7204832509610104, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7707849740932642, "eval_runtime": 1.2422, "eval_samples_per_second": 2148.687, "eval_steps_per_second": 4.83, "step": 2593 }, { "epoch": 247.04761904761904, "grad_norm": 4.348031997680664, "learning_rate": 4.859445277361319e-07, "loss": 1.1357, "step": 2594 }, { "epoch": 247.14285714285714, "grad_norm": 2.821397542953491, "learning_rate": 4.861319340329835e-07, "loss": 1.1486, "step": 2595 }, { "epoch": 247.23809523809524, "grad_norm": 4.46021032333374, "learning_rate": 4.86319340329835e-07, "loss": 1.1585, "step": 2596 }, { "epoch": 247.33333333333334, "grad_norm": 5.806177616119385, "learning_rate": 4.865067466266867e-07, "loss": 1.1376, "step": 2597 }, { "epoch": 247.42857142857142, "grad_norm": 6.550351619720459, "learning_rate": 4.866941529235382e-07, "loss": 1.1208, "step": 2598 }, { "epoch": 247.52380952380952, "grad_norm": 5.049726963043213, "learning_rate": 4.868815592203899e-07, "loss": 1.1931, "step": 2599 }, { "epoch": 247.61904761904762, "grad_norm": 12.2033052444458, "learning_rate": 4.870689655172414e-07, "loss": 1.1471, "step": 2600 }, { "epoch": 247.71428571428572, "grad_norm": 10.302976608276367, "learning_rate": 4.87256371814093e-07, "loss": 1.1419, "step": 2601 }, { "epoch": 247.8095238095238, "grad_norm": 7.384419918060303, "learning_rate": 4.874437781109446e-07, "loss": 1.1408, "step": 2602 }, { "epoch": 247.9047619047619, "grad_norm": 7.357702255249023, "learning_rate": 4.876311844077961e-07, "loss": 1.1146, "step": 2603 }, { "epoch": 248.0, "grad_norm": 14.828198432922363, "learning_rate": 4.878185907046478e-07, "loss": 1.1099, "step": 2604 }, { "epoch": 248.0, "eval_accuracy": 0.7193705507680779, "eval_f1": 0.7756813417190775, "eval_loss": 0.5678009986877441, "eval_precision": 0.7214484679665738, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7703428324697754, "eval_runtime": 1.1966, "eval_samples_per_second": 2230.494, "eval_steps_per_second": 5.014, "step": 2604 }, { "epoch": 248.0952380952381, "grad_norm": 7.183742523193359, "learning_rate": 4.880059970014993e-07, "loss": 1.1776, "step": 2605 }, { "epoch": 248.1904761904762, "grad_norm": 8.25802230834961, "learning_rate": 4.881934032983509e-07, "loss": 1.1201, "step": 2606 }, { "epoch": 248.28571428571428, "grad_norm": 4.379749298095703, "learning_rate": 4.883808095952024e-07, "loss": 1.1509, "step": 2607 }, { "epoch": 248.38095238095238, "grad_norm": 4.609863758087158, "learning_rate": 4.88568215892054e-07, "loss": 1.1903, "step": 2608 }, { "epoch": 248.47619047619048, "grad_norm": 5.011764049530029, "learning_rate": 4.887556221889055e-07, "loss": 1.1881, "step": 2609 }, { "epoch": 248.57142857142858, "grad_norm": 4.677763938903809, "learning_rate": 4.889430284857571e-07, "loss": 1.1537, "step": 2610 }, { "epoch": 248.66666666666666, "grad_norm": 8.4342041015625, "learning_rate": 4.891304347826088e-07, "loss": 1.1329, "step": 2611 }, { "epoch": 248.76190476190476, "grad_norm": 6.544559478759766, "learning_rate": 4.893178410794603e-07, "loss": 1.1114, "step": 2612 }, { "epoch": 248.85714285714286, "grad_norm": 4.162692070007324, "learning_rate": 4.895052473763119e-07, "loss": 1.1318, "step": 2613 }, { "epoch": 248.95238095238096, "grad_norm": 3.0155041217803955, "learning_rate": 4.896926536731634e-07, "loss": 1.1124, "step": 2614 }, { "epoch": 248.95238095238096, "eval_accuracy": 0.7219932559010865, "eval_f1": 0.7798219584569733, "eval_loss": 0.5675429105758667, "eval_precision": 0.71960569550931, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7711234887737478, "eval_runtime": 1.1373, "eval_samples_per_second": 2346.798, "eval_steps_per_second": 5.276, "step": 2614 }, { "epoch": 249.04761904761904, "grad_norm": 3.5584659576416016, "learning_rate": 4.89880059970015e-07, "loss": 1.1268, "step": 2615 }, { "epoch": 249.14285714285714, "grad_norm": 5.403181076049805, "learning_rate": 4.900674662668665e-07, "loss": 1.1221, "step": 2616 }, { "epoch": 249.23809523809524, "grad_norm": 5.95130729675293, "learning_rate": 4.902548725637181e-07, "loss": 1.1212, "step": 2617 }, { "epoch": 249.33333333333334, "grad_norm": 2.7318475246429443, "learning_rate": 4.904422788605697e-07, "loss": 1.1541, "step": 2618 }, { "epoch": 249.42857142857142, "grad_norm": 7.354856967926025, "learning_rate": 4.906296851574213e-07, "loss": 1.139, "step": 2619 }, { "epoch": 249.52380952380952, "grad_norm": 4.743566989898682, "learning_rate": 4.908170914542729e-07, "loss": 1.1343, "step": 2620 }, { "epoch": 249.61904761904762, "grad_norm": 3.644205093383789, "learning_rate": 4.910044977511244e-07, "loss": 1.1356, "step": 2621 }, { "epoch": 249.71428571428572, "grad_norm": 3.6012938022613525, "learning_rate": 4.911919040479761e-07, "loss": 1.1496, "step": 2622 }, { "epoch": 249.8095238095238, "grad_norm": 5.7909650802612305, "learning_rate": 4.913793103448276e-07, "loss": 1.1433, "step": 2623 }, { "epoch": 249.9047619047619, "grad_norm": 5.431772708892822, "learning_rate": 4.915667166416792e-07, "loss": 1.1625, "step": 2624 }, { "epoch": 250.0, "grad_norm": 4.164554595947266, "learning_rate": 4.917541229385308e-07, "loss": 1.1322, "step": 2625 }, { "epoch": 250.0, "eval_accuracy": 0.7219932559010865, "eval_f1": 0.7770432692307693, "eval_loss": 0.5667824149131775, "eval_precision": 0.7247757847533632, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7719421416234887, "eval_runtime": 1.3328, "eval_samples_per_second": 2002.626, "eval_steps_per_second": 4.502, "step": 2625 }, { "epoch": 250.0952380952381, "grad_norm": 5.059668064117432, "learning_rate": 4.919415292353824e-07, "loss": 1.1411, "step": 2626 }, { "epoch": 250.1904761904762, "grad_norm": 4.866458892822266, "learning_rate": 4.92128935532234e-07, "loss": 1.1845, "step": 2627 }, { "epoch": 250.28571428571428, "grad_norm": 8.36328125, "learning_rate": 4.923163418290855e-07, "loss": 1.1561, "step": 2628 }, { "epoch": 250.38095238095238, "grad_norm": 2.8156042098999023, "learning_rate": 4.925037481259371e-07, "loss": 1.1703, "step": 2629 }, { "epoch": 250.47619047619048, "grad_norm": 4.026419162750244, "learning_rate": 4.926911544227886e-07, "loss": 1.1547, "step": 2630 }, { "epoch": 250.57142857142858, "grad_norm": 7.374605178833008, "learning_rate": 4.928785607196402e-07, "loss": 1.1162, "step": 2631 }, { "epoch": 250.66666666666666, "grad_norm": 4.192410945892334, "learning_rate": 4.930659670164918e-07, "loss": 1.1401, "step": 2632 }, { "epoch": 250.76190476190476, "grad_norm": 8.36020565032959, "learning_rate": 4.932533733133434e-07, "loss": 1.1354, "step": 2633 }, { "epoch": 250.85714285714286, "grad_norm": 4.311028003692627, "learning_rate": 4.934407796101949e-07, "loss": 1.1371, "step": 2634 }, { "epoch": 250.95238095238096, "grad_norm": 4.741408348083496, "learning_rate": 4.936281859070465e-07, "loss": 1.1553, "step": 2635 }, { "epoch": 250.95238095238096, "eval_accuracy": 0.7223679280629449, "eval_f1": 0.7796610169491526, "eval_loss": 0.5665171146392822, "eval_precision": 0.7207256734469488, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7729004029936672, "eval_runtime": 1.2164, "eval_samples_per_second": 2194.164, "eval_steps_per_second": 4.933, "step": 2635 }, { "epoch": 251.04761904761904, "grad_norm": 3.4054064750671387, "learning_rate": 4.93815592203898e-07, "loss": 1.1093, "step": 2636 }, { "epoch": 251.14285714285714, "grad_norm": 4.24013614654541, "learning_rate": 4.940029985007496e-07, "loss": 1.0858, "step": 2637 }, { "epoch": 251.23809523809524, "grad_norm": 6.393743515014648, "learning_rate": 4.941904047976012e-07, "loss": 1.19, "step": 2638 }, { "epoch": 251.33333333333334, "grad_norm": 3.3348259925842285, "learning_rate": 4.943778110944528e-07, "loss": 1.1088, "step": 2639 }, { "epoch": 251.42857142857142, "grad_norm": 7.557923316955566, "learning_rate": 4.945652173913044e-07, "loss": 1.1138, "step": 2640 }, { "epoch": 251.52380952380952, "grad_norm": 5.54427433013916, "learning_rate": 4.947526236881559e-07, "loss": 1.1517, "step": 2641 }, { "epoch": 251.61904761904762, "grad_norm": 5.514889717102051, "learning_rate": 4.949400299850075e-07, "loss": 1.1478, "step": 2642 }, { "epoch": 251.71428571428572, "grad_norm": 3.9975523948669434, "learning_rate": 4.95127436281859e-07, "loss": 1.1301, "step": 2643 }, { "epoch": 251.8095238095238, "grad_norm": 3.9698591232299805, "learning_rate": 4.953148425787107e-07, "loss": 1.1489, "step": 2644 }, { "epoch": 251.9047619047619, "grad_norm": 9.93479061126709, "learning_rate": 4.955022488755622e-07, "loss": 1.1834, "step": 2645 }, { "epoch": 252.0, "grad_norm": 7.301760196685791, "learning_rate": 4.956896551724138e-07, "loss": 1.1597, "step": 2646 }, { "epoch": 252.0, "eval_accuracy": 0.7197452229299363, "eval_f1": 0.7740181268882175, "eval_loss": 0.5662791728973389, "eval_precision": 0.7253680634201586, "eval_recall": 0.8296632124352331, "eval_roc_auc": 0.7728767990788716, "eval_runtime": 1.159, "eval_samples_per_second": 2302.753, "eval_steps_per_second": 5.177, "step": 2646 }, { "epoch": 252.0952380952381, "grad_norm": 9.864036560058594, "learning_rate": 4.958770614692655e-07, "loss": 1.12, "step": 2647 }, { "epoch": 252.1904761904762, "grad_norm": 7.417358875274658, "learning_rate": 4.96064467766117e-07, "loss": 1.1745, "step": 2648 }, { "epoch": 252.28571428571428, "grad_norm": 9.244400024414062, "learning_rate": 4.962518740629686e-07, "loss": 1.1274, "step": 2649 }, { "epoch": 252.38095238095238, "grad_norm": 5.600161075592041, "learning_rate": 4.964392803598201e-07, "loss": 1.1308, "step": 2650 }, { "epoch": 252.47619047619048, "grad_norm": 3.25844144821167, "learning_rate": 4.966266866566717e-07, "loss": 1.1433, "step": 2651 }, { "epoch": 252.57142857142858, "grad_norm": 5.3738203048706055, "learning_rate": 4.968140929535232e-07, "loss": 1.1348, "step": 2652 }, { "epoch": 252.66666666666666, "grad_norm": 4.872201919555664, "learning_rate": 4.970014992503749e-07, "loss": 1.1805, "step": 2653 }, { "epoch": 252.76190476190476, "grad_norm": 3.1699211597442627, "learning_rate": 4.971889055472264e-07, "loss": 1.1138, "step": 2654 }, { "epoch": 252.85714285714286, "grad_norm": 8.699714660644531, "learning_rate": 4.97376311844078e-07, "loss": 1.1406, "step": 2655 }, { "epoch": 252.95238095238096, "grad_norm": 4.888248443603516, "learning_rate": 4.975637181409296e-07, "loss": 1.1397, "step": 2656 }, { "epoch": 252.95238095238096, "eval_accuracy": 0.7208692394155114, "eval_f1": 0.7792592592592592, "eval_loss": 0.5662895441055298, "eval_precision": 0.718186783178591, "eval_recall": 0.8516839378238342, "eval_roc_auc": 0.77301065054692, "eval_runtime": 1.2162, "eval_samples_per_second": 2194.61, "eval_steps_per_second": 4.934, "step": 2656 }, { "epoch": 253.04761904761904, "grad_norm": 6.35478401184082, "learning_rate": 4.977511244377811e-07, "loss": 1.1514, "step": 2657 }, { "epoch": 253.14285714285714, "grad_norm": 8.917704582214355, "learning_rate": 4.979385307346327e-07, "loss": 1.1433, "step": 2658 }, { "epoch": 253.23809523809524, "grad_norm": 4.451404571533203, "learning_rate": 4.981259370314842e-07, "loss": 1.1446, "step": 2659 }, { "epoch": 253.33333333333334, "grad_norm": 5.947729110717773, "learning_rate": 4.983133433283359e-07, "loss": 1.1874, "step": 2660 }, { "epoch": 253.42857142857142, "grad_norm": 4.357635498046875, "learning_rate": 4.985007496251874e-07, "loss": 1.1575, "step": 2661 }, { "epoch": 253.52380952380952, "grad_norm": 2.9355976581573486, "learning_rate": 4.98688155922039e-07, "loss": 1.1786, "step": 2662 }, { "epoch": 253.61904761904762, "grad_norm": 6.246455669403076, "learning_rate": 4.988755622188905e-07, "loss": 1.1795, "step": 2663 }, { "epoch": 253.71428571428572, "grad_norm": 3.440420150756836, "learning_rate": 4.990629685157421e-07, "loss": 1.1208, "step": 2664 }, { "epoch": 253.8095238095238, "grad_norm": 7.903586387634277, "learning_rate": 4.992503748125937e-07, "loss": 1.1363, "step": 2665 }, { "epoch": 253.9047619047619, "grad_norm": 8.214106559753418, "learning_rate": 4.994377811094453e-07, "loss": 1.1367, "step": 2666 }, { "epoch": 254.0, "grad_norm": 7.515315532684326, "learning_rate": 4.996251874062969e-07, "loss": 1.103, "step": 2667 }, { "epoch": 254.0, "eval_accuracy": 0.7197452229299363, "eval_f1": 0.7765830346475507, "eval_loss": 0.5663872957229614, "eval_precision": 0.720620842572062, "eval_recall": 0.8419689119170984, "eval_roc_auc": 0.7728983880253311, "eval_runtime": 1.2631, "eval_samples_per_second": 2112.992, "eval_steps_per_second": 4.75, "step": 2667 }, { "epoch": 254.0952380952381, "grad_norm": 4.3371782302856445, "learning_rate": 4.998125937031484e-07, "loss": 1.1568, "step": 2668 }, { "epoch": 254.1904761904762, "grad_norm": 5.310108184814453, "learning_rate": 5.000000000000001e-07, "loss": 1.1713, "step": 2669 }, { "epoch": 254.28571428571428, "grad_norm": 6.331097602844238, "learning_rate": 5.001874062968516e-07, "loss": 1.16, "step": 2670 }, { "epoch": 254.38095238095238, "grad_norm": 4.3251214027404785, "learning_rate": 5.003748125937032e-07, "loss": 1.118, "step": 2671 }, { "epoch": 254.47619047619048, "grad_norm": 5.350346088409424, "learning_rate": 5.005622188905547e-07, "loss": 1.1931, "step": 2672 }, { "epoch": 254.57142857142858, "grad_norm": 4.380564212799072, "learning_rate": 5.007496251874063e-07, "loss": 1.1534, "step": 2673 }, { "epoch": 254.66666666666666, "grad_norm": 4.470727443695068, "learning_rate": 5.00937031484258e-07, "loss": 1.172, "step": 2674 }, { "epoch": 254.76190476190476, "grad_norm": 5.436304092407227, "learning_rate": 5.011244377811095e-07, "loss": 1.1059, "step": 2675 }, { "epoch": 254.85714285714286, "grad_norm": 5.644359588623047, "learning_rate": 5.013118440779611e-07, "loss": 1.1502, "step": 2676 }, { "epoch": 254.95238095238096, "grad_norm": 4.652618408203125, "learning_rate": 5.014992503748126e-07, "loss": 1.1219, "step": 2677 }, { "epoch": 254.95238095238096, "eval_accuracy": 0.7216185837392282, "eval_f1": 0.7799822327509623, "eval_loss": 0.5664767622947693, "eval_precision": 0.7184942716857611, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7731445020149683, "eval_runtime": 1.3646, "eval_samples_per_second": 1955.908, "eval_steps_per_second": 4.397, "step": 2677 }, { "epoch": 255.04761904761904, "grad_norm": 6.856327056884766, "learning_rate": 5.016866566716642e-07, "loss": 1.1468, "step": 2678 }, { "epoch": 255.14285714285714, "grad_norm": 8.316398620605469, "learning_rate": 5.018740629685157e-07, "loss": 1.1719, "step": 2679 }, { "epoch": 255.23809523809524, "grad_norm": 11.568921089172363, "learning_rate": 5.020614692653673e-07, "loss": 1.1473, "step": 2680 }, { "epoch": 255.33333333333334, "grad_norm": 2.822263240814209, "learning_rate": 5.02248875562219e-07, "loss": 1.1532, "step": 2681 }, { "epoch": 255.42857142857142, "grad_norm": 7.216583728790283, "learning_rate": 5.024362818590705e-07, "loss": 1.122, "step": 2682 }, { "epoch": 255.52380952380952, "grad_norm": 6.852657318115234, "learning_rate": 5.026236881559221e-07, "loss": 1.0884, "step": 2683 }, { "epoch": 255.61904761904762, "grad_norm": 4.248050212860107, "learning_rate": 5.028110944527736e-07, "loss": 1.1541, "step": 2684 }, { "epoch": 255.71428571428572, "grad_norm": 4.815935134887695, "learning_rate": 5.029985007496252e-07, "loss": 1.1383, "step": 2685 }, { "epoch": 255.8095238095238, "grad_norm": 4.0007476806640625, "learning_rate": 5.031859070464767e-07, "loss": 1.1343, "step": 2686 }, { "epoch": 255.9047619047619, "grad_norm": 9.387462615966797, "learning_rate": 5.033733133433283e-07, "loss": 1.2121, "step": 2687 }, { "epoch": 256.0, "grad_norm": 5.414767265319824, "learning_rate": 5.035607196401799e-07, "loss": 1.157, "step": 2688 }, { "epoch": 256.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7775768535262206, "eval_loss": 0.5660356283187866, "eval_precision": 0.7271702367531003, "eval_recall": 0.8354922279792746, "eval_roc_auc": 0.7734519286125504, "eval_runtime": 1.1637, "eval_samples_per_second": 2293.636, "eval_steps_per_second": 5.156, "step": 2688 }, { "epoch": 256.0952380952381, "grad_norm": 6.968070030212402, "learning_rate": 5.037481259370315e-07, "loss": 1.1234, "step": 2689 }, { "epoch": 256.1904761904762, "grad_norm": 5.248779296875, "learning_rate": 5.03935532233883e-07, "loss": 1.1436, "step": 2690 }, { "epoch": 256.2857142857143, "grad_norm": 14.160701751708984, "learning_rate": 5.041229385307347e-07, "loss": 1.1447, "step": 2691 }, { "epoch": 256.3809523809524, "grad_norm": 4.778572082519531, "learning_rate": 5.043103448275863e-07, "loss": 1.1345, "step": 2692 }, { "epoch": 256.4761904761905, "grad_norm": 3.8514914512634277, "learning_rate": 5.044977511244378e-07, "loss": 1.1497, "step": 2693 }, { "epoch": 256.57142857142856, "grad_norm": 7.788860321044922, "learning_rate": 5.046851574212894e-07, "loss": 1.1367, "step": 2694 }, { "epoch": 256.6666666666667, "grad_norm": 6.130805015563965, "learning_rate": 5.04872563718141e-07, "loss": 1.173, "step": 2695 }, { "epoch": 256.76190476190476, "grad_norm": 6.780652046203613, "learning_rate": 5.050599700149926e-07, "loss": 1.1691, "step": 2696 }, { "epoch": 256.85714285714283, "grad_norm": 6.14133358001709, "learning_rate": 5.052473763118441e-07, "loss": 1.1414, "step": 2697 }, { "epoch": 256.95238095238096, "grad_norm": 4.445262908935547, "learning_rate": 5.054347826086957e-07, "loss": 1.189, "step": 2698 }, { "epoch": 256.95238095238096, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7799642218246869, "eval_loss": 0.5659570693969727, "eval_precision": 0.7226519337016575, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7735716753022452, "eval_runtime": 1.2354, "eval_samples_per_second": 2160.474, "eval_steps_per_second": 4.857, "step": 2698 }, { "epoch": 257.04761904761904, "grad_norm": 8.566206932067871, "learning_rate": 5.056221889055472e-07, "loss": 1.107, "step": 2699 }, { "epoch": 257.14285714285717, "grad_norm": 3.2759463787078857, "learning_rate": 5.058095952023988e-07, "loss": 1.1409, "step": 2700 }, { "epoch": 257.23809523809524, "grad_norm": 4.49222993850708, "learning_rate": 5.059970014992504e-07, "loss": 1.1603, "step": 2701 }, { "epoch": 257.3333333333333, "grad_norm": 3.603001594543457, "learning_rate": 5.06184407796102e-07, "loss": 1.1433, "step": 2702 }, { "epoch": 257.42857142857144, "grad_norm": 5.98636531829834, "learning_rate": 5.063718140929536e-07, "loss": 1.1373, "step": 2703 }, { "epoch": 257.5238095238095, "grad_norm": 6.03758430480957, "learning_rate": 5.065592203898051e-07, "loss": 1.1238, "step": 2704 }, { "epoch": 257.6190476190476, "grad_norm": 6.095311641693115, "learning_rate": 5.067466266866567e-07, "loss": 1.1018, "step": 2705 }, { "epoch": 257.7142857142857, "grad_norm": 3.1190407276153564, "learning_rate": 5.069340329835082e-07, "loss": 1.1761, "step": 2706 }, { "epoch": 257.8095238095238, "grad_norm": 4.101867198944092, "learning_rate": 5.071214392803598e-07, "loss": 1.1582, "step": 2707 }, { "epoch": 257.9047619047619, "grad_norm": 4.426729679107666, "learning_rate": 5.073088455772113e-07, "loss": 1.1002, "step": 2708 }, { "epoch": 258.0, "grad_norm": 4.8217668533325195, "learning_rate": 5.07496251874063e-07, "loss": 1.1547, "step": 2709 }, { "epoch": 258.0, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7850632167009703, "eval_loss": 0.566510796546936, "eval_precision": 0.7189014539579968, "eval_recall": 0.8646373056994818, "eval_roc_auc": 0.7735877950489349, "eval_runtime": 1.1537, "eval_samples_per_second": 2313.354, "eval_steps_per_second": 5.2, "step": 2709 }, { "epoch": 258.0952380952381, "grad_norm": 11.279150009155273, "learning_rate": 5.076836581709146e-07, "loss": 1.1437, "step": 2710 }, { "epoch": 258.1904761904762, "grad_norm": 6.843417167663574, "learning_rate": 5.078710644677661e-07, "loss": 1.1344, "step": 2711 }, { "epoch": 258.2857142857143, "grad_norm": 4.734285354614258, "learning_rate": 5.080584707646177e-07, "loss": 1.1491, "step": 2712 }, { "epoch": 258.3809523809524, "grad_norm": 6.189227104187012, "learning_rate": 5.082458770614692e-07, "loss": 1.1561, "step": 2713 }, { "epoch": 258.4761904761905, "grad_norm": 5.562397003173828, "learning_rate": 5.084332833583209e-07, "loss": 1.1525, "step": 2714 }, { "epoch": 258.57142857142856, "grad_norm": 4.941099166870117, "learning_rate": 5.086206896551724e-07, "loss": 1.1536, "step": 2715 }, { "epoch": 258.6666666666667, "grad_norm": 6.65519905090332, "learning_rate": 5.088080959520241e-07, "loss": 1.1511, "step": 2716 }, { "epoch": 258.76190476190476, "grad_norm": 3.833465576171875, "learning_rate": 5.089955022488757e-07, "loss": 1.1595, "step": 2717 }, { "epoch": 258.85714285714283, "grad_norm": 2.8721892833709717, "learning_rate": 5.091829085457272e-07, "loss": 1.1216, "step": 2718 }, { "epoch": 258.95238095238096, "grad_norm": 7.314364433288574, "learning_rate": 5.093703148425788e-07, "loss": 1.1316, "step": 2719 }, { "epoch": 258.95238095238096, "eval_accuracy": 0.7268639940052454, "eval_f1": 0.7823230815168707, "eval_loss": 0.565944254398346, "eval_precision": 0.7257617728531855, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.773705814622913, "eval_runtime": 1.1601, "eval_samples_per_second": 2300.601, "eval_steps_per_second": 5.172, "step": 2719 }, { "epoch": 259.04761904761904, "grad_norm": 4.111525535583496, "learning_rate": 5.095577211394303e-07, "loss": 1.1183, "step": 2720 }, { "epoch": 259.14285714285717, "grad_norm": 4.4706130027771, "learning_rate": 5.097451274362819e-07, "loss": 1.1062, "step": 2721 }, { "epoch": 259.23809523809524, "grad_norm": 5.0127129554748535, "learning_rate": 5.099325337331334e-07, "loss": 1.1628, "step": 2722 }, { "epoch": 259.3333333333333, "grad_norm": 5.392455577850342, "learning_rate": 5.101199400299851e-07, "loss": 1.165, "step": 2723 }, { "epoch": 259.42857142857144, "grad_norm": 11.411964416503906, "learning_rate": 5.103073463268366e-07, "loss": 1.1307, "step": 2724 }, { "epoch": 259.5238095238095, "grad_norm": 9.55789852142334, "learning_rate": 5.104947526236882e-07, "loss": 1.1287, "step": 2725 }, { "epoch": 259.6190476190476, "grad_norm": 4.320809364318848, "learning_rate": 5.106821589205397e-07, "loss": 1.1133, "step": 2726 }, { "epoch": 259.7142857142857, "grad_norm": 6.221428871154785, "learning_rate": 5.108695652173913e-07, "loss": 1.1716, "step": 2727 }, { "epoch": 259.8095238095238, "grad_norm": 6.889163494110107, "learning_rate": 5.110569715142429e-07, "loss": 1.1539, "step": 2728 }, { "epoch": 259.9047619047619, "grad_norm": 7.771426200866699, "learning_rate": 5.112443778110944e-07, "loss": 1.1633, "step": 2729 }, { "epoch": 260.0, "grad_norm": 7.223512172698975, "learning_rate": 5.114317841079461e-07, "loss": 1.1894, "step": 2730 }, { "epoch": 260.0, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7823757070556713, "eval_loss": 0.5655758380889893, "eval_precision": 0.7239669421487603, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7740702360391479, "eval_runtime": 1.2076, "eval_samples_per_second": 2210.143, "eval_steps_per_second": 4.968, "step": 2730 }, { "epoch": 260.0952380952381, "grad_norm": 4.196420669555664, "learning_rate": 5.116191904047976e-07, "loss": 1.1319, "step": 2731 }, { "epoch": 260.1904761904762, "grad_norm": 6.068887233734131, "learning_rate": 5.118065967016492e-07, "loss": 1.1364, "step": 2732 }, { "epoch": 260.2857142857143, "grad_norm": 5.328514099121094, "learning_rate": 5.119940029985007e-07, "loss": 1.1266, "step": 2733 }, { "epoch": 260.3809523809524, "grad_norm": 6.16312313079834, "learning_rate": 5.121814092953523e-07, "loss": 1.1274, "step": 2734 }, { "epoch": 260.4761904761905, "grad_norm": 11.435556411743164, "learning_rate": 5.123688155922038e-07, "loss": 1.2014, "step": 2735 }, { "epoch": 260.57142857142856, "grad_norm": 2.7801122665405273, "learning_rate": 5.125562218890555e-07, "loss": 1.1256, "step": 2736 }, { "epoch": 260.6666666666667, "grad_norm": 4.128719806671143, "learning_rate": 5.127436281859071e-07, "loss": 1.1606, "step": 2737 }, { "epoch": 260.76190476190476, "grad_norm": 6.106013298034668, "learning_rate": 5.129310344827586e-07, "loss": 1.1465, "step": 2738 }, { "epoch": 260.85714285714283, "grad_norm": 3.1265065670013428, "learning_rate": 5.131184407796103e-07, "loss": 1.1276, "step": 2739 }, { "epoch": 260.95238095238096, "grad_norm": 4.820446491241455, "learning_rate": 5.133058470764618e-07, "loss": 1.1364, "step": 2740 }, { "epoch": 260.95238095238096, "eval_accuracy": 0.7246159610340952, "eval_f1": 0.779742283488163, "eval_loss": 0.5656084418296814, "eval_precision": 0.7255995538204127, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.773786701208981, "eval_runtime": 1.1679, "eval_samples_per_second": 2285.247, "eval_steps_per_second": 5.137, "step": 2740 }, { "epoch": 261.04761904761904, "grad_norm": 6.337623596191406, "learning_rate": 5.134932533733134e-07, "loss": 1.1083, "step": 2741 }, { "epoch": 261.14285714285717, "grad_norm": 5.403284072875977, "learning_rate": 5.136806596701649e-07, "loss": 1.1444, "step": 2742 }, { "epoch": 261.23809523809524, "grad_norm": 6.919364929199219, "learning_rate": 5.138680659670165e-07, "loss": 1.1273, "step": 2743 }, { "epoch": 261.3333333333333, "grad_norm": 3.3351051807403564, "learning_rate": 5.140554722638682e-07, "loss": 1.1507, "step": 2744 }, { "epoch": 261.42857142857144, "grad_norm": 4.848644256591797, "learning_rate": 5.142428785607197e-07, "loss": 1.1443, "step": 2745 }, { "epoch": 261.5238095238095, "grad_norm": 7.278774261474609, "learning_rate": 5.144302848575713e-07, "loss": 1.1689, "step": 2746 }, { "epoch": 261.6190476190476, "grad_norm": 5.584431171417236, "learning_rate": 5.146176911544228e-07, "loss": 1.1741, "step": 2747 }, { "epoch": 261.7142857142857, "grad_norm": 5.8609843254089355, "learning_rate": 5.148050974512744e-07, "loss": 1.115, "step": 2748 }, { "epoch": 261.8095238095238, "grad_norm": 5.493621349334717, "learning_rate": 5.149925037481259e-07, "loss": 1.1479, "step": 2749 }, { "epoch": 261.9047619047619, "grad_norm": 8.412647247314453, "learning_rate": 5.151799100449776e-07, "loss": 1.1326, "step": 2750 }, { "epoch": 262.0, "grad_norm": 4.3439788818359375, "learning_rate": 5.153673163418291e-07, "loss": 1.1445, "step": 2751 }, { "epoch": 262.0, "eval_accuracy": 0.7246159610340952, "eval_f1": 0.7820930922027869, "eval_loss": 0.5658958554267883, "eval_precision": 0.7211591033351559, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7734879101899828, "eval_runtime": 1.3142, "eval_samples_per_second": 2030.942, "eval_steps_per_second": 4.566, "step": 2751 }, { "epoch": 262.0952380952381, "grad_norm": 5.292582988739014, "learning_rate": 5.155547226386807e-07, "loss": 1.136, "step": 2752 }, { "epoch": 262.1904761904762, "grad_norm": 6.31008243560791, "learning_rate": 5.157421289355322e-07, "loss": 1.1214, "step": 2753 }, { "epoch": 262.2857142857143, "grad_norm": 6.329434871673584, "learning_rate": 5.159295352323838e-07, "loss": 1.1915, "step": 2754 }, { "epoch": 262.3809523809524, "grad_norm": 9.693968772888184, "learning_rate": 5.161169415292354e-07, "loss": 1.1609, "step": 2755 }, { "epoch": 262.4761904761905, "grad_norm": 5.809243679046631, "learning_rate": 5.163043478260869e-07, "loss": 1.1662, "step": 2756 }, { "epoch": 262.57142857142856, "grad_norm": 3.715272903442383, "learning_rate": 5.164917541229386e-07, "loss": 1.1101, "step": 2757 }, { "epoch": 262.6666666666667, "grad_norm": 3.8494436740875244, "learning_rate": 5.166791604197901e-07, "loss": 1.1257, "step": 2758 }, { "epoch": 262.76190476190476, "grad_norm": 4.125227928161621, "learning_rate": 5.168665667166417e-07, "loss": 1.1486, "step": 2759 }, { "epoch": 262.85714285714283, "grad_norm": 13.442261695861816, "learning_rate": 5.170539730134932e-07, "loss": 1.1654, "step": 2760 }, { "epoch": 262.95238095238096, "grad_norm": 5.7701497077941895, "learning_rate": 5.172413793103449e-07, "loss": 1.1293, "step": 2761 }, { "epoch": 262.95238095238096, "eval_accuracy": 0.7238666167103784, "eval_f1": 0.7779451642060862, "eval_loss": 0.5653892755508423, "eval_precision": 0.7273239436619718, "eval_recall": 0.836139896373057, "eval_roc_auc": 0.7735898100172712, "eval_runtime": 1.1528, "eval_samples_per_second": 2315.228, "eval_steps_per_second": 5.205, "step": 2761 }, { "epoch": 263.04761904761904, "grad_norm": 5.449621200561523, "learning_rate": 5.174287856071964e-07, "loss": 1.1372, "step": 2762 }, { "epoch": 263.14285714285717, "grad_norm": 11.872653007507324, "learning_rate": 5.17616191904048e-07, "loss": 1.1212, "step": 2763 }, { "epoch": 263.23809523809524, "grad_norm": 5.305274486541748, "learning_rate": 5.178035982008997e-07, "loss": 1.1336, "step": 2764 }, { "epoch": 263.3333333333333, "grad_norm": 8.194924354553223, "learning_rate": 5.179910044977512e-07, "loss": 1.1706, "step": 2765 }, { "epoch": 263.42857142857144, "grad_norm": 4.264476776123047, "learning_rate": 5.181784107946028e-07, "loss": 1.157, "step": 2766 }, { "epoch": 263.5238095238095, "grad_norm": 16.339935302734375, "learning_rate": 5.183658170914543e-07, "loss": 1.1812, "step": 2767 }, { "epoch": 263.6190476190476, "grad_norm": 6.533397197723389, "learning_rate": 5.185532233883059e-07, "loss": 1.1091, "step": 2768 }, { "epoch": 263.7142857142857, "grad_norm": 5.343795299530029, "learning_rate": 5.187406296851574e-07, "loss": 1.138, "step": 2769 }, { "epoch": 263.8095238095238, "grad_norm": 7.269284248352051, "learning_rate": 5.18928035982009e-07, "loss": 1.1334, "step": 2770 }, { "epoch": 263.9047619047619, "grad_norm": 6.141402721405029, "learning_rate": 5.191154422788607e-07, "loss": 1.1346, "step": 2771 }, { "epoch": 264.0, "grad_norm": 7.910759449005127, "learning_rate": 5.193028485757122e-07, "loss": 1.1253, "step": 2772 }, { "epoch": 264.0, "eval_accuracy": 0.7268639940052454, "eval_f1": 0.7816711590296496, "eval_loss": 0.5649216771125793, "eval_precision": 0.7270194986072424, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7744035693724812, "eval_runtime": 1.1907, "eval_samples_per_second": 2241.481, "eval_steps_per_second": 5.039, "step": 2772 }, { "epoch": 264.0952380952381, "grad_norm": 5.423191070556641, "learning_rate": 5.194902548725638e-07, "loss": 1.1218, "step": 2773 }, { "epoch": 264.1904761904762, "grad_norm": 4.0316243171691895, "learning_rate": 5.196776611694153e-07, "loss": 1.0887, "step": 2774 }, { "epoch": 264.2857142857143, "grad_norm": 8.018619537353516, "learning_rate": 5.198650674662669e-07, "loss": 1.1859, "step": 2775 }, { "epoch": 264.3809523809524, "grad_norm": 8.210404396057129, "learning_rate": 5.200524737631184e-07, "loss": 1.0862, "step": 2776 }, { "epoch": 264.4761904761905, "grad_norm": 11.399775505065918, "learning_rate": 5.2023988005997e-07, "loss": 1.1147, "step": 2777 }, { "epoch": 264.57142857142856, "grad_norm": 11.730525016784668, "learning_rate": 5.204272863568216e-07, "loss": 1.1992, "step": 2778 }, { "epoch": 264.6666666666667, "grad_norm": 5.436350345611572, "learning_rate": 5.206146926536732e-07, "loss": 1.115, "step": 2779 }, { "epoch": 264.76190476190476, "grad_norm": 3.6730194091796875, "learning_rate": 5.208020989505247e-07, "loss": 1.1266, "step": 2780 }, { "epoch": 264.85714285714283, "grad_norm": 10.828516006469727, "learning_rate": 5.209895052473763e-07, "loss": 1.1695, "step": 2781 }, { "epoch": 264.95238095238096, "grad_norm": 6.543045520782471, "learning_rate": 5.211769115442279e-07, "loss": 1.1905, "step": 2782 }, { "epoch": 264.95238095238096, "eval_accuracy": 0.725365305357812, "eval_f1": 0.7809979085748432, "eval_loss": 0.5646792650222778, "eval_precision": 0.7249029395452025, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7749300518134715, "eval_runtime": 1.2059, "eval_samples_per_second": 2213.365, "eval_steps_per_second": 4.976, "step": 2782 }, { "epoch": 265.04761904761904, "grad_norm": 9.305048942565918, "learning_rate": 5.213643178410794e-07, "loss": 1.1608, "step": 2783 }, { "epoch": 265.14285714285717, "grad_norm": 8.853373527526855, "learning_rate": 5.215517241379311e-07, "loss": 1.1278, "step": 2784 }, { "epoch": 265.23809523809524, "grad_norm": 4.973694801330566, "learning_rate": 5.217391304347826e-07, "loss": 1.1237, "step": 2785 }, { "epoch": 265.3333333333333, "grad_norm": 4.541196346282959, "learning_rate": 5.219265367316343e-07, "loss": 1.1741, "step": 2786 }, { "epoch": 265.42857142857144, "grad_norm": 4.542008399963379, "learning_rate": 5.221139430284858e-07, "loss": 1.1403, "step": 2787 }, { "epoch": 265.5238095238095, "grad_norm": 4.306549549102783, "learning_rate": 5.223013493253374e-07, "loss": 1.1615, "step": 2788 }, { "epoch": 265.6190476190476, "grad_norm": 11.334341049194336, "learning_rate": 5.22488755622189e-07, "loss": 1.1244, "step": 2789 }, { "epoch": 265.7142857142857, "grad_norm": 3.0584142208099365, "learning_rate": 5.226761619190405e-07, "loss": 1.1814, "step": 2790 }, { "epoch": 265.8095238095238, "grad_norm": 13.463911056518555, "learning_rate": 5.228635682158921e-07, "loss": 1.1346, "step": 2791 }, { "epoch": 265.9047619047619, "grad_norm": 5.293436527252197, "learning_rate": 5.230509745127437e-07, "loss": 1.1403, "step": 2792 }, { "epoch": 266.0, "grad_norm": 3.8362538814544678, "learning_rate": 5.232383808095953e-07, "loss": 1.1268, "step": 2793 }, { "epoch": 266.0, "eval_accuracy": 0.7249906331959536, "eval_f1": 0.781547619047619, "eval_loss": 0.5646971464157104, "eval_precision": 0.7230176211453745, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7751358664363845, "eval_runtime": 1.1638, "eval_samples_per_second": 2293.433, "eval_steps_per_second": 5.156, "step": 2793 }, { "epoch": 266.0952380952381, "grad_norm": 11.430266380310059, "learning_rate": 5.234257871064468e-07, "loss": 1.1498, "step": 2794 }, { "epoch": 266.1904761904762, "grad_norm": 14.580435752868652, "learning_rate": 5.236131934032984e-07, "loss": 1.191, "step": 2795 }, { "epoch": 266.2857142857143, "grad_norm": 9.603107452392578, "learning_rate": 5.238005997001499e-07, "loss": 1.1362, "step": 2796 }, { "epoch": 266.3809523809524, "grad_norm": 11.882341384887695, "learning_rate": 5.239880059970015e-07, "loss": 1.1268, "step": 2797 }, { "epoch": 266.4761904761905, "grad_norm": 3.629544973373413, "learning_rate": 5.24175412293853e-07, "loss": 1.1628, "step": 2798 }, { "epoch": 266.57142857142856, "grad_norm": 13.007302284240723, "learning_rate": 5.243628185907047e-07, "loss": 1.1492, "step": 2799 }, { "epoch": 266.6666666666667, "grad_norm": 5.342100620269775, "learning_rate": 5.245502248875563e-07, "loss": 1.1439, "step": 2800 }, { "epoch": 266.76190476190476, "grad_norm": 8.716962814331055, "learning_rate": 5.247376311844078e-07, "loss": 1.1493, "step": 2801 }, { "epoch": 266.85714285714283, "grad_norm": 6.74729585647583, "learning_rate": 5.249250374812594e-07, "loss": 1.1258, "step": 2802 }, { "epoch": 266.95238095238096, "grad_norm": 9.423856735229492, "learning_rate": 5.251124437781109e-07, "loss": 1.1271, "step": 2803 }, { "epoch": 266.95238095238096, "eval_accuracy": 0.7208692394155114, "eval_f1": 0.774992449411054, "eval_loss": 0.5647932887077332, "eval_precision": 0.7260894170911149, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.7751534254461716, "eval_runtime": 1.1741, "eval_samples_per_second": 2273.142, "eval_steps_per_second": 5.11, "step": 2803 }, { "epoch": 267.04761904761904, "grad_norm": 7.002502918243408, "learning_rate": 5.252998500749625e-07, "loss": 1.1664, "step": 2804 }, { "epoch": 267.14285714285717, "grad_norm": 3.9827327728271484, "learning_rate": 5.25487256371814e-07, "loss": 1.1584, "step": 2805 }, { "epoch": 267.23809523809524, "grad_norm": 9.634724617004395, "learning_rate": 5.256746626686657e-07, "loss": 1.158, "step": 2806 }, { "epoch": 267.3333333333333, "grad_norm": 8.2416353225708, "learning_rate": 5.258620689655172e-07, "loss": 1.1193, "step": 2807 }, { "epoch": 267.42857142857144, "grad_norm": 6.562640190124512, "learning_rate": 5.260494752623688e-07, "loss": 1.0989, "step": 2808 }, { "epoch": 267.5238095238095, "grad_norm": 9.472210884094238, "learning_rate": 5.262368815592205e-07, "loss": 1.2015, "step": 2809 }, { "epoch": 267.6190476190476, "grad_norm": 6.735084533691406, "learning_rate": 5.26424287856072e-07, "loss": 1.149, "step": 2810 }, { "epoch": 267.7142857142857, "grad_norm": 5.654566287994385, "learning_rate": 5.266116941529236e-07, "loss": 1.1365, "step": 2811 }, { "epoch": 267.8095238095238, "grad_norm": 8.3509521484375, "learning_rate": 5.267991004497751e-07, "loss": 1.1366, "step": 2812 }, { "epoch": 267.9047619047619, "grad_norm": 11.307655334472656, "learning_rate": 5.269865067466268e-07, "loss": 1.1122, "step": 2813 }, { "epoch": 268.0, "grad_norm": 5.473557949066162, "learning_rate": 5.271739130434783e-07, "loss": 1.115, "step": 2814 }, { "epoch": 268.0, "eval_accuracy": 0.7238666167103784, "eval_f1": 0.7800656520441659, "eval_loss": 0.5653632879257202, "eval_precision": 0.7232982844493636, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7745020149683361, "eval_runtime": 1.225, "eval_samples_per_second": 2178.849, "eval_steps_per_second": 4.898, "step": 2814 }, { "epoch": 268.0952380952381, "grad_norm": 7.1619391441345215, "learning_rate": 5.273613193403299e-07, "loss": 1.1266, "step": 2815 }, { "epoch": 268.1904761904762, "grad_norm": 6.1082329750061035, "learning_rate": 5.275487256371815e-07, "loss": 1.1865, "step": 2816 }, { "epoch": 268.2857142857143, "grad_norm": 5.075636863708496, "learning_rate": 5.27736131934033e-07, "loss": 1.1453, "step": 2817 }, { "epoch": 268.3809523809524, "grad_norm": 16.62962532043457, "learning_rate": 5.279235382308846e-07, "loss": 1.141, "step": 2818 }, { "epoch": 268.4761904761905, "grad_norm": 4.551577091217041, "learning_rate": 5.281109445277361e-07, "loss": 1.1515, "step": 2819 }, { "epoch": 268.57142857142856, "grad_norm": 7.195343494415283, "learning_rate": 5.282983508245878e-07, "loss": 1.1221, "step": 2820 }, { "epoch": 268.6666666666667, "grad_norm": 8.327423095703125, "learning_rate": 5.284857571214393e-07, "loss": 1.1538, "step": 2821 }, { "epoch": 268.76190476190476, "grad_norm": 4.540790557861328, "learning_rate": 5.286731634182909e-07, "loss": 1.1255, "step": 2822 }, { "epoch": 268.85714285714283, "grad_norm": 7.532181262969971, "learning_rate": 5.288605697151424e-07, "loss": 1.1624, "step": 2823 }, { "epoch": 268.95238095238096, "grad_norm": 7.560113906860352, "learning_rate": 5.29047976011994e-07, "loss": 1.1273, "step": 2824 }, { "epoch": 268.95238095238096, "eval_accuracy": 0.725365305357812, "eval_f1": 0.7820398453761522, "eval_loss": 0.565043568611145, "eval_precision": 0.7229246838922485, "eval_recall": 0.8516839378238342, "eval_roc_auc": 0.7749015544041451, "eval_runtime": 1.3209, "eval_samples_per_second": 2020.546, "eval_steps_per_second": 4.542, "step": 2824 }, { "epoch": 269.04761904761904, "grad_norm": 5.054904937744141, "learning_rate": 5.292353823088455e-07, "loss": 1.1444, "step": 2825 }, { "epoch": 269.14285714285717, "grad_norm": 3.899892807006836, "learning_rate": 5.294227886056971e-07, "loss": 1.1272, "step": 2826 }, { "epoch": 269.23809523809524, "grad_norm": 6.381415843963623, "learning_rate": 5.296101949025488e-07, "loss": 1.1562, "step": 2827 }, { "epoch": 269.3333333333333, "grad_norm": 3.797952175140381, "learning_rate": 5.297976011994003e-07, "loss": 1.166, "step": 2828 }, { "epoch": 269.42857142857144, "grad_norm": 6.653491020202637, "learning_rate": 5.299850074962519e-07, "loss": 1.102, "step": 2829 }, { "epoch": 269.5238095238095, "grad_norm": 6.340592384338379, "learning_rate": 5.301724137931034e-07, "loss": 1.1018, "step": 2830 }, { "epoch": 269.6190476190476, "grad_norm": 6.27230978012085, "learning_rate": 5.303598200899551e-07, "loss": 1.198, "step": 2831 }, { "epoch": 269.7142857142857, "grad_norm": 4.86538028717041, "learning_rate": 5.305472263868066e-07, "loss": 1.1142, "step": 2832 }, { "epoch": 269.8095238095238, "grad_norm": 5.868375301361084, "learning_rate": 5.307346326836582e-07, "loss": 1.1949, "step": 2833 }, { "epoch": 269.9047619047619, "grad_norm": 6.007962226867676, "learning_rate": 5.309220389805099e-07, "loss": 1.0948, "step": 2834 }, { "epoch": 270.0, "grad_norm": 5.161440372467041, "learning_rate": 5.311094452773614e-07, "loss": 1.1596, "step": 2835 }, { "epoch": 270.0, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7806780678067807, "eval_loss": 0.5640901327133179, "eval_precision": 0.7272219116825042, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7755400115141048, "eval_runtime": 1.1751, "eval_samples_per_second": 2271.295, "eval_steps_per_second": 5.106, "step": 2835 }, { "epoch": 270.0952380952381, "grad_norm": 4.958430290222168, "learning_rate": 5.31296851574213e-07, "loss": 1.1289, "step": 2836 }, { "epoch": 270.1904761904762, "grad_norm": 6.797719955444336, "learning_rate": 5.314842578710645e-07, "loss": 1.1431, "step": 2837 }, { "epoch": 270.2857142857143, "grad_norm": 3.3344295024871826, "learning_rate": 5.316716641679161e-07, "loss": 1.1098, "step": 2838 }, { "epoch": 270.3809523809524, "grad_norm": 3.955629348754883, "learning_rate": 5.318590704647676e-07, "loss": 1.1829, "step": 2839 }, { "epoch": 270.4761904761905, "grad_norm": 5.1240620613098145, "learning_rate": 5.320464767616192e-07, "loss": 1.1741, "step": 2840 }, { "epoch": 270.57142857142856, "grad_norm": 4.2287492752075195, "learning_rate": 5.322338830584708e-07, "loss": 1.1609, "step": 2841 }, { "epoch": 270.6666666666667, "grad_norm": 5.582423210144043, "learning_rate": 5.324212893553224e-07, "loss": 1.1394, "step": 2842 }, { "epoch": 270.76190476190476, "grad_norm": 4.948960781097412, "learning_rate": 5.32608695652174e-07, "loss": 1.1455, "step": 2843 }, { "epoch": 270.85714285714283, "grad_norm": 8.221921920776367, "learning_rate": 5.327961019490255e-07, "loss": 1.1144, "step": 2844 }, { "epoch": 270.95238095238096, "grad_norm": 6.162173271179199, "learning_rate": 5.329835082458771e-07, "loss": 1.1182, "step": 2845 }, { "epoch": 270.95238095238096, "eval_accuracy": 0.7257399775196703, "eval_f1": 0.7795180722891566, "eval_loss": 0.5640262365341187, "eval_precision": 0.7286036036036037, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7757751871042027, "eval_runtime": 1.1735, "eval_samples_per_second": 2274.469, "eval_steps_per_second": 5.113, "step": 2845 }, { "epoch": 271.04761904761904, "grad_norm": 6.911406993865967, "learning_rate": 5.331709145427286e-07, "loss": 1.1601, "step": 2846 }, { "epoch": 271.14285714285717, "grad_norm": 5.134574890136719, "learning_rate": 5.333583208395802e-07, "loss": 1.1173, "step": 2847 }, { "epoch": 271.23809523809524, "grad_norm": 3.4689102172851562, "learning_rate": 5.335457271364318e-07, "loss": 1.1417, "step": 2848 }, { "epoch": 271.3333333333333, "grad_norm": 4.0263447761535645, "learning_rate": 5.337331334332834e-07, "loss": 1.158, "step": 2849 }, { "epoch": 271.42857142857144, "grad_norm": 9.338512420654297, "learning_rate": 5.339205397301349e-07, "loss": 1.149, "step": 2850 }, { "epoch": 271.5238095238095, "grad_norm": 6.389646530151367, "learning_rate": 5.341079460269865e-07, "loss": 1.111, "step": 2851 }, { "epoch": 271.6190476190476, "grad_norm": 6.352331638336182, "learning_rate": 5.34295352323838e-07, "loss": 1.1442, "step": 2852 }, { "epoch": 271.7142857142857, "grad_norm": 6.263265132904053, "learning_rate": 5.344827586206897e-07, "loss": 1.1455, "step": 2853 }, { "epoch": 271.8095238095238, "grad_norm": 6.314106464385986, "learning_rate": 5.346701649175413e-07, "loss": 1.1446, "step": 2854 }, { "epoch": 271.9047619047619, "grad_norm": 6.959097385406494, "learning_rate": 5.348575712143928e-07, "loss": 1.1264, "step": 2855 }, { "epoch": 272.0, "grad_norm": 8.125739097595215, "learning_rate": 5.350449775112445e-07, "loss": 1.1855, "step": 2856 }, { "epoch": 272.0, "eval_accuracy": 0.7242412888722368, "eval_f1": 0.7789789789789789, "eval_loss": 0.5639445185661316, "eval_precision": 0.7262038073908175, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.775972941853771, "eval_runtime": 1.2494, "eval_samples_per_second": 2136.149, "eval_steps_per_second": 4.802, "step": 2856 }, { "epoch": 272.0952380952381, "grad_norm": 7.242920875549316, "learning_rate": 5.35232383808096e-07, "loss": 1.167, "step": 2857 }, { "epoch": 272.1904761904762, "grad_norm": 6.216794013977051, "learning_rate": 5.354197901049476e-07, "loss": 1.121, "step": 2858 }, { "epoch": 272.2857142857143, "grad_norm": 5.231599807739258, "learning_rate": 5.356071964017991e-07, "loss": 1.1327, "step": 2859 }, { "epoch": 272.3809523809524, "grad_norm": 8.307430267333984, "learning_rate": 5.357946026986507e-07, "loss": 1.1287, "step": 2860 }, { "epoch": 272.4761904761905, "grad_norm": 10.797221183776855, "learning_rate": 5.359820089955022e-07, "loss": 1.1367, "step": 2861 }, { "epoch": 272.57142857142856, "grad_norm": 4.599208354949951, "learning_rate": 5.361694152923539e-07, "loss": 1.12, "step": 2862 }, { "epoch": 272.6666666666667, "grad_norm": 4.301135063171387, "learning_rate": 5.363568215892055e-07, "loss": 1.121, "step": 2863 }, { "epoch": 272.76190476190476, "grad_norm": 3.4644644260406494, "learning_rate": 5.36544227886057e-07, "loss": 1.151, "step": 2864 }, { "epoch": 272.85714285714283, "grad_norm": 7.4475274085998535, "learning_rate": 5.367316341829086e-07, "loss": 1.1208, "step": 2865 }, { "epoch": 272.95238095238096, "grad_norm": 5.376792907714844, "learning_rate": 5.369190404797601e-07, "loss": 1.1535, "step": 2866 }, { "epoch": 272.95238095238096, "eval_accuracy": 0.7242412888722368, "eval_f1": 0.7804295942720764, "eval_loss": 0.5637869834899902, "eval_precision": 0.7234513274336283, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.776422855497985, "eval_runtime": 1.3836, "eval_samples_per_second": 1928.965, "eval_steps_per_second": 4.336, "step": 2866 }, { "epoch": 273.04761904761904, "grad_norm": 11.990949630737305, "learning_rate": 5.371064467766117e-07, "loss": 1.1783, "step": 2867 }, { "epoch": 273.14285714285717, "grad_norm": 8.884700775146484, "learning_rate": 5.372938530734632e-07, "loss": 1.1722, "step": 2868 }, { "epoch": 273.23809523809524, "grad_norm": 4.0039777755737305, "learning_rate": 5.374812593703149e-07, "loss": 1.0928, "step": 2869 }, { "epoch": 273.3333333333333, "grad_norm": 5.6337738037109375, "learning_rate": 5.376686656671665e-07, "loss": 1.1199, "step": 2870 }, { "epoch": 273.42857142857144, "grad_norm": 3.9249324798583984, "learning_rate": 5.37856071964018e-07, "loss": 1.1054, "step": 2871 }, { "epoch": 273.5238095238095, "grad_norm": 6.832759380340576, "learning_rate": 5.380434782608696e-07, "loss": 1.1588, "step": 2872 }, { "epoch": 273.6190476190476, "grad_norm": 3.857856035232544, "learning_rate": 5.382308845577211e-07, "loss": 1.1377, "step": 2873 }, { "epoch": 273.7142857142857, "grad_norm": 7.15529727935791, "learning_rate": 5.384182908545727e-07, "loss": 1.1736, "step": 2874 }, { "epoch": 273.8095238095238, "grad_norm": 6.371598243713379, "learning_rate": 5.386056971514242e-07, "loss": 1.1782, "step": 2875 }, { "epoch": 273.9047619047619, "grad_norm": 9.470749855041504, "learning_rate": 5.387931034482759e-07, "loss": 1.1703, "step": 2876 }, { "epoch": 274.0, "grad_norm": 5.563154697418213, "learning_rate": 5.389805097451274e-07, "loss": 1.1281, "step": 2877 }, { "epoch": 274.0, "eval_accuracy": 0.7238666167103784, "eval_f1": 0.7783458646616541, "eval_loss": 0.5636013150215149, "eval_precision": 0.7265581134194273, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7764179620034543, "eval_runtime": 1.1257, "eval_samples_per_second": 2370.982, "eval_steps_per_second": 5.33, "step": 2877 }, { "epoch": 274.0952380952381, "grad_norm": 8.047298431396484, "learning_rate": 5.391679160419791e-07, "loss": 1.1498, "step": 2878 }, { "epoch": 274.1904761904762, "grad_norm": 4.311565399169922, "learning_rate": 5.393553223388307e-07, "loss": 1.1662, "step": 2879 }, { "epoch": 274.2857142857143, "grad_norm": 11.471311569213867, "learning_rate": 5.395427286356822e-07, "loss": 1.1045, "step": 2880 }, { "epoch": 274.3809523809524, "grad_norm": 10.548619270324707, "learning_rate": 5.397301349325338e-07, "loss": 1.1346, "step": 2881 }, { "epoch": 274.4761904761905, "grad_norm": 8.780191421508789, "learning_rate": 5.399175412293853e-07, "loss": 1.1225, "step": 2882 }, { "epoch": 274.57142857142856, "grad_norm": 3.8586528301239014, "learning_rate": 5.40104947526237e-07, "loss": 1.1465, "step": 2883 }, { "epoch": 274.6666666666667, "grad_norm": 5.38668966293335, "learning_rate": 5.402923538230885e-07, "loss": 1.1243, "step": 2884 }, { "epoch": 274.76190476190476, "grad_norm": 10.542987823486328, "learning_rate": 5.404797601199401e-07, "loss": 1.1485, "step": 2885 }, { "epoch": 274.85714285714283, "grad_norm": 5.546933650970459, "learning_rate": 5.406671664167916e-07, "loss": 1.1072, "step": 2886 }, { "epoch": 274.95238095238096, "grad_norm": 5.900209903717041, "learning_rate": 5.408545727136432e-07, "loss": 1.1339, "step": 2887 }, { "epoch": 274.95238095238096, "eval_accuracy": 0.7276133383289621, "eval_f1": 0.7839524517087667, "eval_loss": 0.5638872385025024, "eval_precision": 0.7243272926963207, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7764245826137018, "eval_runtime": 1.2282, "eval_samples_per_second": 2173.041, "eval_steps_per_second": 4.885, "step": 2887 }, { "epoch": 275.04761904761904, "grad_norm": 3.780916452407837, "learning_rate": 5.410419790104947e-07, "loss": 1.1033, "step": 2888 }, { "epoch": 275.14285714285717, "grad_norm": 9.117225646972656, "learning_rate": 5.412293853073463e-07, "loss": 1.0961, "step": 2889 }, { "epoch": 275.23809523809524, "grad_norm": 6.246429920196533, "learning_rate": 5.41416791604198e-07, "loss": 1.1595, "step": 2890 }, { "epoch": 275.3333333333333, "grad_norm": 3.8772833347320557, "learning_rate": 5.416041979010495e-07, "loss": 1.1602, "step": 2891 }, { "epoch": 275.42857142857144, "grad_norm": 11.349824905395508, "learning_rate": 5.417916041979011e-07, "loss": 1.1747, "step": 2892 }, { "epoch": 275.5238095238095, "grad_norm": 7.2987542152404785, "learning_rate": 5.419790104947526e-07, "loss": 1.1192, "step": 2893 }, { "epoch": 275.6190476190476, "grad_norm": 5.3660783767700195, "learning_rate": 5.421664167916042e-07, "loss": 1.1368, "step": 2894 }, { "epoch": 275.7142857142857, "grad_norm": 7.8456010818481445, "learning_rate": 5.423538230884557e-07, "loss": 1.1483, "step": 2895 }, { "epoch": 275.8095238095238, "grad_norm": 3.772411823272705, "learning_rate": 5.425412293853073e-07, "loss": 1.1487, "step": 2896 }, { "epoch": 275.9047619047619, "grad_norm": 4.100811958312988, "learning_rate": 5.42728635682159e-07, "loss": 1.1666, "step": 2897 }, { "epoch": 276.0, "grad_norm": 12.531455039978027, "learning_rate": 5.429160419790105e-07, "loss": 1.1379, "step": 2898 }, { "epoch": 276.0, "eval_accuracy": 0.7249906331959536, "eval_f1": 0.7790487658037327, "eval_loss": 0.5640032887458801, "eval_precision": 0.7277840269966255, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7759821531375934, "eval_runtime": 1.1919, "eval_samples_per_second": 2239.234, "eval_steps_per_second": 5.034, "step": 2898 }, { "epoch": 276.0952380952381, "grad_norm": 11.592368125915527, "learning_rate": 5.431034482758621e-07, "loss": 1.114, "step": 2899 }, { "epoch": 276.1904761904762, "grad_norm": 3.5621049404144287, "learning_rate": 5.432908545727136e-07, "loss": 1.1558, "step": 2900 }, { "epoch": 276.2857142857143, "grad_norm": 6.428314685821533, "learning_rate": 5.434782608695653e-07, "loss": 1.1089, "step": 2901 }, { "epoch": 276.3809523809524, "grad_norm": 8.273981094360352, "learning_rate": 5.436656671664168e-07, "loss": 1.1277, "step": 2902 }, { "epoch": 276.4761904761905, "grad_norm": 7.387972354888916, "learning_rate": 5.438530734632684e-07, "loss": 1.1845, "step": 2903 }, { "epoch": 276.57142857142856, "grad_norm": 4.1005706787109375, "learning_rate": 5.4404047976012e-07, "loss": 1.1568, "step": 2904 }, { "epoch": 276.6666666666667, "grad_norm": 4.488372802734375, "learning_rate": 5.442278860569716e-07, "loss": 1.1023, "step": 2905 }, { "epoch": 276.76190476190476, "grad_norm": 5.401954650878906, "learning_rate": 5.444152923538232e-07, "loss": 1.1355, "step": 2906 }, { "epoch": 276.85714285714283, "grad_norm": 7.288771629333496, "learning_rate": 5.446026986506747e-07, "loss": 1.1368, "step": 2907 }, { "epoch": 276.95238095238096, "grad_norm": 7.125945568084717, "learning_rate": 5.447901049475263e-07, "loss": 1.1767, "step": 2908 }, { "epoch": 276.95238095238096, "eval_accuracy": 0.7298613713001124, "eval_f1": 0.7866232613199171, "eval_loss": 0.564026951789856, "eval_precision": 0.7242506811989101, "eval_recall": 0.8607512953367875, "eval_roc_auc": 0.7765402993667242, "eval_runtime": 1.1584, "eval_samples_per_second": 2304.062, "eval_steps_per_second": 5.18, "step": 2908 }, { "epoch": 277.04761904761904, "grad_norm": 11.035928726196289, "learning_rate": 5.449775112443778e-07, "loss": 1.1625, "step": 2909 }, { "epoch": 277.14285714285717, "grad_norm": 4.437666893005371, "learning_rate": 5.451649175412294e-07, "loss": 1.1427, "step": 2910 }, { "epoch": 277.23809523809524, "grad_norm": 4.022305965423584, "learning_rate": 5.45352323838081e-07, "loss": 1.129, "step": 2911 }, { "epoch": 277.3333333333333, "grad_norm": 10.658130645751953, "learning_rate": 5.455397301349326e-07, "loss": 1.0895, "step": 2912 }, { "epoch": 277.42857142857144, "grad_norm": 5.271421909332275, "learning_rate": 5.457271364317841e-07, "loss": 1.1431, "step": 2913 }, { "epoch": 277.5238095238095, "grad_norm": 6.368401050567627, "learning_rate": 5.459145427286357e-07, "loss": 1.1147, "step": 2914 }, { "epoch": 277.6190476190476, "grad_norm": 3.81168532371521, "learning_rate": 5.461019490254873e-07, "loss": 1.1266, "step": 2915 }, { "epoch": 277.7142857142857, "grad_norm": 3.3784971237182617, "learning_rate": 5.462893553223388e-07, "loss": 1.1751, "step": 2916 }, { "epoch": 277.8095238095238, "grad_norm": 5.5185627937316895, "learning_rate": 5.464767616191904e-07, "loss": 1.1456, "step": 2917 }, { "epoch": 277.9047619047619, "grad_norm": 10.223770141601562, "learning_rate": 5.46664167916042e-07, "loss": 1.1173, "step": 2918 }, { "epoch": 278.0, "grad_norm": 5.04381799697876, "learning_rate": 5.468515742128936e-07, "loss": 1.1602, "step": 2919 }, { "epoch": 278.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7774427020506635, "eval_loss": 0.5633842945098877, "eval_precision": 0.7274266365688488, "eval_recall": 0.8348445595854922, "eval_roc_auc": 0.7765256188831319, "eval_runtime": 1.1165, "eval_samples_per_second": 2390.568, "eval_steps_per_second": 5.374, "step": 2919 }, { "epoch": 278.0952380952381, "grad_norm": 4.8481221199035645, "learning_rate": 5.470389805097451e-07, "loss": 1.1563, "step": 2920 }, { "epoch": 278.1904761904762, "grad_norm": 4.2458906173706055, "learning_rate": 5.472263868065967e-07, "loss": 1.1748, "step": 2921 }, { "epoch": 278.2857142857143, "grad_norm": 6.992273807525635, "learning_rate": 5.474137931034482e-07, "loss": 1.1342, "step": 2922 }, { "epoch": 278.3809523809524, "grad_norm": 8.638583183288574, "learning_rate": 5.476011994002999e-07, "loss": 1.1363, "step": 2923 }, { "epoch": 278.4761904761905, "grad_norm": 5.823126316070557, "learning_rate": 5.477886056971515e-07, "loss": 1.1124, "step": 2924 }, { "epoch": 278.57142857142856, "grad_norm": 8.983731269836426, "learning_rate": 5.47976011994003e-07, "loss": 1.1614, "step": 2925 }, { "epoch": 278.6666666666667, "grad_norm": 3.8761818408966064, "learning_rate": 5.481634182908547e-07, "loss": 1.1422, "step": 2926 }, { "epoch": 278.76190476190476, "grad_norm": 5.85255765914917, "learning_rate": 5.483508245877062e-07, "loss": 1.1216, "step": 2927 }, { "epoch": 278.85714285714283, "grad_norm": 5.081287384033203, "learning_rate": 5.485382308845578e-07, "loss": 1.1161, "step": 2928 }, { "epoch": 278.95238095238096, "grad_norm": 8.263365745544434, "learning_rate": 5.487256371814093e-07, "loss": 1.14, "step": 2929 }, { "epoch": 278.95238095238096, "eval_accuracy": 0.7298613713001124, "eval_f1": 0.7854805117524546, "eval_loss": 0.5636739730834961, "eval_precision": 0.7264722069345074, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7763837075417386, "eval_runtime": 1.1999, "eval_samples_per_second": 2224.265, "eval_steps_per_second": 5.0, "step": 2929 }, { "epoch": 279.04761904761904, "grad_norm": 9.072875022888184, "learning_rate": 5.489130434782609e-07, "loss": 1.1527, "step": 2930 }, { "epoch": 279.14285714285717, "grad_norm": 6.613090991973877, "learning_rate": 5.491004497751124e-07, "loss": 1.1414, "step": 2931 }, { "epoch": 279.23809523809524, "grad_norm": 7.818994522094727, "learning_rate": 5.492878560719641e-07, "loss": 1.1396, "step": 2932 }, { "epoch": 279.3333333333333, "grad_norm": 5.4621124267578125, "learning_rate": 5.494752623688157e-07, "loss": 1.1323, "step": 2933 }, { "epoch": 279.42857142857144, "grad_norm": 11.873200416564941, "learning_rate": 5.496626686656672e-07, "loss": 1.1364, "step": 2934 }, { "epoch": 279.5238095238095, "grad_norm": 4.848647594451904, "learning_rate": 5.498500749625188e-07, "loss": 1.1477, "step": 2935 }, { "epoch": 279.6190476190476, "grad_norm": 9.585158348083496, "learning_rate": 5.500374812593703e-07, "loss": 1.0999, "step": 2936 }, { "epoch": 279.7142857142857, "grad_norm": 4.959415435791016, "learning_rate": 5.502248875562219e-07, "loss": 1.1543, "step": 2937 }, { "epoch": 279.8095238095238, "grad_norm": 3.9837114810943604, "learning_rate": 5.504122938530734e-07, "loss": 1.1479, "step": 2938 }, { "epoch": 279.9047619047619, "grad_norm": 5.842416763305664, "learning_rate": 5.505997001499251e-07, "loss": 1.116, "step": 2939 }, { "epoch": 280.0, "grad_norm": 5.95350980758667, "learning_rate": 5.507871064467766e-07, "loss": 1.1409, "step": 2940 }, { "epoch": 280.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7766343825665859, "eval_loss": 0.5636378526687622, "eval_precision": 0.7289772727272728, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.7759784110535406, "eval_runtime": 1.3077, "eval_samples_per_second": 2040.91, "eval_steps_per_second": 4.588, "step": 2940 }, { "epoch": 280.0952380952381, "grad_norm": 9.22157096862793, "learning_rate": 5.509745127436282e-07, "loss": 1.1084, "step": 2941 }, { "epoch": 280.1904761904762, "grad_norm": 4.287394046783447, "learning_rate": 5.511619190404798e-07, "loss": 1.1666, "step": 2942 }, { "epoch": 280.2857142857143, "grad_norm": 8.031574249267578, "learning_rate": 5.513493253373313e-07, "loss": 1.1422, "step": 2943 }, { "epoch": 280.3809523809524, "grad_norm": 6.339638710021973, "learning_rate": 5.515367316341829e-07, "loss": 1.1359, "step": 2944 }, { "epoch": 280.4761904761905, "grad_norm": 6.532580852508545, "learning_rate": 5.517241379310344e-07, "loss": 1.1241, "step": 2945 }, { "epoch": 280.57142857142856, "grad_norm": 4.235518455505371, "learning_rate": 5.519115442278861e-07, "loss": 1.1372, "step": 2946 }, { "epoch": 280.6666666666667, "grad_norm": 4.8458991050720215, "learning_rate": 5.520989505247376e-07, "loss": 1.1259, "step": 2947 }, { "epoch": 280.76190476190476, "grad_norm": 5.886928081512451, "learning_rate": 5.522863568215893e-07, "loss": 1.15, "step": 2948 }, { "epoch": 280.85714285714283, "grad_norm": 3.73628568649292, "learning_rate": 5.524737631184408e-07, "loss": 1.1483, "step": 2949 }, { "epoch": 280.95238095238096, "grad_norm": 4.2562255859375, "learning_rate": 5.526611694152924e-07, "loss": 1.1616, "step": 2950 }, { "epoch": 280.95238095238096, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7804145389005708, "eval_loss": 0.5636350512504578, "eval_precision": 0.7277310924369748, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.77594156591825, "eval_runtime": 1.1488, "eval_samples_per_second": 2323.242, "eval_steps_per_second": 5.223, "step": 2950 }, { "epoch": 281.04761904761904, "grad_norm": 4.070923805236816, "learning_rate": 5.52848575712144e-07, "loss": 1.1298, "step": 2951 }, { "epoch": 281.14285714285717, "grad_norm": 3.9660825729370117, "learning_rate": 5.530359820089955e-07, "loss": 1.1391, "step": 2952 }, { "epoch": 281.23809523809524, "grad_norm": 7.847269058227539, "learning_rate": 5.532233883058472e-07, "loss": 1.1308, "step": 2953 }, { "epoch": 281.3333333333333, "grad_norm": 10.086249351501465, "learning_rate": 5.534107946026987e-07, "loss": 1.0882, "step": 2954 }, { "epoch": 281.42857142857144, "grad_norm": 7.166942596435547, "learning_rate": 5.535982008995503e-07, "loss": 1.1334, "step": 2955 }, { "epoch": 281.5238095238095, "grad_norm": 4.273452281951904, "learning_rate": 5.537856071964018e-07, "loss": 1.1654, "step": 2956 }, { "epoch": 281.6190476190476, "grad_norm": 5.848499298095703, "learning_rate": 5.539730134932534e-07, "loss": 1.146, "step": 2957 }, { "epoch": 281.7142857142857, "grad_norm": 3.5379650592803955, "learning_rate": 5.541604197901049e-07, "loss": 1.1066, "step": 2958 }, { "epoch": 281.8095238095238, "grad_norm": 4.1590046882629395, "learning_rate": 5.543478260869565e-07, "loss": 1.147, "step": 2959 }, { "epoch": 281.9047619047619, "grad_norm": 4.934486389160156, "learning_rate": 5.545352323838082e-07, "loss": 1.1444, "step": 2960 }, { "epoch": 282.0, "grad_norm": 9.666616439819336, "learning_rate": 5.547226386806597e-07, "loss": 1.1567, "step": 2961 }, { "epoch": 282.0, "eval_accuracy": 0.7287373548145373, "eval_f1": 0.7857988165680473, "eval_loss": 0.5639248490333557, "eval_precision": 0.7233115468409586, "eval_recall": 0.8601036269430051, "eval_roc_auc": 0.7761266551525619, "eval_runtime": 1.2174, "eval_samples_per_second": 2192.455, "eval_steps_per_second": 4.929, "step": 2961 }, { "epoch": 282.0952380952381, "grad_norm": 3.8604648113250732, "learning_rate": 5.549100449775113e-07, "loss": 1.1145, "step": 2962 }, { "epoch": 282.1904761904762, "grad_norm": 11.779272079467773, "learning_rate": 5.550974512743628e-07, "loss": 1.1494, "step": 2963 }, { "epoch": 282.2857142857143, "grad_norm": 9.37829875946045, "learning_rate": 5.552848575712144e-07, "loss": 1.1446, "step": 2964 }, { "epoch": 282.3809523809524, "grad_norm": 8.775849342346191, "learning_rate": 5.554722638680659e-07, "loss": 1.1425, "step": 2965 }, { "epoch": 282.4761904761905, "grad_norm": 3.7040064334869385, "learning_rate": 5.556596701649176e-07, "loss": 1.139, "step": 2966 }, { "epoch": 282.57142857142856, "grad_norm": 9.283699989318848, "learning_rate": 5.558470764617691e-07, "loss": 1.1133, "step": 2967 }, { "epoch": 282.6666666666667, "grad_norm": 4.366463661193848, "learning_rate": 5.560344827586207e-07, "loss": 1.1191, "step": 2968 }, { "epoch": 282.76190476190476, "grad_norm": 9.383438110351562, "learning_rate": 5.562218890554723e-07, "loss": 1.1277, "step": 2969 }, { "epoch": 282.85714285714283, "grad_norm": 10.565701484680176, "learning_rate": 5.564092953523238e-07, "loss": 1.155, "step": 2970 }, { "epoch": 282.95238095238096, "grad_norm": 4.4187397956848145, "learning_rate": 5.565967016491755e-07, "loss": 1.1471, "step": 2971 }, { "epoch": 282.95238095238096, "eval_accuracy": 0.7276133383289621, "eval_f1": 0.7812217875413783, "eval_loss": 0.563374936580658, "eval_precision": 0.7296233839235525, "eval_recall": 0.8406735751295337, "eval_roc_auc": 0.7761419113413933, "eval_runtime": 1.1967, "eval_samples_per_second": 2230.318, "eval_steps_per_second": 5.014, "step": 2971 }, { "epoch": 283.04761904761904, "grad_norm": 5.452326774597168, "learning_rate": 5.56784107946027e-07, "loss": 1.1872, "step": 2972 }, { "epoch": 283.14285714285717, "grad_norm": 11.2903413772583, "learning_rate": 5.569715142428787e-07, "loss": 1.1473, "step": 2973 }, { "epoch": 283.23809523809524, "grad_norm": 4.844279766082764, "learning_rate": 5.571589205397302e-07, "loss": 1.1261, "step": 2974 }, { "epoch": 283.3333333333333, "grad_norm": 5.5100321769714355, "learning_rate": 5.573463268365818e-07, "loss": 1.0917, "step": 2975 }, { "epoch": 283.42857142857144, "grad_norm": 6.915529251098633, "learning_rate": 5.575337331334333e-07, "loss": 1.1668, "step": 2976 }, { "epoch": 283.5238095238095, "grad_norm": 5.634005546569824, "learning_rate": 5.577211394302849e-07, "loss": 1.0749, "step": 2977 }, { "epoch": 283.6190476190476, "grad_norm": 10.577078819274902, "learning_rate": 5.579085457271365e-07, "loss": 1.1603, "step": 2978 }, { "epoch": 283.7142857142857, "grad_norm": 4.12070369720459, "learning_rate": 5.58095952023988e-07, "loss": 1.1099, "step": 2979 }, { "epoch": 283.8095238095238, "grad_norm": 5.871093273162842, "learning_rate": 5.582833583208397e-07, "loss": 1.1818, "step": 2980 }, { "epoch": 283.9047619047619, "grad_norm": 6.47518253326416, "learning_rate": 5.584707646176912e-07, "loss": 1.1224, "step": 2981 }, { "epoch": 284.0, "grad_norm": 6.158949375152588, "learning_rate": 5.586581709145428e-07, "loss": 1.1205, "step": 2982 }, { "epoch": 284.0, "eval_accuracy": 0.729486699138254, "eval_f1": 0.7837028160575195, "eval_loss": 0.5626216530799866, "eval_precision": 0.7290969899665551, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7773434081750143, "eval_runtime": 1.2058, "eval_samples_per_second": 2213.511, "eval_steps_per_second": 4.976, "step": 2982 }, { "epoch": 284.0952380952381, "grad_norm": 5.4894118309021, "learning_rate": 5.588455772113943e-07, "loss": 1.1201, "step": 2983 }, { "epoch": 284.1904761904762, "grad_norm": 5.079287528991699, "learning_rate": 5.590329835082459e-07, "loss": 1.1071, "step": 2984 }, { "epoch": 284.2857142857143, "grad_norm": 7.196651458740234, "learning_rate": 5.592203898050974e-07, "loss": 1.1238, "step": 2985 }, { "epoch": 284.3809523809524, "grad_norm": 5.972583293914795, "learning_rate": 5.59407796101949e-07, "loss": 1.1456, "step": 2986 }, { "epoch": 284.4761904761905, "grad_norm": 4.000961780548096, "learning_rate": 5.595952023988007e-07, "loss": 1.1411, "step": 2987 }, { "epoch": 284.57142857142856, "grad_norm": 6.014395713806152, "learning_rate": 5.597826086956522e-07, "loss": 1.1582, "step": 2988 }, { "epoch": 284.6666666666667, "grad_norm": 4.829216003417969, "learning_rate": 5.599700149925038e-07, "loss": 1.147, "step": 2989 }, { "epoch": 284.76190476190476, "grad_norm": 4.213350772857666, "learning_rate": 5.601574212893553e-07, "loss": 1.1444, "step": 2990 }, { "epoch": 284.85714285714283, "grad_norm": 5.931327819824219, "learning_rate": 5.603448275862069e-07, "loss": 1.1281, "step": 2991 }, { "epoch": 284.95238095238096, "grad_norm": 4.410636901855469, "learning_rate": 5.605322338830584e-07, "loss": 1.105, "step": 2992 }, { "epoch": 284.95238095238096, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7797529376318169, "eval_loss": 0.5622423887252808, "eval_precision": 0.7290140845070423, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7776154289004029, "eval_runtime": 1.1496, "eval_samples_per_second": 2321.654, "eval_steps_per_second": 5.219, "step": 2992 }, { "epoch": 285.04761904761904, "grad_norm": 3.58267879486084, "learning_rate": 5.607196401799101e-07, "loss": 1.1236, "step": 2993 }, { "epoch": 285.14285714285717, "grad_norm": 5.702927112579346, "learning_rate": 5.609070464767616e-07, "loss": 1.1681, "step": 2994 }, { "epoch": 285.23809523809524, "grad_norm": 5.016921043395996, "learning_rate": 5.610944527736132e-07, "loss": 1.1307, "step": 2995 }, { "epoch": 285.3333333333333, "grad_norm": 9.380377769470215, "learning_rate": 5.612818590704649e-07, "loss": 1.156, "step": 2996 }, { "epoch": 285.42857142857144, "grad_norm": 5.757893085479736, "learning_rate": 5.614692653673164e-07, "loss": 1.13, "step": 2997 }, { "epoch": 285.5238095238095, "grad_norm": 5.188559055328369, "learning_rate": 5.61656671664168e-07, "loss": 1.1178, "step": 2998 }, { "epoch": 285.6190476190476, "grad_norm": 6.849817276000977, "learning_rate": 5.618440779610195e-07, "loss": 1.1294, "step": 2999 }, { "epoch": 285.7142857142857, "grad_norm": 11.583452224731445, "learning_rate": 5.620314842578711e-07, "loss": 1.1146, "step": 3000 }, { "epoch": 285.8095238095238, "grad_norm": 8.750665664672852, "learning_rate": 5.622188905547227e-07, "loss": 1.1744, "step": 3001 }, { "epoch": 285.9047619047619, "grad_norm": 5.649927616119385, "learning_rate": 5.624062968515743e-07, "loss": 1.1339, "step": 3002 }, { "epoch": 286.0, "grad_norm": 4.693404674530029, "learning_rate": 5.625937031484258e-07, "loss": 1.1333, "step": 3003 }, { "epoch": 286.0, "eval_accuracy": 0.726489321843387, "eval_f1": 0.7803850782190133, "eval_loss": 0.5620488524436951, "eval_precision": 0.7286516853932584, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7778422567645366, "eval_runtime": 1.1855, "eval_samples_per_second": 2251.445, "eval_steps_per_second": 5.061, "step": 3003 }, { "epoch": 286.0952380952381, "grad_norm": 3.090787887573242, "learning_rate": 5.627811094452774e-07, "loss": 1.1242, "step": 3004 }, { "epoch": 286.1904761904762, "grad_norm": 3.8368513584136963, "learning_rate": 5.62968515742129e-07, "loss": 1.08, "step": 3005 }, { "epoch": 286.2857142857143, "grad_norm": 9.99586296081543, "learning_rate": 5.631559220389805e-07, "loss": 1.1767, "step": 3006 }, { "epoch": 286.3809523809524, "grad_norm": 3.3602445125579834, "learning_rate": 5.633433283358321e-07, "loss": 1.1401, "step": 3007 }, { "epoch": 286.4761904761905, "grad_norm": 9.76583480834961, "learning_rate": 5.635307346326837e-07, "loss": 1.1104, "step": 3008 }, { "epoch": 286.57142857142856, "grad_norm": 8.486311912536621, "learning_rate": 5.637181409295353e-07, "loss": 1.166, "step": 3009 }, { "epoch": 286.6666666666667, "grad_norm": 5.658336639404297, "learning_rate": 5.639055472263868e-07, "loss": 1.1606, "step": 3010 }, { "epoch": 286.76190476190476, "grad_norm": 6.460410118103027, "learning_rate": 5.640929535232384e-07, "loss": 1.1226, "step": 3011 }, { "epoch": 286.85714285714283, "grad_norm": 5.95920991897583, "learning_rate": 5.642803598200899e-07, "loss": 1.1116, "step": 3012 }, { "epoch": 286.95238095238096, "grad_norm": 8.102746963500977, "learning_rate": 5.644677661169415e-07, "loss": 1.163, "step": 3013 }, { "epoch": 286.95238095238096, "eval_accuracy": 0.7268639940052454, "eval_f1": 0.7808836789900812, "eval_loss": 0.5618669986724854, "eval_precision": 0.7285473920358946, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7780023028209557, "eval_runtime": 1.3258, "eval_samples_per_second": 2013.07, "eval_steps_per_second": 4.525, "step": 3013 }, { "epoch": 287.04761904761904, "grad_norm": 3.968151330947876, "learning_rate": 5.64655172413793e-07, "loss": 1.1229, "step": 3014 }, { "epoch": 287.14285714285717, "grad_norm": 4.948294639587402, "learning_rate": 5.648425787106447e-07, "loss": 1.1114, "step": 3015 }, { "epoch": 287.23809523809524, "grad_norm": 8.754914283752441, "learning_rate": 5.650299850074963e-07, "loss": 1.1497, "step": 3016 }, { "epoch": 287.3333333333333, "grad_norm": 4.025392532348633, "learning_rate": 5.652173913043478e-07, "loss": 1.1172, "step": 3017 }, { "epoch": 287.42857142857144, "grad_norm": 11.300790786743164, "learning_rate": 5.654047976011995e-07, "loss": 1.1366, "step": 3018 }, { "epoch": 287.5238095238095, "grad_norm": 7.936151027679443, "learning_rate": 5.65592203898051e-07, "loss": 1.1396, "step": 3019 }, { "epoch": 287.6190476190476, "grad_norm": 8.417755126953125, "learning_rate": 5.657796101949026e-07, "loss": 1.1312, "step": 3020 }, { "epoch": 287.7142857142857, "grad_norm": 8.80263614654541, "learning_rate": 5.659670164917541e-07, "loss": 1.071, "step": 3021 }, { "epoch": 287.8095238095238, "grad_norm": 6.077959060668945, "learning_rate": 5.661544227886058e-07, "loss": 1.1265, "step": 3022 }, { "epoch": 287.9047619047619, "grad_norm": 7.415537357330322, "learning_rate": 5.663418290854574e-07, "loss": 1.1485, "step": 3023 }, { "epoch": 288.0, "grad_norm": 11.430869102478027, "learning_rate": 5.665292353823089e-07, "loss": 1.2005, "step": 3024 }, { "epoch": 288.0, "eval_accuracy": 0.7279880104908205, "eval_f1": 0.7821128451380552, "eval_loss": 0.5615506172180176, "eval_precision": 0.7287472035794184, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7781177317213587, "eval_runtime": 1.186, "eval_samples_per_second": 2250.509, "eval_steps_per_second": 5.059, "step": 3024 }, { "epoch": 288.0952380952381, "grad_norm": 5.465104579925537, "learning_rate": 5.667166416791605e-07, "loss": 1.1319, "step": 3025 }, { "epoch": 288.1904761904762, "grad_norm": 8.500192642211914, "learning_rate": 5.66904047976012e-07, "loss": 1.1595, "step": 3026 }, { "epoch": 288.2857142857143, "grad_norm": 8.348478317260742, "learning_rate": 5.670914542728636e-07, "loss": 1.109, "step": 3027 }, { "epoch": 288.3809523809524, "grad_norm": 10.712888717651367, "learning_rate": 5.672788605697151e-07, "loss": 1.156, "step": 3028 }, { "epoch": 288.4761904761905, "grad_norm": 4.471301078796387, "learning_rate": 5.674662668665668e-07, "loss": 1.1707, "step": 3029 }, { "epoch": 288.57142857142856, "grad_norm": 5.227294921875, "learning_rate": 5.676536731634183e-07, "loss": 1.1179, "step": 3030 }, { "epoch": 288.6666666666667, "grad_norm": 7.055893421173096, "learning_rate": 5.678410794602699e-07, "loss": 1.1221, "step": 3031 }, { "epoch": 288.76190476190476, "grad_norm": 14.00888729095459, "learning_rate": 5.680284857571215e-07, "loss": 1.1227, "step": 3032 }, { "epoch": 288.85714285714283, "grad_norm": 13.468306541442871, "learning_rate": 5.68215892053973e-07, "loss": 1.1306, "step": 3033 }, { "epoch": 288.95238095238096, "grad_norm": 3.5932788848876953, "learning_rate": 5.684032983508246e-07, "loss": 1.1477, "step": 3034 }, { "epoch": 288.95238095238096, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7801503759398496, "eval_loss": 0.5614120960235596, "eval_precision": 0.7282425603593486, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7784884858952217, "eval_runtime": 1.1599, "eval_samples_per_second": 2301.009, "eval_steps_per_second": 5.173, "step": 3034 }, { "epoch": 289.04761904761904, "grad_norm": 5.253028392791748, "learning_rate": 5.685907046476761e-07, "loss": 1.1977, "step": 3035 }, { "epoch": 289.14285714285717, "grad_norm": 7.134814739227295, "learning_rate": 5.687781109445278e-07, "loss": 1.1047, "step": 3036 }, { "epoch": 289.23809523809524, "grad_norm": 6.438389778137207, "learning_rate": 5.689655172413793e-07, "loss": 1.1218, "step": 3037 }, { "epoch": 289.3333333333333, "grad_norm": 5.677706718444824, "learning_rate": 5.691529235382309e-07, "loss": 1.1593, "step": 3038 }, { "epoch": 289.42857142857144, "grad_norm": 5.320911884307861, "learning_rate": 5.693403298350824e-07, "loss": 1.1046, "step": 3039 }, { "epoch": 289.5238095238095, "grad_norm": 6.542576313018799, "learning_rate": 5.695277361319341e-07, "loss": 1.1368, "step": 3040 }, { "epoch": 289.6190476190476, "grad_norm": 9.399479866027832, "learning_rate": 5.697151424287857e-07, "loss": 1.1047, "step": 3041 }, { "epoch": 289.7142857142857, "grad_norm": 7.715976238250732, "learning_rate": 5.699025487256372e-07, "loss": 1.1477, "step": 3042 }, { "epoch": 289.8095238095238, "grad_norm": 7.544461727142334, "learning_rate": 5.700899550224889e-07, "loss": 1.1429, "step": 3043 }, { "epoch": 289.9047619047619, "grad_norm": 11.521993637084961, "learning_rate": 5.702773613193404e-07, "loss": 1.1868, "step": 3044 }, { "epoch": 290.0, "grad_norm": 4.851450443267822, "learning_rate": 5.70464767616192e-07, "loss": 1.1391, "step": 3045 }, { "epoch": 290.0, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.7798855766335441, "eval_loss": 0.5616016983985901, "eval_precision": 0.7287563308947664, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7783494530800231, "eval_runtime": 1.1982, "eval_samples_per_second": 2227.525, "eval_steps_per_second": 5.008, "step": 3045 }, { "epoch": 290.0952380952381, "grad_norm": 5.691206455230713, "learning_rate": 5.706521739130435e-07, "loss": 1.1532, "step": 3046 }, { "epoch": 290.1904761904762, "grad_norm": 8.848892211914062, "learning_rate": 5.708395802098951e-07, "loss": 1.1201, "step": 3047 }, { "epoch": 290.2857142857143, "grad_norm": 4.961413860321045, "learning_rate": 5.710269865067466e-07, "loss": 1.1733, "step": 3048 }, { "epoch": 290.3809523809524, "grad_norm": 4.45950984954834, "learning_rate": 5.712143928035982e-07, "loss": 1.1741, "step": 3049 }, { "epoch": 290.4761904761905, "grad_norm": 3.6861424446105957, "learning_rate": 5.714017991004499e-07, "loss": 1.1357, "step": 3050 }, { "epoch": 290.57142857142856, "grad_norm": 4.187479019165039, "learning_rate": 5.715892053973014e-07, "loss": 1.1444, "step": 3051 }, { "epoch": 290.6666666666667, "grad_norm": 5.746031761169434, "learning_rate": 5.71776611694153e-07, "loss": 1.134, "step": 3052 }, { "epoch": 290.76190476190476, "grad_norm": 5.604963779449463, "learning_rate": 5.719640179910045e-07, "loss": 1.1561, "step": 3053 }, { "epoch": 290.85714285714283, "grad_norm": 5.199735641479492, "learning_rate": 5.721514242878561e-07, "loss": 1.1173, "step": 3054 }, { "epoch": 290.95238095238096, "grad_norm": 4.521169662475586, "learning_rate": 5.723388305847076e-07, "loss": 1.0963, "step": 3055 }, { "epoch": 290.95238095238096, "eval_accuracy": 0.7298613713001124, "eval_f1": 0.7832882476705741, "eval_loss": 0.5621520280838013, "eval_precision": 0.730790802019069, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7781056419113415, "eval_runtime": 1.3525, "eval_samples_per_second": 1973.311, "eval_steps_per_second": 4.436, "step": 3055 }, { "epoch": 291.04761904761904, "grad_norm": 4.686976432800293, "learning_rate": 5.725262368815592e-07, "loss": 1.1488, "step": 3056 }, { "epoch": 291.14285714285717, "grad_norm": 4.308133125305176, "learning_rate": 5.727136431784108e-07, "loss": 1.088, "step": 3057 }, { "epoch": 291.23809523809524, "grad_norm": 9.826374053955078, "learning_rate": 5.729010494752624e-07, "loss": 1.1471, "step": 3058 }, { "epoch": 291.3333333333333, "grad_norm": 5.6400980949401855, "learning_rate": 5.73088455772114e-07, "loss": 1.1539, "step": 3059 }, { "epoch": 291.42857142857144, "grad_norm": 4.731324672698975, "learning_rate": 5.732758620689655e-07, "loss": 1.1177, "step": 3060 }, { "epoch": 291.5238095238095, "grad_norm": 7.6967644691467285, "learning_rate": 5.734632683658171e-07, "loss": 1.1073, "step": 3061 }, { "epoch": 291.6190476190476, "grad_norm": 3.6323401927948, "learning_rate": 5.736506746626686e-07, "loss": 1.182, "step": 3062 }, { "epoch": 291.7142857142857, "grad_norm": 4.018256664276123, "learning_rate": 5.738380809595203e-07, "loss": 1.1464, "step": 3063 }, { "epoch": 291.8095238095238, "grad_norm": 4.508745193481445, "learning_rate": 5.740254872563718e-07, "loss": 1.1568, "step": 3064 }, { "epoch": 291.9047619047619, "grad_norm": 6.37939453125, "learning_rate": 5.742128935532235e-07, "loss": 1.1368, "step": 3065 }, { "epoch": 292.0, "grad_norm": 3.512845039367676, "learning_rate": 5.74400299850075e-07, "loss": 1.1478, "step": 3066 }, { "epoch": 292.0, "eval_accuracy": 0.7287373548145373, "eval_f1": 0.7827130852340937, "eval_loss": 0.5623800158500671, "eval_precision": 0.7293064876957495, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7777728842832469, "eval_runtime": 1.1205, "eval_samples_per_second": 2381.874, "eval_steps_per_second": 5.355, "step": 3066 }, { "epoch": 292.0952380952381, "grad_norm": 10.951603889465332, "learning_rate": 5.745877061469266e-07, "loss": 1.161, "step": 3067 }, { "epoch": 292.1904761904762, "grad_norm": 13.926806449890137, "learning_rate": 5.747751124437782e-07, "loss": 1.165, "step": 3068 }, { "epoch": 292.2857142857143, "grad_norm": 3.765690565109253, "learning_rate": 5.749625187406297e-07, "loss": 1.1571, "step": 3069 }, { "epoch": 292.3809523809524, "grad_norm": 8.859353065490723, "learning_rate": 5.751499250374813e-07, "loss": 1.1286, "step": 3070 }, { "epoch": 292.4761904761905, "grad_norm": 6.424526214599609, "learning_rate": 5.753373313343329e-07, "loss": 1.1304, "step": 3071 }, { "epoch": 292.57142857142856, "grad_norm": 10.029023170471191, "learning_rate": 5.755247376311845e-07, "loss": 1.0804, "step": 3072 }, { "epoch": 292.6666666666667, "grad_norm": 3.650876045227051, "learning_rate": 5.75712143928036e-07, "loss": 1.1577, "step": 3073 }, { "epoch": 292.76190476190476, "grad_norm": 6.428129196166992, "learning_rate": 5.758995502248876e-07, "loss": 1.137, "step": 3074 }, { "epoch": 292.85714285714283, "grad_norm": 7.669662952423096, "learning_rate": 5.760869565217391e-07, "loss": 1.1369, "step": 3075 }, { "epoch": 292.95238095238096, "grad_norm": 7.731568813323975, "learning_rate": 5.762743628185907e-07, "loss": 1.1473, "step": 3076 }, { "epoch": 292.95238095238096, "eval_accuracy": 0.7306107156238292, "eval_f1": 0.7856929955290611, "eval_loss": 0.5623287558555603, "eval_precision": 0.7277747101049145, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7782962003454232, "eval_runtime": 1.2639, "eval_samples_per_second": 2111.777, "eval_steps_per_second": 4.747, "step": 3076 }, { "epoch": 293.04761904761904, "grad_norm": 8.484759330749512, "learning_rate": 5.764617691154423e-07, "loss": 1.1324, "step": 3077 }, { "epoch": 293.14285714285717, "grad_norm": 9.59839916229248, "learning_rate": 5.766491754122939e-07, "loss": 1.1433, "step": 3078 }, { "epoch": 293.23809523809524, "grad_norm": 6.049525737762451, "learning_rate": 5.768365817091455e-07, "loss": 1.1578, "step": 3079 }, { "epoch": 293.3333333333333, "grad_norm": 6.255111217498779, "learning_rate": 5.77023988005997e-07, "loss": 1.1264, "step": 3080 }, { "epoch": 293.42857142857144, "grad_norm": 4.266199111938477, "learning_rate": 5.772113943028486e-07, "loss": 1.0949, "step": 3081 }, { "epoch": 293.5238095238095, "grad_norm": 8.692477226257324, "learning_rate": 5.773988005997001e-07, "loss": 1.1688, "step": 3082 }, { "epoch": 293.6190476190476, "grad_norm": 4.867812633514404, "learning_rate": 5.775862068965517e-07, "loss": 1.1337, "step": 3083 }, { "epoch": 293.7142857142857, "grad_norm": 4.503742218017578, "learning_rate": 5.777736131934032e-07, "loss": 1.1476, "step": 3084 }, { "epoch": 293.8095238095238, "grad_norm": 10.558121681213379, "learning_rate": 5.779610194902549e-07, "loss": 1.1134, "step": 3085 }, { "epoch": 293.9047619047619, "grad_norm": 11.817536354064941, "learning_rate": 5.781484257871065e-07, "loss": 1.1337, "step": 3086 }, { "epoch": 294.0, "grad_norm": 8.518043518066406, "learning_rate": 5.78335832083958e-07, "loss": 1.1348, "step": 3087 }, { "epoch": 294.0, "eval_accuracy": 0.7249906331959536, "eval_f1": 0.7779794313369631, "eval_loss": 0.5618783235549927, "eval_precision": 0.7298524404086265, "eval_recall": 0.832901554404145, "eval_roc_auc": 0.7786315486470926, "eval_runtime": 1.1578, "eval_samples_per_second": 2305.293, "eval_steps_per_second": 5.182, "step": 3087 }, { "epoch": 294.0952380952381, "grad_norm": 5.27886962890625, "learning_rate": 5.785232383808097e-07, "loss": 1.1217, "step": 3088 }, { "epoch": 294.1904761904762, "grad_norm": 3.7267980575561523, "learning_rate": 5.787106446776612e-07, "loss": 1.1421, "step": 3089 }, { "epoch": 294.2857142857143, "grad_norm": 5.075734615325928, "learning_rate": 5.788980509745128e-07, "loss": 1.1312, "step": 3090 }, { "epoch": 294.3809523809524, "grad_norm": 11.15335464477539, "learning_rate": 5.790854572713643e-07, "loss": 1.1175, "step": 3091 }, { "epoch": 294.4761904761905, "grad_norm": 8.802132606506348, "learning_rate": 5.79272863568216e-07, "loss": 1.1491, "step": 3092 }, { "epoch": 294.57142857142856, "grad_norm": 6.129073619842529, "learning_rate": 5.794602698650675e-07, "loss": 1.0718, "step": 3093 }, { "epoch": 294.6666666666667, "grad_norm": 5.770191192626953, "learning_rate": 5.796476761619191e-07, "loss": 1.1447, "step": 3094 }, { "epoch": 294.76190476190476, "grad_norm": 5.671845436096191, "learning_rate": 5.798350824587707e-07, "loss": 1.1716, "step": 3095 }, { "epoch": 294.85714285714283, "grad_norm": 15.143241882324219, "learning_rate": 5.800224887556222e-07, "loss": 1.1416, "step": 3096 }, { "epoch": 294.95238095238096, "grad_norm": 15.241469383239746, "learning_rate": 5.802098950524738e-07, "loss": 1.1278, "step": 3097 }, { "epoch": 294.95238095238096, "eval_accuracy": 0.732484076433121, "eval_f1": 0.7898763978811065, "eval_loss": 0.5626599788665771, "eval_precision": 0.7238403451995685, "eval_recall": 0.8691709844559585, "eval_roc_auc": 0.7789355210132412, "eval_runtime": 1.1853, "eval_samples_per_second": 2251.769, "eval_steps_per_second": 5.062, "step": 3097 }, { "epoch": 295.04761904761904, "grad_norm": 14.951590538024902, "learning_rate": 5.803973013493253e-07, "loss": 1.1323, "step": 3098 }, { "epoch": 295.14285714285717, "grad_norm": 15.76667308807373, "learning_rate": 5.80584707646177e-07, "loss": 1.1233, "step": 3099 }, { "epoch": 295.23809523809524, "grad_norm": 10.656340599060059, "learning_rate": 5.807721139430285e-07, "loss": 1.1226, "step": 3100 }, { "epoch": 295.3333333333333, "grad_norm": 7.614379405975342, "learning_rate": 5.809595202398801e-07, "loss": 1.1035, "step": 3101 }, { "epoch": 295.42857142857144, "grad_norm": 5.129384994506836, "learning_rate": 5.811469265367316e-07, "loss": 1.1489, "step": 3102 }, { "epoch": 295.5238095238095, "grad_norm": 9.847796440124512, "learning_rate": 5.813343328335832e-07, "loss": 1.1712, "step": 3103 }, { "epoch": 295.6190476190476, "grad_norm": 5.071105003356934, "learning_rate": 5.815217391304348e-07, "loss": 1.1127, "step": 3104 }, { "epoch": 295.7142857142857, "grad_norm": 7.032314777374268, "learning_rate": 5.817091454272863e-07, "loss": 1.1381, "step": 3105 }, { "epoch": 295.8095238095238, "grad_norm": 21.643314361572266, "learning_rate": 5.81896551724138e-07, "loss": 1.1751, "step": 3106 }, { "epoch": 295.9047619047619, "grad_norm": 16.88426399230957, "learning_rate": 5.820839580209895e-07, "loss": 1.1669, "step": 3107 }, { "epoch": 296.0, "grad_norm": 14.358475685119629, "learning_rate": 5.822713643178411e-07, "loss": 1.1254, "step": 3108 }, { "epoch": 296.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7743119266055046, "eval_loss": 0.5619354844093323, "eval_precision": 0.7334878331402086, "eval_recall": 0.8199481865284974, "eval_roc_auc": 0.7782605066206102, "eval_runtime": 1.162, "eval_samples_per_second": 2296.94, "eval_steps_per_second": 5.164, "step": 3108 }, { "epoch": 296.0952380952381, "grad_norm": 5.653419017791748, "learning_rate": 5.824587706146926e-07, "loss": 1.0971, "step": 3109 }, { "epoch": 296.1904761904762, "grad_norm": 11.617242813110352, "learning_rate": 5.826461769115443e-07, "loss": 1.1437, "step": 3110 }, { "epoch": 296.2857142857143, "grad_norm": 15.939972877502441, "learning_rate": 5.828335832083958e-07, "loss": 1.1009, "step": 3111 }, { "epoch": 296.3809523809524, "grad_norm": 15.541411399841309, "learning_rate": 5.830209895052474e-07, "loss": 1.1665, "step": 3112 }, { "epoch": 296.4761904761905, "grad_norm": 4.3177056312561035, "learning_rate": 5.832083958020991e-07, "loss": 1.1738, "step": 3113 }, { "epoch": 296.57142857142856, "grad_norm": 9.515015602111816, "learning_rate": 5.833958020989506e-07, "loss": 1.1184, "step": 3114 }, { "epoch": 296.6666666666667, "grad_norm": 19.697540283203125, "learning_rate": 5.835832083958022e-07, "loss": 1.2232, "step": 3115 }, { "epoch": 296.76190476190476, "grad_norm": 3.511566638946533, "learning_rate": 5.837706146926537e-07, "loss": 1.1091, "step": 3116 }, { "epoch": 296.85714285714283, "grad_norm": 6.693878650665283, "learning_rate": 5.839580209895053e-07, "loss": 1.0979, "step": 3117 }, { "epoch": 296.95238095238096, "grad_norm": 8.09457778930664, "learning_rate": 5.841454272863568e-07, "loss": 1.1329, "step": 3118 }, { "epoch": 296.95238095238096, "eval_accuracy": 0.7298613713001124, "eval_f1": 0.7861168792643133, "eval_loss": 0.5612280964851379, "eval_precision": 0.7252326217843459, "eval_recall": 0.8581606217616581, "eval_roc_auc": 0.7798862982153137, "eval_runtime": 1.1797, "eval_samples_per_second": 2262.461, "eval_steps_per_second": 5.086, "step": 3118 }, { "epoch": 297.04761904761904, "grad_norm": 7.095411777496338, "learning_rate": 5.843328335832084e-07, "loss": 1.1483, "step": 3119 }, { "epoch": 297.14285714285717, "grad_norm": 10.408041954040527, "learning_rate": 5.8452023988006e-07, "loss": 1.1275, "step": 3120 }, { "epoch": 297.23809523809524, "grad_norm": 5.680013179779053, "learning_rate": 5.847076461769116e-07, "loss": 1.1233, "step": 3121 }, { "epoch": 297.3333333333333, "grad_norm": 5.734152317047119, "learning_rate": 5.848950524737632e-07, "loss": 1.1404, "step": 3122 }, { "epoch": 297.42857142857144, "grad_norm": 6.248361110687256, "learning_rate": 5.850824587706147e-07, "loss": 1.177, "step": 3123 }, { "epoch": 297.5238095238095, "grad_norm": 8.506665229797363, "learning_rate": 5.852698650674663e-07, "loss": 1.1269, "step": 3124 }, { "epoch": 297.6190476190476, "grad_norm": 4.191070079803467, "learning_rate": 5.854572713643178e-07, "loss": 1.1508, "step": 3125 }, { "epoch": 297.7142857142857, "grad_norm": 12.647905349731445, "learning_rate": 5.856446776611694e-07, "loss": 1.0912, "step": 3126 }, { "epoch": 297.8095238095238, "grad_norm": 5.739502429962158, "learning_rate": 5.85832083958021e-07, "loss": 1.1881, "step": 3127 }, { "epoch": 297.9047619047619, "grad_norm": 9.85162353515625, "learning_rate": 5.860194902548726e-07, "loss": 1.1236, "step": 3128 }, { "epoch": 298.0, "grad_norm": 15.419437408447266, "learning_rate": 5.862068965517241e-07, "loss": 1.0916, "step": 3129 }, { "epoch": 298.0, "eval_accuracy": 0.7257399775196703, "eval_f1": 0.7765567765567766, "eval_loss": 0.5605422854423523, "eval_precision": 0.7344110854503464, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.780507484168106, "eval_runtime": 1.2568, "eval_samples_per_second": 2123.632, "eval_steps_per_second": 4.774, "step": 3129 }, { "epoch": 298.0952380952381, "grad_norm": 3.829314947128296, "learning_rate": 5.863943028485757e-07, "loss": 1.1854, "step": 3130 }, { "epoch": 298.1904761904762, "grad_norm": 6.155840873718262, "learning_rate": 5.865817091454273e-07, "loss": 1.1357, "step": 3131 }, { "epoch": 298.2857142857143, "grad_norm": 11.116759300231934, "learning_rate": 5.867691154422788e-07, "loss": 1.0738, "step": 3132 }, { "epoch": 298.3809523809524, "grad_norm": 8.681174278259277, "learning_rate": 5.869565217391305e-07, "loss": 1.1234, "step": 3133 }, { "epoch": 298.4761904761905, "grad_norm": 4.055141448974609, "learning_rate": 5.87143928035982e-07, "loss": 1.1191, "step": 3134 }, { "epoch": 298.57142857142856, "grad_norm": 5.928338527679443, "learning_rate": 5.873313343328337e-07, "loss": 1.1314, "step": 3135 }, { "epoch": 298.6666666666667, "grad_norm": 6.639645576477051, "learning_rate": 5.875187406296852e-07, "loss": 1.152, "step": 3136 }, { "epoch": 298.76190476190476, "grad_norm": 5.529913902282715, "learning_rate": 5.877061469265368e-07, "loss": 1.0963, "step": 3137 }, { "epoch": 298.85714285714283, "grad_norm": 18.109577178955078, "learning_rate": 5.878935532233883e-07, "loss": 1.1149, "step": 3138 }, { "epoch": 298.95238095238096, "grad_norm": 7.9466872215271, "learning_rate": 5.880809595202399e-07, "loss": 1.1287, "step": 3139 }, { "epoch": 298.95238095238096, "eval_accuracy": 0.7302360434619708, "eval_f1": 0.787359716479622, "eval_loss": 0.560848593711853, "eval_precision": 0.7236699239956569, "eval_recall": 0.8633419689119171, "eval_roc_auc": 0.7808442717328729, "eval_runtime": 1.1353, "eval_samples_per_second": 2350.957, "eval_steps_per_second": 5.285, "step": 3139 }, { "epoch": 299.04761904761904, "grad_norm": 6.386238098144531, "learning_rate": 5.882683658170915e-07, "loss": 1.0765, "step": 3140 }, { "epoch": 299.14285714285717, "grad_norm": 6.555985927581787, "learning_rate": 5.884557721139431e-07, "loss": 1.14, "step": 3141 }, { "epoch": 299.23809523809524, "grad_norm": 6.371258735656738, "learning_rate": 5.886431784107947e-07, "loss": 1.1133, "step": 3142 }, { "epoch": 299.3333333333333, "grad_norm": 3.4509472846984863, "learning_rate": 5.888305847076462e-07, "loss": 1.1415, "step": 3143 }, { "epoch": 299.42857142857144, "grad_norm": 5.557456016540527, "learning_rate": 5.890179910044978e-07, "loss": 1.0862, "step": 3144 }, { "epoch": 299.5238095238095, "grad_norm": 10.220240592956543, "learning_rate": 5.892053973013493e-07, "loss": 1.1722, "step": 3145 }, { "epoch": 299.6190476190476, "grad_norm": 4.97195291519165, "learning_rate": 5.893928035982009e-07, "loss": 1.1453, "step": 3146 }, { "epoch": 299.7142857142857, "grad_norm": 4.511751174926758, "learning_rate": 5.895802098950524e-07, "loss": 1.1307, "step": 3147 }, { "epoch": 299.8095238095238, "grad_norm": 10.958107948303223, "learning_rate": 5.897676161919041e-07, "loss": 1.0468, "step": 3148 }, { "epoch": 299.9047619047619, "grad_norm": 5.010954856872559, "learning_rate": 5.899550224887557e-07, "loss": 1.1802, "step": 3149 }, { "epoch": 300.0, "grad_norm": 7.055208683013916, "learning_rate": 5.901424287856072e-07, "loss": 1.2058, "step": 3150 }, { "epoch": 300.0, "eval_accuracy": 0.7283626826526789, "eval_f1": 0.7820859633303276, "eval_loss": 0.5605196952819824, "eval_precision": 0.7296690970274817, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7803629821531376, "eval_runtime": 1.2717, "eval_samples_per_second": 2098.823, "eval_steps_per_second": 4.718, "step": 3150 }, { "epoch": 300.0952380952381, "grad_norm": 5.877634525299072, "learning_rate": 5.903298350824588e-07, "loss": 1.151, "step": 3151 }, { "epoch": 300.1904761904762, "grad_norm": 10.211005210876465, "learning_rate": 5.905172413793103e-07, "loss": 1.1311, "step": 3152 }, { "epoch": 300.2857142857143, "grad_norm": 4.744696140289307, "learning_rate": 5.907046476761619e-07, "loss": 1.1364, "step": 3153 }, { "epoch": 300.3809523809524, "grad_norm": 8.057610511779785, "learning_rate": 5.908920539730134e-07, "loss": 1.1331, "step": 3154 }, { "epoch": 300.4761904761905, "grad_norm": 4.582040786743164, "learning_rate": 5.910794602698651e-07, "loss": 1.0779, "step": 3155 }, { "epoch": 300.57142857142856, "grad_norm": 5.584441184997559, "learning_rate": 5.912668665667166e-07, "loss": 1.1095, "step": 3156 }, { "epoch": 300.6666666666667, "grad_norm": 5.46194314956665, "learning_rate": 5.914542728635682e-07, "loss": 1.1512, "step": 3157 }, { "epoch": 300.76190476190476, "grad_norm": 5.296677112579346, "learning_rate": 5.916416791604199e-07, "loss": 1.1492, "step": 3158 }, { "epoch": 300.85714285714283, "grad_norm": 4.495828151702881, "learning_rate": 5.918290854572714e-07, "loss": 1.1227, "step": 3159 }, { "epoch": 300.95238095238096, "grad_norm": 9.015779495239258, "learning_rate": 5.92016491754123e-07, "loss": 1.1717, "step": 3160 }, { "epoch": 300.95238095238096, "eval_accuracy": 0.7291120269763957, "eval_f1": 0.7841146610928635, "eval_loss": 0.5596321225166321, "eval_precision": 0.7274238227146814, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7817251007484168, "eval_runtime": 1.1674, "eval_samples_per_second": 2286.256, "eval_steps_per_second": 5.14, "step": 3160 }, { "epoch": 301.04761904761904, "grad_norm": 6.904094696044922, "learning_rate": 5.922038980509745e-07, "loss": 1.0834, "step": 3161 }, { "epoch": 301.14285714285717, "grad_norm": 8.023409843444824, "learning_rate": 5.923913043478262e-07, "loss": 1.1702, "step": 3162 }, { "epoch": 301.23809523809524, "grad_norm": 6.607522487640381, "learning_rate": 5.925787106446777e-07, "loss": 1.1145, "step": 3163 }, { "epoch": 301.3333333333333, "grad_norm": 6.27717399597168, "learning_rate": 5.927661169415293e-07, "loss": 1.1473, "step": 3164 }, { "epoch": 301.42857142857144, "grad_norm": 13.253412246704102, "learning_rate": 5.929535232383808e-07, "loss": 1.1361, "step": 3165 }, { "epoch": 301.5238095238095, "grad_norm": 6.1444807052612305, "learning_rate": 5.931409295352324e-07, "loss": 1.1103, "step": 3166 }, { "epoch": 301.6190476190476, "grad_norm": 4.564396381378174, "learning_rate": 5.93328335832084e-07, "loss": 1.1062, "step": 3167 }, { "epoch": 301.7142857142857, "grad_norm": 14.411751747131348, "learning_rate": 5.935157421289355e-07, "loss": 1.1852, "step": 3168 }, { "epoch": 301.8095238095238, "grad_norm": 6.965888977050781, "learning_rate": 5.937031484257872e-07, "loss": 1.1626, "step": 3169 }, { "epoch": 301.9047619047619, "grad_norm": 10.08370304107666, "learning_rate": 5.938905547226387e-07, "loss": 1.1333, "step": 3170 }, { "epoch": 302.0, "grad_norm": 6.450741291046143, "learning_rate": 5.940779610194903e-07, "loss": 1.1493, "step": 3171 }, { "epoch": 302.0, "eval_accuracy": 0.7234919445485201, "eval_f1": 0.7759562841530054, "eval_loss": 0.5597299933433533, "eval_precision": 0.7302857142857143, "eval_recall": 0.8277202072538861, "eval_roc_auc": 0.7810618883131837, "eval_runtime": 1.2015, "eval_samples_per_second": 2221.468, "eval_steps_per_second": 4.994, "step": 3171 }, { "epoch": 302.0952380952381, "grad_norm": 6.088359832763672, "learning_rate": 5.942653673163418e-07, "loss": 1.1616, "step": 3172 }, { "epoch": 302.1904761904762, "grad_norm": 8.05422306060791, "learning_rate": 5.944527736131934e-07, "loss": 1.1455, "step": 3173 }, { "epoch": 302.2857142857143, "grad_norm": 16.03872299194336, "learning_rate": 5.946401799100449e-07, "loss": 1.1452, "step": 3174 }, { "epoch": 302.3809523809524, "grad_norm": 8.73220443725586, "learning_rate": 5.948275862068965e-07, "loss": 1.1488, "step": 3175 }, { "epoch": 302.4761904761905, "grad_norm": 10.711005210876465, "learning_rate": 5.950149925037482e-07, "loss": 1.1715, "step": 3176 }, { "epoch": 302.57142857142856, "grad_norm": 12.020049095153809, "learning_rate": 5.952023988005997e-07, "loss": 1.1498, "step": 3177 }, { "epoch": 302.6666666666667, "grad_norm": 7.707366943359375, "learning_rate": 5.953898050974513e-07, "loss": 1.1374, "step": 3178 }, { "epoch": 302.76190476190476, "grad_norm": 6.902293682098389, "learning_rate": 5.955772113943028e-07, "loss": 1.1004, "step": 3179 }, { "epoch": 302.85714285714283, "grad_norm": 9.250264167785645, "learning_rate": 5.957646176911545e-07, "loss": 1.093, "step": 3180 }, { "epoch": 302.95238095238096, "grad_norm": 5.168294429779053, "learning_rate": 5.95952023988006e-07, "loss": 1.1286, "step": 3181 }, { "epoch": 302.95238095238096, "eval_accuracy": 0.7328587485949793, "eval_f1": 0.789861479516652, "eval_loss": 0.5604127645492554, "eval_precision": 0.7247160627366144, "eval_recall": 0.8678756476683938, "eval_roc_auc": 0.7815261945883708, "eval_runtime": 1.174, "eval_samples_per_second": 2273.494, "eval_steps_per_second": 5.111, "step": 3181 }, { "epoch": 303.04761904761904, "grad_norm": 6.529985427856445, "learning_rate": 5.961394302848576e-07, "loss": 1.122, "step": 3182 }, { "epoch": 303.14285714285717, "grad_norm": 10.856659889221191, "learning_rate": 5.963268365817093e-07, "loss": 1.1605, "step": 3183 }, { "epoch": 303.23809523809524, "grad_norm": 27.985136032104492, "learning_rate": 5.965142428785608e-07, "loss": 1.1823, "step": 3184 }, { "epoch": 303.3333333333333, "grad_norm": 11.863192558288574, "learning_rate": 5.967016491754124e-07, "loss": 1.0658, "step": 3185 }, { "epoch": 303.42857142857144, "grad_norm": 11.549949645996094, "learning_rate": 5.968890554722639e-07, "loss": 1.1329, "step": 3186 }, { "epoch": 303.5238095238095, "grad_norm": 27.801267623901367, "learning_rate": 5.970764617691155e-07, "loss": 1.187, "step": 3187 }, { "epoch": 303.6190476190476, "grad_norm": 12.57270622253418, "learning_rate": 5.97263868065967e-07, "loss": 1.1489, "step": 3188 }, { "epoch": 303.7142857142857, "grad_norm": 4.093427658081055, "learning_rate": 5.974512743628187e-07, "loss": 1.0962, "step": 3189 }, { "epoch": 303.8095238095238, "grad_norm": 4.537065029144287, "learning_rate": 5.976386806596702e-07, "loss": 1.1486, "step": 3190 }, { "epoch": 303.9047619047619, "grad_norm": 5.669375896453857, "learning_rate": 5.978260869565218e-07, "loss": 1.1027, "step": 3191 }, { "epoch": 304.0, "grad_norm": 5.12407922744751, "learning_rate": 5.980134932533733e-07, "loss": 1.1195, "step": 3192 }, { "epoch": 304.0, "eval_accuracy": 0.7276133383289621, "eval_f1": 0.778150747635032, "eval_loss": 0.5602059364318848, "eval_precision": 0.7357184073860358, "eval_recall": 0.8257772020725389, "eval_roc_auc": 0.7804870466321243, "eval_runtime": 1.1956, "eval_samples_per_second": 2232.4, "eval_steps_per_second": 5.019, "step": 3192 }, { "epoch": 304.0952380952381, "grad_norm": 4.144861698150635, "learning_rate": 5.982008995502249e-07, "loss": 1.1244, "step": 3193 }, { "epoch": 304.1904761904762, "grad_norm": 4.079629421234131, "learning_rate": 5.983883058470765e-07, "loss": 1.1458, "step": 3194 }, { "epoch": 304.2857142857143, "grad_norm": 18.062515258789062, "learning_rate": 5.98575712143928e-07, "loss": 1.1121, "step": 3195 }, { "epoch": 304.3809523809524, "grad_norm": 8.738750457763672, "learning_rate": 5.987631184407797e-07, "loss": 1.1344, "step": 3196 }, { "epoch": 304.4761904761905, "grad_norm": 7.128112316131592, "learning_rate": 5.989505247376312e-07, "loss": 1.1471, "step": 3197 }, { "epoch": 304.57142857142856, "grad_norm": 6.939492225646973, "learning_rate": 5.991379310344828e-07, "loss": 1.1812, "step": 3198 }, { "epoch": 304.6666666666667, "grad_norm": 4.064919948577881, "learning_rate": 5.993253373313343e-07, "loss": 1.1323, "step": 3199 }, { "epoch": 304.76190476190476, "grad_norm": 12.939764022827148, "learning_rate": 5.995127436281859e-07, "loss": 1.1151, "step": 3200 }, { "epoch": 304.85714285714283, "grad_norm": 4.833240509033203, "learning_rate": 5.997001499250374e-07, "loss": 1.1176, "step": 3201 }, { "epoch": 304.95238095238096, "grad_norm": 7.5120673179626465, "learning_rate": 5.998875562218891e-07, "loss": 1.1406, "step": 3202 }, { "epoch": 304.95238095238096, "eval_accuracy": 0.7336080929186961, "eval_f1": 0.7900797165633304, "eval_loss": 0.5607642531394958, "eval_precision": 0.7259902333152469, "eval_recall": 0.866580310880829, "eval_roc_auc": 0.7805192861255037, "eval_runtime": 1.3776, "eval_samples_per_second": 1937.392, "eval_steps_per_second": 4.355, "step": 3202 }, { "epoch": 305.04761904761904, "grad_norm": 12.696020126342773, "learning_rate": 6.000749625187407e-07, "loss": 1.1232, "step": 3203 }, { "epoch": 305.14285714285717, "grad_norm": 17.43992042541504, "learning_rate": 6.002623688155922e-07, "loss": 1.1179, "step": 3204 }, { "epoch": 305.23809523809524, "grad_norm": 5.639843940734863, "learning_rate": 6.004497751124439e-07, "loss": 1.0978, "step": 3205 }, { "epoch": 305.3333333333333, "grad_norm": 25.095577239990234, "learning_rate": 6.006371814092954e-07, "loss": 1.1403, "step": 3206 }, { "epoch": 305.42857142857144, "grad_norm": 10.123703956604004, "learning_rate": 6.00824587706147e-07, "loss": 1.102, "step": 3207 }, { "epoch": 305.5238095238095, "grad_norm": 6.3739824295043945, "learning_rate": 6.010119940029985e-07, "loss": 1.1449, "step": 3208 }, { "epoch": 305.6190476190476, "grad_norm": 6.56973123550415, "learning_rate": 6.011994002998501e-07, "loss": 1.1216, "step": 3209 }, { "epoch": 305.7142857142857, "grad_norm": 7.129731178283691, "learning_rate": 6.013868065967018e-07, "loss": 1.1629, "step": 3210 }, { "epoch": 305.8095238095238, "grad_norm": 3.856766700744629, "learning_rate": 6.015742128935533e-07, "loss": 1.1298, "step": 3211 }, { "epoch": 305.9047619047619, "grad_norm": 11.901754379272461, "learning_rate": 6.017616191904049e-07, "loss": 1.1581, "step": 3212 }, { "epoch": 306.0, "grad_norm": 6.4798054695129395, "learning_rate": 6.019490254872564e-07, "loss": 1.1326, "step": 3213 }, { "epoch": 306.0, "eval_accuracy": 0.7279880104908205, "eval_f1": 0.7779816513761468, "eval_loss": 0.5608794689178467, "eval_precision": 0.7369640787949016, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7791991940126656, "eval_runtime": 1.1268, "eval_samples_per_second": 2368.694, "eval_steps_per_second": 5.325, "step": 3213 }, { "epoch": 306.0952380952381, "grad_norm": 11.762500762939453, "learning_rate": 6.02136431784108e-07, "loss": 1.1089, "step": 3214 }, { "epoch": 306.1904761904762, "grad_norm": 6.889684200286865, "learning_rate": 6.023238380809595e-07, "loss": 1.1317, "step": 3215 }, { "epoch": 306.2857142857143, "grad_norm": 8.107551574707031, "learning_rate": 6.025112443778111e-07, "loss": 1.1479, "step": 3216 }, { "epoch": 306.3809523809524, "grad_norm": 13.955403327941895, "learning_rate": 6.026986506746627e-07, "loss": 1.1003, "step": 3217 }, { "epoch": 306.4761904761905, "grad_norm": 5.956514358520508, "learning_rate": 6.028860569715143e-07, "loss": 1.1478, "step": 3218 }, { "epoch": 306.57142857142856, "grad_norm": 6.5395917892456055, "learning_rate": 6.030734632683658e-07, "loss": 1.1259, "step": 3219 }, { "epoch": 306.6666666666667, "grad_norm": 13.178661346435547, "learning_rate": 6.032608695652174e-07, "loss": 1.1703, "step": 3220 }, { "epoch": 306.76190476190476, "grad_norm": 8.879888534545898, "learning_rate": 6.03448275862069e-07, "loss": 1.1012, "step": 3221 }, { "epoch": 306.85714285714283, "grad_norm": 8.496637344360352, "learning_rate": 6.036356821589205e-07, "loss": 1.1188, "step": 3222 }, { "epoch": 306.95238095238096, "grad_norm": 12.254331588745117, "learning_rate": 6.038230884557721e-07, "loss": 1.1522, "step": 3223 }, { "epoch": 306.95238095238096, "eval_accuracy": 0.7328587485949793, "eval_f1": 0.7889908256880734, "eval_loss": 0.5601345896720886, "eval_precision": 0.7264305177111716, "eval_recall": 0.8633419689119171, "eval_roc_auc": 0.7810305123776626, "eval_runtime": 1.1948, "eval_samples_per_second": 2233.805, "eval_steps_per_second": 5.022, "step": 3223 }, { "epoch": 307.04761904761904, "grad_norm": 6.876726150512695, "learning_rate": 6.040104947526237e-07, "loss": 1.1352, "step": 3224 }, { "epoch": 307.14285714285717, "grad_norm": 6.194924354553223, "learning_rate": 6.041979010494753e-07, "loss": 1.1511, "step": 3225 }, { "epoch": 307.23809523809524, "grad_norm": 9.343500137329102, "learning_rate": 6.043853073463268e-07, "loss": 1.0997, "step": 3226 }, { "epoch": 307.3333333333333, "grad_norm": 3.359732151031494, "learning_rate": 6.045727136431785e-07, "loss": 1.1152, "step": 3227 }, { "epoch": 307.42857142857144, "grad_norm": 4.9511542320251465, "learning_rate": 6.0476011994003e-07, "loss": 1.1046, "step": 3228 }, { "epoch": 307.5238095238095, "grad_norm": 10.57309627532959, "learning_rate": 6.049475262368816e-07, "loss": 1.1745, "step": 3229 }, { "epoch": 307.6190476190476, "grad_norm": 5.695428848266602, "learning_rate": 6.051349325337332e-07, "loss": 1.1544, "step": 3230 }, { "epoch": 307.7142857142857, "grad_norm": 7.539062023162842, "learning_rate": 6.053223388305848e-07, "loss": 1.1166, "step": 3231 }, { "epoch": 307.8095238095238, "grad_norm": 5.299892425537109, "learning_rate": 6.055097451274364e-07, "loss": 1.144, "step": 3232 }, { "epoch": 307.9047619047619, "grad_norm": 9.088667869567871, "learning_rate": 6.056971514242879e-07, "loss": 1.1555, "step": 3233 }, { "epoch": 308.0, "grad_norm": 8.569293022155762, "learning_rate": 6.058845577211395e-07, "loss": 1.1036, "step": 3234 }, { "epoch": 308.0, "eval_accuracy": 0.7317347321094043, "eval_f1": 0.785242951409718, "eval_loss": 0.5596063137054443, "eval_precision": 0.7312849162011174, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7812645365572826, "eval_runtime": 1.2018, "eval_samples_per_second": 2220.77, "eval_steps_per_second": 4.992, "step": 3234 }, { "epoch": 308.0952380952381, "grad_norm": 4.837582111358643, "learning_rate": 6.06071964017991e-07, "loss": 1.1471, "step": 3235 }, { "epoch": 308.1904761904762, "grad_norm": 4.442275524139404, "learning_rate": 6.062593703148426e-07, "loss": 1.1361, "step": 3236 }, { "epoch": 308.2857142857143, "grad_norm": 11.222310066223145, "learning_rate": 6.064467766116941e-07, "loss": 1.1192, "step": 3237 }, { "epoch": 308.3809523809524, "grad_norm": 9.199163436889648, "learning_rate": 6.066341829085458e-07, "loss": 1.1205, "step": 3238 }, { "epoch": 308.4761904761905, "grad_norm": 18.25674057006836, "learning_rate": 6.068215892053974e-07, "loss": 1.0867, "step": 3239 }, { "epoch": 308.57142857142856, "grad_norm": 5.885474681854248, "learning_rate": 6.070089955022489e-07, "loss": 1.1149, "step": 3240 }, { "epoch": 308.6666666666667, "grad_norm": 4.786909580230713, "learning_rate": 6.071964017991005e-07, "loss": 1.1215, "step": 3241 }, { "epoch": 308.76190476190476, "grad_norm": 9.002260208129883, "learning_rate": 6.07383808095952e-07, "loss": 1.1238, "step": 3242 }, { "epoch": 308.85714285714283, "grad_norm": 6.030719757080078, "learning_rate": 6.075712143928036e-07, "loss": 1.1225, "step": 3243 }, { "epoch": 308.95238095238096, "grad_norm": 7.590175151824951, "learning_rate": 6.077586206896551e-07, "loss": 1.1342, "step": 3244 }, { "epoch": 308.95238095238096, "eval_accuracy": 0.7276133383289621, "eval_f1": 0.7798970632758099, "eval_loss": 0.5592300295829773, "eval_precision": 0.7322342239909039, "eval_recall": 0.8341968911917098, "eval_roc_auc": 0.7817251007484167, "eval_runtime": 1.1711, "eval_samples_per_second": 2278.968, "eval_steps_per_second": 5.123, "step": 3244 }, { "epoch": 309.04761904761904, "grad_norm": 4.260223865509033, "learning_rate": 6.079460269865068e-07, "loss": 1.1425, "step": 3245 }, { "epoch": 309.14285714285717, "grad_norm": 8.455692291259766, "learning_rate": 6.081334332833583e-07, "loss": 1.0991, "step": 3246 }, { "epoch": 309.23809523809524, "grad_norm": 6.05549955368042, "learning_rate": 6.083208395802099e-07, "loss": 1.1541, "step": 3247 }, { "epoch": 309.3333333333333, "grad_norm": 5.0326995849609375, "learning_rate": 6.085082458770615e-07, "loss": 1.1085, "step": 3248 }, { "epoch": 309.42857142857144, "grad_norm": 4.912335395812988, "learning_rate": 6.08695652173913e-07, "loss": 1.1493, "step": 3249 }, { "epoch": 309.5238095238095, "grad_norm": 10.788124084472656, "learning_rate": 6.088830584707647e-07, "loss": 1.1418, "step": 3250 }, { "epoch": 309.6190476190476, "grad_norm": 5.081390380859375, "learning_rate": 6.090704647676162e-07, "loss": 1.1463, "step": 3251 }, { "epoch": 309.7142857142857, "grad_norm": 5.906666278839111, "learning_rate": 6.092578710644679e-07, "loss": 1.1376, "step": 3252 }, { "epoch": 309.8095238095238, "grad_norm": 10.533183097839355, "learning_rate": 6.094452773613194e-07, "loss": 1.1166, "step": 3253 }, { "epoch": 309.9047619047619, "grad_norm": 9.069061279296875, "learning_rate": 6.09632683658171e-07, "loss": 1.1509, "step": 3254 }, { "epoch": 310.0, "grad_norm": 6.89107084274292, "learning_rate": 6.098200899550225e-07, "loss": 1.1169, "step": 3255 }, { "epoch": 310.0, "eval_accuracy": 0.729486699138254, "eval_f1": 0.7844776119402985, "eval_loss": 0.5588894486427307, "eval_precision": 0.7275747508305648, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7822990788716178, "eval_runtime": 1.168, "eval_samples_per_second": 2285.1, "eval_steps_per_second": 5.137, "step": 3255 }, { "epoch": 310.0952380952381, "grad_norm": 8.010951042175293, "learning_rate": 6.100074962518741e-07, "loss": 1.1057, "step": 3256 }, { "epoch": 310.1904761904762, "grad_norm": 4.460325717926025, "learning_rate": 6.101949025487257e-07, "loss": 1.141, "step": 3257 }, { "epoch": 310.2857142857143, "grad_norm": 12.403966903686523, "learning_rate": 6.103823088455772e-07, "loss": 1.12, "step": 3258 }, { "epoch": 310.3809523809524, "grad_norm": 4.519659996032715, "learning_rate": 6.105697151424289e-07, "loss": 1.1342, "step": 3259 }, { "epoch": 310.4761904761905, "grad_norm": 9.70462703704834, "learning_rate": 6.107571214392804e-07, "loss": 1.1626, "step": 3260 }, { "epoch": 310.57142857142856, "grad_norm": 6.432512283325195, "learning_rate": 6.10944527736132e-07, "loss": 1.1871, "step": 3261 }, { "epoch": 310.6666666666667, "grad_norm": 5.8990254402160645, "learning_rate": 6.111319340329835e-07, "loss": 1.1867, "step": 3262 }, { "epoch": 310.76190476190476, "grad_norm": 11.098644256591797, "learning_rate": 6.113193403298351e-07, "loss": 1.1311, "step": 3263 }, { "epoch": 310.85714285714283, "grad_norm": 13.900815963745117, "learning_rate": 6.115067466266866e-07, "loss": 1.081, "step": 3264 }, { "epoch": 310.95238095238096, "grad_norm": 4.705795764923096, "learning_rate": 6.116941529235382e-07, "loss": 1.137, "step": 3265 }, { "epoch": 310.95238095238096, "eval_accuracy": 0.726489321843387, "eval_f1": 0.7802528597230584, "eval_loss": 0.5585808753967285, "eval_precision": 0.7289088863892014, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7824746689694876, "eval_runtime": 1.5471, "eval_samples_per_second": 1725.177, "eval_steps_per_second": 3.878, "step": 3265 }, { "epoch": 311.04761904761904, "grad_norm": 4.762983798980713, "learning_rate": 6.118815592203899e-07, "loss": 1.1286, "step": 3266 }, { "epoch": 311.14285714285717, "grad_norm": 4.509739398956299, "learning_rate": 6.120689655172414e-07, "loss": 1.0949, "step": 3267 }, { "epoch": 311.23809523809524, "grad_norm": 8.220304489135742, "learning_rate": 6.12256371814093e-07, "loss": 1.1219, "step": 3268 }, { "epoch": 311.3333333333333, "grad_norm": 4.448649883270264, "learning_rate": 6.124437781109445e-07, "loss": 1.1405, "step": 3269 }, { "epoch": 311.42857142857144, "grad_norm": 4.820764064788818, "learning_rate": 6.126311844077961e-07, "loss": 1.1275, "step": 3270 }, { "epoch": 311.5238095238095, "grad_norm": 5.165571212768555, "learning_rate": 6.128185907046476e-07, "loss": 1.0832, "step": 3271 }, { "epoch": 311.6190476190476, "grad_norm": 5.083733081817627, "learning_rate": 6.130059970014993e-07, "loss": 1.1247, "step": 3272 }, { "epoch": 311.7142857142857, "grad_norm": 6.846983909606934, "learning_rate": 6.131934032983508e-07, "loss": 1.1273, "step": 3273 }, { "epoch": 311.8095238095238, "grad_norm": 14.690434455871582, "learning_rate": 6.133808095952024e-07, "loss": 1.1779, "step": 3274 }, { "epoch": 311.9047619047619, "grad_norm": 4.378728866577148, "learning_rate": 6.135682158920541e-07, "loss": 1.1281, "step": 3275 }, { "epoch": 312.0, "grad_norm": 5.9867658615112305, "learning_rate": 6.137556221889056e-07, "loss": 1.1285, "step": 3276 }, { "epoch": 312.0, "eval_accuracy": 0.725365305357812, "eval_f1": 0.7787503773015394, "eval_loss": 0.5584920644760132, "eval_precision": 0.7292255511588468, "eval_recall": 0.8354922279792746, "eval_roc_auc": 0.7823529073114566, "eval_runtime": 1.2509, "eval_samples_per_second": 2133.686, "eval_steps_per_second": 4.797, "step": 3276 }, { "epoch": 312.0952380952381, "grad_norm": 6.125533580780029, "learning_rate": 6.139430284857572e-07, "loss": 1.1553, "step": 3277 }, { "epoch": 312.1904761904762, "grad_norm": 11.429598808288574, "learning_rate": 6.141304347826087e-07, "loss": 1.1018, "step": 3278 }, { "epoch": 312.2857142857143, "grad_norm": 13.404115676879883, "learning_rate": 6.143178410794603e-07, "loss": 1.1201, "step": 3279 }, { "epoch": 312.3809523809524, "grad_norm": 6.066341400146484, "learning_rate": 6.145052473763119e-07, "loss": 1.1588, "step": 3280 }, { "epoch": 312.4761904761905, "grad_norm": 5.60275411605835, "learning_rate": 6.146926536731635e-07, "loss": 1.1359, "step": 3281 }, { "epoch": 312.57142857142856, "grad_norm": 2.7833516597747803, "learning_rate": 6.14880059970015e-07, "loss": 1.1629, "step": 3282 }, { "epoch": 312.6666666666667, "grad_norm": 14.339527130126953, "learning_rate": 6.150674662668666e-07, "loss": 1.1026, "step": 3283 }, { "epoch": 312.76190476190476, "grad_norm": 10.421635627746582, "learning_rate": 6.152548725637182e-07, "loss": 1.1388, "step": 3284 }, { "epoch": 312.85714285714283, "grad_norm": 6.532027244567871, "learning_rate": 6.154422788605697e-07, "loss": 1.1199, "step": 3285 }, { "epoch": 312.95238095238096, "grad_norm": 10.787590980529785, "learning_rate": 6.156296851574213e-07, "loss": 1.1507, "step": 3286 }, { "epoch": 312.95238095238096, "eval_accuracy": 0.7261146496815286, "eval_f1": 0.777879064114251, "eval_loss": 0.5586705207824707, "eval_precision": 0.7326846021751574, "eval_recall": 0.8290155440414507, "eval_roc_auc": 0.7821442141623489, "eval_runtime": 1.214, "eval_samples_per_second": 2198.608, "eval_steps_per_second": 4.943, "step": 3286 }, { "epoch": 313.04761904761904, "grad_norm": 15.449256896972656, "learning_rate": 6.158170914542729e-07, "loss": 1.1215, "step": 3287 }, { "epoch": 313.14285714285717, "grad_norm": 9.633466720581055, "learning_rate": 6.160044977511245e-07, "loss": 1.1711, "step": 3288 }, { "epoch": 313.23809523809524, "grad_norm": 5.2529191970825195, "learning_rate": 6.16191904047976e-07, "loss": 1.1441, "step": 3289 }, { "epoch": 313.3333333333333, "grad_norm": 4.596034526824951, "learning_rate": 6.163793103448276e-07, "loss": 1.1165, "step": 3290 }, { "epoch": 313.42857142857144, "grad_norm": 4.310565948486328, "learning_rate": 6.165667166416791e-07, "loss": 1.1557, "step": 3291 }, { "epoch": 313.5238095238095, "grad_norm": 5.154080867767334, "learning_rate": 6.167541229385307e-07, "loss": 1.1244, "step": 3292 }, { "epoch": 313.6190476190476, "grad_norm": 6.325362205505371, "learning_rate": 6.169415292353823e-07, "loss": 1.0947, "step": 3293 }, { "epoch": 313.7142857142857, "grad_norm": 10.20145320892334, "learning_rate": 6.171289355322339e-07, "loss": 1.1397, "step": 3294 }, { "epoch": 313.8095238095238, "grad_norm": 5.5285844802856445, "learning_rate": 6.173163418290855e-07, "loss": 1.182, "step": 3295 }, { "epoch": 313.9047619047619, "grad_norm": 16.499744415283203, "learning_rate": 6.17503748125937e-07, "loss": 1.1293, "step": 3296 }, { "epoch": 314.0, "grad_norm": 12.139077186584473, "learning_rate": 6.176911544227887e-07, "loss": 1.1397, "step": 3297 }, { "epoch": 314.0, "eval_accuracy": 0.7272386661671038, "eval_f1": 0.7799274486094316, "eval_loss": 0.5583422183990479, "eval_precision": 0.7312925170068028, "eval_recall": 0.8354922279792746, "eval_roc_auc": 0.7827049510650548, "eval_runtime": 1.1746, "eval_samples_per_second": 2272.288, "eval_steps_per_second": 5.108, "step": 3297 }, { "epoch": 314.0952380952381, "grad_norm": 7.897889614105225, "learning_rate": 6.178785607196402e-07, "loss": 1.1594, "step": 3298 }, { "epoch": 314.1904761904762, "grad_norm": 6.885706901550293, "learning_rate": 6.180659670164918e-07, "loss": 1.1127, "step": 3299 }, { "epoch": 314.2857142857143, "grad_norm": 4.971257209777832, "learning_rate": 6.182533733133433e-07, "loss": 1.1407, "step": 3300 }, { "epoch": 314.3809523809524, "grad_norm": 7.690533638000488, "learning_rate": 6.18440779610195e-07, "loss": 1.1584, "step": 3301 }, { "epoch": 314.4761904761905, "grad_norm": 5.1898274421691895, "learning_rate": 6.186281859070466e-07, "loss": 1.1201, "step": 3302 }, { "epoch": 314.57142857142856, "grad_norm": 3.992206573486328, "learning_rate": 6.188155922038981e-07, "loss": 1.1321, "step": 3303 }, { "epoch": 314.6666666666667, "grad_norm": 7.562021255493164, "learning_rate": 6.190029985007497e-07, "loss": 1.0823, "step": 3304 }, { "epoch": 314.76190476190476, "grad_norm": 9.269352912902832, "learning_rate": 6.191904047976012e-07, "loss": 1.1033, "step": 3305 }, { "epoch": 314.85714285714283, "grad_norm": 11.719595909118652, "learning_rate": 6.193778110944528e-07, "loss": 1.1579, "step": 3306 }, { "epoch": 314.95238095238096, "grad_norm": 5.1851372718811035, "learning_rate": 6.195652173913043e-07, "loss": 1.1304, "step": 3307 }, { "epoch": 314.95238095238096, "eval_accuracy": 0.732484076433121, "eval_f1": 0.7872467222884386, "eval_loss": 0.5586660504341125, "eval_precision": 0.7290286975717439, "eval_recall": 0.8555699481865285, "eval_roc_auc": 0.7826174438687391, "eval_runtime": 1.1578, "eval_samples_per_second": 2305.332, "eval_steps_per_second": 5.182, "step": 3307 }, { "epoch": 315.04761904761904, "grad_norm": 10.430843353271484, "learning_rate": 6.19752623688156e-07, "loss": 1.0927, "step": 3308 }, { "epoch": 315.14285714285717, "grad_norm": 3.1264262199401855, "learning_rate": 6.199400299850076e-07, "loss": 1.1322, "step": 3309 }, { "epoch": 315.23809523809524, "grad_norm": 3.9145965576171875, "learning_rate": 6.201274362818591e-07, "loss": 1.138, "step": 3310 }, { "epoch": 315.3333333333333, "grad_norm": 3.780506134033203, "learning_rate": 6.203148425787107e-07, "loss": 1.146, "step": 3311 }, { "epoch": 315.42857142857144, "grad_norm": 2.8768436908721924, "learning_rate": 6.205022488755622e-07, "loss": 1.1304, "step": 3312 }, { "epoch": 315.5238095238095, "grad_norm": 4.919506549835205, "learning_rate": 6.206896551724138e-07, "loss": 1.1322, "step": 3313 }, { "epoch": 315.6190476190476, "grad_norm": 7.0171709060668945, "learning_rate": 6.208770614692653e-07, "loss": 1.0799, "step": 3314 }, { "epoch": 315.7142857142857, "grad_norm": 8.8346586227417, "learning_rate": 6.21064467766117e-07, "loss": 1.1269, "step": 3315 }, { "epoch": 315.8095238095238, "grad_norm": 11.773017883300781, "learning_rate": 6.212518740629685e-07, "loss": 1.1579, "step": 3316 }, { "epoch": 315.9047619047619, "grad_norm": 4.387534141540527, "learning_rate": 6.214392803598201e-07, "loss": 1.1322, "step": 3317 }, { "epoch": 316.0, "grad_norm": 4.816924571990967, "learning_rate": 6.216266866566716e-07, "loss": 1.1106, "step": 3318 }, { "epoch": 316.0, "eval_accuracy": 0.7279880104908205, "eval_f1": 0.7806646525679758, "eval_loss": 0.5587853193283081, "eval_precision": 0.7315968289920725, "eval_recall": 0.8367875647668394, "eval_roc_auc": 0.7817772020725389, "eval_runtime": 1.3043, "eval_samples_per_second": 2046.246, "eval_steps_per_second": 4.6, "step": 3318 }, { "epoch": 316.0952380952381, "grad_norm": 5.566372871398926, "learning_rate": 6.218140929535232e-07, "loss": 1.1183, "step": 3319 }, { "epoch": 316.1904761904762, "grad_norm": 6.3373942375183105, "learning_rate": 6.220014992503749e-07, "loss": 1.0724, "step": 3320 }, { "epoch": 316.2857142857143, "grad_norm": 7.2872233390808105, "learning_rate": 6.221889055472264e-07, "loss": 1.1399, "step": 3321 }, { "epoch": 316.3809523809524, "grad_norm": 7.598955154418945, "learning_rate": 6.223763118440781e-07, "loss": 1.1447, "step": 3322 }, { "epoch": 316.4761904761905, "grad_norm": 7.6095991134643555, "learning_rate": 6.225637181409296e-07, "loss": 1.1293, "step": 3323 }, { "epoch": 316.57142857142856, "grad_norm": 6.845755577087402, "learning_rate": 6.227511244377812e-07, "loss": 1.1348, "step": 3324 }, { "epoch": 316.6666666666667, "grad_norm": 3.707514762878418, "learning_rate": 6.229385307346327e-07, "loss": 1.125, "step": 3325 }, { "epoch": 316.76190476190476, "grad_norm": 8.177051544189453, "learning_rate": 6.231259370314843e-07, "loss": 1.1184, "step": 3326 }, { "epoch": 316.85714285714283, "grad_norm": 9.464016914367676, "learning_rate": 6.233133433283358e-07, "loss": 1.0847, "step": 3327 }, { "epoch": 316.95238095238096, "grad_norm": 9.221125602722168, "learning_rate": 6.235007496251874e-07, "loss": 1.1479, "step": 3328 }, { "epoch": 316.95238095238096, "eval_accuracy": 0.7272386661671038, "eval_f1": 0.7809867629362214, "eval_loss": 0.5587996244430542, "eval_precision": 0.7292134831460674, "eval_recall": 0.8406735751295337, "eval_roc_auc": 0.7818117443868737, "eval_runtime": 1.1582, "eval_samples_per_second": 2304.423, "eval_steps_per_second": 5.18, "step": 3328 }, { "epoch": 317.04761904761904, "grad_norm": 4.902645111083984, "learning_rate": 6.236881559220391e-07, "loss": 1.1764, "step": 3329 }, { "epoch": 317.14285714285717, "grad_norm": 4.456246852874756, "learning_rate": 6.238755622188906e-07, "loss": 1.112, "step": 3330 }, { "epoch": 317.23809523809524, "grad_norm": 11.598137855529785, "learning_rate": 6.240629685157422e-07, "loss": 1.0803, "step": 3331 }, { "epoch": 317.3333333333333, "grad_norm": 5.1275529861450195, "learning_rate": 6.242503748125937e-07, "loss": 1.1288, "step": 3332 }, { "epoch": 317.42857142857144, "grad_norm": 11.168167114257812, "learning_rate": 6.244377811094453e-07, "loss": 1.1478, "step": 3333 }, { "epoch": 317.5238095238095, "grad_norm": 4.918898105621338, "learning_rate": 6.246251874062968e-07, "loss": 1.1507, "step": 3334 }, { "epoch": 317.6190476190476, "grad_norm": 11.758259773254395, "learning_rate": 6.248125937031484e-07, "loss": 1.1648, "step": 3335 }, { "epoch": 317.7142857142857, "grad_norm": 3.0417706966400146, "learning_rate": 6.25e-07, "loss": 1.1398, "step": 3336 }, { "epoch": 317.8095238095238, "grad_norm": 9.044265747070312, "learning_rate": 6.251874062968516e-07, "loss": 1.128, "step": 3337 }, { "epoch": 317.9047619047619, "grad_norm": 6.247106075286865, "learning_rate": 6.253748125937032e-07, "loss": 1.13, "step": 3338 }, { "epoch": 318.0, "grad_norm": 11.326921463012695, "learning_rate": 6.255622188905547e-07, "loss": 1.0818, "step": 3339 }, { "epoch": 318.0, "eval_accuracy": 0.7279880104908205, "eval_f1": 0.7795992714025501, "eval_loss": 0.5579902529716492, "eval_precision": 0.7337142857142858, "eval_recall": 0.8316062176165803, "eval_roc_auc": 0.7827248128957974, "eval_runtime": 1.1928, "eval_samples_per_second": 2237.596, "eval_steps_per_second": 5.03, "step": 3339 }, { "epoch": 318.0952380952381, "grad_norm": 8.263168334960938, "learning_rate": 6.257496251874063e-07, "loss": 1.1696, "step": 3340 }, { "epoch": 318.1904761904762, "grad_norm": 5.750796794891357, "learning_rate": 6.259370314842578e-07, "loss": 1.1072, "step": 3341 }, { "epoch": 318.2857142857143, "grad_norm": 3.7694246768951416, "learning_rate": 6.261244377811095e-07, "loss": 1.1296, "step": 3342 }, { "epoch": 318.3809523809524, "grad_norm": 3.5072813034057617, "learning_rate": 6.26311844077961e-07, "loss": 1.1201, "step": 3343 }, { "epoch": 318.4761904761905, "grad_norm": 4.239100933074951, "learning_rate": 6.264992503748126e-07, "loss": 1.1158, "step": 3344 }, { "epoch": 318.57142857142856, "grad_norm": 9.425958633422852, "learning_rate": 6.266866566716643e-07, "loss": 1.0952, "step": 3345 }, { "epoch": 318.6666666666667, "grad_norm": 5.783169269561768, "learning_rate": 6.268740629685158e-07, "loss": 1.1676, "step": 3346 }, { "epoch": 318.76190476190476, "grad_norm": 4.611095428466797, "learning_rate": 6.270614692653674e-07, "loss": 1.0811, "step": 3347 }, { "epoch": 318.85714285714283, "grad_norm": 10.244481086730957, "learning_rate": 6.272488755622189e-07, "loss": 1.0756, "step": 3348 }, { "epoch": 318.95238095238096, "grad_norm": 15.405163764953613, "learning_rate": 6.274362818590705e-07, "loss": 1.161, "step": 3349 }, { "epoch": 318.95238095238096, "eval_accuracy": 0.7328587485949793, "eval_f1": 0.7859501651155809, "eval_loss": 0.5577879548072815, "eval_precision": 0.7325125909345271, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.783233448474381, "eval_runtime": 1.2591, "eval_samples_per_second": 2119.693, "eval_steps_per_second": 4.765, "step": 3349 }, { "epoch": 319.04761904761904, "grad_norm": 6.132430076599121, "learning_rate": 6.276236881559221e-07, "loss": 1.176, "step": 3350 }, { "epoch": 319.14285714285717, "grad_norm": 6.559388160705566, "learning_rate": 6.278110944527737e-07, "loss": 1.158, "step": 3351 }, { "epoch": 319.23809523809524, "grad_norm": 7.797235488891602, "learning_rate": 6.279985007496252e-07, "loss": 1.1378, "step": 3352 }, { "epoch": 319.3333333333333, "grad_norm": 6.024532794952393, "learning_rate": 6.281859070464768e-07, "loss": 1.1724, "step": 3353 }, { "epoch": 319.42857142857144, "grad_norm": 6.553297996520996, "learning_rate": 6.283733133433283e-07, "loss": 1.1502, "step": 3354 }, { "epoch": 319.5238095238095, "grad_norm": 5.962327480316162, "learning_rate": 6.285607196401799e-07, "loss": 1.1233, "step": 3355 }, { "epoch": 319.6190476190476, "grad_norm": 5.1203837394714355, "learning_rate": 6.287481259370315e-07, "loss": 1.1058, "step": 3356 }, { "epoch": 319.7142857142857, "grad_norm": 5.673627853393555, "learning_rate": 6.289355322338831e-07, "loss": 1.1309, "step": 3357 }, { "epoch": 319.8095238095238, "grad_norm": 6.080491065979004, "learning_rate": 6.291229385307347e-07, "loss": 1.1293, "step": 3358 }, { "epoch": 319.9047619047619, "grad_norm": 10.415179252624512, "learning_rate": 6.293103448275862e-07, "loss": 1.1229, "step": 3359 }, { "epoch": 320.0, "grad_norm": 6.787259578704834, "learning_rate": 6.294977511244378e-07, "loss": 1.092, "step": 3360 }, { "epoch": 320.0, "eval_accuracy": 0.7321094042712627, "eval_f1": 0.7858640311470501, "eval_loss": 0.5576049089431763, "eval_precision": 0.7309192200557103, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.7837668393782383, "eval_runtime": 1.3345, "eval_samples_per_second": 2000.012, "eval_steps_per_second": 4.496, "step": 3360 }, { "epoch": 320.0952380952381, "grad_norm": 10.310934066772461, "learning_rate": 6.296851574212893e-07, "loss": 1.1127, "step": 3361 }, { "epoch": 320.1904761904762, "grad_norm": 10.253336906433105, "learning_rate": 6.298725637181409e-07, "loss": 1.1832, "step": 3362 }, { "epoch": 320.2857142857143, "grad_norm": 7.179525852203369, "learning_rate": 6.300599700149924e-07, "loss": 1.111, "step": 3363 }, { "epoch": 320.3809523809524, "grad_norm": 6.53006649017334, "learning_rate": 6.302473763118441e-07, "loss": 1.1105, "step": 3364 }, { "epoch": 320.4761904761905, "grad_norm": 7.752871513366699, "learning_rate": 6.304347826086957e-07, "loss": 1.1257, "step": 3365 }, { "epoch": 320.57142857142856, "grad_norm": 9.54736042022705, "learning_rate": 6.306221889055472e-07, "loss": 1.1221, "step": 3366 }, { "epoch": 320.6666666666667, "grad_norm": 12.933712005615234, "learning_rate": 6.308095952023989e-07, "loss": 1.1537, "step": 3367 }, { "epoch": 320.76190476190476, "grad_norm": 14.127771377563477, "learning_rate": 6.309970014992504e-07, "loss": 1.131, "step": 3368 }, { "epoch": 320.85714285714283, "grad_norm": 10.824623107910156, "learning_rate": 6.31184407796102e-07, "loss": 1.1564, "step": 3369 }, { "epoch": 320.95238095238096, "grad_norm": 7.595580577850342, "learning_rate": 6.313718140929535e-07, "loss": 1.114, "step": 3370 }, { "epoch": 320.95238095238096, "eval_accuracy": 0.7328587485949793, "eval_f1": 0.7854348480288895, "eval_loss": 0.5570670962333679, "eval_precision": 0.7335581787521079, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7843957973517559, "eval_runtime": 1.1426, "eval_samples_per_second": 2335.936, "eval_steps_per_second": 5.251, "step": 3370 }, { "epoch": 321.04761904761904, "grad_norm": 4.967525005340576, "learning_rate": 6.315592203898052e-07, "loss": 1.1011, "step": 3371 }, { "epoch": 321.14285714285717, "grad_norm": 6.865474224090576, "learning_rate": 6.317466266866568e-07, "loss": 1.1482, "step": 3372 }, { "epoch": 321.23809523809524, "grad_norm": 5.231659412384033, "learning_rate": 6.319340329835083e-07, "loss": 1.1513, "step": 3373 }, { "epoch": 321.3333333333333, "grad_norm": 8.249855995178223, "learning_rate": 6.321214392803599e-07, "loss": 1.1368, "step": 3374 }, { "epoch": 321.42857142857144, "grad_norm": 5.477447032928467, "learning_rate": 6.323088455772114e-07, "loss": 1.1297, "step": 3375 }, { "epoch": 321.5238095238095, "grad_norm": 7.059687614440918, "learning_rate": 6.32496251874063e-07, "loss": 1.089, "step": 3376 }, { "epoch": 321.6190476190476, "grad_norm": 7.401088714599609, "learning_rate": 6.326836581709145e-07, "loss": 1.1318, "step": 3377 }, { "epoch": 321.7142857142857, "grad_norm": 7.332993984222412, "learning_rate": 6.328710644677662e-07, "loss": 1.1427, "step": 3378 }, { "epoch": 321.8095238095238, "grad_norm": 5.104832649230957, "learning_rate": 6.330584707646177e-07, "loss": 1.129, "step": 3379 }, { "epoch": 321.9047619047619, "grad_norm": 5.148171424865723, "learning_rate": 6.332458770614693e-07, "loss": 1.1143, "step": 3380 }, { "epoch": 322.0, "grad_norm": 12.024181365966797, "learning_rate": 6.334332833583208e-07, "loss": 1.1546, "step": 3381 }, { "epoch": 322.0, "eval_accuracy": 0.729486699138254, "eval_f1": 0.7810794420861128, "eval_loss": 0.5574794411659241, "eval_precision": 0.734321550741163, "eval_recall": 0.8341968911917098, "eval_roc_auc": 0.7835354058721935, "eval_runtime": 1.1773, "eval_samples_per_second": 2266.969, "eval_steps_per_second": 5.096, "step": 3381 }, { "epoch": 322.0952380952381, "grad_norm": 8.503469467163086, "learning_rate": 6.336206896551724e-07, "loss": 1.1533, "step": 3382 }, { "epoch": 322.1904761904762, "grad_norm": 13.482050895690918, "learning_rate": 6.33808095952024e-07, "loss": 1.0889, "step": 3383 }, { "epoch": 322.2857142857143, "grad_norm": 3.6773922443389893, "learning_rate": 6.339955022488755e-07, "loss": 1.1106, "step": 3384 }, { "epoch": 322.3809523809524, "grad_norm": 9.904181480407715, "learning_rate": 6.341829085457272e-07, "loss": 1.1195, "step": 3385 }, { "epoch": 322.4761904761905, "grad_norm": 11.262557029724121, "learning_rate": 6.343703148425787e-07, "loss": 1.0854, "step": 3386 }, { "epoch": 322.57142857142856, "grad_norm": 8.886178016662598, "learning_rate": 6.345577211394303e-07, "loss": 1.1319, "step": 3387 }, { "epoch": 322.6666666666667, "grad_norm": 10.291092872619629, "learning_rate": 6.347451274362818e-07, "loss": 1.1216, "step": 3388 }, { "epoch": 322.76190476190476, "grad_norm": 8.690281867980957, "learning_rate": 6.349325337331335e-07, "loss": 1.1387, "step": 3389 }, { "epoch": 322.85714285714283, "grad_norm": 5.215178966522217, "learning_rate": 6.35119940029985e-07, "loss": 1.1351, "step": 3390 }, { "epoch": 322.95238095238096, "grad_norm": 3.838747501373291, "learning_rate": 6.353073463268366e-07, "loss": 1.1343, "step": 3391 }, { "epoch": 322.95238095238096, "eval_accuracy": 0.729486699138254, "eval_f1": 0.782922429344558, "eval_loss": 0.5572158694267273, "eval_precision": 0.7306397306397306, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7839916522740358, "eval_runtime": 1.2145, "eval_samples_per_second": 2197.652, "eval_steps_per_second": 4.94, "step": 3391 }, { "epoch": 323.04761904761904, "grad_norm": 5.711175441741943, "learning_rate": 6.354947526236883e-07, "loss": 1.1285, "step": 3392 }, { "epoch": 323.14285714285717, "grad_norm": 7.926575183868408, "learning_rate": 6.356821589205398e-07, "loss": 1.1537, "step": 3393 }, { "epoch": 323.23809523809524, "grad_norm": 8.844377517700195, "learning_rate": 6.358695652173914e-07, "loss": 1.1409, "step": 3394 }, { "epoch": 323.3333333333333, "grad_norm": 13.982290267944336, "learning_rate": 6.360569715142429e-07, "loss": 1.1452, "step": 3395 }, { "epoch": 323.42857142857144, "grad_norm": 6.307914733886719, "learning_rate": 6.362443778110945e-07, "loss": 1.0981, "step": 3396 }, { "epoch": 323.5238095238095, "grad_norm": 3.3946173191070557, "learning_rate": 6.36431784107946e-07, "loss": 1.0955, "step": 3397 }, { "epoch": 323.6190476190476, "grad_norm": 13.919370651245117, "learning_rate": 6.366191904047976e-07, "loss": 1.1081, "step": 3398 }, { "epoch": 323.7142857142857, "grad_norm": 9.541321754455566, "learning_rate": 6.368065967016493e-07, "loss": 1.159, "step": 3399 }, { "epoch": 323.8095238095238, "grad_norm": 7.30950927734375, "learning_rate": 6.369940029985008e-07, "loss": 1.1319, "step": 3400 }, { "epoch": 323.9047619047619, "grad_norm": 4.72685432434082, "learning_rate": 6.371814092953524e-07, "loss": 1.1306, "step": 3401 }, { "epoch": 324.0, "grad_norm": 7.5063090324401855, "learning_rate": 6.373688155922039e-07, "loss": 1.1321, "step": 3402 }, { "epoch": 324.0, "eval_accuracy": 0.7306107156238292, "eval_f1": 0.7824508320726172, "eval_loss": 0.5564565658569336, "eval_precision": 0.7342419080068143, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7845097869890616, "eval_runtime": 1.2683, "eval_samples_per_second": 2104.47, "eval_steps_per_second": 4.731, "step": 3402 }, { "epoch": 324.0952380952381, "grad_norm": 11.140090942382812, "learning_rate": 6.375562218890555e-07, "loss": 1.1572, "step": 3403 }, { "epoch": 324.1904761904762, "grad_norm": 4.548201560974121, "learning_rate": 6.37743628185907e-07, "loss": 1.1466, "step": 3404 }, { "epoch": 324.2857142857143, "grad_norm": 10.859220504760742, "learning_rate": 6.379310344827587e-07, "loss": 1.0967, "step": 3405 }, { "epoch": 324.3809523809524, "grad_norm": 6.48252534866333, "learning_rate": 6.381184407796102e-07, "loss": 1.1105, "step": 3406 }, { "epoch": 324.4761904761905, "grad_norm": 5.144731044769287, "learning_rate": 6.383058470764618e-07, "loss": 1.1343, "step": 3407 }, { "epoch": 324.57142857142856, "grad_norm": 8.819598197937012, "learning_rate": 6.384932533733134e-07, "loss": 1.1031, "step": 3408 }, { "epoch": 324.6666666666667, "grad_norm": 3.8726017475128174, "learning_rate": 6.386806596701649e-07, "loss": 1.1425, "step": 3409 }, { "epoch": 324.76190476190476, "grad_norm": 9.518330574035645, "learning_rate": 6.388680659670165e-07, "loss": 1.0801, "step": 3410 }, { "epoch": 324.85714285714283, "grad_norm": 10.46760368347168, "learning_rate": 6.39055472263868e-07, "loss": 1.1716, "step": 3411 }, { "epoch": 324.95238095238096, "grad_norm": 6.5744524002075195, "learning_rate": 6.392428785607197e-07, "loss": 1.1494, "step": 3412 }, { "epoch": 324.95238095238096, "eval_accuracy": 0.7287373548145373, "eval_f1": 0.781664656212304, "eval_loss": 0.5566065311431885, "eval_precision": 0.7313769751693002, "eval_recall": 0.8393782383419689, "eval_roc_auc": 0.7841865284974092, "eval_runtime": 1.1265, "eval_samples_per_second": 2369.223, "eval_steps_per_second": 5.326, "step": 3412 }, { "epoch": 325.04761904761904, "grad_norm": 5.198469161987305, "learning_rate": 6.394302848575712e-07, "loss": 1.1435, "step": 3413 }, { "epoch": 325.14285714285717, "grad_norm": 7.31497049331665, "learning_rate": 6.396176911544229e-07, "loss": 1.1152, "step": 3414 }, { "epoch": 325.23809523809524, "grad_norm": 3.6078991889953613, "learning_rate": 6.398050974512744e-07, "loss": 1.1313, "step": 3415 }, { "epoch": 325.3333333333333, "grad_norm": 15.786949157714844, "learning_rate": 6.39992503748126e-07, "loss": 1.1381, "step": 3416 }, { "epoch": 325.42857142857144, "grad_norm": 7.494317054748535, "learning_rate": 6.401799100449776e-07, "loss": 1.1206, "step": 3417 }, { "epoch": 325.5238095238095, "grad_norm": 6.337263584136963, "learning_rate": 6.403673163418291e-07, "loss": 1.1413, "step": 3418 }, { "epoch": 325.6190476190476, "grad_norm": 9.863685607910156, "learning_rate": 6.405547226386808e-07, "loss": 1.1607, "step": 3419 }, { "epoch": 325.7142857142857, "grad_norm": 5.73906946182251, "learning_rate": 6.407421289355323e-07, "loss": 1.091, "step": 3420 }, { "epoch": 325.8095238095238, "grad_norm": 5.083852291107178, "learning_rate": 6.409295352323839e-07, "loss": 1.1101, "step": 3421 }, { "epoch": 325.9047619047619, "grad_norm": 5.438633441925049, "learning_rate": 6.411169415292354e-07, "loss": 1.0855, "step": 3422 }, { "epoch": 326.0, "grad_norm": 17.124401092529297, "learning_rate": 6.41304347826087e-07, "loss": 1.1609, "step": 3423 }, { "epoch": 326.0, "eval_accuracy": 0.7309853877856876, "eval_f1": 0.7850299401197605, "eval_loss": 0.5572195649147034, "eval_precision": 0.7299554565701559, "eval_recall": 0.8490932642487047, "eval_roc_auc": 0.7838831318364998, "eval_runtime": 1.1314, "eval_samples_per_second": 2359.053, "eval_steps_per_second": 5.303, "step": 3423 }, { "epoch": 326.0952380952381, "grad_norm": 5.153423309326172, "learning_rate": 6.414917541229385e-07, "loss": 1.0732, "step": 3424 }, { "epoch": 326.1904761904762, "grad_norm": 5.137463092803955, "learning_rate": 6.416791604197901e-07, "loss": 1.1078, "step": 3425 }, { "epoch": 326.2857142857143, "grad_norm": 6.32768440246582, "learning_rate": 6.418665667166418e-07, "loss": 1.159, "step": 3426 }, { "epoch": 326.3809523809524, "grad_norm": 6.702006816864014, "learning_rate": 6.420539730134933e-07, "loss": 1.1379, "step": 3427 }, { "epoch": 326.4761904761905, "grad_norm": 5.366683483123779, "learning_rate": 6.422413793103449e-07, "loss": 1.1531, "step": 3428 }, { "epoch": 326.57142857142856, "grad_norm": 6.240462779998779, "learning_rate": 6.424287856071964e-07, "loss": 1.1293, "step": 3429 }, { "epoch": 326.6666666666667, "grad_norm": 6.877244472503662, "learning_rate": 6.42616191904048e-07, "loss": 1.1515, "step": 3430 }, { "epoch": 326.76190476190476, "grad_norm": 5.66732120513916, "learning_rate": 6.428035982008995e-07, "loss": 1.1392, "step": 3431 }, { "epoch": 326.85714285714283, "grad_norm": 4.892309188842773, "learning_rate": 6.429910044977511e-07, "loss": 1.0918, "step": 3432 }, { "epoch": 326.95238095238096, "grad_norm": 7.027184963226318, "learning_rate": 6.431784107946027e-07, "loss": 1.164, "step": 3433 }, { "epoch": 326.95238095238096, "eval_accuracy": 0.732484076433121, "eval_f1": 0.7851985559566786, "eval_loss": 0.5570408701896667, "eval_precision": 0.7331460674157303, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7840287852619459, "eval_runtime": 1.1261, "eval_samples_per_second": 2370.097, "eval_steps_per_second": 5.328, "step": 3433 }, { "epoch": 327.04761904761904, "grad_norm": 7.328666687011719, "learning_rate": 6.433658170914543e-07, "loss": 1.1519, "step": 3434 }, { "epoch": 327.14285714285717, "grad_norm": 8.158414840698242, "learning_rate": 6.435532233883059e-07, "loss": 1.1626, "step": 3435 }, { "epoch": 327.23809523809524, "grad_norm": 7.522820949554443, "learning_rate": 6.437406296851574e-07, "loss": 1.1562, "step": 3436 }, { "epoch": 327.3333333333333, "grad_norm": 5.471183776855469, "learning_rate": 6.439280359820091e-07, "loss": 1.1115, "step": 3437 }, { "epoch": 327.42857142857144, "grad_norm": 13.796281814575195, "learning_rate": 6.441154422788606e-07, "loss": 1.0954, "step": 3438 }, { "epoch": 327.5238095238095, "grad_norm": 9.086668014526367, "learning_rate": 6.443028485757122e-07, "loss": 1.1177, "step": 3439 }, { "epoch": 327.6190476190476, "grad_norm": 6.380118370056152, "learning_rate": 6.444902548725638e-07, "loss": 1.164, "step": 3440 }, { "epoch": 327.7142857142857, "grad_norm": 10.345011711120605, "learning_rate": 6.446776611694154e-07, "loss": 1.1433, "step": 3441 }, { "epoch": 327.8095238095238, "grad_norm": 4.250133991241455, "learning_rate": 6.448650674662669e-07, "loss": 1.1391, "step": 3442 }, { "epoch": 327.9047619047619, "grad_norm": 7.399500370025635, "learning_rate": 6.450524737631185e-07, "loss": 1.1499, "step": 3443 }, { "epoch": 328.0, "grad_norm": 5.87099027633667, "learning_rate": 6.4523988005997e-07, "loss": 1.1016, "step": 3444 }, { "epoch": 328.0, "eval_accuracy": 0.7343574372424129, "eval_f1": 0.7850863898150955, "eval_loss": 0.5564801692962646, "eval_precision": 0.7378917378917379, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.784690846286701, "eval_runtime": 1.1322, "eval_samples_per_second": 2357.292, "eval_steps_per_second": 5.299, "step": 3444 }, { "epoch": 328.0952380952381, "grad_norm": 5.727064609527588, "learning_rate": 6.454272863568216e-07, "loss": 1.1292, "step": 3445 }, { "epoch": 328.1904761904762, "grad_norm": 4.016811370849609, "learning_rate": 6.456146926536732e-07, "loss": 1.1085, "step": 3446 }, { "epoch": 328.2857142857143, "grad_norm": 6.0340352058410645, "learning_rate": 6.458020989505248e-07, "loss": 1.0875, "step": 3447 }, { "epoch": 328.3809523809524, "grad_norm": 4.349868297576904, "learning_rate": 6.459895052473764e-07, "loss": 1.1237, "step": 3448 }, { "epoch": 328.4761904761905, "grad_norm": 9.41535472869873, "learning_rate": 6.461769115442279e-07, "loss": 1.1675, "step": 3449 }, { "epoch": 328.57142857142856, "grad_norm": 10.782812118530273, "learning_rate": 6.463643178410795e-07, "loss": 1.1496, "step": 3450 }, { "epoch": 328.6666666666667, "grad_norm": 4.990645408630371, "learning_rate": 6.46551724137931e-07, "loss": 1.1289, "step": 3451 }, { "epoch": 328.76190476190476, "grad_norm": 5.7273850440979, "learning_rate": 6.467391304347826e-07, "loss": 1.1303, "step": 3452 }, { "epoch": 328.85714285714283, "grad_norm": 3.955021858215332, "learning_rate": 6.469265367316341e-07, "loss": 1.1059, "step": 3453 }, { "epoch": 328.95238095238096, "grad_norm": 6.905742645263672, "learning_rate": 6.471139430284858e-07, "loss": 1.1573, "step": 3454 }, { "epoch": 328.95238095238096, "eval_accuracy": 0.7351067815661296, "eval_f1": 0.7885133113969488, "eval_loss": 0.5563375949859619, "eval_precision": 0.7326292384658143, "eval_recall": 0.8536269430051814, "eval_roc_auc": 0.7854332181922854, "eval_runtime": 1.1084, "eval_samples_per_second": 2408.01, "eval_steps_per_second": 5.413, "step": 3454 }, { "epoch": 329.04761904761904, "grad_norm": 14.890825271606445, "learning_rate": 6.473013493253374e-07, "loss": 1.1772, "step": 3455 }, { "epoch": 329.14285714285717, "grad_norm": 11.016300201416016, "learning_rate": 6.474887556221889e-07, "loss": 1.0925, "step": 3456 }, { "epoch": 329.23809523809524, "grad_norm": 4.1203227043151855, "learning_rate": 6.476761619190405e-07, "loss": 1.093, "step": 3457 }, { "epoch": 329.3333333333333, "grad_norm": 5.667579174041748, "learning_rate": 6.47863568215892e-07, "loss": 1.1552, "step": 3458 }, { "epoch": 329.42857142857144, "grad_norm": 9.598355293273926, "learning_rate": 6.480509745127437e-07, "loss": 1.1022, "step": 3459 }, { "epoch": 329.5238095238095, "grad_norm": 15.727906227111816, "learning_rate": 6.482383808095952e-07, "loss": 1.1737, "step": 3460 }, { "epoch": 329.6190476190476, "grad_norm": 5.195409774780273, "learning_rate": 6.484257871064468e-07, "loss": 1.1394, "step": 3461 }, { "epoch": 329.7142857142857, "grad_norm": 8.702081680297852, "learning_rate": 6.486131934032985e-07, "loss": 1.1333, "step": 3462 }, { "epoch": 329.8095238095238, "grad_norm": 5.054378986358643, "learning_rate": 6.4880059970015e-07, "loss": 1.1154, "step": 3463 }, { "epoch": 329.9047619047619, "grad_norm": 5.856356143951416, "learning_rate": 6.489880059970016e-07, "loss": 1.1472, "step": 3464 }, { "epoch": 330.0, "grad_norm": 6.509719371795654, "learning_rate": 6.491754122938531e-07, "loss": 1.1341, "step": 3465 }, { "epoch": 330.0, "eval_accuracy": 0.7336080929186961, "eval_f1": 0.784480145498636, "eval_loss": 0.5562961101531982, "eval_precision": 0.7373219373219373, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7849631548647092, "eval_runtime": 1.1231, "eval_samples_per_second": 2376.361, "eval_steps_per_second": 5.342, "step": 3465 }, { "epoch": 330.0952380952381, "grad_norm": 8.819149017333984, "learning_rate": 6.493628185907047e-07, "loss": 1.1193, "step": 3466 }, { "epoch": 330.1904761904762, "grad_norm": 5.841251373291016, "learning_rate": 6.495502248875562e-07, "loss": 1.1278, "step": 3467 }, { "epoch": 330.2857142857143, "grad_norm": 7.35856294631958, "learning_rate": 6.497376311844079e-07, "loss": 1.1653, "step": 3468 }, { "epoch": 330.3809523809524, "grad_norm": 10.730827331542969, "learning_rate": 6.499250374812594e-07, "loss": 1.098, "step": 3469 }, { "epoch": 330.4761904761905, "grad_norm": 13.916901588439941, "learning_rate": 6.50112443778111e-07, "loss": 1.1301, "step": 3470 }, { "epoch": 330.57142857142856, "grad_norm": 12.171894073486328, "learning_rate": 6.502998500749626e-07, "loss": 1.1151, "step": 3471 }, { "epoch": 330.6666666666667, "grad_norm": 4.601052284240723, "learning_rate": 6.504872563718141e-07, "loss": 1.1187, "step": 3472 }, { "epoch": 330.76190476190476, "grad_norm": 6.772599220275879, "learning_rate": 6.506746626686657e-07, "loss": 1.1054, "step": 3473 }, { "epoch": 330.85714285714283, "grad_norm": 15.050695419311523, "learning_rate": 6.508620689655172e-07, "loss": 1.12, "step": 3474 }, { "epoch": 330.95238095238096, "grad_norm": 4.695240020751953, "learning_rate": 6.510494752623689e-07, "loss": 1.1394, "step": 3475 }, { "epoch": 330.95238095238096, "eval_accuracy": 0.735481453727988, "eval_f1": 0.7876052948255114, "eval_loss": 0.5557202100753784, "eval_precision": 0.7353932584269663, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7858235463442717, "eval_runtime": 1.1291, "eval_samples_per_second": 2363.921, "eval_steps_per_second": 5.314, "step": 3475 }, { "epoch": 331.04761904761904, "grad_norm": 8.036359786987305, "learning_rate": 6.512368815592204e-07, "loss": 1.1233, "step": 3476 }, { "epoch": 331.14285714285717, "grad_norm": 7.4813995361328125, "learning_rate": 6.51424287856072e-07, "loss": 1.1522, "step": 3477 }, { "epoch": 331.23809523809524, "grad_norm": 6.448642253875732, "learning_rate": 6.516116941529235e-07, "loss": 1.1023, "step": 3478 }, { "epoch": 331.3333333333333, "grad_norm": 5.55362606048584, "learning_rate": 6.517991004497751e-07, "loss": 1.1096, "step": 3479 }, { "epoch": 331.42857142857144, "grad_norm": 4.546058177947998, "learning_rate": 6.519865067466266e-07, "loss": 1.137, "step": 3480 }, { "epoch": 331.5238095238095, "grad_norm": 4.071013450622559, "learning_rate": 6.521739130434782e-07, "loss": 1.147, "step": 3481 }, { "epoch": 331.6190476190476, "grad_norm": 5.781118392944336, "learning_rate": 6.523613193403299e-07, "loss": 1.1107, "step": 3482 }, { "epoch": 331.7142857142857, "grad_norm": 5.479564189910889, "learning_rate": 6.525487256371814e-07, "loss": 1.141, "step": 3483 }, { "epoch": 331.8095238095238, "grad_norm": 4.575967311859131, "learning_rate": 6.527361319340331e-07, "loss": 1.1133, "step": 3484 }, { "epoch": 331.9047619047619, "grad_norm": 6.548676490783691, "learning_rate": 6.529235382308846e-07, "loss": 1.0983, "step": 3485 }, { "epoch": 332.0, "grad_norm": 15.194136619567871, "learning_rate": 6.531109445277362e-07, "loss": 1.1231, "step": 3486 }, { "epoch": 332.0, "eval_accuracy": 0.7366054702135632, "eval_f1": 0.7865168539325843, "eval_loss": 0.5558101534843445, "eval_precision": 0.740423098913665, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7858981001727117, "eval_runtime": 1.1449, "eval_samples_per_second": 2331.269, "eval_steps_per_second": 5.241, "step": 3486 }, { "epoch": 332.0952380952381, "grad_norm": 5.636200904846191, "learning_rate": 6.532983508245877e-07, "loss": 1.1396, "step": 3487 }, { "epoch": 332.1904761904762, "grad_norm": 13.208602905273438, "learning_rate": 6.534857571214393e-07, "loss": 1.1486, "step": 3488 }, { "epoch": 332.2857142857143, "grad_norm": 8.23987865447998, "learning_rate": 6.53673163418291e-07, "loss": 1.0966, "step": 3489 }, { "epoch": 332.3809523809524, "grad_norm": 6.295897006988525, "learning_rate": 6.538605697151425e-07, "loss": 1.1478, "step": 3490 }, { "epoch": 332.4761904761905, "grad_norm": 4.879843711853027, "learning_rate": 6.540479760119941e-07, "loss": 1.0825, "step": 3491 }, { "epoch": 332.57142857142856, "grad_norm": 2.976145029067993, "learning_rate": 6.542353823088456e-07, "loss": 1.1143, "step": 3492 }, { "epoch": 332.6666666666667, "grad_norm": 15.933645248413086, "learning_rate": 6.544227886056972e-07, "loss": 1.1543, "step": 3493 }, { "epoch": 332.76190476190476, "grad_norm": 4.494801998138428, "learning_rate": 6.546101949025487e-07, "loss": 1.089, "step": 3494 }, { "epoch": 332.85714285714283, "grad_norm": 5.822324275970459, "learning_rate": 6.547976011994003e-07, "loss": 1.1265, "step": 3495 }, { "epoch": 332.95238095238096, "grad_norm": 8.572771072387695, "learning_rate": 6.549850074962519e-07, "loss": 1.125, "step": 3496 }, { "epoch": 332.95238095238096, "eval_accuracy": 0.7373548145372799, "eval_f1": 0.7905587092919032, "eval_loss": 0.5562298893928528, "eval_precision": 0.7337770382695508, "eval_recall": 0.8568652849740933, "eval_roc_auc": 0.7858097294185377, "eval_runtime": 1.1423, "eval_samples_per_second": 2336.425, "eval_steps_per_second": 5.252, "step": 3496 }, { "epoch": 333.04761904761904, "grad_norm": 11.127102851867676, "learning_rate": 6.551724137931035e-07, "loss": 1.2003, "step": 3497 }, { "epoch": 333.14285714285717, "grad_norm": 5.642254829406738, "learning_rate": 6.553598200899551e-07, "loss": 1.0894, "step": 3498 }, { "epoch": 333.23809523809524, "grad_norm": 6.612725257873535, "learning_rate": 6.555472263868066e-07, "loss": 1.1221, "step": 3499 }, { "epoch": 333.3333333333333, "grad_norm": 17.96694564819336, "learning_rate": 6.557346326836582e-07, "loss": 1.1726, "step": 3500 }, { "epoch": 333.42857142857144, "grad_norm": 3.2642133235931396, "learning_rate": 6.559220389805097e-07, "loss": 1.1022, "step": 3501 }, { "epoch": 333.5238095238095, "grad_norm": 6.946366786956787, "learning_rate": 6.561094452773613e-07, "loss": 1.1372, "step": 3502 }, { "epoch": 333.6190476190476, "grad_norm": 6.836911201477051, "learning_rate": 6.562968515742129e-07, "loss": 1.0981, "step": 3503 }, { "epoch": 333.7142857142857, "grad_norm": 6.185560703277588, "learning_rate": 6.564842578710645e-07, "loss": 1.1081, "step": 3504 }, { "epoch": 333.8095238095238, "grad_norm": 11.273193359375, "learning_rate": 6.56671664167916e-07, "loss": 1.121, "step": 3505 }, { "epoch": 333.9047619047619, "grad_norm": 5.383264064788818, "learning_rate": 6.568590704647676e-07, "loss": 1.1328, "step": 3506 }, { "epoch": 334.0, "grad_norm": 8.786903381347656, "learning_rate": 6.570464767616193e-07, "loss": 1.1291, "step": 3507 }, { "epoch": 334.0, "eval_accuracy": 0.7347321094042712, "eval_f1": 0.7858439201451906, "eval_loss": 0.5563549399375916, "eval_precision": 0.7372304199772985, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.784830742659758, "eval_runtime": 1.1532, "eval_samples_per_second": 2314.338, "eval_steps_per_second": 5.203, "step": 3507 }, { "epoch": 334.0952380952381, "grad_norm": 8.342055320739746, "learning_rate": 6.572338830584708e-07, "loss": 1.1618, "step": 3508 }, { "epoch": 334.1904761904762, "grad_norm": 5.02683162689209, "learning_rate": 6.574212893553224e-07, "loss": 1.1333, "step": 3509 }, { "epoch": 334.2857142857143, "grad_norm": 12.340161323547363, "learning_rate": 6.57608695652174e-07, "loss": 1.1035, "step": 3510 }, { "epoch": 334.3809523809524, "grad_norm": 4.038184642791748, "learning_rate": 6.577961019490256e-07, "loss": 1.1365, "step": 3511 }, { "epoch": 334.4761904761905, "grad_norm": 5.561829566955566, "learning_rate": 6.579835082458771e-07, "loss": 1.0896, "step": 3512 }, { "epoch": 334.57142857142856, "grad_norm": 6.000752925872803, "learning_rate": 6.581709145427287e-07, "loss": 1.108, "step": 3513 }, { "epoch": 334.6666666666667, "grad_norm": 7.689496994018555, "learning_rate": 6.583583208395802e-07, "loss": 1.1406, "step": 3514 }, { "epoch": 334.76190476190476, "grad_norm": 8.802385330200195, "learning_rate": 6.585457271364318e-07, "loss": 1.1199, "step": 3515 }, { "epoch": 334.85714285714283, "grad_norm": 5.754329681396484, "learning_rate": 6.587331334332834e-07, "loss": 1.1222, "step": 3516 }, { "epoch": 334.95238095238096, "grad_norm": 5.224997043609619, "learning_rate": 6.58920539730135e-07, "loss": 1.0855, "step": 3517 }, { "epoch": 334.95238095238096, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.792621243677477, "eval_loss": 0.5562059283256531, "eval_precision": 0.7330764997248211, "eval_recall": 0.8626943005181347, "eval_roc_auc": 0.7856744386873922, "eval_runtime": 1.1406, "eval_samples_per_second": 2339.928, "eval_steps_per_second": 5.26, "step": 3517 }, { "epoch": 335.04761904761904, "grad_norm": 9.480507850646973, "learning_rate": 6.591079460269866e-07, "loss": 1.146, "step": 3518 }, { "epoch": 335.14285714285717, "grad_norm": 9.278295516967773, "learning_rate": 6.592953523238381e-07, "loss": 1.1548, "step": 3519 }, { "epoch": 335.23809523809524, "grad_norm": 3.4852707386016846, "learning_rate": 6.594827586206897e-07, "loss": 1.1137, "step": 3520 }, { "epoch": 335.3333333333333, "grad_norm": 16.312044143676758, "learning_rate": 6.596701649175412e-07, "loss": 1.0929, "step": 3521 }, { "epoch": 335.42857142857144, "grad_norm": 10.9599027633667, "learning_rate": 6.598575712143928e-07, "loss": 1.1378, "step": 3522 }, { "epoch": 335.5238095238095, "grad_norm": 4.634957313537598, "learning_rate": 6.600449775112443e-07, "loss": 1.1423, "step": 3523 }, { "epoch": 335.6190476190476, "grad_norm": 5.783304214477539, "learning_rate": 6.60232383808096e-07, "loss": 1.103, "step": 3524 }, { "epoch": 335.7142857142857, "grad_norm": 14.14034652709961, "learning_rate": 6.604197901049476e-07, "loss": 1.1274, "step": 3525 }, { "epoch": 335.8095238095238, "grad_norm": 7.4911789894104, "learning_rate": 6.606071964017991e-07, "loss": 1.1116, "step": 3526 }, { "epoch": 335.9047619047619, "grad_norm": 7.270888328552246, "learning_rate": 6.607946026986507e-07, "loss": 1.1475, "step": 3527 }, { "epoch": 336.0, "grad_norm": 10.573918342590332, "learning_rate": 6.609820089955022e-07, "loss": 1.1337, "step": 3528 }, { "epoch": 336.0, "eval_accuracy": 0.7309853877856876, "eval_f1": 0.780562347188264, "eval_loss": 0.5544623732566833, "eval_precision": 0.7390046296296297, "eval_recall": 0.8270725388601037, "eval_roc_auc": 0.7872259643062751, "eval_runtime": 1.1055, "eval_samples_per_second": 2414.265, "eval_steps_per_second": 5.427, "step": 3528 }, { "epoch": 336.0952380952381, "grad_norm": 14.302084922790527, "learning_rate": 6.611694152923539e-07, "loss": 1.1146, "step": 3529 }, { "epoch": 336.1904761904762, "grad_norm": 7.01728630065918, "learning_rate": 6.613568215892054e-07, "loss": 1.1553, "step": 3530 }, { "epoch": 336.2857142857143, "grad_norm": 8.606803894042969, "learning_rate": 6.61544227886057e-07, "loss": 1.0986, "step": 3531 }, { "epoch": 336.3809523809524, "grad_norm": 5.691741466522217, "learning_rate": 6.617316341829086e-07, "loss": 1.1194, "step": 3532 }, { "epoch": 336.4761904761905, "grad_norm": 5.615129470825195, "learning_rate": 6.619190404797602e-07, "loss": 1.0961, "step": 3533 }, { "epoch": 336.57142857142856, "grad_norm": 7.926955223083496, "learning_rate": 6.621064467766118e-07, "loss": 1.1396, "step": 3534 }, { "epoch": 336.6666666666667, "grad_norm": 3.7907376289367676, "learning_rate": 6.622938530734633e-07, "loss": 1.0845, "step": 3535 }, { "epoch": 336.76190476190476, "grad_norm": 9.664169311523438, "learning_rate": 6.624812593703149e-07, "loss": 1.2008, "step": 3536 }, { "epoch": 336.85714285714283, "grad_norm": 5.354334831237793, "learning_rate": 6.626686656671664e-07, "loss": 1.1075, "step": 3537 }, { "epoch": 336.95238095238096, "grad_norm": 11.554585456848145, "learning_rate": 6.628560719640181e-07, "loss": 1.1256, "step": 3538 }, { "epoch": 336.95238095238096, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.7940209267563527, "eval_loss": 0.5547006726264954, "eval_precision": 0.7373681288173237, "eval_recall": 0.8601036269430051, "eval_roc_auc": 0.7875762809441567, "eval_runtime": 1.1087, "eval_samples_per_second": 2407.42, "eval_steps_per_second": 5.412, "step": 3538 }, { "epoch": 337.04761904761904, "grad_norm": 3.42289137840271, "learning_rate": 6.630434782608696e-07, "loss": 1.0927, "step": 3539 }, { "epoch": 337.14285714285717, "grad_norm": 12.857282638549805, "learning_rate": 6.632308845577212e-07, "loss": 1.128, "step": 3540 }, { "epoch": 337.23809523809524, "grad_norm": 16.507972717285156, "learning_rate": 6.634182908545727e-07, "loss": 1.1832, "step": 3541 }, { "epoch": 337.3333333333333, "grad_norm": 5.046204090118408, "learning_rate": 6.636056971514243e-07, "loss": 1.0964, "step": 3542 }, { "epoch": 337.42857142857144, "grad_norm": 7.036007404327393, "learning_rate": 6.637931034482759e-07, "loss": 1.1487, "step": 3543 }, { "epoch": 337.5238095238095, "grad_norm": 7.588108539581299, "learning_rate": 6.639805097451274e-07, "loss": 1.1212, "step": 3544 }, { "epoch": 337.6190476190476, "grad_norm": 12.190917015075684, "learning_rate": 6.641679160419791e-07, "loss": 1.103, "step": 3545 }, { "epoch": 337.7142857142857, "grad_norm": 6.134925365447998, "learning_rate": 6.643553223388306e-07, "loss": 1.0935, "step": 3546 }, { "epoch": 337.8095238095238, "grad_norm": 24.103914260864258, "learning_rate": 6.645427286356822e-07, "loss": 1.1485, "step": 3547 }, { "epoch": 337.9047619047619, "grad_norm": 12.370758056640625, "learning_rate": 6.647301349325337e-07, "loss": 1.1255, "step": 3548 }, { "epoch": 338.0, "grad_norm": 9.194156646728516, "learning_rate": 6.649175412293853e-07, "loss": 1.1161, "step": 3549 }, { "epoch": 338.0, "eval_accuracy": 0.7351067815661296, "eval_f1": 0.7858224780369585, "eval_loss": 0.5546090006828308, "eval_precision": 0.7381900967558338, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7870918249856074, "eval_runtime": 1.1211, "eval_samples_per_second": 2380.612, "eval_steps_per_second": 5.352, "step": 3549 }, { "epoch": 338.0952380952381, "grad_norm": 12.278610229492188, "learning_rate": 6.651049475262368e-07, "loss": 1.131, "step": 3550 }, { "epoch": 338.1904761904762, "grad_norm": 5.793935775756836, "learning_rate": 6.652923538230885e-07, "loss": 1.1346, "step": 3551 }, { "epoch": 338.2857142857143, "grad_norm": 7.647890567779541, "learning_rate": 6.654797601199401e-07, "loss": 1.1406, "step": 3552 }, { "epoch": 338.3809523809524, "grad_norm": 15.229239463806152, "learning_rate": 6.656671664167916e-07, "loss": 1.1486, "step": 3553 }, { "epoch": 338.4761904761905, "grad_norm": 8.464210510253906, "learning_rate": 6.658545727136433e-07, "loss": 1.1245, "step": 3554 }, { "epoch": 338.57142857142856, "grad_norm": 10.347994804382324, "learning_rate": 6.660419790104948e-07, "loss": 1.1417, "step": 3555 }, { "epoch": 338.6666666666667, "grad_norm": 5.064682483673096, "learning_rate": 6.662293853073464e-07, "loss": 1.0578, "step": 3556 }, { "epoch": 338.76190476190476, "grad_norm": 8.694058418273926, "learning_rate": 6.664167916041979e-07, "loss": 1.1247, "step": 3557 }, { "epoch": 338.85714285714283, "grad_norm": 10.907423973083496, "learning_rate": 6.666041979010495e-07, "loss": 1.1499, "step": 3558 }, { "epoch": 338.95238095238096, "grad_norm": 6.98323917388916, "learning_rate": 6.667916041979011e-07, "loss": 1.0894, "step": 3559 }, { "epoch": 338.95238095238096, "eval_accuracy": 0.7369801423754215, "eval_f1": 0.7874015748031497, "eval_loss": 0.5539066791534424, "eval_precision": 0.7394766780432309, "eval_recall": 0.8419689119170984, "eval_roc_auc": 0.7878603914795625, "eval_runtime": 1.1949, "eval_samples_per_second": 2233.618, "eval_steps_per_second": 5.021, "step": 3559 }, { "epoch": 339.04761904761904, "grad_norm": 6.979728698730469, "learning_rate": 6.669790104947527e-07, "loss": 1.1115, "step": 3560 }, { "epoch": 339.14285714285717, "grad_norm": 4.081621170043945, "learning_rate": 6.671664167916043e-07, "loss": 1.1271, "step": 3561 }, { "epoch": 339.23809523809524, "grad_norm": 13.691429138183594, "learning_rate": 6.673538230884558e-07, "loss": 1.1255, "step": 3562 }, { "epoch": 339.3333333333333, "grad_norm": 11.593656539916992, "learning_rate": 6.675412293853074e-07, "loss": 1.1027, "step": 3563 }, { "epoch": 339.42857142857144, "grad_norm": 6.386806488037109, "learning_rate": 6.677286356821589e-07, "loss": 1.0918, "step": 3564 }, { "epoch": 339.5238095238095, "grad_norm": 3.813530445098877, "learning_rate": 6.679160419790105e-07, "loss": 1.1615, "step": 3565 }, { "epoch": 339.6190476190476, "grad_norm": 5.865270137786865, "learning_rate": 6.681034482758621e-07, "loss": 1.1287, "step": 3566 }, { "epoch": 339.7142857142857, "grad_norm": 5.257119178771973, "learning_rate": 6.682908545727137e-07, "loss": 1.1735, "step": 3567 }, { "epoch": 339.8095238095238, "grad_norm": 13.466306686401367, "learning_rate": 6.684782608695652e-07, "loss": 1.1191, "step": 3568 }, { "epoch": 339.9047619047619, "grad_norm": 20.444292068481445, "learning_rate": 6.686656671664168e-07, "loss": 1.1079, "step": 3569 }, { "epoch": 340.0, "grad_norm": 6.399350643157959, "learning_rate": 6.688530734632684e-07, "loss": 1.1475, "step": 3570 }, { "epoch": 340.0, "eval_accuracy": 0.7321094042712627, "eval_f1": 0.781010719754977, "eval_loss": 0.5544794201850891, "eval_precision": 0.7408483439860546, "eval_recall": 0.8257772020725389, "eval_roc_auc": 0.7868966609096142, "eval_runtime": 1.1362, "eval_samples_per_second": 2349.049, "eval_steps_per_second": 5.281, "step": 3570 }, { "epoch": 340.0952380952381, "grad_norm": 17.85746192932129, "learning_rate": 6.690404797601199e-07, "loss": 1.0988, "step": 3571 }, { "epoch": 340.1904761904762, "grad_norm": 12.25684928894043, "learning_rate": 6.692278860569715e-07, "loss": 1.1331, "step": 3572 }, { "epoch": 340.2857142857143, "grad_norm": 8.942669868469238, "learning_rate": 6.694152923538231e-07, "loss": 1.1431, "step": 3573 }, { "epoch": 340.3809523809524, "grad_norm": 6.340849876403809, "learning_rate": 6.696026986506747e-07, "loss": 1.0891, "step": 3574 }, { "epoch": 340.4761904761905, "grad_norm": 5.250206470489502, "learning_rate": 6.697901049475262e-07, "loss": 1.1616, "step": 3575 }, { "epoch": 340.57142857142856, "grad_norm": 6.328611850738525, "learning_rate": 6.699775112443779e-07, "loss": 1.1095, "step": 3576 }, { "epoch": 340.6666666666667, "grad_norm": 9.236186981201172, "learning_rate": 6.701649175412294e-07, "loss": 1.0791, "step": 3577 }, { "epoch": 340.76190476190476, "grad_norm": 11.165750503540039, "learning_rate": 6.70352323838081e-07, "loss": 1.1567, "step": 3578 }, { "epoch": 340.85714285714283, "grad_norm": 8.74531364440918, "learning_rate": 6.705397301349326e-07, "loss": 1.108, "step": 3579 }, { "epoch": 340.95238095238096, "grad_norm": 8.133259773254395, "learning_rate": 6.707271364317842e-07, "loss": 1.1202, "step": 3580 }, { "epoch": 340.95238095238096, "eval_accuracy": 0.7339827650805545, "eval_f1": 0.786273329319687, "eval_loss": 0.5549090504646301, "eval_precision": 0.734533183352081, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7867956246401843, "eval_runtime": 1.1413, "eval_samples_per_second": 2338.49, "eval_steps_per_second": 5.257, "step": 3580 }, { "epoch": 341.04761904761904, "grad_norm": 12.086969375610352, "learning_rate": 6.709145427286358e-07, "loss": 1.1356, "step": 3581 }, { "epoch": 341.14285714285717, "grad_norm": 9.834478378295898, "learning_rate": 6.711019490254873e-07, "loss": 1.1098, "step": 3582 }, { "epoch": 341.23809523809524, "grad_norm": 8.839277267456055, "learning_rate": 6.712893553223389e-07, "loss": 1.1024, "step": 3583 }, { "epoch": 341.3333333333333, "grad_norm": 12.858485221862793, "learning_rate": 6.714767616191904e-07, "loss": 1.0935, "step": 3584 }, { "epoch": 341.42857142857144, "grad_norm": 21.6302490234375, "learning_rate": 6.71664167916042e-07, "loss": 1.1222, "step": 3585 }, { "epoch": 341.5238095238095, "grad_norm": 23.848730087280273, "learning_rate": 6.718515742128935e-07, "loss": 1.1004, "step": 3586 }, { "epoch": 341.6190476190476, "grad_norm": 13.283079147338867, "learning_rate": 6.720389805097452e-07, "loss": 1.1245, "step": 3587 }, { "epoch": 341.7142857142857, "grad_norm": 15.620110511779785, "learning_rate": 6.722263868065968e-07, "loss": 1.1212, "step": 3588 }, { "epoch": 341.8095238095238, "grad_norm": 4.007311820983887, "learning_rate": 6.724137931034483e-07, "loss": 1.1378, "step": 3589 }, { "epoch": 341.9047619047619, "grad_norm": 6.650176525115967, "learning_rate": 6.726011994002999e-07, "loss": 1.15, "step": 3590 }, { "epoch": 342.0, "grad_norm": 12.86173152923584, "learning_rate": 6.727886056971514e-07, "loss": 1.1555, "step": 3591 }, { "epoch": 342.0, "eval_accuracy": 0.738478831022855, "eval_f1": 0.7922619047619047, "eval_loss": 0.5554071068763733, "eval_precision": 0.7329295154185022, "eval_recall": 0.8620466321243523, "eval_roc_auc": 0.7867576280944157, "eval_runtime": 1.1368, "eval_samples_per_second": 2347.863, "eval_steps_per_second": 5.278, "step": 3591 }, { "epoch": 342.0952380952381, "grad_norm": 17.999942779541016, "learning_rate": 6.72976011994003e-07, "loss": 1.1372, "step": 3592 }, { "epoch": 342.1904761904762, "grad_norm": 11.610297203063965, "learning_rate": 6.731634182908545e-07, "loss": 1.0924, "step": 3593 }, { "epoch": 342.2857142857143, "grad_norm": 5.744530200958252, "learning_rate": 6.733508245877062e-07, "loss": 1.1296, "step": 3594 }, { "epoch": 342.3809523809524, "grad_norm": 4.968858242034912, "learning_rate": 6.735382308845577e-07, "loss": 1.1053, "step": 3595 }, { "epoch": 342.4761904761905, "grad_norm": 5.93127965927124, "learning_rate": 6.737256371814093e-07, "loss": 1.118, "step": 3596 }, { "epoch": 342.57142857142856, "grad_norm": 5.766799449920654, "learning_rate": 6.739130434782609e-07, "loss": 1.1342, "step": 3597 }, { "epoch": 342.6666666666667, "grad_norm": 9.84838581085205, "learning_rate": 6.741004497751124e-07, "loss": 1.1653, "step": 3598 }, { "epoch": 342.76190476190476, "grad_norm": 10.662291526794434, "learning_rate": 6.742878560719641e-07, "loss": 1.1093, "step": 3599 }, { "epoch": 342.85714285714283, "grad_norm": 4.422891139984131, "learning_rate": 6.744752623688156e-07, "loss": 1.1379, "step": 3600 }, { "epoch": 342.95238095238096, "grad_norm": 11.94595718383789, "learning_rate": 6.746626686656673e-07, "loss": 1.0888, "step": 3601 }, { "epoch": 342.95238095238096, "eval_accuracy": 0.732484076433121, "eval_f1": 0.7808471454880295, "eval_loss": 0.5546474456787109, "eval_precision": 0.7421236872812136, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7872820955670696, "eval_runtime": 1.1459, "eval_samples_per_second": 2329.114, "eval_steps_per_second": 5.236, "step": 3601 }, { "epoch": 343.04761904761904, "grad_norm": 15.905829429626465, "learning_rate": 6.748500749625188e-07, "loss": 1.1039, "step": 3602 }, { "epoch": 343.14285714285717, "grad_norm": 16.905872344970703, "learning_rate": 6.750374812593704e-07, "loss": 1.0695, "step": 3603 }, { "epoch": 343.23809523809524, "grad_norm": 4.414461612701416, "learning_rate": 6.752248875562219e-07, "loss": 1.14, "step": 3604 }, { "epoch": 343.3333333333333, "grad_norm": 4.829150676727295, "learning_rate": 6.754122938530735e-07, "loss": 1.1487, "step": 3605 }, { "epoch": 343.42857142857144, "grad_norm": 7.950586318969727, "learning_rate": 6.755997001499251e-07, "loss": 1.0979, "step": 3606 }, { "epoch": 343.5238095238095, "grad_norm": 4.235756874084473, "learning_rate": 6.757871064467766e-07, "loss": 1.0673, "step": 3607 }, { "epoch": 343.6190476190476, "grad_norm": 9.007696151733398, "learning_rate": 6.759745127436283e-07, "loss": 1.1128, "step": 3608 }, { "epoch": 343.7142857142857, "grad_norm": 6.327154159545898, "learning_rate": 6.761619190404798e-07, "loss": 1.1134, "step": 3609 }, { "epoch": 343.8095238095238, "grad_norm": 8.22981071472168, "learning_rate": 6.763493253373314e-07, "loss": 1.1625, "step": 3610 }, { "epoch": 343.9047619047619, "grad_norm": 6.529686450958252, "learning_rate": 6.765367316341829e-07, "loss": 1.1357, "step": 3611 }, { "epoch": 344.0, "grad_norm": 9.798868179321289, "learning_rate": 6.767241379310345e-07, "loss": 1.1103, "step": 3612 }, { "epoch": 344.0, "eval_accuracy": 0.7347321094042712, "eval_f1": 0.7828220858895706, "eval_loss": 0.5543044805526733, "eval_precision": 0.7435897435897436, "eval_recall": 0.8264248704663213, "eval_roc_auc": 0.7875420264824409, "eval_runtime": 1.1258, "eval_samples_per_second": 2370.761, "eval_steps_per_second": 5.33, "step": 3612 }, { "epoch": 344.0952380952381, "grad_norm": 6.461381912231445, "learning_rate": 6.76911544227886e-07, "loss": 1.1304, "step": 3613 }, { "epoch": 344.1904761904762, "grad_norm": 6.234090328216553, "learning_rate": 6.770989505247376e-07, "loss": 1.1294, "step": 3614 }, { "epoch": 344.2857142857143, "grad_norm": 7.588049411773682, "learning_rate": 6.772863568215893e-07, "loss": 1.0847, "step": 3615 }, { "epoch": 344.3809523809524, "grad_norm": 18.067716598510742, "learning_rate": 6.774737631184408e-07, "loss": 1.1469, "step": 3616 }, { "epoch": 344.4761904761905, "grad_norm": 9.160544395446777, "learning_rate": 6.776611694152924e-07, "loss": 1.1314, "step": 3617 }, { "epoch": 344.57142857142856, "grad_norm": 12.811017990112305, "learning_rate": 6.778485757121439e-07, "loss": 1.1341, "step": 3618 }, { "epoch": 344.6666666666667, "grad_norm": 3.5876166820526123, "learning_rate": 6.780359820089955e-07, "loss": 1.1237, "step": 3619 }, { "epoch": 344.76190476190476, "grad_norm": 8.06754207611084, "learning_rate": 6.78223388305847e-07, "loss": 1.1162, "step": 3620 }, { "epoch": 344.85714285714283, "grad_norm": 10.948036193847656, "learning_rate": 6.784107946026987e-07, "loss": 1.1154, "step": 3621 }, { "epoch": 344.95238095238096, "grad_norm": 5.318874835968018, "learning_rate": 6.785982008995502e-07, "loss": 1.1006, "step": 3622 }, { "epoch": 344.95238095238096, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.7920023873470606, "eval_loss": 0.554513156414032, "eval_precision": 0.7343663530713891, "eval_recall": 0.8594559585492227, "eval_roc_auc": 0.7877369027058146, "eval_runtime": 1.1197, "eval_samples_per_second": 2383.667, "eval_steps_per_second": 5.359, "step": 3622 }, { "epoch": 345.04761904761904, "grad_norm": 12.399185180664062, "learning_rate": 6.787856071964018e-07, "loss": 1.0919, "step": 3623 }, { "epoch": 345.14285714285717, "grad_norm": 20.321130752563477, "learning_rate": 6.789730134932535e-07, "loss": 1.1898, "step": 3624 }, { "epoch": 345.23809523809524, "grad_norm": 14.606483459472656, "learning_rate": 6.79160419790105e-07, "loss": 1.12, "step": 3625 }, { "epoch": 345.3333333333333, "grad_norm": 13.469183921813965, "learning_rate": 6.793478260869566e-07, "loss": 1.1608, "step": 3626 }, { "epoch": 345.42857142857144, "grad_norm": 8.02731990814209, "learning_rate": 6.795352323838081e-07, "loss": 1.1208, "step": 3627 }, { "epoch": 345.5238095238095, "grad_norm": 7.375977993011475, "learning_rate": 6.797226386806598e-07, "loss": 1.1089, "step": 3628 }, { "epoch": 345.6190476190476, "grad_norm": 9.051599502563477, "learning_rate": 6.799100449775113e-07, "loss": 1.0553, "step": 3629 }, { "epoch": 345.7142857142857, "grad_norm": 9.57223892211914, "learning_rate": 6.800974512743629e-07, "loss": 1.0911, "step": 3630 }, { "epoch": 345.8095238095238, "grad_norm": 6.9309916496276855, "learning_rate": 6.802848575712144e-07, "loss": 1.1275, "step": 3631 }, { "epoch": 345.9047619047619, "grad_norm": 14.309074401855469, "learning_rate": 6.80472263868066e-07, "loss": 1.1293, "step": 3632 }, { "epoch": 346.0, "grad_norm": 4.155423641204834, "learning_rate": 6.806596701649176e-07, "loss": 1.0834, "step": 3633 }, { "epoch": 346.0, "eval_accuracy": 0.735481453727988, "eval_f1": 0.7876052948255114, "eval_loss": 0.5538960099220276, "eval_precision": 0.7353932584269663, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7879467472654001, "eval_runtime": 1.1295, "eval_samples_per_second": 2362.961, "eval_steps_per_second": 5.312, "step": 3633 }, { "epoch": 346.0952380952381, "grad_norm": 11.9675931930542, "learning_rate": 6.808470764617691e-07, "loss": 1.1013, "step": 3634 }, { "epoch": 346.1904761904762, "grad_norm": 9.897497177124023, "learning_rate": 6.810344827586208e-07, "loss": 1.1574, "step": 3635 }, { "epoch": 346.2857142857143, "grad_norm": 13.217548370361328, "learning_rate": 6.812218890554723e-07, "loss": 1.1247, "step": 3636 }, { "epoch": 346.3809523809524, "grad_norm": 10.568078994750977, "learning_rate": 6.814092953523239e-07, "loss": 1.1355, "step": 3637 }, { "epoch": 346.4761904761905, "grad_norm": 9.215120315551758, "learning_rate": 6.815967016491754e-07, "loss": 1.1323, "step": 3638 }, { "epoch": 346.57142857142856, "grad_norm": 7.759387493133545, "learning_rate": 6.81784107946027e-07, "loss": 1.1929, "step": 3639 }, { "epoch": 346.6666666666667, "grad_norm": 6.659882068634033, "learning_rate": 6.819715142428785e-07, "loss": 1.086, "step": 3640 }, { "epoch": 346.76190476190476, "grad_norm": 19.305633544921875, "learning_rate": 6.821589205397301e-07, "loss": 1.078, "step": 3641 }, { "epoch": 346.85714285714283, "grad_norm": 6.204228401184082, "learning_rate": 6.823463268365818e-07, "loss": 1.1404, "step": 3642 }, { "epoch": 346.95238095238096, "grad_norm": 7.152632713317871, "learning_rate": 6.825337331334333e-07, "loss": 1.1183, "step": 3643 }, { "epoch": 346.95238095238096, "eval_accuracy": 0.7362307980517048, "eval_f1": 0.7858880778588808, "eval_loss": 0.5532867908477783, "eval_precision": 0.7408256880733946, "eval_recall": 0.8367875647668394, "eval_roc_auc": 0.7884700633275763, "eval_runtime": 1.1328, "eval_samples_per_second": 2356.206, "eval_steps_per_second": 5.297, "step": 3643 }, { "epoch": 347.04761904761904, "grad_norm": 22.47641372680664, "learning_rate": 6.827211394302849e-07, "loss": 1.0886, "step": 3644 }, { "epoch": 347.14285714285717, "grad_norm": 20.005538940429688, "learning_rate": 6.829085457271364e-07, "loss": 1.1646, "step": 3645 }, { "epoch": 347.23809523809524, "grad_norm": 8.337130546569824, "learning_rate": 6.830959520239881e-07, "loss": 1.0712, "step": 3646 }, { "epoch": 347.3333333333333, "grad_norm": 15.485507011413574, "learning_rate": 6.832833583208396e-07, "loss": 1.1199, "step": 3647 }, { "epoch": 347.42857142857144, "grad_norm": 17.480194091796875, "learning_rate": 6.834707646176912e-07, "loss": 1.1515, "step": 3648 }, { "epoch": 347.5238095238095, "grad_norm": 9.325078964233398, "learning_rate": 6.836581709145429e-07, "loss": 1.1247, "step": 3649 }, { "epoch": 347.6190476190476, "grad_norm": 7.568593502044678, "learning_rate": 6.838455772113944e-07, "loss": 1.1163, "step": 3650 }, { "epoch": 347.7142857142857, "grad_norm": 8.339293479919434, "learning_rate": 6.84032983508246e-07, "loss": 1.1626, "step": 3651 }, { "epoch": 347.8095238095238, "grad_norm": 11.512581825256348, "learning_rate": 6.842203898050975e-07, "loss": 1.1538, "step": 3652 }, { "epoch": 347.9047619047619, "grad_norm": 6.716801166534424, "learning_rate": 6.844077961019491e-07, "loss": 1.1551, "step": 3653 }, { "epoch": 348.0, "grad_norm": 10.907341957092285, "learning_rate": 6.845952023988006e-07, "loss": 1.0937, "step": 3654 }, { "epoch": 348.0, "eval_accuracy": 0.7343574372424129, "eval_f1": 0.7840389887298203, "eval_loss": 0.5535434484481812, "eval_precision": 0.7400805060379528, "eval_recall": 0.8335492227979274, "eval_roc_auc": 0.7883514680483592, "eval_runtime": 1.1299, "eval_samples_per_second": 2362.186, "eval_steps_per_second": 5.31, "step": 3654 }, { "epoch": 348.0952380952381, "grad_norm": 10.432865142822266, "learning_rate": 6.847826086956522e-07, "loss": 1.1648, "step": 3655 }, { "epoch": 348.1904761904762, "grad_norm": 9.500462532043457, "learning_rate": 6.849700149925038e-07, "loss": 1.0846, "step": 3656 }, { "epoch": 348.2857142857143, "grad_norm": 6.565241813659668, "learning_rate": 6.851574212893554e-07, "loss": 1.096, "step": 3657 }, { "epoch": 348.3809523809524, "grad_norm": 6.4096221923828125, "learning_rate": 6.853448275862069e-07, "loss": 1.1165, "step": 3658 }, { "epoch": 348.4761904761905, "grad_norm": 5.140103816986084, "learning_rate": 6.855322338830585e-07, "loss": 1.0618, "step": 3659 }, { "epoch": 348.57142857142856, "grad_norm": 5.91832160949707, "learning_rate": 6.857196401799101e-07, "loss": 1.1687, "step": 3660 }, { "epoch": 348.6666666666667, "grad_norm": 5.8545379638671875, "learning_rate": 6.859070464767616e-07, "loss": 1.1406, "step": 3661 }, { "epoch": 348.76190476190476, "grad_norm": 14.378029823303223, "learning_rate": 6.860944527736132e-07, "loss": 1.141, "step": 3662 }, { "epoch": 348.85714285714283, "grad_norm": 7.654487133026123, "learning_rate": 6.862818590704648e-07, "loss": 1.1031, "step": 3663 }, { "epoch": 348.95238095238096, "grad_norm": 13.655937194824219, "learning_rate": 6.864692653673164e-07, "loss": 1.1128, "step": 3664 }, { "epoch": 348.95238095238096, "eval_accuracy": 0.738478831022855, "eval_f1": 0.7913927077106994, "eval_loss": 0.5536009669303894, "eval_precision": 0.7347391786903441, "eval_recall": 0.8575129533678757, "eval_roc_auc": 0.7887458261370177, "eval_runtime": 1.1331, "eval_samples_per_second": 2355.514, "eval_steps_per_second": 5.295, "step": 3664 }, { "epoch": 349.04761904761904, "grad_norm": 16.01888084411621, "learning_rate": 6.866566716641679e-07, "loss": 1.1669, "step": 3665 }, { "epoch": 349.14285714285717, "grad_norm": 7.082530975341797, "learning_rate": 6.868440779610195e-07, "loss": 1.0966, "step": 3666 }, { "epoch": 349.23809523809524, "grad_norm": 5.911025524139404, "learning_rate": 6.87031484257871e-07, "loss": 1.0857, "step": 3667 }, { "epoch": 349.3333333333333, "grad_norm": 3.9199273586273193, "learning_rate": 6.872188905547226e-07, "loss": 1.1304, "step": 3668 }, { "epoch": 349.42857142857144, "grad_norm": 8.267579078674316, "learning_rate": 6.874062968515743e-07, "loss": 1.1276, "step": 3669 }, { "epoch": 349.5238095238095, "grad_norm": 11.254096984863281, "learning_rate": 6.875937031484258e-07, "loss": 1.1101, "step": 3670 }, { "epoch": 349.6190476190476, "grad_norm": 12.040755271911621, "learning_rate": 6.877811094452775e-07, "loss": 1.0977, "step": 3671 }, { "epoch": 349.7142857142857, "grad_norm": 8.342375755310059, "learning_rate": 6.87968515742129e-07, "loss": 1.1664, "step": 3672 }, { "epoch": 349.8095238095238, "grad_norm": 6.742152214050293, "learning_rate": 6.881559220389806e-07, "loss": 1.0981, "step": 3673 }, { "epoch": 349.9047619047619, "grad_norm": 5.508918285369873, "learning_rate": 6.883433283358321e-07, "loss": 1.1162, "step": 3674 }, { "epoch": 350.0, "grad_norm": 6.856598377227783, "learning_rate": 6.885307346326837e-07, "loss": 1.149, "step": 3675 }, { "epoch": 350.0, "eval_accuracy": 0.7358561258898464, "eval_f1": 0.7887323943661971, "eval_loss": 0.553325891494751, "eval_precision": 0.7339654210819855, "eval_recall": 0.8523316062176166, "eval_roc_auc": 0.7887636729994243, "eval_runtime": 1.1307, "eval_samples_per_second": 2360.581, "eval_steps_per_second": 5.307, "step": 3675 }, { "epoch": 350.0952380952381, "grad_norm": 7.425978183746338, "learning_rate": 6.887181409295352e-07, "loss": 1.1165, "step": 3676 }, { "epoch": 350.1904761904762, "grad_norm": 11.71827220916748, "learning_rate": 6.889055472263869e-07, "loss": 1.1188, "step": 3677 }, { "epoch": 350.2857142857143, "grad_norm": 12.371844291687012, "learning_rate": 6.890929535232385e-07, "loss": 1.102, "step": 3678 }, { "epoch": 350.3809523809524, "grad_norm": 13.872154235839844, "learning_rate": 6.8928035982009e-07, "loss": 1.1576, "step": 3679 }, { "epoch": 350.4761904761905, "grad_norm": 15.714462280273438, "learning_rate": 6.894677661169416e-07, "loss": 1.0988, "step": 3680 }, { "epoch": 350.57142857142856, "grad_norm": 10.854438781738281, "learning_rate": 6.896551724137931e-07, "loss": 1.1247, "step": 3681 }, { "epoch": 350.6666666666667, "grad_norm": 4.670620918273926, "learning_rate": 6.898425787106447e-07, "loss": 1.1434, "step": 3682 }, { "epoch": 350.76190476190476, "grad_norm": 6.489286422729492, "learning_rate": 6.900299850074962e-07, "loss": 1.1209, "step": 3683 }, { "epoch": 350.85714285714283, "grad_norm": 5.2290358543396, "learning_rate": 6.902173913043479e-07, "loss": 1.1203, "step": 3684 }, { "epoch": 350.95238095238096, "grad_norm": 16.88265037536621, "learning_rate": 6.904047976011994e-07, "loss": 1.1177, "step": 3685 }, { "epoch": 350.95238095238096, "eval_accuracy": 0.732484076433121, "eval_f1": 0.7811158798283262, "eval_loss": 0.5532238483428955, "eval_precision": 0.7415599534342259, "eval_recall": 0.8251295336787565, "eval_roc_auc": 0.7884602763385147, "eval_runtime": 1.1127, "eval_samples_per_second": 2398.725, "eval_steps_per_second": 5.392, "step": 3685 }, { "epoch": 351.04761904761904, "grad_norm": 5.788422107696533, "learning_rate": 6.90592203898051e-07, "loss": 1.0999, "step": 3686 }, { "epoch": 351.14285714285717, "grad_norm": 15.770282745361328, "learning_rate": 6.907796101949026e-07, "loss": 1.1257, "step": 3687 }, { "epoch": 351.23809523809524, "grad_norm": 4.182605266571045, "learning_rate": 6.909670164917541e-07, "loss": 1.1125, "step": 3688 }, { "epoch": 351.3333333333333, "grad_norm": 7.102372169494629, "learning_rate": 6.911544227886057e-07, "loss": 1.1161, "step": 3689 }, { "epoch": 351.42857142857144, "grad_norm": 7.176307201385498, "learning_rate": 6.913418290854572e-07, "loss": 1.144, "step": 3690 }, { "epoch": 351.5238095238095, "grad_norm": 7.356261253356934, "learning_rate": 6.915292353823089e-07, "loss": 1.1582, "step": 3691 }, { "epoch": 351.6190476190476, "grad_norm": 7.973656177520752, "learning_rate": 6.917166416791604e-07, "loss": 1.1195, "step": 3692 }, { "epoch": 351.7142857142857, "grad_norm": 6.8437018394470215, "learning_rate": 6.91904047976012e-07, "loss": 1.1236, "step": 3693 }, { "epoch": 351.8095238095238, "grad_norm": 6.552664279937744, "learning_rate": 6.920914542728636e-07, "loss": 1.1384, "step": 3694 }, { "epoch": 351.9047619047619, "grad_norm": 8.255615234375, "learning_rate": 6.922788605697152e-07, "loss": 1.1148, "step": 3695 }, { "epoch": 352.0, "grad_norm": 8.523663520812988, "learning_rate": 6.924662668665668e-07, "loss": 1.107, "step": 3696 }, { "epoch": 352.0, "eval_accuracy": 0.7298613713001124, "eval_f1": 0.7791730474732006, "eval_loss": 0.5536131262779236, "eval_precision": 0.7391051714119697, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7881154289004031, "eval_runtime": 1.1313, "eval_samples_per_second": 2359.162, "eval_steps_per_second": 5.303, "step": 3696 }, { "epoch": 352.0952380952381, "grad_norm": 6.894546031951904, "learning_rate": 6.926536731634183e-07, "loss": 1.1187, "step": 3697 }, { "epoch": 352.1904761904762, "grad_norm": 12.019125938415527, "learning_rate": 6.9284107946027e-07, "loss": 1.1303, "step": 3698 }, { "epoch": 352.2857142857143, "grad_norm": 6.342089653015137, "learning_rate": 6.930284857571215e-07, "loss": 1.1312, "step": 3699 }, { "epoch": 352.3809523809524, "grad_norm": 5.598708152770996, "learning_rate": 6.932158920539731e-07, "loss": 1.1504, "step": 3700 }, { "epoch": 352.4761904761905, "grad_norm": 9.867156982421875, "learning_rate": 6.934032983508246e-07, "loss": 1.127, "step": 3701 }, { "epoch": 352.57142857142856, "grad_norm": 3.7137577533721924, "learning_rate": 6.935907046476762e-07, "loss": 1.0934, "step": 3702 }, { "epoch": 352.6666666666667, "grad_norm": 5.500702857971191, "learning_rate": 6.937781109445277e-07, "loss": 1.1383, "step": 3703 }, { "epoch": 352.76190476190476, "grad_norm": 7.868194103240967, "learning_rate": 6.939655172413793e-07, "loss": 1.0965, "step": 3704 }, { "epoch": 352.85714285714283, "grad_norm": 8.515414237976074, "learning_rate": 6.94152923538231e-07, "loss": 1.1162, "step": 3705 }, { "epoch": 352.95238095238096, "grad_norm": 6.582380771636963, "learning_rate": 6.943403298350825e-07, "loss": 1.1019, "step": 3706 }, { "epoch": 352.95238095238096, "eval_accuracy": 0.7362307980517048, "eval_f1": 0.7908496732026143, "eval_loss": 0.5542948842048645, "eval_precision": 0.7305159165751921, "eval_recall": 0.8620466321243523, "eval_roc_auc": 0.7884173862982153, "eval_runtime": 1.1274, "eval_samples_per_second": 2367.475, "eval_steps_per_second": 5.322, "step": 3706 }, { "epoch": 353.04761904761904, "grad_norm": 6.112668991088867, "learning_rate": 6.945277361319341e-07, "loss": 1.1165, "step": 3707 }, { "epoch": 353.14285714285717, "grad_norm": 15.119726181030273, "learning_rate": 6.947151424287856e-07, "loss": 1.1496, "step": 3708 }, { "epoch": 353.23809523809524, "grad_norm": 11.367457389831543, "learning_rate": 6.949025487256372e-07, "loss": 1.1206, "step": 3709 }, { "epoch": 353.3333333333333, "grad_norm": 6.386621952056885, "learning_rate": 6.950899550224887e-07, "loss": 1.1265, "step": 3710 }, { "epoch": 353.42857142857144, "grad_norm": 9.647610664367676, "learning_rate": 6.952773613193403e-07, "loss": 1.1095, "step": 3711 }, { "epoch": 353.5238095238095, "grad_norm": 10.258861541748047, "learning_rate": 6.95464767616192e-07, "loss": 1.0473, "step": 3712 }, { "epoch": 353.6190476190476, "grad_norm": 7.602921485900879, "learning_rate": 6.956521739130435e-07, "loss": 1.0915, "step": 3713 }, { "epoch": 353.7142857142857, "grad_norm": 10.235103607177734, "learning_rate": 6.958395802098951e-07, "loss": 1.1615, "step": 3714 }, { "epoch": 353.8095238095238, "grad_norm": 18.607894897460938, "learning_rate": 6.960269865067466e-07, "loss": 1.1101, "step": 3715 }, { "epoch": 353.9047619047619, "grad_norm": 5.014978408813477, "learning_rate": 6.962143928035983e-07, "loss": 1.1291, "step": 3716 }, { "epoch": 354.0, "grad_norm": 4.992445945739746, "learning_rate": 6.964017991004498e-07, "loss": 1.1391, "step": 3717 }, { "epoch": 354.0, "eval_accuracy": 0.7317347321094043, "eval_f1": 0.7800982800982801, "eval_loss": 0.5533689260482788, "eval_precision": 0.741822429906542, "eval_recall": 0.822538860103627, "eval_roc_auc": 0.7882397812320091, "eval_runtime": 1.1277, "eval_samples_per_second": 2366.68, "eval_steps_per_second": 5.32, "step": 3717 }, { "epoch": 354.0952380952381, "grad_norm": 3.7810394763946533, "learning_rate": 6.965892053973014e-07, "loss": 1.1567, "step": 3718 }, { "epoch": 354.1904761904762, "grad_norm": 15.194648742675781, "learning_rate": 6.96776611694153e-07, "loss": 1.1036, "step": 3719 }, { "epoch": 354.2857142857143, "grad_norm": 18.574241638183594, "learning_rate": 6.969640179910046e-07, "loss": 1.047, "step": 3720 }, { "epoch": 354.3809523809524, "grad_norm": 6.198830604553223, "learning_rate": 6.971514242878561e-07, "loss": 1.1338, "step": 3721 }, { "epoch": 354.4761904761905, "grad_norm": 6.480565547943115, "learning_rate": 6.973388305847077e-07, "loss": 1.0989, "step": 3722 }, { "epoch": 354.57142857142856, "grad_norm": 7.632514953613281, "learning_rate": 6.975262368815593e-07, "loss": 1.0984, "step": 3723 }, { "epoch": 354.6666666666667, "grad_norm": 9.1426362991333, "learning_rate": 6.977136431784108e-07, "loss": 1.1029, "step": 3724 }, { "epoch": 354.76190476190476, "grad_norm": 6.421456336975098, "learning_rate": 6.979010494752624e-07, "loss": 1.1504, "step": 3725 }, { "epoch": 354.85714285714283, "grad_norm": 15.901803970336914, "learning_rate": 6.98088455772114e-07, "loss": 1.1431, "step": 3726 }, { "epoch": 354.95238095238096, "grad_norm": 6.8048014640808105, "learning_rate": 6.982758620689656e-07, "loss": 1.1198, "step": 3727 }, { "epoch": 354.95238095238096, "eval_accuracy": 0.7362307980517048, "eval_f1": 0.7880794701986755, "eval_loss": 0.5533745884895325, "eval_precision": 0.7362204724409449, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7882170408750719, "eval_runtime": 1.4072, "eval_samples_per_second": 1896.614, "eval_steps_per_second": 4.264, "step": 3727 }, { "epoch": 355.04761904761904, "grad_norm": 7.465842247009277, "learning_rate": 6.984632683658171e-07, "loss": 1.1824, "step": 3728 }, { "epoch": 355.14285714285717, "grad_norm": 6.699007034301758, "learning_rate": 6.986506746626687e-07, "loss": 1.1086, "step": 3729 }, { "epoch": 355.23809523809524, "grad_norm": 5.756167411804199, "learning_rate": 6.988380809595202e-07, "loss": 1.0881, "step": 3730 }, { "epoch": 355.3333333333333, "grad_norm": 11.109375953674316, "learning_rate": 6.990254872563718e-07, "loss": 1.0663, "step": 3731 }, { "epoch": 355.42857142857144, "grad_norm": 6.402453899383545, "learning_rate": 6.992128935532234e-07, "loss": 1.1266, "step": 3732 }, { "epoch": 355.5238095238095, "grad_norm": 5.234026908874512, "learning_rate": 6.99400299850075e-07, "loss": 1.089, "step": 3733 }, { "epoch": 355.6190476190476, "grad_norm": 7.56530237197876, "learning_rate": 6.995877061469266e-07, "loss": 1.1751, "step": 3734 }, { "epoch": 355.7142857142857, "grad_norm": 4.21050500869751, "learning_rate": 6.997751124437781e-07, "loss": 1.1163, "step": 3735 }, { "epoch": 355.8095238095238, "grad_norm": 8.78940200805664, "learning_rate": 6.999625187406297e-07, "loss": 1.0849, "step": 3736 }, { "epoch": 355.9047619047619, "grad_norm": 10.318490028381348, "learning_rate": 7.001499250374812e-07, "loss": 1.1286, "step": 3737 }, { "epoch": 356.0, "grad_norm": 7.933201313018799, "learning_rate": 7.003373313343329e-07, "loss": 1.1523, "step": 3738 }, { "epoch": 356.0, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.7896166616359794, "eval_loss": 0.5528306365013123, "eval_precision": 0.7394007914075749, "eval_recall": 0.8471502590673575, "eval_roc_auc": 0.7888730569948186, "eval_runtime": 1.1105, "eval_samples_per_second": 2403.431, "eval_steps_per_second": 5.403, "step": 3738 }, { "epoch": 356.0952380952381, "grad_norm": 6.1067070960998535, "learning_rate": 7.005247376311844e-07, "loss": 1.1268, "step": 3739 }, { "epoch": 356.1904761904762, "grad_norm": 10.120503425598145, "learning_rate": 7.00712143928036e-07, "loss": 1.1142, "step": 3740 }, { "epoch": 356.2857142857143, "grad_norm": 4.93829870223999, "learning_rate": 7.008995502248877e-07, "loss": 1.0877, "step": 3741 }, { "epoch": 356.3809523809524, "grad_norm": 10.851411819458008, "learning_rate": 7.010869565217392e-07, "loss": 1.1183, "step": 3742 }, { "epoch": 356.4761904761905, "grad_norm": 12.106622695922852, "learning_rate": 7.012743628185908e-07, "loss": 1.1104, "step": 3743 }, { "epoch": 356.57142857142856, "grad_norm": 7.220052719116211, "learning_rate": 7.014617691154423e-07, "loss": 1.1312, "step": 3744 }, { "epoch": 356.6666666666667, "grad_norm": 5.6121015548706055, "learning_rate": 7.016491754122939e-07, "loss": 1.1463, "step": 3745 }, { "epoch": 356.76190476190476, "grad_norm": 4.8981146812438965, "learning_rate": 7.018365817091454e-07, "loss": 1.1288, "step": 3746 }, { "epoch": 356.85714285714283, "grad_norm": 5.686845779418945, "learning_rate": 7.020239880059971e-07, "loss": 1.115, "step": 3747 }, { "epoch": 356.95238095238096, "grad_norm": 4.765906810760498, "learning_rate": 7.022113943028487e-07, "loss": 1.1609, "step": 3748 }, { "epoch": 356.95238095238096, "eval_accuracy": 0.735481453727988, "eval_f1": 0.7838334353949785, "eval_loss": 0.5525299310684204, "eval_precision": 0.743321718931475, "eval_recall": 0.8290155440414507, "eval_roc_auc": 0.7889815774323548, "eval_runtime": 1.1157, "eval_samples_per_second": 2392.192, "eval_steps_per_second": 5.378, "step": 3748 }, { "epoch": 357.04761904761904, "grad_norm": 11.675453186035156, "learning_rate": 7.023988005997002e-07, "loss": 1.0912, "step": 3749 }, { "epoch": 357.14285714285717, "grad_norm": 5.98292875289917, "learning_rate": 7.025862068965518e-07, "loss": 1.1203, "step": 3750 }, { "epoch": 357.23809523809524, "grad_norm": 4.8235554695129395, "learning_rate": 7.027736131934033e-07, "loss": 1.1239, "step": 3751 }, { "epoch": 357.3333333333333, "grad_norm": 7.256765842437744, "learning_rate": 7.029610194902549e-07, "loss": 1.1065, "step": 3752 }, { "epoch": 357.42857142857144, "grad_norm": 9.69433879852295, "learning_rate": 7.031484257871064e-07, "loss": 1.157, "step": 3753 }, { "epoch": 357.5238095238095, "grad_norm": 6.654829025268555, "learning_rate": 7.033358320839581e-07, "loss": 1.1326, "step": 3754 }, { "epoch": 357.6190476190476, "grad_norm": 6.713947772979736, "learning_rate": 7.035232383808096e-07, "loss": 1.0654, "step": 3755 }, { "epoch": 357.7142857142857, "grad_norm": 11.10183334350586, "learning_rate": 7.037106446776612e-07, "loss": 1.1352, "step": 3756 }, { "epoch": 357.8095238095238, "grad_norm": 6.993966102600098, "learning_rate": 7.038980509745127e-07, "loss": 1.1249, "step": 3757 }, { "epoch": 357.9047619047619, "grad_norm": 11.608559608459473, "learning_rate": 7.040854572713643e-07, "loss": 1.0958, "step": 3758 }, { "epoch": 358.0, "grad_norm": 12.34938907623291, "learning_rate": 7.042728635682159e-07, "loss": 1.1323, "step": 3759 }, { "epoch": 358.0, "eval_accuracy": 0.7392281753465717, "eval_f1": 0.7896009673518742, "eval_loss": 0.5525399446487427, "eval_precision": 0.7403628117913832, "eval_recall": 0.8458549222797928, "eval_roc_auc": 0.7890290731145653, "eval_runtime": 1.1201, "eval_samples_per_second": 2382.802, "eval_steps_per_second": 5.357, "step": 3759 }, { "epoch": 358.0952380952381, "grad_norm": 17.054271697998047, "learning_rate": 7.044602698650674e-07, "loss": 1.0948, "step": 3760 }, { "epoch": 358.1904761904762, "grad_norm": 17.62253761291504, "learning_rate": 7.046476761619191e-07, "loss": 1.1152, "step": 3761 }, { "epoch": 358.2857142857143, "grad_norm": 7.233613014221191, "learning_rate": 7.048350824587706e-07, "loss": 1.0497, "step": 3762 }, { "epoch": 358.3809523809524, "grad_norm": 19.385459899902344, "learning_rate": 7.050224887556223e-07, "loss": 1.1506, "step": 3763 }, { "epoch": 358.4761904761905, "grad_norm": 10.535709381103516, "learning_rate": 7.052098950524738e-07, "loss": 1.1232, "step": 3764 }, { "epoch": 358.57142857142856, "grad_norm": 11.408620834350586, "learning_rate": 7.053973013493254e-07, "loss": 1.1645, "step": 3765 }, { "epoch": 358.6666666666667, "grad_norm": 17.90571403503418, "learning_rate": 7.05584707646177e-07, "loss": 1.113, "step": 3766 }, { "epoch": 358.76190476190476, "grad_norm": 5.536513805389404, "learning_rate": 7.057721139430285e-07, "loss": 1.1213, "step": 3767 }, { "epoch": 358.85714285714283, "grad_norm": 8.541691780090332, "learning_rate": 7.059595202398802e-07, "loss": 1.1596, "step": 3768 }, { "epoch": 358.95238095238096, "grad_norm": 7.569893836975098, "learning_rate": 7.061469265367317e-07, "loss": 1.1625, "step": 3769 }, { "epoch": 358.95238095238096, "eval_accuracy": 0.7369801423754215, "eval_f1": 0.7846625766871166, "eval_loss": 0.5524272322654724, "eval_precision": 0.7453379953379954, "eval_recall": 0.8283678756476683, "eval_roc_auc": 0.7891099597006334, "eval_runtime": 1.123, "eval_samples_per_second": 2376.639, "eval_steps_per_second": 5.343, "step": 3769 }, { "epoch": 359.04761904761904, "grad_norm": 6.949024200439453, "learning_rate": 7.063343328335833e-07, "loss": 1.1192, "step": 3770 }, { "epoch": 359.14285714285717, "grad_norm": 5.126923084259033, "learning_rate": 7.065217391304348e-07, "loss": 1.1206, "step": 3771 }, { "epoch": 359.23809523809524, "grad_norm": 8.701910972595215, "learning_rate": 7.067091454272864e-07, "loss": 1.1306, "step": 3772 }, { "epoch": 359.3333333333333, "grad_norm": 15.019597053527832, "learning_rate": 7.068965517241379e-07, "loss": 1.1335, "step": 3773 }, { "epoch": 359.42857142857144, "grad_norm": 11.442471504211426, "learning_rate": 7.070839580209895e-07, "loss": 1.121, "step": 3774 }, { "epoch": 359.5238095238095, "grad_norm": 7.127864837646484, "learning_rate": 7.072713643178412e-07, "loss": 1.1044, "step": 3775 }, { "epoch": 359.6190476190476, "grad_norm": 13.844635963439941, "learning_rate": 7.074587706146927e-07, "loss": 1.1022, "step": 3776 }, { "epoch": 359.7142857142857, "grad_norm": 8.636882781982422, "learning_rate": 7.076461769115443e-07, "loss": 1.1134, "step": 3777 }, { "epoch": 359.8095238095238, "grad_norm": 18.998241424560547, "learning_rate": 7.078335832083958e-07, "loss": 1.121, "step": 3778 }, { "epoch": 359.9047619047619, "grad_norm": 15.878497123718262, "learning_rate": 7.080209895052474e-07, "loss": 1.1422, "step": 3779 }, { "epoch": 360.0, "grad_norm": 4.503548622131348, "learning_rate": 7.082083958020989e-07, "loss": 1.1042, "step": 3780 }, { "epoch": 360.0, "eval_accuracy": 0.741476208317722, "eval_f1": 0.7927927927927928, "eval_loss": 0.5527340769767761, "eval_precision": 0.7390817469204927, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7891531375935521, "eval_runtime": 1.1228, "eval_samples_per_second": 2377.189, "eval_steps_per_second": 5.344, "step": 3780 }, { "epoch": 360.0952380952381, "grad_norm": 18.075942993164062, "learning_rate": 7.083958020989505e-07, "loss": 1.1838, "step": 3781 }, { "epoch": 360.1904761904762, "grad_norm": 5.373623371124268, "learning_rate": 7.085832083958021e-07, "loss": 1.1442, "step": 3782 }, { "epoch": 360.2857142857143, "grad_norm": 4.5523681640625, "learning_rate": 7.087706146926537e-07, "loss": 1.1005, "step": 3783 }, { "epoch": 360.3809523809524, "grad_norm": 10.179972648620605, "learning_rate": 7.089580209895052e-07, "loss": 1.1009, "step": 3784 }, { "epoch": 360.4761904761905, "grad_norm": 9.946412086486816, "learning_rate": 7.091454272863568e-07, "loss": 1.1263, "step": 3785 }, { "epoch": 360.57142857142856, "grad_norm": 6.647387981414795, "learning_rate": 7.093328335832085e-07, "loss": 1.133, "step": 3786 }, { "epoch": 360.6666666666667, "grad_norm": 6.924274921417236, "learning_rate": 7.0952023988006e-07, "loss": 1.1009, "step": 3787 }, { "epoch": 360.76190476190476, "grad_norm": 6.803415298461914, "learning_rate": 7.097076461769116e-07, "loss": 1.0819, "step": 3788 }, { "epoch": 360.85714285714283, "grad_norm": 8.356952667236328, "learning_rate": 7.098950524737632e-07, "loss": 1.1109, "step": 3789 }, { "epoch": 360.95238095238096, "grad_norm": 7.981040000915527, "learning_rate": 7.100824587706148e-07, "loss": 1.1094, "step": 3790 }, { "epoch": 360.95238095238096, "eval_accuracy": 0.7377294866991383, "eval_f1": 0.7872340425531915, "eval_loss": 0.5522446632385254, "eval_precision": 0.7416953035509737, "eval_recall": 0.8387305699481865, "eval_roc_auc": 0.7897343120322395, "eval_runtime": 1.1203, "eval_samples_per_second": 2382.456, "eval_steps_per_second": 5.356, "step": 3790 }, { "epoch": 361.04761904761904, "grad_norm": 10.223261833190918, "learning_rate": 7.102698650674663e-07, "loss": 1.0982, "step": 3791 }, { "epoch": 361.14285714285717, "grad_norm": 4.465803623199463, "learning_rate": 7.104572713643179e-07, "loss": 1.0778, "step": 3792 }, { "epoch": 361.23809523809524, "grad_norm": 8.075662612915039, "learning_rate": 7.106446776611694e-07, "loss": 1.1723, "step": 3793 }, { "epoch": 361.3333333333333, "grad_norm": 10.181119918823242, "learning_rate": 7.10832083958021e-07, "loss": 1.1023, "step": 3794 }, { "epoch": 361.42857142857144, "grad_norm": 6.193162441253662, "learning_rate": 7.110194902548726e-07, "loss": 1.123, "step": 3795 }, { "epoch": 361.5238095238095, "grad_norm": 3.2716386318206787, "learning_rate": 7.112068965517242e-07, "loss": 1.1325, "step": 3796 }, { "epoch": 361.6190476190476, "grad_norm": 6.4619832038879395, "learning_rate": 7.113943028485758e-07, "loss": 1.1629, "step": 3797 }, { "epoch": 361.7142857142857, "grad_norm": 6.093001365661621, "learning_rate": 7.115817091454273e-07, "loss": 1.092, "step": 3798 }, { "epoch": 361.8095238095238, "grad_norm": 14.338656425476074, "learning_rate": 7.117691154422789e-07, "loss": 1.1473, "step": 3799 }, { "epoch": 361.9047619047619, "grad_norm": 11.990729331970215, "learning_rate": 7.119565217391304e-07, "loss": 1.1122, "step": 3800 }, { "epoch": 362.0, "grad_norm": 4.780630111694336, "learning_rate": 7.12143928035982e-07, "loss": 1.0641, "step": 3801 }, { "epoch": 362.0, "eval_accuracy": 0.7373548145372799, "eval_f1": 0.7872534142640364, "eval_loss": 0.5522888898849487, "eval_precision": 0.7407195888063963, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7897772020725388, "eval_runtime": 1.1352, "eval_samples_per_second": 2351.214, "eval_steps_per_second": 5.286, "step": 3801 }, { "epoch": 362.0952380952381, "grad_norm": 9.136756896972656, "learning_rate": 7.123313343328335e-07, "loss": 1.0569, "step": 3802 }, { "epoch": 362.1904761904762, "grad_norm": 17.731273651123047, "learning_rate": 7.125187406296852e-07, "loss": 1.1098, "step": 3803 }, { "epoch": 362.2857142857143, "grad_norm": 5.175982475280762, "learning_rate": 7.127061469265368e-07, "loss": 1.0944, "step": 3804 }, { "epoch": 362.3809523809524, "grad_norm": 10.310859680175781, "learning_rate": 7.128935532233883e-07, "loss": 1.0819, "step": 3805 }, { "epoch": 362.4761904761905, "grad_norm": 5.00278377532959, "learning_rate": 7.130809595202399e-07, "loss": 1.1317, "step": 3806 }, { "epoch": 362.57142857142856, "grad_norm": 9.666382789611816, "learning_rate": 7.132683658170914e-07, "loss": 1.1537, "step": 3807 }, { "epoch": 362.6666666666667, "grad_norm": 5.540799617767334, "learning_rate": 7.134557721139431e-07, "loss": 1.1462, "step": 3808 }, { "epoch": 362.76190476190476, "grad_norm": 9.392330169677734, "learning_rate": 7.136431784107946e-07, "loss": 1.1592, "step": 3809 }, { "epoch": 362.85714285714283, "grad_norm": 10.634110450744629, "learning_rate": 7.138305847076462e-07, "loss": 1.1258, "step": 3810 }, { "epoch": 362.95238095238096, "grad_norm": 6.859245300292969, "learning_rate": 7.140179910044979e-07, "loss": 1.1117, "step": 3811 }, { "epoch": 362.95238095238096, "eval_accuracy": 0.741476208317722, "eval_f1": 0.7915407854984894, "eval_loss": 0.5519759654998779, "eval_precision": 0.7417893544733862, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7899614277489926, "eval_runtime": 1.1356, "eval_samples_per_second": 2350.256, "eval_steps_per_second": 5.283, "step": 3811 }, { "epoch": 363.04761904761904, "grad_norm": 10.381455421447754, "learning_rate": 7.142053973013494e-07, "loss": 1.0988, "step": 3812 }, { "epoch": 363.14285714285717, "grad_norm": 9.13387680053711, "learning_rate": 7.14392803598201e-07, "loss": 1.1698, "step": 3813 }, { "epoch": 363.23809523809524, "grad_norm": 12.999701499938965, "learning_rate": 7.145802098950525e-07, "loss": 1.1325, "step": 3814 }, { "epoch": 363.3333333333333, "grad_norm": 7.6583781242370605, "learning_rate": 7.147676161919041e-07, "loss": 1.1042, "step": 3815 }, { "epoch": 363.42857142857144, "grad_norm": 17.781267166137695, "learning_rate": 7.149550224887556e-07, "loss": 1.1218, "step": 3816 }, { "epoch": 363.5238095238095, "grad_norm": 7.954900741577148, "learning_rate": 7.151424287856073e-07, "loss": 1.1074, "step": 3817 }, { "epoch": 363.6190476190476, "grad_norm": 9.188843727111816, "learning_rate": 7.153298350824588e-07, "loss": 1.0778, "step": 3818 }, { "epoch": 363.7142857142857, "grad_norm": 10.288064002990723, "learning_rate": 7.155172413793104e-07, "loss": 1.1257, "step": 3819 }, { "epoch": 363.8095238095238, "grad_norm": 5.548375606536865, "learning_rate": 7.15704647676162e-07, "loss": 1.1675, "step": 3820 }, { "epoch": 363.9047619047619, "grad_norm": 9.2520170211792, "learning_rate": 7.158920539730135e-07, "loss": 1.0806, "step": 3821 }, { "epoch": 364.0, "grad_norm": 12.628405570983887, "learning_rate": 7.160794602698651e-07, "loss": 1.0937, "step": 3822 }, { "epoch": 364.0, "eval_accuracy": 0.7381041588609967, "eval_f1": 0.7883742052679382, "eval_loss": 0.552045464515686, "eval_precision": 0.740193291642979, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7898848589522165, "eval_runtime": 1.1396, "eval_samples_per_second": 2342.127, "eval_steps_per_second": 5.265, "step": 3822 }, { "epoch": 364.0952380952381, "grad_norm": 7.857677936553955, "learning_rate": 7.162668665667166e-07, "loss": 1.1142, "step": 3823 }, { "epoch": 364.1904761904762, "grad_norm": 6.276848316192627, "learning_rate": 7.164542728635683e-07, "loss": 1.1138, "step": 3824 }, { "epoch": 364.2857142857143, "grad_norm": 11.035969734191895, "learning_rate": 7.166416791604198e-07, "loss": 1.1548, "step": 3825 }, { "epoch": 364.3809523809524, "grad_norm": 9.335686683654785, "learning_rate": 7.168290854572714e-07, "loss": 1.0909, "step": 3826 }, { "epoch": 364.4761904761905, "grad_norm": 6.2040324211120605, "learning_rate": 7.170164917541229e-07, "loss": 1.0907, "step": 3827 }, { "epoch": 364.57142857142856, "grad_norm": 7.269261360168457, "learning_rate": 7.172038980509745e-07, "loss": 1.0912, "step": 3828 }, { "epoch": 364.6666666666667, "grad_norm": 3.7643415927886963, "learning_rate": 7.17391304347826e-07, "loss": 1.1181, "step": 3829 }, { "epoch": 364.76190476190476, "grad_norm": 5.428904056549072, "learning_rate": 7.175787106446777e-07, "loss": 1.1256, "step": 3830 }, { "epoch": 364.85714285714283, "grad_norm": 11.517545700073242, "learning_rate": 7.177661169415293e-07, "loss": 1.0952, "step": 3831 }, { "epoch": 364.95238095238096, "grad_norm": 9.582964897155762, "learning_rate": 7.179535232383808e-07, "loss": 1.1242, "step": 3832 }, { "epoch": 364.95238095238096, "eval_accuracy": 0.7358561258898464, "eval_f1": 0.7831436481082744, "eval_loss": 0.5519841313362122, "eval_precision": 0.7457527826596367, "eval_recall": 0.8244818652849741, "eval_roc_auc": 0.7898721934369602, "eval_runtime": 1.1201, "eval_samples_per_second": 2382.779, "eval_steps_per_second": 5.357, "step": 3832 }, { "epoch": 365.04761904761904, "grad_norm": 5.688808441162109, "learning_rate": 7.181409295352325e-07, "loss": 1.1182, "step": 3833 }, { "epoch": 365.14285714285717, "grad_norm": 8.307438850402832, "learning_rate": 7.18328335832084e-07, "loss": 1.0878, "step": 3834 }, { "epoch": 365.23809523809524, "grad_norm": 7.110036373138428, "learning_rate": 7.185157421289356e-07, "loss": 1.1538, "step": 3835 }, { "epoch": 365.3333333333333, "grad_norm": 6.807857990264893, "learning_rate": 7.187031484257871e-07, "loss": 1.1287, "step": 3836 }, { "epoch": 365.42857142857144, "grad_norm": 8.348471641540527, "learning_rate": 7.188905547226387e-07, "loss": 1.0766, "step": 3837 }, { "epoch": 365.5238095238095, "grad_norm": 6.609596252441406, "learning_rate": 7.190779610194904e-07, "loss": 1.1401, "step": 3838 }, { "epoch": 365.6190476190476, "grad_norm": 8.325094223022461, "learning_rate": 7.192653673163419e-07, "loss": 1.121, "step": 3839 }, { "epoch": 365.7142857142857, "grad_norm": 5.06738805770874, "learning_rate": 7.194527736131935e-07, "loss": 1.0984, "step": 3840 }, { "epoch": 365.8095238095238, "grad_norm": 8.403528213500977, "learning_rate": 7.19640179910045e-07, "loss": 1.103, "step": 3841 }, { "epoch": 365.9047619047619, "grad_norm": 11.857973098754883, "learning_rate": 7.198275862068966e-07, "loss": 1.147, "step": 3842 }, { "epoch": 366.0, "grad_norm": 9.966644287109375, "learning_rate": 7.200149925037481e-07, "loss": 1.1497, "step": 3843 }, { "epoch": 366.0, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.7934032983508246, "eval_loss": 0.5516533255577087, "eval_precision": 0.7386934673366834, "eval_recall": 0.8568652849740933, "eval_roc_auc": 0.7909896373056996, "eval_runtime": 1.1908, "eval_samples_per_second": 2241.381, "eval_steps_per_second": 5.039, "step": 3843 }, { "epoch": 366.0952380952381, "grad_norm": 5.4201459884643555, "learning_rate": 7.202023988005998e-07, "loss": 1.1294, "step": 3844 }, { "epoch": 366.1904761904762, "grad_norm": 6.1927876472473145, "learning_rate": 7.203898050974513e-07, "loss": 1.0901, "step": 3845 }, { "epoch": 366.2857142857143, "grad_norm": 15.932611465454102, "learning_rate": 7.205772113943029e-07, "loss": 1.1438, "step": 3846 }, { "epoch": 366.3809523809524, "grad_norm": 7.462088108062744, "learning_rate": 7.207646176911544e-07, "loss": 1.1717, "step": 3847 }, { "epoch": 366.4761904761905, "grad_norm": 9.691133499145508, "learning_rate": 7.20952023988006e-07, "loss": 1.1742, "step": 3848 }, { "epoch": 366.57142857142856, "grad_norm": 6.052723407745361, "learning_rate": 7.211394302848576e-07, "loss": 1.1219, "step": 3849 }, { "epoch": 366.6666666666667, "grad_norm": 8.719459533691406, "learning_rate": 7.213268365817091e-07, "loss": 1.0821, "step": 3850 }, { "epoch": 366.76190476190476, "grad_norm": 5.203175067901611, "learning_rate": 7.215142428785608e-07, "loss": 1.1109, "step": 3851 }, { "epoch": 366.85714285714283, "grad_norm": 11.557815551757812, "learning_rate": 7.217016491754123e-07, "loss": 1.1294, "step": 3852 }, { "epoch": 366.95238095238096, "grad_norm": 7.829832077026367, "learning_rate": 7.218890554722639e-07, "loss": 1.1205, "step": 3853 }, { "epoch": 366.95238095238096, "eval_accuracy": 0.7407268639940052, "eval_f1": 0.7896656534954407, "eval_loss": 0.5511266589164734, "eval_precision": 0.7439862542955327, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.7909928036845135, "eval_runtime": 1.1232, "eval_samples_per_second": 2376.163, "eval_steps_per_second": 5.342, "step": 3853 }, { "epoch": 367.04761904761904, "grad_norm": 7.368420124053955, "learning_rate": 7.220764617691154e-07, "loss": 1.103, "step": 3854 }, { "epoch": 367.14285714285717, "grad_norm": 7.851127624511719, "learning_rate": 7.22263868065967e-07, "loss": 1.1219, "step": 3855 }, { "epoch": 367.23809523809524, "grad_norm": 5.373974323272705, "learning_rate": 7.224512743628187e-07, "loss": 1.096, "step": 3856 }, { "epoch": 367.3333333333333, "grad_norm": 7.843284606933594, "learning_rate": 7.226386806596702e-07, "loss": 1.1196, "step": 3857 }, { "epoch": 367.42857142857144, "grad_norm": 12.174184799194336, "learning_rate": 7.228260869565219e-07, "loss": 1.0599, "step": 3858 }, { "epoch": 367.5238095238095, "grad_norm": 8.143187522888184, "learning_rate": 7.230134932533734e-07, "loss": 1.1317, "step": 3859 }, { "epoch": 367.6190476190476, "grad_norm": 6.931474208831787, "learning_rate": 7.23200899550225e-07, "loss": 1.1271, "step": 3860 }, { "epoch": 367.7142857142857, "grad_norm": 5.616233825683594, "learning_rate": 7.233883058470765e-07, "loss": 1.1407, "step": 3861 }, { "epoch": 367.8095238095238, "grad_norm": 9.154975891113281, "learning_rate": 7.235757121439281e-07, "loss": 1.1845, "step": 3862 }, { "epoch": 367.9047619047619, "grad_norm": 14.478100776672363, "learning_rate": 7.237631184407796e-07, "loss": 1.0533, "step": 3863 }, { "epoch": 368.0, "grad_norm": 10.775654792785645, "learning_rate": 7.239505247376312e-07, "loss": 1.1425, "step": 3864 }, { "epoch": 368.0, "eval_accuracy": 0.7347321094042712, "eval_f1": 0.7820197044334976, "eval_loss": 0.5520932674407959, "eval_precision": 0.7453051643192489, "eval_recall": 0.822538860103627, "eval_roc_auc": 0.7896453655728267, "eval_runtime": 1.1296, "eval_samples_per_second": 2362.879, "eval_steps_per_second": 5.312, "step": 3864 }, { "epoch": 368.0952380952381, "grad_norm": 13.183884620666504, "learning_rate": 7.241379310344829e-07, "loss": 1.1199, "step": 3865 }, { "epoch": 368.1904761904762, "grad_norm": 9.267333030700684, "learning_rate": 7.243253373313344e-07, "loss": 1.085, "step": 3866 }, { "epoch": 368.2857142857143, "grad_norm": 10.788604736328125, "learning_rate": 7.24512743628186e-07, "loss": 1.1305, "step": 3867 }, { "epoch": 368.3809523809524, "grad_norm": 7.998301029205322, "learning_rate": 7.247001499250375e-07, "loss": 1.1015, "step": 3868 }, { "epoch": 368.4761904761905, "grad_norm": 6.51740837097168, "learning_rate": 7.248875562218891e-07, "loss": 1.1298, "step": 3869 }, { "epoch": 368.57142857142856, "grad_norm": 7.447991847991943, "learning_rate": 7.250749625187406e-07, "loss": 1.1301, "step": 3870 }, { "epoch": 368.6666666666667, "grad_norm": 7.863955974578857, "learning_rate": 7.252623688155922e-07, "loss": 1.0831, "step": 3871 }, { "epoch": 368.76190476190476, "grad_norm": 10.221150398254395, "learning_rate": 7.254497751124438e-07, "loss": 1.0993, "step": 3872 }, { "epoch": 368.85714285714283, "grad_norm": 14.498579978942871, "learning_rate": 7.256371814092954e-07, "loss": 1.1215, "step": 3873 }, { "epoch": 368.95238095238096, "grad_norm": 9.742353439331055, "learning_rate": 7.25824587706147e-07, "loss": 1.1466, "step": 3874 }, { "epoch": 368.95238095238096, "eval_accuracy": 0.7429748969651555, "eval_f1": 0.794241151769646, "eval_loss": 0.5526332855224609, "eval_precision": 0.7396648044692737, "eval_recall": 0.8575129533678757, "eval_roc_auc": 0.789238341968912, "eval_runtime": 1.1202, "eval_samples_per_second": 2382.668, "eval_steps_per_second": 5.356, "step": 3874 }, { "epoch": 369.04761904761904, "grad_norm": 5.24822998046875, "learning_rate": 7.260119940029985e-07, "loss": 1.1057, "step": 3875 }, { "epoch": 369.14285714285717, "grad_norm": 6.557324409484863, "learning_rate": 7.261994002998501e-07, "loss": 1.1194, "step": 3876 }, { "epoch": 369.23809523809524, "grad_norm": 10.006333351135254, "learning_rate": 7.263868065967016e-07, "loss": 1.0907, "step": 3877 }, { "epoch": 369.3333333333333, "grad_norm": 15.545181274414062, "learning_rate": 7.265742128935533e-07, "loss": 1.1058, "step": 3878 }, { "epoch": 369.42857142857144, "grad_norm": 12.693238258361816, "learning_rate": 7.267616191904048e-07, "loss": 1.1443, "step": 3879 }, { "epoch": 369.5238095238095, "grad_norm": 16.02750015258789, "learning_rate": 7.269490254872564e-07, "loss": 1.099, "step": 3880 }, { "epoch": 369.6190476190476, "grad_norm": 14.570963859558105, "learning_rate": 7.27136431784108e-07, "loss": 1.1059, "step": 3881 }, { "epoch": 369.7142857142857, "grad_norm": 17.04131317138672, "learning_rate": 7.273238380809596e-07, "loss": 1.0468, "step": 3882 }, { "epoch": 369.8095238095238, "grad_norm": 7.516332626342773, "learning_rate": 7.275112443778112e-07, "loss": 1.1619, "step": 3883 }, { "epoch": 369.9047619047619, "grad_norm": 7.201144218444824, "learning_rate": 7.276986506746627e-07, "loss": 1.1273, "step": 3884 }, { "epoch": 370.0, "grad_norm": 5.620227336883545, "learning_rate": 7.278860569715143e-07, "loss": 1.0963, "step": 3885 }, { "epoch": 370.0, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.7882102704345184, "eval_loss": 0.5518689751625061, "eval_precision": 0.7424155695477962, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.789812320092113, "eval_runtime": 1.1569, "eval_samples_per_second": 2306.989, "eval_steps_per_second": 5.186, "step": 3885 }, { "epoch": 370.0952380952381, "grad_norm": 6.75601863861084, "learning_rate": 7.280734632683659e-07, "loss": 1.1112, "step": 3886 }, { "epoch": 370.1904761904762, "grad_norm": 5.5591020584106445, "learning_rate": 7.282608695652175e-07, "loss": 1.0847, "step": 3887 }, { "epoch": 370.2857142857143, "grad_norm": 9.115145683288574, "learning_rate": 7.28448275862069e-07, "loss": 1.1349, "step": 3888 }, { "epoch": 370.3809523809524, "grad_norm": 5.112926959991455, "learning_rate": 7.286356821589206e-07, "loss": 1.1822, "step": 3889 }, { "epoch": 370.4761904761905, "grad_norm": 6.981180191040039, "learning_rate": 7.288230884557721e-07, "loss": 1.1226, "step": 3890 }, { "epoch": 370.57142857142856, "grad_norm": 14.1383695602417, "learning_rate": 7.290104947526237e-07, "loss": 1.1291, "step": 3891 }, { "epoch": 370.6666666666667, "grad_norm": 16.603281021118164, "learning_rate": 7.291979010494752e-07, "loss": 1.0763, "step": 3892 }, { "epoch": 370.76190476190476, "grad_norm": 6.563464641571045, "learning_rate": 7.293853073463269e-07, "loss": 1.1482, "step": 3893 }, { "epoch": 370.85714285714283, "grad_norm": 13.651692390441895, "learning_rate": 7.295727136431785e-07, "loss": 1.104, "step": 3894 }, { "epoch": 370.95238095238096, "grad_norm": 5.060080528259277, "learning_rate": 7.2976011994003e-07, "loss": 1.1399, "step": 3895 }, { "epoch": 370.95238095238096, "eval_accuracy": 0.7381041588609967, "eval_f1": 0.7893943959023803, "eval_loss": 0.5518258810043335, "eval_precision": 0.7380281690140845, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7901482440990213, "eval_runtime": 1.1343, "eval_samples_per_second": 2352.985, "eval_steps_per_second": 5.29, "step": 3895 }, { "epoch": 371.04761904761904, "grad_norm": 6.809076309204102, "learning_rate": 7.299475262368816e-07, "loss": 1.1489, "step": 3896 }, { "epoch": 371.14285714285717, "grad_norm": 6.76948356628418, "learning_rate": 7.301349325337331e-07, "loss": 1.0803, "step": 3897 }, { "epoch": 371.23809523809524, "grad_norm": 5.986545562744141, "learning_rate": 7.303223388305847e-07, "loss": 1.1069, "step": 3898 }, { "epoch": 371.3333333333333, "grad_norm": 11.000617980957031, "learning_rate": 7.305097451274362e-07, "loss": 1.1055, "step": 3899 }, { "epoch": 371.42857142857144, "grad_norm": 8.57867431640625, "learning_rate": 7.306971514242879e-07, "loss": 1.096, "step": 3900 }, { "epoch": 371.5238095238095, "grad_norm": 9.18748950958252, "learning_rate": 7.308845577211395e-07, "loss": 1.1128, "step": 3901 }, { "epoch": 371.6190476190476, "grad_norm": 10.393960952758789, "learning_rate": 7.31071964017991e-07, "loss": 1.1361, "step": 3902 }, { "epoch": 371.7142857142857, "grad_norm": 10.242207527160645, "learning_rate": 7.312593703148427e-07, "loss": 1.1515, "step": 3903 }, { "epoch": 371.8095238095238, "grad_norm": 6.255610466003418, "learning_rate": 7.314467766116942e-07, "loss": 1.0784, "step": 3904 }, { "epoch": 371.9047619047619, "grad_norm": 8.978156089782715, "learning_rate": 7.316341829085458e-07, "loss": 1.158, "step": 3905 }, { "epoch": 372.0, "grad_norm": 11.08545970916748, "learning_rate": 7.318215892053973e-07, "loss": 1.0757, "step": 3906 }, { "epoch": 372.0, "eval_accuracy": 0.7336080929186961, "eval_f1": 0.7808936825885978, "eval_loss": 0.5521113276481628, "eval_precision": 0.7448559670781894, "eval_recall": 0.8205958549222798, "eval_roc_auc": 0.7896076568796776, "eval_runtime": 1.1181, "eval_samples_per_second": 2387.001, "eval_steps_per_second": 5.366, "step": 3906 }, { "epoch": 372.0952380952381, "grad_norm": 5.756834030151367, "learning_rate": 7.32008995502249e-07, "loss": 1.1166, "step": 3907 }, { "epoch": 372.1904761904762, "grad_norm": 6.470827102661133, "learning_rate": 7.321964017991005e-07, "loss": 1.1019, "step": 3908 }, { "epoch": 372.2857142857143, "grad_norm": 9.52391242980957, "learning_rate": 7.323838080959521e-07, "loss": 1.1114, "step": 3909 }, { "epoch": 372.3809523809524, "grad_norm": 17.73433494567871, "learning_rate": 7.325712143928037e-07, "loss": 1.1026, "step": 3910 }, { "epoch": 372.4761904761905, "grad_norm": 6.577383041381836, "learning_rate": 7.327586206896552e-07, "loss": 1.0955, "step": 3911 }, { "epoch": 372.57142857142856, "grad_norm": 8.080799102783203, "learning_rate": 7.329460269865068e-07, "loss": 1.1049, "step": 3912 }, { "epoch": 372.6666666666667, "grad_norm": 16.410451889038086, "learning_rate": 7.331334332833583e-07, "loss": 1.1505, "step": 3913 }, { "epoch": 372.76190476190476, "grad_norm": 11.473193168640137, "learning_rate": 7.3332083958021e-07, "loss": 1.1487, "step": 3914 }, { "epoch": 372.85714285714283, "grad_norm": 6.292839050292969, "learning_rate": 7.335082458770615e-07, "loss": 1.0933, "step": 3915 }, { "epoch": 372.95238095238096, "grad_norm": 11.717289924621582, "learning_rate": 7.336956521739131e-07, "loss": 1.1058, "step": 3916 }, { "epoch": 372.95238095238096, "eval_accuracy": 0.7403521918321468, "eval_f1": 0.7896813353566009, "eval_loss": 0.5519756078720093, "eval_precision": 0.7430039977155911, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7897651122625216, "eval_runtime": 1.1323, "eval_samples_per_second": 2357.052, "eval_steps_per_second": 5.299, "step": 3916 }, { "epoch": 373.04761904761904, "grad_norm": 9.770853996276855, "learning_rate": 7.338830584707646e-07, "loss": 1.1188, "step": 3917 }, { "epoch": 373.14285714285717, "grad_norm": 13.120342254638672, "learning_rate": 7.340704647676162e-07, "loss": 1.104, "step": 3918 }, { "epoch": 373.23809523809524, "grad_norm": 5.9968461990356445, "learning_rate": 7.342578710644677e-07, "loss": 1.1355, "step": 3919 }, { "epoch": 373.3333333333333, "grad_norm": 6.1175127029418945, "learning_rate": 7.344452773613193e-07, "loss": 1.1082, "step": 3920 }, { "epoch": 373.42857142857144, "grad_norm": 11.14828109741211, "learning_rate": 7.34632683658171e-07, "loss": 1.1189, "step": 3921 }, { "epoch": 373.5238095238095, "grad_norm": 7.950910568237305, "learning_rate": 7.348200899550225e-07, "loss": 1.1224, "step": 3922 }, { "epoch": 373.6190476190476, "grad_norm": 6.772651672363281, "learning_rate": 7.350074962518741e-07, "loss": 1.1048, "step": 3923 }, { "epoch": 373.7142857142857, "grad_norm": 8.026163101196289, "learning_rate": 7.351949025487256e-07, "loss": 1.1246, "step": 3924 }, { "epoch": 373.8095238095238, "grad_norm": 5.476925849914551, "learning_rate": 7.353823088455773e-07, "loss": 1.0943, "step": 3925 }, { "epoch": 373.9047619047619, "grad_norm": 10.513056755065918, "learning_rate": 7.355697151424288e-07, "loss": 1.1546, "step": 3926 }, { "epoch": 374.0, "grad_norm": 6.3184051513671875, "learning_rate": 7.357571214392804e-07, "loss": 1.0826, "step": 3927 }, { "epoch": 374.0, "eval_accuracy": 0.738478831022855, "eval_f1": 0.7883565797453002, "eval_loss": 0.5509572625160217, "eval_precision": 0.7411630558722919, "eval_recall": 0.8419689119170984, "eval_roc_auc": 0.7910938399539436, "eval_runtime": 1.128, "eval_samples_per_second": 2366.157, "eval_steps_per_second": 5.319, "step": 3927 }, { "epoch": 374.0952380952381, "grad_norm": 11.123753547668457, "learning_rate": 7.359445277361321e-07, "loss": 1.1463, "step": 3928 }, { "epoch": 374.1904761904762, "grad_norm": 6.112797737121582, "learning_rate": 7.361319340329836e-07, "loss": 1.0887, "step": 3929 }, { "epoch": 374.2857142857143, "grad_norm": 4.011114597320557, "learning_rate": 7.363193403298352e-07, "loss": 1.1321, "step": 3930 }, { "epoch": 374.3809523809524, "grad_norm": 6.350623607635498, "learning_rate": 7.365067466266867e-07, "loss": 1.1348, "step": 3931 }, { "epoch": 374.4761904761905, "grad_norm": 9.323946952819824, "learning_rate": 7.366941529235383e-07, "loss": 1.0847, "step": 3932 }, { "epoch": 374.57142857142856, "grad_norm": 5.713606834411621, "learning_rate": 7.368815592203898e-07, "loss": 1.1499, "step": 3933 }, { "epoch": 374.6666666666667, "grad_norm": 9.828307151794434, "learning_rate": 7.370689655172414e-07, "loss": 1.1031, "step": 3934 }, { "epoch": 374.76190476190476, "grad_norm": 12.144657135009766, "learning_rate": 7.37256371814093e-07, "loss": 1.1269, "step": 3935 }, { "epoch": 374.85714285714283, "grad_norm": 7.657914161682129, "learning_rate": 7.374437781109446e-07, "loss": 1.1093, "step": 3936 }, { "epoch": 374.95238095238096, "grad_norm": 9.174505233764648, "learning_rate": 7.376311844077962e-07, "loss": 1.0883, "step": 3937 }, { "epoch": 374.95238095238096, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.7916540671303296, "eval_loss": 0.5501177310943604, "eval_precision": 0.742484401588202, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7922084052964882, "eval_runtime": 1.1426, "eval_samples_per_second": 2335.804, "eval_steps_per_second": 5.251, "step": 3937 }, { "epoch": 375.04761904761904, "grad_norm": 11.815903663635254, "learning_rate": 7.378185907046477e-07, "loss": 1.1139, "step": 3938 }, { "epoch": 375.14285714285717, "grad_norm": 7.327791690826416, "learning_rate": 7.380059970014993e-07, "loss": 1.1217, "step": 3939 }, { "epoch": 375.23809523809524, "grad_norm": 8.6361083984375, "learning_rate": 7.381934032983508e-07, "loss": 1.1498, "step": 3940 }, { "epoch": 375.3333333333333, "grad_norm": 9.45090389251709, "learning_rate": 7.383808095952024e-07, "loss": 1.1283, "step": 3941 }, { "epoch": 375.42857142857144, "grad_norm": 8.466257095336914, "learning_rate": 7.38568215892054e-07, "loss": 1.1063, "step": 3942 }, { "epoch": 375.5238095238095, "grad_norm": 5.573893070220947, "learning_rate": 7.387556221889056e-07, "loss": 1.0902, "step": 3943 }, { "epoch": 375.6190476190476, "grad_norm": 5.684384346008301, "learning_rate": 7.389430284857571e-07, "loss": 1.1213, "step": 3944 }, { "epoch": 375.7142857142857, "grad_norm": 4.658603191375732, "learning_rate": 7.391304347826087e-07, "loss": 1.1028, "step": 3945 }, { "epoch": 375.8095238095238, "grad_norm": 8.962539672851562, "learning_rate": 7.393178410794602e-07, "loss": 1.1042, "step": 3946 }, { "epoch": 375.9047619047619, "grad_norm": 7.740326881408691, "learning_rate": 7.395052473763118e-07, "loss": 1.1129, "step": 3947 }, { "epoch": 376.0, "grad_norm": 6.349332332611084, "learning_rate": 7.396926536731635e-07, "loss": 1.09, "step": 3948 }, { "epoch": 376.0, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.792156862745098, "eval_loss": 0.5506932735443115, "eval_precision": 0.7413890457368718, "eval_recall": 0.8503886010362695, "eval_roc_auc": 0.7915322394933794, "eval_runtime": 1.1262, "eval_samples_per_second": 2369.989, "eval_steps_per_second": 5.328, "step": 3948 }, { "epoch": 376.0952380952381, "grad_norm": 5.533696174621582, "learning_rate": 7.39880059970015e-07, "loss": 1.1238, "step": 3949 }, { "epoch": 376.1904761904762, "grad_norm": 9.323712348937988, "learning_rate": 7.400674662668667e-07, "loss": 1.1281, "step": 3950 }, { "epoch": 376.2857142857143, "grad_norm": 13.140124320983887, "learning_rate": 7.402548725637182e-07, "loss": 1.0889, "step": 3951 }, { "epoch": 376.3809523809524, "grad_norm": 5.626778602600098, "learning_rate": 7.404422788605698e-07, "loss": 1.1441, "step": 3952 }, { "epoch": 376.4761904761905, "grad_norm": 10.561690330505371, "learning_rate": 7.406296851574213e-07, "loss": 1.0828, "step": 3953 }, { "epoch": 376.57142857142856, "grad_norm": 5.1880621910095215, "learning_rate": 7.408170914542729e-07, "loss": 1.1144, "step": 3954 }, { "epoch": 376.6666666666667, "grad_norm": 10.816292762756348, "learning_rate": 7.410044977511245e-07, "loss": 1.1355, "step": 3955 }, { "epoch": 376.76190476190476, "grad_norm": 11.029444694519043, "learning_rate": 7.411919040479761e-07, "loss": 1.1215, "step": 3956 }, { "epoch": 376.85714285714283, "grad_norm": 5.887256145477295, "learning_rate": 7.413793103448277e-07, "loss": 1.1361, "step": 3957 }, { "epoch": 376.95238095238096, "grad_norm": 8.500890731811523, "learning_rate": 7.415667166416792e-07, "loss": 1.1084, "step": 3958 }, { "epoch": 376.95238095238096, "eval_accuracy": 0.7422255526414387, "eval_f1": 0.7920193470374849, "eval_loss": 0.5507376790046692, "eval_precision": 0.7426303854875284, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7916801957397812, "eval_runtime": 1.134, "eval_samples_per_second": 2353.674, "eval_steps_per_second": 5.291, "step": 3958 }, { "epoch": 377.04761904761904, "grad_norm": 7.278326988220215, "learning_rate": 7.417541229385308e-07, "loss": 1.1019, "step": 3959 }, { "epoch": 377.14285714285717, "grad_norm": 6.423912525177002, "learning_rate": 7.419415292353823e-07, "loss": 1.1087, "step": 3960 }, { "epoch": 377.23809523809524, "grad_norm": 11.042447090148926, "learning_rate": 7.421289355322339e-07, "loss": 1.1223, "step": 3961 }, { "epoch": 377.3333333333333, "grad_norm": 14.736066818237305, "learning_rate": 7.423163418290854e-07, "loss": 1.099, "step": 3962 }, { "epoch": 377.42857142857144, "grad_norm": 8.444828987121582, "learning_rate": 7.425037481259371e-07, "loss": 1.1219, "step": 3963 }, { "epoch": 377.5238095238095, "grad_norm": 8.200831413269043, "learning_rate": 7.426911544227887e-07, "loss": 1.1237, "step": 3964 }, { "epoch": 377.6190476190476, "grad_norm": 12.050019264221191, "learning_rate": 7.428785607196402e-07, "loss": 1.0873, "step": 3965 }, { "epoch": 377.7142857142857, "grad_norm": 6.6788177490234375, "learning_rate": 7.430659670164918e-07, "loss": 1.108, "step": 3966 }, { "epoch": 377.8095238095238, "grad_norm": 10.5377779006958, "learning_rate": 7.432533733133433e-07, "loss": 1.1078, "step": 3967 }, { "epoch": 377.9047619047619, "grad_norm": 7.445323944091797, "learning_rate": 7.434407796101949e-07, "loss": 1.1545, "step": 3968 }, { "epoch": 378.0, "grad_norm": 9.764788627624512, "learning_rate": 7.436281859070464e-07, "loss": 1.1151, "step": 3969 }, { "epoch": 378.0, "eval_accuracy": 0.7392281753465717, "eval_f1": 0.7878048780487805, "eval_loss": 0.5502727627754211, "eval_precision": 0.7442396313364056, "eval_recall": 0.8367875647668394, "eval_roc_auc": 0.7921421991940126, "eval_runtime": 1.1304, "eval_samples_per_second": 2361.214, "eval_steps_per_second": 5.308, "step": 3969 }, { "epoch": 378.0952380952381, "grad_norm": 14.01278305053711, "learning_rate": 7.438155922038981e-07, "loss": 1.1209, "step": 3970 }, { "epoch": 378.1904761904762, "grad_norm": 5.303046226501465, "learning_rate": 7.440029985007496e-07, "loss": 1.1224, "step": 3971 }, { "epoch": 378.2857142857143, "grad_norm": 6.776137351989746, "learning_rate": 7.441904047976012e-07, "loss": 1.0761, "step": 3972 }, { "epoch": 378.3809523809524, "grad_norm": 5.707252025604248, "learning_rate": 7.443778110944529e-07, "loss": 1.1331, "step": 3973 }, { "epoch": 378.4761904761905, "grad_norm": 15.999380111694336, "learning_rate": 7.445652173913044e-07, "loss": 1.0887, "step": 3974 }, { "epoch": 378.57142857142856, "grad_norm": 11.217973709106445, "learning_rate": 7.44752623688156e-07, "loss": 1.1119, "step": 3975 }, { "epoch": 378.6666666666667, "grad_norm": 4.672736167907715, "learning_rate": 7.449400299850075e-07, "loss": 1.1536, "step": 3976 }, { "epoch": 378.76190476190476, "grad_norm": 12.972167015075684, "learning_rate": 7.451274362818592e-07, "loss": 1.1258, "step": 3977 }, { "epoch": 378.85714285714283, "grad_norm": 5.687925338745117, "learning_rate": 7.453148425787107e-07, "loss": 1.0886, "step": 3978 }, { "epoch": 378.95238095238096, "grad_norm": 6.426712989807129, "learning_rate": 7.455022488755623e-07, "loss": 1.0961, "step": 3979 }, { "epoch": 378.95238095238096, "eval_accuracy": 0.7399775196702885, "eval_f1": 0.7904589371980676, "eval_loss": 0.5504778623580933, "eval_precision": 0.7403846153846154, "eval_recall": 0.8477979274611399, "eval_roc_auc": 0.7919576856649396, "eval_runtime": 1.1251, "eval_samples_per_second": 2372.226, "eval_steps_per_second": 5.333, "step": 3979 }, { "epoch": 379.04761904761904, "grad_norm": 11.194128036499023, "learning_rate": 7.456896551724138e-07, "loss": 1.0735, "step": 3980 }, { "epoch": 379.14285714285717, "grad_norm": 8.991393089294434, "learning_rate": 7.458770614692654e-07, "loss": 1.1403, "step": 3981 }, { "epoch": 379.23809523809524, "grad_norm": 9.715619087219238, "learning_rate": 7.46064467766117e-07, "loss": 1.1153, "step": 3982 }, { "epoch": 379.3333333333333, "grad_norm": 11.953075408935547, "learning_rate": 7.462518740629685e-07, "loss": 1.1342, "step": 3983 }, { "epoch": 379.42857142857144, "grad_norm": 6.715610027313232, "learning_rate": 7.464392803598202e-07, "loss": 1.1291, "step": 3984 }, { "epoch": 379.5238095238095, "grad_norm": 7.38981294631958, "learning_rate": 7.466266866566717e-07, "loss": 1.0811, "step": 3985 }, { "epoch": 379.6190476190476, "grad_norm": 7.233059406280518, "learning_rate": 7.468140929535233e-07, "loss": 1.0664, "step": 3986 }, { "epoch": 379.7142857142857, "grad_norm": 5.6119561195373535, "learning_rate": 7.470014992503748e-07, "loss": 1.1032, "step": 3987 }, { "epoch": 379.8095238095238, "grad_norm": 12.48303508758545, "learning_rate": 7.471889055472264e-07, "loss": 1.0959, "step": 3988 }, { "epoch": 379.9047619047619, "grad_norm": 6.563333988189697, "learning_rate": 7.473763118440779e-07, "loss": 1.1254, "step": 3989 }, { "epoch": 380.0, "grad_norm": 8.098648071289062, "learning_rate": 7.475637181409295e-07, "loss": 1.136, "step": 3990 }, { "epoch": 380.0, "eval_accuracy": 0.7381041588609967, "eval_f1": 0.7887579329102448, "eval_loss": 0.5500577688217163, "eval_precision": 0.7393767705382436, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7920089234312033, "eval_runtime": 1.1123, "eval_samples_per_second": 2399.447, "eval_steps_per_second": 5.394, "step": 3990 }, { "epoch": 380.0952380952381, "grad_norm": 6.370793342590332, "learning_rate": 7.477511244377812e-07, "loss": 1.107, "step": 3991 }, { "epoch": 380.1904761904762, "grad_norm": 5.635412693023682, "learning_rate": 7.479385307346327e-07, "loss": 1.098, "step": 3992 }, { "epoch": 380.2857142857143, "grad_norm": 9.10843563079834, "learning_rate": 7.481259370314843e-07, "loss": 1.1219, "step": 3993 }, { "epoch": 380.3809523809524, "grad_norm": 8.240547180175781, "learning_rate": 7.483133433283358e-07, "loss": 1.1008, "step": 3994 }, { "epoch": 380.4761904761905, "grad_norm": 8.950239181518555, "learning_rate": 7.485007496251875e-07, "loss": 1.1599, "step": 3995 }, { "epoch": 380.57142857142856, "grad_norm": 5.558511257171631, "learning_rate": 7.48688155922039e-07, "loss": 1.1444, "step": 3996 }, { "epoch": 380.6666666666667, "grad_norm": 6.004672050476074, "learning_rate": 7.488755622188906e-07, "loss": 1.1108, "step": 3997 }, { "epoch": 380.76190476190476, "grad_norm": 6.5706963539123535, "learning_rate": 7.490629685157422e-07, "loss": 1.1451, "step": 3998 }, { "epoch": 380.85714285714283, "grad_norm": 4.188346862792969, "learning_rate": 7.492503748125938e-07, "loss": 1.1113, "step": 3999 }, { "epoch": 380.95238095238096, "grad_norm": 17.704849243164062, "learning_rate": 7.494377811094454e-07, "loss": 1.0749, "step": 4000 }, { "epoch": 380.95238095238096, "eval_accuracy": 0.7358561258898464, "eval_f1": 0.7808517252098228, "eval_loss": 0.5495111346244812, "eval_precision": 0.7507471607890018, "eval_recall": 0.8134715025906736, "eval_roc_auc": 0.7926370178468625, "eval_runtime": 1.1209, "eval_samples_per_second": 2381.206, "eval_steps_per_second": 5.353, "step": 4000 }, { "epoch": 381.04761904761904, "grad_norm": 6.359234809875488, "learning_rate": 7.496251874062969e-07, "loss": 1.0951, "step": 4001 }, { "epoch": 381.14285714285717, "grad_norm": 15.279829025268555, "learning_rate": 7.498125937031485e-07, "loss": 1.122, "step": 4002 }, { "epoch": 381.23809523809524, "grad_norm": 4.818670749664307, "learning_rate": 7.5e-07, "loss": 1.1443, "step": 4003 }, { "epoch": 381.3333333333333, "grad_norm": 5.658208847045898, "learning_rate": 7.501874062968516e-07, "loss": 1.1217, "step": 4004 }, { "epoch": 381.42857142857144, "grad_norm": 14.737176895141602, "learning_rate": 7.503748125937032e-07, "loss": 1.1419, "step": 4005 }, { "epoch": 381.5238095238095, "grad_norm": 5.5258965492248535, "learning_rate": 7.505622188905548e-07, "loss": 1.1137, "step": 4006 }, { "epoch": 381.6190476190476, "grad_norm": 9.513168334960938, "learning_rate": 7.507496251874063e-07, "loss": 1.1272, "step": 4007 }, { "epoch": 381.7142857142857, "grad_norm": 10.469417572021484, "learning_rate": 7.509370314842579e-07, "loss": 1.0777, "step": 4008 }, { "epoch": 381.8095238095238, "grad_norm": 8.181276321411133, "learning_rate": 7.511244377811095e-07, "loss": 1.0905, "step": 4009 }, { "epoch": 381.9047619047619, "grad_norm": 6.987445831298828, "learning_rate": 7.51311844077961e-07, "loss": 1.0617, "step": 4010 }, { "epoch": 382.0, "grad_norm": 21.09636116027832, "learning_rate": 7.514992503748126e-07, "loss": 1.1038, "step": 4011 }, { "epoch": 382.0, "eval_accuracy": 0.7452229299363057, "eval_f1": 0.7980997624703088, "eval_loss": 0.5505495667457581, "eval_precision": 0.7368421052631579, "eval_recall": 0.8704663212435233, "eval_roc_auc": 0.7933411053540588, "eval_runtime": 1.1316, "eval_samples_per_second": 2358.552, "eval_steps_per_second": 5.302, "step": 4011 }, { "epoch": 382.0952380952381, "grad_norm": Infinity, "learning_rate": 7.514992503748126e-07, "loss": 1.1442, "step": 4012 }, { "epoch": 382.1904761904762, "grad_norm": 13.094733238220215, "learning_rate": 7.516866566716642e-07, "loss": 1.0918, "step": 4013 }, { "epoch": 382.2857142857143, "grad_norm": 23.418006896972656, "learning_rate": 7.518740629685158e-07, "loss": 1.2109, "step": 4014 }, { "epoch": 382.3809523809524, "grad_norm": 15.063214302062988, "learning_rate": 7.520614692653673e-07, "loss": 1.1248, "step": 4015 }, { "epoch": 382.4761904761905, "grad_norm": 10.05310344696045, "learning_rate": 7.522488755622189e-07, "loss": 1.0842, "step": 4016 }, { "epoch": 382.57142857142856, "grad_norm": 4.695971965789795, "learning_rate": 7.524362818590704e-07, "loss": 1.1224, "step": 4017 }, { "epoch": 382.6666666666667, "grad_norm": 9.64921760559082, "learning_rate": 7.526236881559221e-07, "loss": 1.0769, "step": 4018 }, { "epoch": 382.76190476190476, "grad_norm": 10.011499404907227, "learning_rate": 7.528110944527737e-07, "loss": 1.0853, "step": 4019 }, { "epoch": 382.85714285714283, "grad_norm": 6.553365230560303, "learning_rate": 7.529985007496252e-07, "loss": 1.1405, "step": 4020 }, { "epoch": 382.95238095238096, "grad_norm": 13.643135070800781, "learning_rate": 7.531859070464769e-07, "loss": 1.1103, "step": 4021 }, { "epoch": 382.95238095238096, "eval_accuracy": 0.7366054702135632, "eval_f1": 0.7828236021007106, "eval_loss": 0.549675464630127, "eval_precision": 0.748375664500886, "eval_recall": 0.8205958549222798, "eval_roc_auc": 0.7923963730569947, "eval_runtime": 1.1358, "eval_samples_per_second": 2349.91, "eval_steps_per_second": 5.283, "step": 4021 }, { "epoch": 383.04761904761904, "grad_norm": 6.7152018547058105, "learning_rate": 7.533733133433284e-07, "loss": 1.182, "step": 4022 }, { "epoch": 383.14285714285717, "grad_norm": 4.382837772369385, "learning_rate": 7.5356071964018e-07, "loss": 1.1322, "step": 4023 }, { "epoch": 383.23809523809524, "grad_norm": 11.175541877746582, "learning_rate": 7.537481259370315e-07, "loss": 1.0832, "step": 4024 }, { "epoch": 383.3333333333333, "grad_norm": 13.932941436767578, "learning_rate": 7.539355322338831e-07, "loss": 1.1331, "step": 4025 }, { "epoch": 383.42857142857144, "grad_norm": 6.4109015464782715, "learning_rate": 7.541229385307346e-07, "loss": 1.1014, "step": 4026 }, { "epoch": 383.5238095238095, "grad_norm": 4.632279872894287, "learning_rate": 7.543103448275863e-07, "loss": 1.1187, "step": 4027 }, { "epoch": 383.6190476190476, "grad_norm": 7.028567790985107, "learning_rate": 7.544977511244379e-07, "loss": 1.0876, "step": 4028 }, { "epoch": 383.7142857142857, "grad_norm": 13.858692169189453, "learning_rate": 7.546851574212894e-07, "loss": 1.1304, "step": 4029 }, { "epoch": 383.8095238095238, "grad_norm": 20.205524444580078, "learning_rate": 7.54872563718141e-07, "loss": 1.1553, "step": 4030 }, { "epoch": 383.9047619047619, "grad_norm": 9.382784843444824, "learning_rate": 7.550599700149925e-07, "loss": 1.1131, "step": 4031 }, { "epoch": 384.0, "grad_norm": 4.756422996520996, "learning_rate": 7.552473763118441e-07, "loss": 1.1189, "step": 4032 }, { "epoch": 384.0, "eval_accuracy": 0.7377294866991383, "eval_f1": 0.7863247863247863, "eval_loss": 0.5493919849395752, "eval_precision": 0.74364896073903, "eval_recall": 0.8341968911917098, "eval_roc_auc": 0.7924628670120898, "eval_runtime": 1.1398, "eval_samples_per_second": 2341.643, "eval_steps_per_second": 5.264, "step": 4032 }, { "epoch": 384.0952380952381, "grad_norm": 5.865123271942139, "learning_rate": 7.554347826086956e-07, "loss": 1.1313, "step": 4033 }, { "epoch": 384.1904761904762, "grad_norm": 5.505659580230713, "learning_rate": 7.556221889055473e-07, "loss": 1.1503, "step": 4034 }, { "epoch": 384.2857142857143, "grad_norm": 7.910295009613037, "learning_rate": 7.558095952023988e-07, "loss": 1.1272, "step": 4035 }, { "epoch": 384.3809523809524, "grad_norm": 16.181732177734375, "learning_rate": 7.559970014992504e-07, "loss": 1.112, "step": 4036 }, { "epoch": 384.4761904761905, "grad_norm": 21.702638626098633, "learning_rate": 7.56184407796102e-07, "loss": 1.0762, "step": 4037 }, { "epoch": 384.57142857142856, "grad_norm": 15.46338939666748, "learning_rate": 7.563718140929535e-07, "loss": 1.1196, "step": 4038 }, { "epoch": 384.6666666666667, "grad_norm": 5.850086212158203, "learning_rate": 7.565592203898051e-07, "loss": 1.1457, "step": 4039 }, { "epoch": 384.76190476190476, "grad_norm": 10.61465835571289, "learning_rate": 7.567466266866566e-07, "loss": 1.0985, "step": 4040 }, { "epoch": 384.85714285714283, "grad_norm": 9.136896133422852, "learning_rate": 7.569340329835083e-07, "loss": 1.0866, "step": 4041 }, { "epoch": 384.95238095238096, "grad_norm": 10.092803955078125, "learning_rate": 7.571214392803598e-07, "loss": 1.1282, "step": 4042 }, { "epoch": 384.95238095238096, "eval_accuracy": 0.7426002248032971, "eval_f1": 0.7913756453082296, "eval_loss": 0.5494597554206848, "eval_precision": 0.7449971412235563, "eval_recall": 0.8439119170984456, "eval_roc_auc": 0.7925371329879102, "eval_runtime": 1.1433, "eval_samples_per_second": 2334.451, "eval_steps_per_second": 5.248, "step": 4042 }, { "epoch": 385.04761904761904, "grad_norm": 6.845860481262207, "learning_rate": 7.573088455772114e-07, "loss": 1.1061, "step": 4043 }, { "epoch": 385.14285714285717, "grad_norm": 14.234851837158203, "learning_rate": 7.57496251874063e-07, "loss": 1.1518, "step": 4044 }, { "epoch": 385.23809523809524, "grad_norm": 12.365203857421875, "learning_rate": 7.576836581709146e-07, "loss": 1.0977, "step": 4045 }, { "epoch": 385.3333333333333, "grad_norm": 13.45079231262207, "learning_rate": 7.578710644677662e-07, "loss": 1.1092, "step": 4046 }, { "epoch": 385.42857142857144, "grad_norm": 12.979645729064941, "learning_rate": 7.580584707646177e-07, "loss": 1.1114, "step": 4047 }, { "epoch": 385.5238095238095, "grad_norm": 12.906009674072266, "learning_rate": 7.582458770614694e-07, "loss": 1.1234, "step": 4048 }, { "epoch": 385.6190476190476, "grad_norm": 23.5379695892334, "learning_rate": 7.584332833583209e-07, "loss": 1.0786, "step": 4049 }, { "epoch": 385.7142857142857, "grad_norm": 9.682621002197266, "learning_rate": 7.586206896551725e-07, "loss": 1.1018, "step": 4050 }, { "epoch": 385.8095238095238, "grad_norm": 7.639532566070557, "learning_rate": 7.58808095952024e-07, "loss": 1.1342, "step": 4051 }, { "epoch": 385.9047619047619, "grad_norm": 8.08156967163086, "learning_rate": 7.589955022488756e-07, "loss": 1.1269, "step": 4052 }, { "epoch": 386.0, "grad_norm": 5.800262928009033, "learning_rate": 7.591829085457271e-07, "loss": 1.0908, "step": 4053 }, { "epoch": 386.0, "eval_accuracy": 0.7426002248032971, "eval_f1": 0.7941264608930176, "eval_loss": 0.5496708750724792, "eval_precision": 0.7389849414389291, "eval_recall": 0.8581606217616581, "eval_roc_auc": 0.7928500287852621, "eval_runtime": 1.1583, "eval_samples_per_second": 2304.314, "eval_steps_per_second": 5.18, "step": 4053 }, { "epoch": 386.0952380952381, "grad_norm": 7.999423980712891, "learning_rate": 7.593703148425787e-07, "loss": 1.1201, "step": 4054 }, { "epoch": 386.1904761904762, "grad_norm": 8.465465545654297, "learning_rate": 7.595577211394304e-07, "loss": 1.1421, "step": 4055 }, { "epoch": 386.2857142857143, "grad_norm": 7.850752353668213, "learning_rate": 7.597451274362819e-07, "loss": 1.0854, "step": 4056 }, { "epoch": 386.3809523809524, "grad_norm": 4.966638565063477, "learning_rate": 7.599325337331335e-07, "loss": 1.132, "step": 4057 }, { "epoch": 386.4761904761905, "grad_norm": 6.809133529663086, "learning_rate": 7.60119940029985e-07, "loss": 1.096, "step": 4058 }, { "epoch": 386.57142857142856, "grad_norm": 5.391140460968018, "learning_rate": 7.603073463268366e-07, "loss": 1.1183, "step": 4059 }, { "epoch": 386.6666666666667, "grad_norm": 8.04463005065918, "learning_rate": 7.604947526236881e-07, "loss": 1.1102, "step": 4060 }, { "epoch": 386.76190476190476, "grad_norm": 5.56711483001709, "learning_rate": 7.606821589205397e-07, "loss": 1.1237, "step": 4061 }, { "epoch": 386.85714285714283, "grad_norm": 11.3066987991333, "learning_rate": 7.608695652173913e-07, "loss": 1.1338, "step": 4062 }, { "epoch": 386.95238095238096, "grad_norm": 14.796104431152344, "learning_rate": 7.610569715142429e-07, "loss": 1.0928, "step": 4063 }, { "epoch": 386.95238095238096, "eval_accuracy": 0.7366054702135632, "eval_f1": 0.782554902567275, "eval_loss": 0.5493862628936768, "eval_precision": 0.7489638839550029, "eval_recall": 0.819300518134715, "eval_roc_auc": 0.792762809441566, "eval_runtime": 1.1154, "eval_samples_per_second": 2392.873, "eval_steps_per_second": 5.379, "step": 4063 }, { "epoch": 387.04761904761904, "grad_norm": 15.751404762268066, "learning_rate": 7.612443778110945e-07, "loss": 1.0897, "step": 4064 }, { "epoch": 387.14285714285717, "grad_norm": 4.9107985496521, "learning_rate": 7.61431784107946e-07, "loss": 1.1318, "step": 4065 }, { "epoch": 387.23809523809524, "grad_norm": 9.686596870422363, "learning_rate": 7.616191904047977e-07, "loss": 1.0834, "step": 4066 }, { "epoch": 387.3333333333333, "grad_norm": 12.998448371887207, "learning_rate": 7.618065967016492e-07, "loss": 1.1354, "step": 4067 }, { "epoch": 387.42857142857144, "grad_norm": 6.6138153076171875, "learning_rate": 7.619940029985008e-07, "loss": 1.0676, "step": 4068 }, { "epoch": 387.5238095238095, "grad_norm": 12.226428985595703, "learning_rate": 7.621814092953524e-07, "loss": 1.1026, "step": 4069 }, { "epoch": 387.6190476190476, "grad_norm": 15.720524787902832, "learning_rate": 7.62368815592204e-07, "loss": 1.0988, "step": 4070 }, { "epoch": 387.7142857142857, "grad_norm": 22.66543197631836, "learning_rate": 7.625562218890555e-07, "loss": 1.1942, "step": 4071 }, { "epoch": 387.8095238095238, "grad_norm": 9.127182960510254, "learning_rate": 7.627436281859071e-07, "loss": 1.1264, "step": 4072 }, { "epoch": 387.9047619047619, "grad_norm": 6.516150951385498, "learning_rate": 7.629310344827587e-07, "loss": 1.1258, "step": 4073 }, { "epoch": 388.0, "grad_norm": 8.39951229095459, "learning_rate": 7.631184407796102e-07, "loss": 1.1152, "step": 4074 }, { "epoch": 388.0, "eval_accuracy": 0.7392281753465717, "eval_f1": 0.7861094038106945, "eval_loss": 0.5489965081214905, "eval_precision": 0.747953216374269, "eval_recall": 0.8283678756476683, "eval_roc_auc": 0.7930201496833621, "eval_runtime": 1.1205, "eval_samples_per_second": 2381.989, "eval_steps_per_second": 5.355, "step": 4074 }, { "epoch": 388.0952380952381, "grad_norm": 17.57448387145996, "learning_rate": 7.633058470764619e-07, "loss": 1.1187, "step": 4075 }, { "epoch": 388.1904761904762, "grad_norm": 9.762043952941895, "learning_rate": 7.634932533733134e-07, "loss": 1.075, "step": 4076 }, { "epoch": 388.2857142857143, "grad_norm": 8.012761116027832, "learning_rate": 7.63680659670165e-07, "loss": 1.1115, "step": 4077 }, { "epoch": 388.3809523809524, "grad_norm": 7.436084747314453, "learning_rate": 7.638680659670165e-07, "loss": 1.1101, "step": 4078 }, { "epoch": 388.4761904761905, "grad_norm": 11.756044387817383, "learning_rate": 7.640554722638681e-07, "loss": 1.1194, "step": 4079 }, { "epoch": 388.57142857142856, "grad_norm": 6.643121719360352, "learning_rate": 7.642428785607196e-07, "loss": 1.0684, "step": 4080 }, { "epoch": 388.6666666666667, "grad_norm": 7.203343868255615, "learning_rate": 7.644302848575712e-07, "loss": 1.0863, "step": 4081 }, { "epoch": 388.76190476190476, "grad_norm": 20.424135208129883, "learning_rate": 7.646176911544229e-07, "loss": 1.1182, "step": 4082 }, { "epoch": 388.85714285714283, "grad_norm": 24.31191062927246, "learning_rate": 7.648050974512744e-07, "loss": 1.15, "step": 4083 }, { "epoch": 388.95238095238096, "grad_norm": 14.023303031921387, "learning_rate": 7.64992503748126e-07, "loss": 1.145, "step": 4084 }, { "epoch": 388.95238095238096, "eval_accuracy": 0.744473585612589, "eval_f1": 0.7951951951951952, "eval_loss": 0.5493708848953247, "eval_precision": 0.7413213885778276, "eval_recall": 0.8575129533678757, "eval_roc_auc": 0.7932363270005757, "eval_runtime": 1.1349, "eval_samples_per_second": 2351.699, "eval_steps_per_second": 5.287, "step": 4084 }, { "epoch": 389.04761904761904, "grad_norm": 9.623614311218262, "learning_rate": 7.651799100449775e-07, "loss": 1.1326, "step": 4085 }, { "epoch": 389.14285714285717, "grad_norm": 8.110107421875, "learning_rate": 7.653673163418291e-07, "loss": 1.079, "step": 4086 }, { "epoch": 389.23809523809524, "grad_norm": 6.075845241546631, "learning_rate": 7.655547226386806e-07, "loss": 1.0716, "step": 4087 }, { "epoch": 389.3333333333333, "grad_norm": 9.143749237060547, "learning_rate": 7.657421289355323e-07, "loss": 1.0784, "step": 4088 }, { "epoch": 389.42857142857144, "grad_norm": 16.901330947875977, "learning_rate": 7.659295352323838e-07, "loss": 1.1384, "step": 4089 }, { "epoch": 389.5238095238095, "grad_norm": 24.563400268554688, "learning_rate": 7.661169415292354e-07, "loss": 1.0713, "step": 4090 }, { "epoch": 389.6190476190476, "grad_norm": 14.202386856079102, "learning_rate": 7.663043478260871e-07, "loss": 1.1105, "step": 4091 }, { "epoch": 389.7142857142857, "grad_norm": 4.999241828918457, "learning_rate": 7.664917541229386e-07, "loss": 1.1261, "step": 4092 }, { "epoch": 389.8095238095238, "grad_norm": 8.44494342803955, "learning_rate": 7.666791604197902e-07, "loss": 1.1392, "step": 4093 }, { "epoch": 389.9047619047619, "grad_norm": 11.428085327148438, "learning_rate": 7.668665667166417e-07, "loss": 1.1268, "step": 4094 }, { "epoch": 390.0, "grad_norm": 8.754639625549316, "learning_rate": 7.670539730134933e-07, "loss": 1.166, "step": 4095 }, { "epoch": 390.0, "eval_accuracy": 0.7396028475084301, "eval_f1": 0.7871362940275651, "eval_loss": 0.5485778450965881, "eval_precision": 0.7466589192330041, "eval_recall": 0.8322538860103627, "eval_roc_auc": 0.7935293609671848, "eval_runtime": 1.1308, "eval_samples_per_second": 2360.28, "eval_steps_per_second": 5.306, "step": 4095 }, { "epoch": 390.0952380952381, "grad_norm": 6.919116020202637, "learning_rate": 7.672413793103449e-07, "loss": 1.0923, "step": 4096 }, { "epoch": 390.1904761904762, "grad_norm": 9.43787670135498, "learning_rate": 7.674287856071965e-07, "loss": 1.1345, "step": 4097 }, { "epoch": 390.2857142857143, "grad_norm": 5.613615989685059, "learning_rate": 7.67616191904048e-07, "loss": 1.0736, "step": 4098 }, { "epoch": 390.3809523809524, "grad_norm": 9.838118553161621, "learning_rate": 7.678035982008996e-07, "loss": 1.1262, "step": 4099 }, { "epoch": 390.4761904761905, "grad_norm": 22.736331939697266, "learning_rate": 7.679910044977512e-07, "loss": 1.1702, "step": 4100 }, { "epoch": 390.57142857142856, "grad_norm": 11.852968215942383, "learning_rate": 7.681784107946027e-07, "loss": 1.1134, "step": 4101 }, { "epoch": 390.6666666666667, "grad_norm": 5.733023166656494, "learning_rate": 7.683658170914543e-07, "loss": 1.094, "step": 4102 }, { "epoch": 390.76190476190476, "grad_norm": 8.226594924926758, "learning_rate": 7.685532233883059e-07, "loss": 1.1004, "step": 4103 }, { "epoch": 390.85714285714283, "grad_norm": 8.334565162658691, "learning_rate": 7.687406296851575e-07, "loss": 1.1379, "step": 4104 }, { "epoch": 390.95238095238096, "grad_norm": 7.8795976638793945, "learning_rate": 7.68928035982009e-07, "loss": 1.0851, "step": 4105 }, { "epoch": 390.95238095238096, "eval_accuracy": 0.7403521918321468, "eval_f1": 0.7841793833696667, "eval_loss": 0.5487409234046936, "eval_precision": 0.755248950209958, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.7933934945308002, "eval_runtime": 1.1417, "eval_samples_per_second": 2337.756, "eval_steps_per_second": 5.255, "step": 4105 }, { "epoch": 391.04761904761904, "grad_norm": 24.32604217529297, "learning_rate": 7.691154422788606e-07, "loss": 1.0997, "step": 4106 }, { "epoch": 391.14285714285717, "grad_norm": 21.518884658813477, "learning_rate": 7.693028485757121e-07, "loss": 1.1185, "step": 4107 }, { "epoch": 391.23809523809524, "grad_norm": 12.176133155822754, "learning_rate": 7.694902548725637e-07, "loss": 1.1042, "step": 4108 }, { "epoch": 391.3333333333333, "grad_norm": 5.470086574554443, "learning_rate": 7.696776611694153e-07, "loss": 1.0876, "step": 4109 }, { "epoch": 391.42857142857144, "grad_norm": 8.903295516967773, "learning_rate": 7.698650674662669e-07, "loss": 1.119, "step": 4110 }, { "epoch": 391.5238095238095, "grad_norm": 6.113744258880615, "learning_rate": 7.700524737631185e-07, "loss": 1.1801, "step": 4111 }, { "epoch": 391.6190476190476, "grad_norm": 6.159307479858398, "learning_rate": 7.7023988005997e-07, "loss": 1.1089, "step": 4112 }, { "epoch": 391.7142857142857, "grad_norm": 16.8972110748291, "learning_rate": 7.704272863568217e-07, "loss": 1.1127, "step": 4113 }, { "epoch": 391.8095238095238, "grad_norm": 12.682228088378906, "learning_rate": 7.706146926536732e-07, "loss": 1.1253, "step": 4114 }, { "epoch": 391.9047619047619, "grad_norm": 12.118672370910645, "learning_rate": 7.708020989505248e-07, "loss": 1.083, "step": 4115 }, { "epoch": 392.0, "grad_norm": 6.608816146850586, "learning_rate": 7.709895052473763e-07, "loss": 1.0857, "step": 4116 }, { "epoch": 392.0, "eval_accuracy": 0.7392281753465717, "eval_f1": 0.7851851851851852, "eval_loss": 0.5484685301780701, "eval_precision": 0.75, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7933667242371906, "eval_runtime": 1.1364, "eval_samples_per_second": 2348.638, "eval_steps_per_second": 5.28, "step": 4116 }, { "epoch": 392.0952380952381, "grad_norm": 7.915292739868164, "learning_rate": 7.71176911544228e-07, "loss": 1.1346, "step": 4117 }, { "epoch": 392.1904761904762, "grad_norm": 20.730438232421875, "learning_rate": 7.713643178410796e-07, "loss": 1.0698, "step": 4118 }, { "epoch": 392.2857142857143, "grad_norm": 9.250012397766113, "learning_rate": 7.715517241379311e-07, "loss": 1.1287, "step": 4119 }, { "epoch": 392.3809523809524, "grad_norm": 10.762714385986328, "learning_rate": 7.717391304347827e-07, "loss": 1.0905, "step": 4120 }, { "epoch": 392.4761904761905, "grad_norm": 24.7636775970459, "learning_rate": 7.719265367316342e-07, "loss": 1.1358, "step": 4121 }, { "epoch": 392.57142857142856, "grad_norm": 7.282887935638428, "learning_rate": 7.721139430284858e-07, "loss": 1.0824, "step": 4122 }, { "epoch": 392.6666666666667, "grad_norm": 3.8522496223449707, "learning_rate": 7.723013493253373e-07, "loss": 1.1061, "step": 4123 }, { "epoch": 392.76190476190476, "grad_norm": 5.173220634460449, "learning_rate": 7.72488755622189e-07, "loss": 1.0511, "step": 4124 }, { "epoch": 392.85714285714283, "grad_norm": 7.5248918533325195, "learning_rate": 7.726761619190405e-07, "loss": 1.0936, "step": 4125 }, { "epoch": 392.95238095238096, "grad_norm": 7.896293640136719, "learning_rate": 7.728635682158921e-07, "loss": 1.0633, "step": 4126 }, { "epoch": 392.95238095238096, "eval_accuracy": 0.744473585612589, "eval_f1": 0.7947019867549668, "eval_loss": 0.5479307770729065, "eval_precision": 0.7424071991001124, "eval_recall": 0.8549222797927462, "eval_roc_auc": 0.7944922279792745, "eval_runtime": 1.1524, "eval_samples_per_second": 2316.088, "eval_steps_per_second": 5.207, "step": 4126 }, { "epoch": 393.04761904761904, "grad_norm": 14.139813423156738, "learning_rate": 7.730509745127437e-07, "loss": 1.1619, "step": 4127 }, { "epoch": 393.14285714285717, "grad_norm": 9.308013916015625, "learning_rate": 7.732383808095952e-07, "loss": 1.1096, "step": 4128 }, { "epoch": 393.23809523809524, "grad_norm": 5.986069679260254, "learning_rate": 7.734257871064468e-07, "loss": 1.1388, "step": 4129 }, { "epoch": 393.3333333333333, "grad_norm": 12.865239143371582, "learning_rate": 7.736131934032983e-07, "loss": 1.1361, "step": 4130 }, { "epoch": 393.42857142857144, "grad_norm": 8.613828659057617, "learning_rate": 7.7380059970015e-07, "loss": 1.0821, "step": 4131 }, { "epoch": 393.5238095238095, "grad_norm": 11.044524192810059, "learning_rate": 7.739880059970015e-07, "loss": 1.1162, "step": 4132 }, { "epoch": 393.6190476190476, "grad_norm": 5.985708236694336, "learning_rate": 7.741754122938531e-07, "loss": 1.1143, "step": 4133 }, { "epoch": 393.7142857142857, "grad_norm": 4.828469753265381, "learning_rate": 7.743628185907046e-07, "loss": 1.0967, "step": 4134 }, { "epoch": 393.8095238095238, "grad_norm": 9.598358154296875, "learning_rate": 7.745502248875562e-07, "loss": 1.1205, "step": 4135 }, { "epoch": 393.9047619047619, "grad_norm": 6.418941497802734, "learning_rate": 7.747376311844079e-07, "loss": 1.1402, "step": 4136 }, { "epoch": 394.0, "grad_norm": 14.198280334472656, "learning_rate": 7.749250374812594e-07, "loss": 1.1459, "step": 4137 }, { "epoch": 394.0, "eval_accuracy": 0.7440989134507306, "eval_f1": 0.7920852359208523, "eval_loss": 0.5479085445404053, "eval_precision": 0.7472716829408386, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7939928036845136, "eval_runtime": 1.1257, "eval_samples_per_second": 2370.9, "eval_steps_per_second": 5.33, "step": 4137 }, { "epoch": 394.0952380952381, "grad_norm": 7.425207614898682, "learning_rate": 7.751124437781111e-07, "loss": 1.0908, "step": 4138 }, { "epoch": 394.1904761904762, "grad_norm": 7.119057655334473, "learning_rate": 7.752998500749626e-07, "loss": 1.0966, "step": 4139 }, { "epoch": 394.2857142857143, "grad_norm": 10.171456336975098, "learning_rate": 7.754872563718142e-07, "loss": 1.1169, "step": 4140 }, { "epoch": 394.3809523809524, "grad_norm": 8.68658447265625, "learning_rate": 7.756746626686657e-07, "loss": 1.145, "step": 4141 }, { "epoch": 394.4761904761905, "grad_norm": 6.64039421081543, "learning_rate": 7.758620689655173e-07, "loss": 1.127, "step": 4142 }, { "epoch": 394.57142857142856, "grad_norm": 18.38219451904297, "learning_rate": 7.760494752623688e-07, "loss": 1.1243, "step": 4143 }, { "epoch": 394.6666666666667, "grad_norm": 12.123787879943848, "learning_rate": 7.762368815592204e-07, "loss": 1.1077, "step": 4144 }, { "epoch": 394.76190476190476, "grad_norm": 18.44883918762207, "learning_rate": 7.764242878560721e-07, "loss": 1.0949, "step": 4145 }, { "epoch": 394.85714285714283, "grad_norm": 6.585038661956787, "learning_rate": 7.766116941529236e-07, "loss": 1.1109, "step": 4146 }, { "epoch": 394.95238095238096, "grad_norm": 8.92080020904541, "learning_rate": 7.767991004497752e-07, "loss": 1.0813, "step": 4147 }, { "epoch": 394.95238095238096, "eval_accuracy": 0.7429748969651555, "eval_f1": 0.7914893617021277, "eval_loss": 0.5480291843414307, "eval_precision": 0.7457044673539519, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7937679907887162, "eval_runtime": 1.1425, "eval_samples_per_second": 2336.026, "eval_steps_per_second": 5.251, "step": 4147 }, { "epoch": 395.04761904761904, "grad_norm": 25.43185043334961, "learning_rate": 7.769865067466267e-07, "loss": 1.1358, "step": 4148 }, { "epoch": 395.14285714285717, "grad_norm": 14.379817008972168, "learning_rate": 7.771739130434783e-07, "loss": 1.1052, "step": 4149 }, { "epoch": 395.23809523809524, "grad_norm": 10.084162712097168, "learning_rate": 7.773613193403298e-07, "loss": 1.1024, "step": 4150 }, { "epoch": 395.3333333333333, "grad_norm": 11.979368209838867, "learning_rate": 7.775487256371814e-07, "loss": 1.1147, "step": 4151 }, { "epoch": 395.42857142857144, "grad_norm": 12.168645858764648, "learning_rate": 7.77736131934033e-07, "loss": 1.1247, "step": 4152 }, { "epoch": 395.5238095238095, "grad_norm": 9.172712326049805, "learning_rate": 7.779235382308846e-07, "loss": 1.1047, "step": 4153 }, { "epoch": 395.6190476190476, "grad_norm": 7.719291687011719, "learning_rate": 7.781109445277362e-07, "loss": 1.1277, "step": 4154 }, { "epoch": 395.7142857142857, "grad_norm": 7.303157806396484, "learning_rate": 7.782983508245877e-07, "loss": 1.0759, "step": 4155 }, { "epoch": 395.8095238095238, "grad_norm": 17.161909103393555, "learning_rate": 7.784857571214393e-07, "loss": 1.0705, "step": 4156 }, { "epoch": 395.9047619047619, "grad_norm": 6.004673957824707, "learning_rate": 7.786731634182908e-07, "loss": 1.1455, "step": 4157 }, { "epoch": 396.0, "grad_norm": 8.70738697052002, "learning_rate": 7.788605697151425e-07, "loss": 1.1343, "step": 4158 }, { "epoch": 396.0, "eval_accuracy": 0.7411015361558636, "eval_f1": 0.7878415719987719, "eval_loss": 0.5477091670036316, "eval_precision": 0.7489784004670169, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.7940800230282095, "eval_runtime": 1.1309, "eval_samples_per_second": 2360.138, "eval_steps_per_second": 5.306, "step": 4158 }, { "epoch": 396.0952380952381, "grad_norm": 7.129392147064209, "learning_rate": 7.79047976011994e-07, "loss": 1.1285, "step": 4159 }, { "epoch": 396.1904761904762, "grad_norm": 7.756862163543701, "learning_rate": 7.792353823088456e-07, "loss": 1.1352, "step": 4160 }, { "epoch": 396.2857142857143, "grad_norm": 7.004748344421387, "learning_rate": 7.794227886056972e-07, "loss": 1.0852, "step": 4161 }, { "epoch": 396.3809523809524, "grad_norm": 8.161051750183105, "learning_rate": 7.796101949025488e-07, "loss": 1.0557, "step": 4162 }, { "epoch": 396.4761904761905, "grad_norm": 12.619900703430176, "learning_rate": 7.797976011994004e-07, "loss": 1.0826, "step": 4163 }, { "epoch": 396.57142857142856, "grad_norm": 19.46121597290039, "learning_rate": 7.799850074962519e-07, "loss": 1.139, "step": 4164 }, { "epoch": 396.6666666666667, "grad_norm": 6.766005516052246, "learning_rate": 7.801724137931035e-07, "loss": 1.0997, "step": 4165 }, { "epoch": 396.76190476190476, "grad_norm": 5.906949520111084, "learning_rate": 7.803598200899551e-07, "loss": 1.0896, "step": 4166 }, { "epoch": 396.85714285714283, "grad_norm": 7.1702165603637695, "learning_rate": 7.805472263868067e-07, "loss": 1.1346, "step": 4167 }, { "epoch": 396.95238095238096, "grad_norm": 10.30286979675293, "learning_rate": 7.807346326836582e-07, "loss": 1.0979, "step": 4168 }, { "epoch": 396.95238095238096, "eval_accuracy": 0.7399775196702885, "eval_f1": 0.7868550368550369, "eval_loss": 0.5477451682090759, "eval_precision": 0.7482476635514018, "eval_recall": 0.8296632124352331, "eval_roc_auc": 0.7941706966033392, "eval_runtime": 1.1281, "eval_samples_per_second": 2365.986, "eval_steps_per_second": 5.319, "step": 4168 }, { "epoch": 397.04761904761904, "grad_norm": 19.294652938842773, "learning_rate": 7.809220389805098e-07, "loss": 1.1442, "step": 4169 }, { "epoch": 397.14285714285717, "grad_norm": 6.437745094299316, "learning_rate": 7.811094452773613e-07, "loss": 1.127, "step": 4170 }, { "epoch": 397.23809523809524, "grad_norm": 11.683369636535645, "learning_rate": 7.81296851574213e-07, "loss": 1.0871, "step": 4171 }, { "epoch": 397.3333333333333, "grad_norm": 13.600419998168945, "learning_rate": 7.814842578710645e-07, "loss": 1.1089, "step": 4172 }, { "epoch": 397.42857142857144, "grad_norm": 8.484980583190918, "learning_rate": 7.816716641679161e-07, "loss": 1.1061, "step": 4173 }, { "epoch": 397.5238095238095, "grad_norm": 6.674485206604004, "learning_rate": 7.818590704647676e-07, "loss": 1.1898, "step": 4174 }, { "epoch": 397.6190476190476, "grad_norm": 7.062256336212158, "learning_rate": 7.820464767616192e-07, "loss": 1.0903, "step": 4175 }, { "epoch": 397.7142857142857, "grad_norm": 6.853922367095947, "learning_rate": 7.822338830584709e-07, "loss": 1.0625, "step": 4176 }, { "epoch": 397.8095238095238, "grad_norm": 12.509185791015625, "learning_rate": 7.824212893553223e-07, "loss": 1.1676, "step": 4177 }, { "epoch": 397.9047619047619, "grad_norm": 7.87623929977417, "learning_rate": 7.82608695652174e-07, "loss": 1.1061, "step": 4178 }, { "epoch": 398.0, "grad_norm": 7.967573642730713, "learning_rate": 7.827961019490254e-07, "loss": 1.1038, "step": 4179 }, { "epoch": 398.0, "eval_accuracy": 0.7422255526414387, "eval_f1": 0.7892156862745098, "eval_loss": 0.547892689704895, "eval_precision": 0.7488372093023256, "eval_recall": 0.8341968911917098, "eval_roc_auc": 0.7940877950489349, "eval_runtime": 1.1298, "eval_samples_per_second": 2362.435, "eval_steps_per_second": 5.311, "step": 4179 }, { "epoch": 398.0952380952381, "grad_norm": 7.890987396240234, "learning_rate": 7.829835082458771e-07, "loss": 1.1294, "step": 4180 }, { "epoch": 398.1904761904762, "grad_norm": 6.791111946105957, "learning_rate": 7.831709145427287e-07, "loss": 1.1611, "step": 4181 }, { "epoch": 398.2857142857143, "grad_norm": 18.181640625, "learning_rate": 7.833583208395802e-07, "loss": 1.1183, "step": 4182 }, { "epoch": 398.3809523809524, "grad_norm": 11.313365936279297, "learning_rate": 7.835457271364319e-07, "loss": 1.1054, "step": 4183 }, { "epoch": 398.4761904761905, "grad_norm": 9.285812377929688, "learning_rate": 7.837331334332834e-07, "loss": 1.1179, "step": 4184 }, { "epoch": 398.57142857142856, "grad_norm": 15.523584365844727, "learning_rate": 7.83920539730135e-07, "loss": 1.0533, "step": 4185 }, { "epoch": 398.6666666666667, "grad_norm": 10.66666030883789, "learning_rate": 7.841079460269865e-07, "loss": 1.1006, "step": 4186 }, { "epoch": 398.76190476190476, "grad_norm": 5.271409511566162, "learning_rate": 7.842953523238382e-07, "loss": 1.1081, "step": 4187 }, { "epoch": 398.85714285714283, "grad_norm": 7.439558029174805, "learning_rate": 7.844827586206896e-07, "loss": 1.0676, "step": 4188 }, { "epoch": 398.95238095238096, "grad_norm": 6.201678276062012, "learning_rate": 7.846701649175413e-07, "loss": 1.1458, "step": 4189 }, { "epoch": 398.95238095238096, "eval_accuracy": 0.7448482577744474, "eval_f1": 0.7965342097400657, "eval_loss": 0.5484153032302856, "eval_precision": 0.7393233499722685, "eval_recall": 0.8633419689119171, "eval_roc_auc": 0.794457973517559, "eval_runtime": 1.1373, "eval_samples_per_second": 2346.777, "eval_steps_per_second": 5.276, "step": 4189 }, { "epoch": 399.04761904761904, "grad_norm": 5.896444320678711, "learning_rate": 7.84857571214393e-07, "loss": 1.0803, "step": 4190 }, { "epoch": 399.14285714285717, "grad_norm": 19.47102165222168, "learning_rate": 7.850449775112444e-07, "loss": 1.1401, "step": 4191 }, { "epoch": 399.23809523809524, "grad_norm": 14.513976097106934, "learning_rate": 7.852323838080961e-07, "loss": 1.1022, "step": 4192 }, { "epoch": 399.3333333333333, "grad_norm": 19.547645568847656, "learning_rate": 7.854197901049475e-07, "loss": 1.1015, "step": 4193 }, { "epoch": 399.42857142857144, "grad_norm": 6.47927713394165, "learning_rate": 7.856071964017992e-07, "loss": 1.0758, "step": 4194 }, { "epoch": 399.5238095238095, "grad_norm": 11.972545623779297, "learning_rate": 7.857946026986506e-07, "loss": 1.1458, "step": 4195 }, { "epoch": 399.6190476190476, "grad_norm": 6.737449645996094, "learning_rate": 7.859820089955023e-07, "loss": 1.1259, "step": 4196 }, { "epoch": 399.7142857142857, "grad_norm": 4.990560531616211, "learning_rate": 7.86169415292354e-07, "loss": 1.1201, "step": 4197 }, { "epoch": 399.8095238095238, "grad_norm": 14.076717376708984, "learning_rate": 7.863568215892054e-07, "loss": 1.1131, "step": 4198 }, { "epoch": 399.9047619047619, "grad_norm": 13.812651634216309, "learning_rate": 7.865442278860571e-07, "loss": 1.0948, "step": 4199 }, { "epoch": 400.0, "grad_norm": 11.414024353027344, "learning_rate": 7.867316341829085e-07, "loss": 1.1089, "step": 4200 }, { "epoch": 400.0, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.7879347491535857, "eval_loss": 0.547219455242157, "eval_precision": 0.750733137829912, "eval_recall": 0.8290155440414507, "eval_roc_auc": 0.7948862982153139, "eval_runtime": 1.1302, "eval_samples_per_second": 2361.519, "eval_steps_per_second": 5.309, "step": 4200 }, { "epoch": 400.0952380952381, "grad_norm": 5.199063777923584, "learning_rate": 7.869190404797602e-07, "loss": 1.1062, "step": 4201 }, { "epoch": 400.1904761904762, "grad_norm": 7.194316387176514, "learning_rate": 7.871064467766116e-07, "loss": 1.108, "step": 4202 }, { "epoch": 400.2857142857143, "grad_norm": 15.215425491333008, "learning_rate": 7.872938530734633e-07, "loss": 1.0974, "step": 4203 }, { "epoch": 400.3809523809524, "grad_norm": 6.132933616638184, "learning_rate": 7.874812593703149e-07, "loss": 1.1083, "step": 4204 }, { "epoch": 400.4761904761905, "grad_norm": 7.98865270614624, "learning_rate": 7.876686656671665e-07, "loss": 1.0969, "step": 4205 }, { "epoch": 400.57142857142856, "grad_norm": 21.348203659057617, "learning_rate": 7.87856071964018e-07, "loss": 1.1038, "step": 4206 }, { "epoch": 400.6666666666667, "grad_norm": 21.51778793334961, "learning_rate": 7.880434782608696e-07, "loss": 1.1275, "step": 4207 }, { "epoch": 400.76190476190476, "grad_norm": 4.526026248931885, "learning_rate": 7.882308845577213e-07, "loss": 1.1037, "step": 4208 }, { "epoch": 400.85714285714283, "grad_norm": 11.087535858154297, "learning_rate": 7.884182908545727e-07, "loss": 1.1356, "step": 4209 }, { "epoch": 400.95238095238096, "grad_norm": 11.458879470825195, "learning_rate": 7.886056971514244e-07, "loss": 1.1229, "step": 4210 }, { "epoch": 400.95238095238096, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.7829336655247586, "eval_loss": 0.5476679801940918, "eval_precision": 0.7540491901619676, "eval_recall": 0.814119170984456, "eval_roc_auc": 0.7943388025331031, "eval_runtime": 1.1127, "eval_samples_per_second": 2398.776, "eval_steps_per_second": 5.393, "step": 4210 }, { "epoch": 401.04761904761904, "grad_norm": 9.395782470703125, "learning_rate": 7.88793103448276e-07, "loss": 1.1361, "step": 4211 }, { "epoch": 401.14285714285717, "grad_norm": 22.677854537963867, "learning_rate": 7.889805097451275e-07, "loss": 1.1372, "step": 4212 }, { "epoch": 401.23809523809524, "grad_norm": 15.018343925476074, "learning_rate": 7.891679160419791e-07, "loss": 1.0888, "step": 4213 }, { "epoch": 401.3333333333333, "grad_norm": 5.283858776092529, "learning_rate": 7.893553223388306e-07, "loss": 1.1102, "step": 4214 }, { "epoch": 401.42857142857144, "grad_norm": 9.966854095458984, "learning_rate": 7.895427286356822e-07, "loss": 1.0927, "step": 4215 }, { "epoch": 401.5238095238095, "grad_norm": 5.786252498626709, "learning_rate": 7.897301349325337e-07, "loss": 1.1023, "step": 4216 }, { "epoch": 401.6190476190476, "grad_norm": 5.132158279418945, "learning_rate": 7.899175412293854e-07, "loss": 1.1067, "step": 4217 }, { "epoch": 401.7142857142857, "grad_norm": 7.377092361450195, "learning_rate": 7.90104947526237e-07, "loss": 1.0825, "step": 4218 }, { "epoch": 401.8095238095238, "grad_norm": 13.699070930480957, "learning_rate": 7.902923538230885e-07, "loss": 1.1531, "step": 4219 }, { "epoch": 401.9047619047619, "grad_norm": 12.673673629760742, "learning_rate": 7.904797601199401e-07, "loss": 1.0908, "step": 4220 }, { "epoch": 402.0, "grad_norm": 9.16825008392334, "learning_rate": 7.906671664167916e-07, "loss": 1.1487, "step": 4221 }, { "epoch": 402.0, "eval_accuracy": 0.747470962907456, "eval_f1": 0.7962515114873036, "eval_loss": 0.5470756888389587, "eval_precision": 0.7465986394557823, "eval_recall": 0.852979274611399, "eval_roc_auc": 0.7952337363270007, "eval_runtime": 1.1274, "eval_samples_per_second": 2367.488, "eval_steps_per_second": 5.322, "step": 4221 }, { "epoch": 402.0952380952381, "grad_norm": 11.666342735290527, "learning_rate": 7.908545727136432e-07, "loss": 1.115, "step": 4222 }, { "epoch": 402.1904761904762, "grad_norm": 7.8304033279418945, "learning_rate": 7.910419790104947e-07, "loss": 1.0772, "step": 4223 }, { "epoch": 402.2857142857143, "grad_norm": 8.050092697143555, "learning_rate": 7.912293853073463e-07, "loss": 1.1208, "step": 4224 }, { "epoch": 402.3809523809524, "grad_norm": 7.431957721710205, "learning_rate": 7.91416791604198e-07, "loss": 1.139, "step": 4225 }, { "epoch": 402.4761904761905, "grad_norm": 7.459015369415283, "learning_rate": 7.916041979010495e-07, "loss": 1.1413, "step": 4226 }, { "epoch": 402.57142857142856, "grad_norm": 8.06148624420166, "learning_rate": 7.917916041979011e-07, "loss": 1.0962, "step": 4227 }, { "epoch": 402.6666666666667, "grad_norm": 6.986510753631592, "learning_rate": 7.919790104947527e-07, "loss": 1.097, "step": 4228 }, { "epoch": 402.76190476190476, "grad_norm": 7.007997035980225, "learning_rate": 7.921664167916042e-07, "loss": 1.1002, "step": 4229 }, { "epoch": 402.85714285714283, "grad_norm": 6.520593166351318, "learning_rate": 7.923538230884558e-07, "loss": 1.1174, "step": 4230 }, { "epoch": 402.95238095238096, "grad_norm": 6.7104105949401855, "learning_rate": 7.925412293853074e-07, "loss": 1.0596, "step": 4231 }, { "epoch": 402.95238095238096, "eval_accuracy": 0.7463469464218808, "eval_f1": 0.7957767722473604, "eval_loss": 0.5472564697265625, "eval_precision": 0.7447769621682665, "eval_recall": 0.8542746113989638, "eval_roc_auc": 0.7951096718480137, "eval_runtime": 1.1411, "eval_samples_per_second": 2339.006, "eval_steps_per_second": 5.258, "step": 4231 }, { "epoch": 403.04761904761904, "grad_norm": 16.685976028442383, "learning_rate": 7.92728635682159e-07, "loss": 1.0923, "step": 4232 }, { "epoch": 403.14285714285717, "grad_norm": 7.973769187927246, "learning_rate": 7.929160419790105e-07, "loss": 1.0894, "step": 4233 }, { "epoch": 403.23809523809524, "grad_norm": 16.108617782592773, "learning_rate": 7.931034482758622e-07, "loss": 1.0602, "step": 4234 }, { "epoch": 403.3333333333333, "grad_norm": 12.90723991394043, "learning_rate": 7.932908545727137e-07, "loss": 1.1349, "step": 4235 }, { "epoch": 403.42857142857144, "grad_norm": 16.323835372924805, "learning_rate": 7.934782608695653e-07, "loss": 1.1329, "step": 4236 }, { "epoch": 403.5238095238095, "grad_norm": 4.875527858734131, "learning_rate": 7.936656671664168e-07, "loss": 1.0819, "step": 4237 }, { "epoch": 403.6190476190476, "grad_norm": 6.662182331085205, "learning_rate": 7.938530734632684e-07, "loss": 1.0866, "step": 4238 }, { "epoch": 403.7142857142857, "grad_norm": 8.983207702636719, "learning_rate": 7.940404797601201e-07, "loss": 1.0948, "step": 4239 }, { "epoch": 403.8095238095238, "grad_norm": 8.16940689086914, "learning_rate": 7.942278860569715e-07, "loss": 1.1415, "step": 4240 }, { "epoch": 403.9047619047619, "grad_norm": 7.25838565826416, "learning_rate": 7.944152923538232e-07, "loss": 1.1255, "step": 4241 }, { "epoch": 404.0, "grad_norm": 8.486621856689453, "learning_rate": 7.946026986506746e-07, "loss": 1.1461, "step": 4242 }, { "epoch": 404.0, "eval_accuracy": 0.7392281753465717, "eval_f1": 0.7835820895522388, "eval_loss": 0.5473288893699646, "eval_precision": 0.7535885167464115, "eval_recall": 0.8160621761658031, "eval_roc_auc": 0.7946073690270582, "eval_runtime": 1.1341, "eval_samples_per_second": 2353.457, "eval_steps_per_second": 5.291, "step": 4242 }, { "epoch": 404.0952380952381, "grad_norm": 10.87182331085205, "learning_rate": 7.947901049475263e-07, "loss": 1.1152, "step": 4243 }, { "epoch": 404.1904761904762, "grad_norm": 11.432137489318848, "learning_rate": 7.949775112443778e-07, "loss": 1.0998, "step": 4244 }, { "epoch": 404.2857142857143, "grad_norm": 5.492263317108154, "learning_rate": 7.951649175412294e-07, "loss": 1.1035, "step": 4245 }, { "epoch": 404.3809523809524, "grad_norm": 13.131122589111328, "learning_rate": 7.953523238380811e-07, "loss": 1.1386, "step": 4246 }, { "epoch": 404.4761904761905, "grad_norm": 6.534399509429932, "learning_rate": 7.955397301349325e-07, "loss": 1.1258, "step": 4247 }, { "epoch": 404.57142857142856, "grad_norm": 9.083715438842773, "learning_rate": 7.957271364317842e-07, "loss": 1.126, "step": 4248 }, { "epoch": 404.6666666666667, "grad_norm": 5.49713659286499, "learning_rate": 7.959145427286356e-07, "loss": 1.0675, "step": 4249 }, { "epoch": 404.76190476190476, "grad_norm": 14.592151641845703, "learning_rate": 7.961019490254873e-07, "loss": 1.0971, "step": 4250 }, { "epoch": 404.85714285714283, "grad_norm": 16.717880249023438, "learning_rate": 7.962893553223388e-07, "loss": 1.1313, "step": 4251 }, { "epoch": 404.95238095238096, "grad_norm": 8.905058860778809, "learning_rate": 7.964767616191904e-07, "loss": 1.1062, "step": 4252 }, { "epoch": 404.95238095238096, "eval_accuracy": 0.7437242412888723, "eval_f1": 0.7926015767131595, "eval_loss": 0.5475287437438965, "eval_precision": 0.7451539338654504, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7942599309153713, "eval_runtime": 1.1444, "eval_samples_per_second": 2332.262, "eval_steps_per_second": 5.243, "step": 4252 }, { "epoch": 405.04761904761904, "grad_norm": 9.620675086975098, "learning_rate": 7.966641679160421e-07, "loss": 1.1391, "step": 4253 }, { "epoch": 405.14285714285717, "grad_norm": 8.57576847076416, "learning_rate": 7.968515742128936e-07, "loss": 1.1224, "step": 4254 }, { "epoch": 405.23809523809524, "grad_norm": 6.662265300750732, "learning_rate": 7.970389805097452e-07, "loss": 1.0391, "step": 4255 }, { "epoch": 405.3333333333333, "grad_norm": 11.818034172058105, "learning_rate": 7.972263868065967e-07, "loss": 1.0941, "step": 4256 }, { "epoch": 405.42857142857144, "grad_norm": 5.025628566741943, "learning_rate": 7.974137931034484e-07, "loss": 1.1243, "step": 4257 }, { "epoch": 405.5238095238095, "grad_norm": 10.131697654724121, "learning_rate": 7.976011994002998e-07, "loss": 1.0803, "step": 4258 }, { "epoch": 405.6190476190476, "grad_norm": 6.461528778076172, "learning_rate": 7.977886056971515e-07, "loss": 1.1283, "step": 4259 }, { "epoch": 405.7142857142857, "grad_norm": 10.693177223205566, "learning_rate": 7.979760119940032e-07, "loss": 1.0796, "step": 4260 }, { "epoch": 405.8095238095238, "grad_norm": 8.95005989074707, "learning_rate": 7.981634182908546e-07, "loss": 1.1086, "step": 4261 }, { "epoch": 405.9047619047619, "grad_norm": 10.91205883026123, "learning_rate": 7.983508245877063e-07, "loss": 1.1541, "step": 4262 }, { "epoch": 406.0, "grad_norm": 6.45164680480957, "learning_rate": 7.985382308845577e-07, "loss": 1.1022, "step": 4263 }, { "epoch": 406.0, "eval_accuracy": 0.7455976020981641, "eval_f1": 0.7938050409960522, "eval_loss": 0.5474799275398254, "eval_precision": 0.747284162378502, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7942668393782384, "eval_runtime": 1.148, "eval_samples_per_second": 2324.828, "eval_steps_per_second": 5.226, "step": 4263 }, { "epoch": 406.0952380952381, "grad_norm": 14.972555160522461, "learning_rate": 7.987256371814094e-07, "loss": 1.1507, "step": 4264 }, { "epoch": 406.1904761904762, "grad_norm": 16.497968673706055, "learning_rate": 7.989130434782608e-07, "loss": 1.1423, "step": 4265 }, { "epoch": 406.2857142857143, "grad_norm": 11.75539493560791, "learning_rate": 7.991004497751125e-07, "loss": 1.123, "step": 4266 }, { "epoch": 406.3809523809524, "grad_norm": 5.47906494140625, "learning_rate": 7.992878560719641e-07, "loss": 1.1137, "step": 4267 }, { "epoch": 406.4761904761905, "grad_norm": 9.136211395263672, "learning_rate": 7.994752623688156e-07, "loss": 1.1199, "step": 4268 }, { "epoch": 406.57142857142856, "grad_norm": 7.102169036865234, "learning_rate": 7.996626686656673e-07, "loss": 1.0876, "step": 4269 }, { "epoch": 406.6666666666667, "grad_norm": 16.571800231933594, "learning_rate": 7.998500749625187e-07, "loss": 1.0672, "step": 4270 }, { "epoch": 406.76190476190476, "grad_norm": 8.28835678100586, "learning_rate": 8.000374812593704e-07, "loss": 1.1015, "step": 4271 }, { "epoch": 406.85714285714283, "grad_norm": 10.805572509765625, "learning_rate": 8.002248875562218e-07, "loss": 1.0577, "step": 4272 }, { "epoch": 406.95238095238096, "grad_norm": 6.612346649169922, "learning_rate": 8.004122938530735e-07, "loss": 1.0792, "step": 4273 }, { "epoch": 406.95238095238096, "eval_accuracy": 0.7448482577744474, "eval_f1": 0.7915518824609734, "eval_loss": 0.5470107793807983, "eval_precision": 0.7504352872896112, "eval_recall": 0.8374352331606217, "eval_roc_auc": 0.7948126079447323, "eval_runtime": 1.1468, "eval_samples_per_second": 2327.419, "eval_steps_per_second": 5.232, "step": 4273 }, { "epoch": 407.04761904761904, "grad_norm": 7.979783535003662, "learning_rate": 8.005997001499251e-07, "loss": 1.106, "step": 4274 }, { "epoch": 407.14285714285717, "grad_norm": 5.28836727142334, "learning_rate": 8.007871064467767e-07, "loss": 1.0894, "step": 4275 }, { "epoch": 407.23809523809524, "grad_norm": 20.07858657836914, "learning_rate": 8.009745127436282e-07, "loss": 1.1641, "step": 4276 }, { "epoch": 407.3333333333333, "grad_norm": 7.941175937652588, "learning_rate": 8.011619190404798e-07, "loss": 1.0691, "step": 4277 }, { "epoch": 407.42857142857144, "grad_norm": 10.082222938537598, "learning_rate": 8.013493253373315e-07, "loss": 1.0746, "step": 4278 }, { "epoch": 407.5238095238095, "grad_norm": 16.410083770751953, "learning_rate": 8.015367316341829e-07, "loss": 1.1093, "step": 4279 }, { "epoch": 407.6190476190476, "grad_norm": 7.074474811553955, "learning_rate": 8.017241379310346e-07, "loss": 1.1178, "step": 4280 }, { "epoch": 407.7142857142857, "grad_norm": 4.897011756896973, "learning_rate": 8.019115442278862e-07, "loss": 1.1151, "step": 4281 }, { "epoch": 407.8095238095238, "grad_norm": 6.866818428039551, "learning_rate": 8.020989505247377e-07, "loss": 1.094, "step": 4282 }, { "epoch": 407.9047619047619, "grad_norm": 11.228714942932129, "learning_rate": 8.022863568215893e-07, "loss": 1.1042, "step": 4283 }, { "epoch": 408.0, "grad_norm": 18.426788330078125, "learning_rate": 8.024737631184408e-07, "loss": 1.1237, "step": 4284 }, { "epoch": 408.0, "eval_accuracy": 0.7470962907455976, "eval_f1": 0.7943953700883338, "eval_loss": 0.5461246371269226, "eval_precision": 0.7498562392179413, "eval_recall": 0.844559585492228, "eval_roc_auc": 0.7961093839953942, "eval_runtime": 1.1489, "eval_samples_per_second": 2323.052, "eval_steps_per_second": 5.222, "step": 4284 }, { "epoch": 408.0952380952381, "grad_norm": 9.016111373901367, "learning_rate": 8.026611694152924e-07, "loss": 1.0989, "step": 4285 }, { "epoch": 408.1904761904762, "grad_norm": 8.912129402160645, "learning_rate": 8.028485757121439e-07, "loss": 1.1514, "step": 4286 }, { "epoch": 408.2857142857143, "grad_norm": 8.221604347229004, "learning_rate": 8.030359820089955e-07, "loss": 1.0928, "step": 4287 }, { "epoch": 408.3809523809524, "grad_norm": 12.56308650970459, "learning_rate": 8.032233883058472e-07, "loss": 1.1431, "step": 4288 }, { "epoch": 408.4761904761905, "grad_norm": 11.391637802124023, "learning_rate": 8.034107946026987e-07, "loss": 1.0881, "step": 4289 }, { "epoch": 408.57142857142856, "grad_norm": 9.031922340393066, "learning_rate": 8.035982008995503e-07, "loss": 1.0857, "step": 4290 }, { "epoch": 408.6666666666667, "grad_norm": 16.605451583862305, "learning_rate": 8.037856071964018e-07, "loss": 1.0703, "step": 4291 }, { "epoch": 408.76190476190476, "grad_norm": 7.212552547454834, "learning_rate": 8.039730134932534e-07, "loss": 1.1281, "step": 4292 }, { "epoch": 408.85714285714283, "grad_norm": 6.889387607574463, "learning_rate": 8.041604197901049e-07, "loss": 1.0902, "step": 4293 }, { "epoch": 408.95238095238096, "grad_norm": 16.509017944335938, "learning_rate": 8.043478260869565e-07, "loss": 1.1056, "step": 4294 }, { "epoch": 408.95238095238096, "eval_accuracy": 0.7459722742600224, "eval_f1": 0.796028880866426, "eval_loss": 0.5461532473564148, "eval_precision": 0.7432584269662922, "eval_recall": 0.8568652849740933, "eval_roc_auc": 0.7970866436384572, "eval_runtime": 1.1196, "eval_samples_per_second": 2383.949, "eval_steps_per_second": 5.359, "step": 4294 }, { "epoch": 409.04761904761904, "grad_norm": 13.46784782409668, "learning_rate": 8.045352323838082e-07, "loss": 1.0993, "step": 4295 }, { "epoch": 409.14285714285717, "grad_norm": 9.369694709777832, "learning_rate": 8.047226386806596e-07, "loss": 1.0908, "step": 4296 }, { "epoch": 409.23809523809524, "grad_norm": 10.482138633728027, "learning_rate": 8.049100449775113e-07, "loss": 1.0897, "step": 4297 }, { "epoch": 409.3333333333333, "grad_norm": 4.546848773956299, "learning_rate": 8.050974512743629e-07, "loss": 1.1082, "step": 4298 }, { "epoch": 409.42857142857144, "grad_norm": 8.517866134643555, "learning_rate": 8.052848575712144e-07, "loss": 1.1483, "step": 4299 }, { "epoch": 409.5238095238095, "grad_norm": 7.181638717651367, "learning_rate": 8.05472263868066e-07, "loss": 1.0889, "step": 4300 }, { "epoch": 409.6190476190476, "grad_norm": 7.7330403327941895, "learning_rate": 8.056596701649176e-07, "loss": 1.1655, "step": 4301 }, { "epoch": 409.7142857142857, "grad_norm": 14.187042236328125, "learning_rate": 8.058470764617692e-07, "loss": 1.1368, "step": 4302 }, { "epoch": 409.8095238095238, "grad_norm": 16.997217178344727, "learning_rate": 8.060344827586207e-07, "loss": 1.0779, "step": 4303 }, { "epoch": 409.9047619047619, "grad_norm": 15.993823051452637, "learning_rate": 8.062218890554724e-07, "loss": 1.1321, "step": 4304 }, { "epoch": 410.0, "grad_norm": 7.269989013671875, "learning_rate": 8.064092953523238e-07, "loss": 1.1147, "step": 4305 }, { "epoch": 410.0, "eval_accuracy": 0.738478831022855, "eval_f1": 0.7837670384138785, "eval_loss": 0.545691728591919, "eval_precision": 0.751187648456057, "eval_recall": 0.819300518134715, "eval_roc_auc": 0.7966246401842256, "eval_runtime": 1.1265, "eval_samples_per_second": 2369.19, "eval_steps_per_second": 5.326, "step": 4305 }, { "epoch": 410.0952380952381, "grad_norm": 14.141743659973145, "learning_rate": 8.065967016491755e-07, "loss": 1.0654, "step": 4306 }, { "epoch": 410.1904761904762, "grad_norm": 16.728107452392578, "learning_rate": 8.06784107946027e-07, "loss": 1.0393, "step": 4307 }, { "epoch": 410.2857142857143, "grad_norm": 13.319567680358887, "learning_rate": 8.069715142428786e-07, "loss": 1.1547, "step": 4308 }, { "epoch": 410.3809523809524, "grad_norm": 12.622332572937012, "learning_rate": 8.071589205397303e-07, "loss": 1.0834, "step": 4309 }, { "epoch": 410.4761904761905, "grad_norm": 15.735387802124023, "learning_rate": 8.073463268365817e-07, "loss": 1.1014, "step": 4310 }, { "epoch": 410.57142857142856, "grad_norm": 16.525028228759766, "learning_rate": 8.075337331334334e-07, "loss": 1.0989, "step": 4311 }, { "epoch": 410.6666666666667, "grad_norm": 17.96881866455078, "learning_rate": 8.077211394302848e-07, "loss": 1.1008, "step": 4312 }, { "epoch": 410.76190476190476, "grad_norm": 6.438241958618164, "learning_rate": 8.079085457271365e-07, "loss": 1.1317, "step": 4313 }, { "epoch": 410.85714285714283, "grad_norm": 5.570372581481934, "learning_rate": 8.080959520239879e-07, "loss": 1.1357, "step": 4314 }, { "epoch": 410.95238095238096, "grad_norm": 11.380789756774902, "learning_rate": 8.082833583208396e-07, "loss": 1.0883, "step": 4315 }, { "epoch": 410.95238095238096, "eval_accuracy": 0.7381041588609967, "eval_f1": 0.7823108066023046, "eval_loss": 0.5462737679481506, "eval_precision": 0.7534493101379725, "eval_recall": 0.8134715025906736, "eval_roc_auc": 0.7960748416810592, "eval_runtime": 1.1341, "eval_samples_per_second": 2353.328, "eval_steps_per_second": 5.29, "step": 4315 }, { "epoch": 411.04761904761904, "grad_norm": 8.623936653137207, "learning_rate": 8.084707646176913e-07, "loss": 1.1474, "step": 4316 }, { "epoch": 411.14285714285717, "grad_norm": 19.895549774169922, "learning_rate": 8.086581709145427e-07, "loss": 1.1781, "step": 4317 }, { "epoch": 411.23809523809524, "grad_norm": 5.159582614898682, "learning_rate": 8.088455772113944e-07, "loss": 1.1296, "step": 4318 }, { "epoch": 411.3333333333333, "grad_norm": 23.74869155883789, "learning_rate": 8.090329835082458e-07, "loss": 1.1161, "step": 4319 }, { "epoch": 411.42857142857144, "grad_norm": 5.957592964172363, "learning_rate": 8.092203898050975e-07, "loss": 1.1283, "step": 4320 }, { "epoch": 411.5238095238095, "grad_norm": 21.57670021057129, "learning_rate": 8.09407796101949e-07, "loss": 1.0866, "step": 4321 }, { "epoch": 411.6190476190476, "grad_norm": 9.695438385009766, "learning_rate": 8.095952023988006e-07, "loss": 1.0912, "step": 4322 }, { "epoch": 411.7142857142857, "grad_norm": 7.644775390625, "learning_rate": 8.097826086956523e-07, "loss": 1.0915, "step": 4323 }, { "epoch": 411.8095238095238, "grad_norm": 12.165261268615723, "learning_rate": 8.099700149925038e-07, "loss": 1.1117, "step": 4324 }, { "epoch": 411.9047619047619, "grad_norm": 10.229143142700195, "learning_rate": 8.101574212893555e-07, "loss": 1.0971, "step": 4325 }, { "epoch": 412.0, "grad_norm": 10.323266983032227, "learning_rate": 8.103448275862069e-07, "loss": 1.0797, "step": 4326 }, { "epoch": 412.0, "eval_accuracy": 0.7426002248032971, "eval_f1": 0.7890696960393, "eval_loss": 0.5459049344062805, "eval_precision": 0.7501459427904261, "eval_recall": 0.8322538860103627, "eval_roc_auc": 0.7969559585492227, "eval_runtime": 1.1416, "eval_samples_per_second": 2337.93, "eval_steps_per_second": 5.256, "step": 4326 }, { "epoch": 412.0952380952381, "grad_norm": 8.321845054626465, "learning_rate": 8.105322338830586e-07, "loss": 1.1491, "step": 4327 }, { "epoch": 412.1904761904762, "grad_norm": 7.4247941970825195, "learning_rate": 8.1071964017991e-07, "loss": 1.138, "step": 4328 }, { "epoch": 412.2857142857143, "grad_norm": 19.407428741455078, "learning_rate": 8.109070464767617e-07, "loss": 1.0456, "step": 4329 }, { "epoch": 412.3809523809524, "grad_norm": 7.326526641845703, "learning_rate": 8.110944527736133e-07, "loss": 1.1331, "step": 4330 }, { "epoch": 412.4761904761905, "grad_norm": 7.689526081085205, "learning_rate": 8.112818590704648e-07, "loss": 1.1223, "step": 4331 }, { "epoch": 412.57142857142856, "grad_norm": 24.991323471069336, "learning_rate": 8.114692653673165e-07, "loss": 1.1007, "step": 4332 }, { "epoch": 412.6666666666667, "grad_norm": 9.8153076171875, "learning_rate": 8.116566716641679e-07, "loss": 1.1181, "step": 4333 }, { "epoch": 412.76190476190476, "grad_norm": 16.506303787231445, "learning_rate": 8.118440779610196e-07, "loss": 1.1094, "step": 4334 }, { "epoch": 412.85714285714283, "grad_norm": 11.549041748046875, "learning_rate": 8.12031484257871e-07, "loss": 1.0732, "step": 4335 }, { "epoch": 412.95238095238096, "grad_norm": 16.197805404663086, "learning_rate": 8.122188905547227e-07, "loss": 1.1104, "step": 4336 }, { "epoch": 412.95238095238096, "eval_accuracy": 0.7470962907455976, "eval_f1": 0.796870297923563, "eval_loss": 0.5461461544036865, "eval_precision": 0.7442383361439011, "eval_recall": 0.8575129533678757, "eval_roc_auc": 0.7971735751295337, "eval_runtime": 1.1432, "eval_samples_per_second": 2334.652, "eval_steps_per_second": 5.248, "step": 4336 }, { "epoch": 413.04761904761904, "grad_norm": 13.857231140136719, "learning_rate": 8.124062968515743e-07, "loss": 1.1105, "step": 4337 }, { "epoch": 413.14285714285717, "grad_norm": 16.94660758972168, "learning_rate": 8.125937031484258e-07, "loss": 1.1081, "step": 4338 }, { "epoch": 413.23809523809524, "grad_norm": 6.674830436706543, "learning_rate": 8.127811094452774e-07, "loss": 1.1017, "step": 4339 }, { "epoch": 413.3333333333333, "grad_norm": 9.114222526550293, "learning_rate": 8.129685157421289e-07, "loss": 1.0578, "step": 4340 }, { "epoch": 413.42857142857144, "grad_norm": 7.696568012237549, "learning_rate": 8.131559220389805e-07, "loss": 1.0933, "step": 4341 }, { "epoch": 413.5238095238095, "grad_norm": 11.424352645874023, "learning_rate": 8.133433283358321e-07, "loss": 1.1116, "step": 4342 }, { "epoch": 413.6190476190476, "grad_norm": 29.731801986694336, "learning_rate": 8.135307346326837e-07, "loss": 1.1092, "step": 4343 }, { "epoch": 413.7142857142857, "grad_norm": 15.900996208190918, "learning_rate": 8.137181409295353e-07, "loss": 1.0991, "step": 4344 }, { "epoch": 413.8095238095238, "grad_norm": 11.894193649291992, "learning_rate": 8.139055472263869e-07, "loss": 1.1348, "step": 4345 }, { "epoch": 413.9047619047619, "grad_norm": 5.878718376159668, "learning_rate": 8.140929535232384e-07, "loss": 1.1219, "step": 4346 }, { "epoch": 414.0, "grad_norm": 7.070927619934082, "learning_rate": 8.1428035982009e-07, "loss": 1.0884, "step": 4347 }, { "epoch": 414.0, "eval_accuracy": 0.7437242412888723, "eval_f1": 0.7934782608695652, "eval_loss": 0.5461050868034363, "eval_precision": 0.7432126696832579, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.7969974093264248, "eval_runtime": 1.1418, "eval_samples_per_second": 2337.604, "eval_steps_per_second": 5.255, "step": 4347 }, { "epoch": 414.0952380952381, "grad_norm": 8.964851379394531, "learning_rate": 8.144677661169416e-07, "loss": 1.1322, "step": 4348 }, { "epoch": 414.1904761904762, "grad_norm": 7.7236714363098145, "learning_rate": 8.146551724137931e-07, "loss": 1.0882, "step": 4349 }, { "epoch": 414.2857142857143, "grad_norm": 14.21401596069336, "learning_rate": 8.148425787106448e-07, "loss": 1.1519, "step": 4350 }, { "epoch": 414.3809523809524, "grad_norm": 11.415388107299805, "learning_rate": 8.150299850074964e-07, "loss": 1.0953, "step": 4351 }, { "epoch": 414.4761904761905, "grad_norm": 14.541751861572266, "learning_rate": 8.152173913043479e-07, "loss": 1.1454, "step": 4352 }, { "epoch": 414.57142857142856, "grad_norm": 10.25801944732666, "learning_rate": 8.154047976011995e-07, "loss": 1.1093, "step": 4353 }, { "epoch": 414.6666666666667, "grad_norm": 10.318373680114746, "learning_rate": 8.15592203898051e-07, "loss": 1.0564, "step": 4354 }, { "epoch": 414.76190476190476, "grad_norm": 6.321846961975098, "learning_rate": 8.157796101949026e-07, "loss": 1.0697, "step": 4355 }, { "epoch": 414.85714285714283, "grad_norm": 12.549822807312012, "learning_rate": 8.159670164917541e-07, "loss": 1.1069, "step": 4356 }, { "epoch": 414.95238095238096, "grad_norm": 7.117936134338379, "learning_rate": 8.161544227886057e-07, "loss": 1.149, "step": 4357 }, { "epoch": 414.95238095238096, "eval_accuracy": 0.7403521918321468, "eval_f1": 0.7841793833696667, "eval_loss": 0.5456610918045044, "eval_precision": 0.755248950209958, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.7966873920552677, "eval_runtime": 1.124, "eval_samples_per_second": 2374.473, "eval_steps_per_second": 5.338, "step": 4357 }, { "epoch": 415.04761904761904, "grad_norm": 11.925171852111816, "learning_rate": 8.163418290854574e-07, "loss": 1.1399, "step": 4358 }, { "epoch": 415.14285714285717, "grad_norm": 7.809358596801758, "learning_rate": 8.165292353823088e-07, "loss": 1.1266, "step": 4359 }, { "epoch": 415.23809523809524, "grad_norm": 8.68516731262207, "learning_rate": 8.167166416791605e-07, "loss": 1.1213, "step": 4360 }, { "epoch": 415.3333333333333, "grad_norm": 12.828359603881836, "learning_rate": 8.16904047976012e-07, "loss": 1.1063, "step": 4361 }, { "epoch": 415.42857142857144, "grad_norm": 9.938231468200684, "learning_rate": 8.170914542728636e-07, "loss": 1.1034, "step": 4362 }, { "epoch": 415.5238095238095, "grad_norm": 14.429758071899414, "learning_rate": 8.172788605697151e-07, "loss": 1.1455, "step": 4363 }, { "epoch": 415.6190476190476, "grad_norm": 6.0715718269348145, "learning_rate": 8.174662668665667e-07, "loss": 1.1066, "step": 4364 }, { "epoch": 415.7142857142857, "grad_norm": 10.693117141723633, "learning_rate": 8.176536731634184e-07, "loss": 1.0846, "step": 4365 }, { "epoch": 415.8095238095238, "grad_norm": 6.945085525512695, "learning_rate": 8.178410794602698e-07, "loss": 1.0902, "step": 4366 }, { "epoch": 415.9047619047619, "grad_norm": 7.514898777008057, "learning_rate": 8.180284857571215e-07, "loss": 1.0867, "step": 4367 }, { "epoch": 416.0, "grad_norm": 5.8616509437561035, "learning_rate": 8.18215892053973e-07, "loss": 1.0946, "step": 4368 }, { "epoch": 416.0, "eval_accuracy": 0.741476208317722, "eval_f1": 0.7889908256880734, "eval_loss": 0.5456025004386902, "eval_precision": 0.7473928157589803, "eval_recall": 0.8354922279792746, "eval_roc_auc": 0.7965768566493956, "eval_runtime": 1.1384, "eval_samples_per_second": 2344.428, "eval_steps_per_second": 5.27, "step": 4368 }, { "epoch": 416.0952380952381, "grad_norm": 4.330708980560303, "learning_rate": 8.184032983508246e-07, "loss": 1.1099, "step": 4369 }, { "epoch": 416.1904761904762, "grad_norm": 8.46364688873291, "learning_rate": 8.185907046476762e-07, "loss": 1.1266, "step": 4370 }, { "epoch": 416.2857142857143, "grad_norm": 14.600200653076172, "learning_rate": 8.187781109445278e-07, "loss": 1.0867, "step": 4371 }, { "epoch": 416.3809523809524, "grad_norm": 3.619706392288208, "learning_rate": 8.189655172413794e-07, "loss": 1.1126, "step": 4372 }, { "epoch": 416.4761904761905, "grad_norm": 6.472954750061035, "learning_rate": 8.191529235382309e-07, "loss": 1.1137, "step": 4373 }, { "epoch": 416.57142857142856, "grad_norm": 9.510734558105469, "learning_rate": 8.193403298350826e-07, "loss": 1.1469, "step": 4374 }, { "epoch": 416.6666666666667, "grad_norm": 15.471990585327148, "learning_rate": 8.19527736131934e-07, "loss": 1.0597, "step": 4375 }, { "epoch": 416.76190476190476, "grad_norm": 6.931447505950928, "learning_rate": 8.197151424287857e-07, "loss": 1.1144, "step": 4376 }, { "epoch": 416.85714285714283, "grad_norm": 10.197463035583496, "learning_rate": 8.199025487256371e-07, "loss": 1.0994, "step": 4377 }, { "epoch": 416.95238095238096, "grad_norm": 5.985307693481445, "learning_rate": 8.200899550224888e-07, "loss": 1.0616, "step": 4378 }, { "epoch": 416.95238095238096, "eval_accuracy": 0.7426002248032971, "eval_f1": 0.7894575543977934, "eval_loss": 0.5450717210769653, "eval_precision": 0.7492728330424665, "eval_recall": 0.8341968911917098, "eval_roc_auc": 0.7973819804260218, "eval_runtime": 1.1568, "eval_samples_per_second": 2307.26, "eval_steps_per_second": 5.187, "step": 4378 }, { "epoch": 417.04761904761904, "grad_norm": 6.391446113586426, "learning_rate": 8.202773613193405e-07, "loss": 1.0323, "step": 4379 }, { "epoch": 417.14285714285717, "grad_norm": 12.014970779418945, "learning_rate": 8.204647676161919e-07, "loss": 1.1424, "step": 4380 }, { "epoch": 417.23809523809524, "grad_norm": 10.252963066101074, "learning_rate": 8.206521739130436e-07, "loss": 1.1437, "step": 4381 }, { "epoch": 417.3333333333333, "grad_norm": 10.97370433807373, "learning_rate": 8.20839580209895e-07, "loss": 1.0734, "step": 4382 }, { "epoch": 417.42857142857144, "grad_norm": 5.32800817489624, "learning_rate": 8.210269865067467e-07, "loss": 1.137, "step": 4383 }, { "epoch": 417.5238095238095, "grad_norm": 9.901985168457031, "learning_rate": 8.212143928035981e-07, "loss": 1.1143, "step": 4384 }, { "epoch": 417.6190476190476, "grad_norm": 13.600570678710938, "learning_rate": 8.214017991004498e-07, "loss": 1.101, "step": 4385 }, { "epoch": 417.7142857142857, "grad_norm": 6.229227066040039, "learning_rate": 8.215892053973015e-07, "loss": 1.1173, "step": 4386 }, { "epoch": 417.8095238095238, "grad_norm": 11.53397274017334, "learning_rate": 8.217766116941529e-07, "loss": 1.0949, "step": 4387 }, { "epoch": 417.9047619047619, "grad_norm": 8.325032234191895, "learning_rate": 8.219640179910046e-07, "loss": 1.1058, "step": 4388 }, { "epoch": 418.0, "grad_norm": 11.090065002441406, "learning_rate": 8.22151424287856e-07, "loss": 1.0834, "step": 4389 }, { "epoch": 418.0, "eval_accuracy": 0.741476208317722, "eval_f1": 0.7866419294990723, "eval_loss": 0.545009434223175, "eval_precision": 0.7526627218934911, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7973802533103053, "eval_runtime": 1.1366, "eval_samples_per_second": 2348.205, "eval_steps_per_second": 5.279, "step": 4389 }, { "epoch": 418.0952380952381, "grad_norm": 10.562765121459961, "learning_rate": 8.223388305847077e-07, "loss": 1.099, "step": 4390 }, { "epoch": 418.1904761904762, "grad_norm": 10.48797607421875, "learning_rate": 8.225262368815592e-07, "loss": 1.1413, "step": 4391 }, { "epoch": 418.2857142857143, "grad_norm": 11.87863540649414, "learning_rate": 8.227136431784109e-07, "loss": 1.0856, "step": 4392 }, { "epoch": 418.3809523809524, "grad_norm": 16.91488265991211, "learning_rate": 8.229010494752624e-07, "loss": 1.0922, "step": 4393 }, { "epoch": 418.4761904761905, "grad_norm": 12.223773956298828, "learning_rate": 8.23088455772114e-07, "loss": 1.1175, "step": 4394 }, { "epoch": 418.57142857142856, "grad_norm": 10.999683380126953, "learning_rate": 8.232758620689657e-07, "loss": 1.0696, "step": 4395 }, { "epoch": 418.6666666666667, "grad_norm": 6.884599685668945, "learning_rate": 8.234632683658171e-07, "loss": 1.1307, "step": 4396 }, { "epoch": 418.76190476190476, "grad_norm": 9.62916374206543, "learning_rate": 8.236506746626688e-07, "loss": 1.0341, "step": 4397 }, { "epoch": 418.85714285714283, "grad_norm": 11.067511558532715, "learning_rate": 8.238380809595202e-07, "loss": 1.1207, "step": 4398 }, { "epoch": 418.95238095238096, "grad_norm": 12.294548988342285, "learning_rate": 8.240254872563719e-07, "loss": 1.1302, "step": 4399 }, { "epoch": 418.95238095238096, "eval_accuracy": 0.7429748969651555, "eval_f1": 0.7911084043848965, "eval_loss": 0.5447846055030823, "eval_precision": 0.746551724137931, "eval_recall": 0.841321243523316, "eval_roc_auc": 0.798289291882556, "eval_runtime": 1.1598, "eval_samples_per_second": 2301.243, "eval_steps_per_second": 5.173, "step": 4399 }, { "epoch": 419.04761904761904, "grad_norm": 11.96934700012207, "learning_rate": 8.242128935532235e-07, "loss": 1.0869, "step": 4400 }, { "epoch": 419.14285714285717, "grad_norm": 7.476302146911621, "learning_rate": 8.24400299850075e-07, "loss": 1.0941, "step": 4401 }, { "epoch": 419.23809523809524, "grad_norm": 6.855079174041748, "learning_rate": 8.245877061469266e-07, "loss": 1.0963, "step": 4402 }, { "epoch": 419.3333333333333, "grad_norm": 8.352704048156738, "learning_rate": 8.247751124437781e-07, "loss": 1.1144, "step": 4403 }, { "epoch": 419.42857142857144, "grad_norm": 9.372175216674805, "learning_rate": 8.249625187406298e-07, "loss": 1.0919, "step": 4404 }, { "epoch": 419.5238095238095, "grad_norm": 13.681797981262207, "learning_rate": 8.251499250374812e-07, "loss": 1.1232, "step": 4405 }, { "epoch": 419.6190476190476, "grad_norm": 5.431125640869141, "learning_rate": 8.253373313343329e-07, "loss": 1.1242, "step": 4406 }, { "epoch": 419.7142857142857, "grad_norm": 5.640235424041748, "learning_rate": 8.255247376311845e-07, "loss": 1.1382, "step": 4407 }, { "epoch": 419.8095238095238, "grad_norm": 11.201006889343262, "learning_rate": 8.25712143928036e-07, "loss": 1.0872, "step": 4408 }, { "epoch": 419.9047619047619, "grad_norm": 10.526140213012695, "learning_rate": 8.258995502248876e-07, "loss": 1.112, "step": 4409 }, { "epoch": 420.0, "grad_norm": 9.951897621154785, "learning_rate": 8.260869565217391e-07, "loss": 1.1077, "step": 4410 }, { "epoch": 420.0, "eval_accuracy": 0.7437242412888723, "eval_f1": 0.7923497267759563, "eval_loss": 0.5443801879882812, "eval_precision": 0.7457142857142857, "eval_recall": 0.8452072538860104, "eval_roc_auc": 0.7985854922279794, "eval_runtime": 1.1406, "eval_samples_per_second": 2339.901, "eval_steps_per_second": 5.26, "step": 4410 }, { "epoch": 420.0952380952381, "grad_norm": 4.912606716156006, "learning_rate": 8.262743628185907e-07, "loss": 1.0917, "step": 4411 }, { "epoch": 420.1904761904762, "grad_norm": 11.934670448303223, "learning_rate": 8.264617691154423e-07, "loss": 1.1055, "step": 4412 }, { "epoch": 420.2857142857143, "grad_norm": 14.591500282287598, "learning_rate": 8.266491754122938e-07, "loss": 1.1455, "step": 4413 }, { "epoch": 420.3809523809524, "grad_norm": 7.827000617980957, "learning_rate": 8.268365817091455e-07, "loss": 1.0873, "step": 4414 }, { "epoch": 420.4761904761905, "grad_norm": 8.400473594665527, "learning_rate": 8.270239880059971e-07, "loss": 1.0894, "step": 4415 }, { "epoch": 420.57142857142856, "grad_norm": 7.976455211639404, "learning_rate": 8.272113943028486e-07, "loss": 1.0916, "step": 4416 }, { "epoch": 420.6666666666667, "grad_norm": 9.351739883422852, "learning_rate": 8.273988005997002e-07, "loss": 1.1784, "step": 4417 }, { "epoch": 420.76190476190476, "grad_norm": 17.383878707885742, "learning_rate": 8.275862068965518e-07, "loss": 1.0647, "step": 4418 }, { "epoch": 420.85714285714283, "grad_norm": 14.991252899169922, "learning_rate": 8.277736131934033e-07, "loss": 1.125, "step": 4419 }, { "epoch": 420.95238095238096, "grad_norm": 31.164175033569336, "learning_rate": 8.279610194902549e-07, "loss": 1.1136, "step": 4420 }, { "epoch": 420.95238095238096, "eval_accuracy": 0.7347321094042712, "eval_f1": 0.776937618147448, "eval_loss": 0.5451502203941345, "eval_precision": 0.756441717791411, "eval_recall": 0.7985751295336787, "eval_roc_auc": 0.797415371329879, "eval_runtime": 1.1407, "eval_samples_per_second": 2339.842, "eval_steps_per_second": 5.26, "step": 4420 }, { "epoch": 421.04761904761904, "grad_norm": 17.54259490966797, "learning_rate": 8.281484257871066e-07, "loss": 1.1021, "step": 4421 }, { "epoch": 421.14285714285717, "grad_norm": 6.919532299041748, "learning_rate": 8.28335832083958e-07, "loss": 1.1348, "step": 4422 }, { "epoch": 421.23809523809524, "grad_norm": 8.13316822052002, "learning_rate": 8.285232383808097e-07, "loss": 1.1717, "step": 4423 }, { "epoch": 421.3333333333333, "grad_norm": 14.149221420288086, "learning_rate": 8.287106446776612e-07, "loss": 1.1057, "step": 4424 }, { "epoch": 421.42857142857144, "grad_norm": 12.222851753234863, "learning_rate": 8.288980509745128e-07, "loss": 1.1559, "step": 4425 }, { "epoch": 421.5238095238095, "grad_norm": 13.207738876342773, "learning_rate": 8.290854572713643e-07, "loss": 1.0409, "step": 4426 }, { "epoch": 421.6190476190476, "grad_norm": 10.126866340637207, "learning_rate": 8.292728635682159e-07, "loss": 1.0834, "step": 4427 }, { "epoch": 421.7142857142857, "grad_norm": 12.194635391235352, "learning_rate": 8.294602698650676e-07, "loss": 1.1281, "step": 4428 }, { "epoch": 421.8095238095238, "grad_norm": 11.794801712036133, "learning_rate": 8.29647676161919e-07, "loss": 1.0945, "step": 4429 }, { "epoch": 421.9047619047619, "grad_norm": 4.966232776641846, "learning_rate": 8.298350824587707e-07, "loss": 1.0807, "step": 4430 }, { "epoch": 422.0, "grad_norm": 12.192557334899902, "learning_rate": 8.300224887556221e-07, "loss": 1.0746, "step": 4431 }, { "epoch": 422.0, "eval_accuracy": 0.744473585612589, "eval_f1": 0.7902829028290282, "eval_loss": 0.5442536473274231, "eval_precision": 0.7523419203747073, "eval_recall": 0.8322538860103627, "eval_roc_auc": 0.7985938399539436, "eval_runtime": 1.1093, "eval_samples_per_second": 2406.062, "eval_steps_per_second": 5.409, "step": 4431 }, { "epoch": 422.0952380952381, "grad_norm": 10.80482292175293, "learning_rate": 8.302098950524738e-07, "loss": 1.1118, "step": 4432 }, { "epoch": 422.1904761904762, "grad_norm": 7.838009357452393, "learning_rate": 8.303973013493253e-07, "loss": 1.1194, "step": 4433 }, { "epoch": 422.2857142857143, "grad_norm": 10.171510696411133, "learning_rate": 8.305847076461769e-07, "loss": 1.068, "step": 4434 }, { "epoch": 422.3809523809524, "grad_norm": 6.738454818725586, "learning_rate": 8.307721139430286e-07, "loss": 1.1022, "step": 4435 }, { "epoch": 422.4761904761905, "grad_norm": 14.316563606262207, "learning_rate": 8.3095952023988e-07, "loss": 1.0298, "step": 4436 }, { "epoch": 422.57142857142856, "grad_norm": 16.27669334411621, "learning_rate": 8.311469265367317e-07, "loss": 1.0936, "step": 4437 }, { "epoch": 422.6666666666667, "grad_norm": 14.908007621765137, "learning_rate": 8.313343328335832e-07, "loss": 1.1261, "step": 4438 }, { "epoch": 422.76190476190476, "grad_norm": 6.022855281829834, "learning_rate": 8.315217391304348e-07, "loss": 1.0819, "step": 4439 }, { "epoch": 422.85714285714283, "grad_norm": 21.759950637817383, "learning_rate": 8.317091454272863e-07, "loss": 1.1399, "step": 4440 }, { "epoch": 422.95238095238096, "grad_norm": 15.305691719055176, "learning_rate": 8.31896551724138e-07, "loss": 1.0986, "step": 4441 }, { "epoch": 422.95238095238096, "eval_accuracy": 0.7459722742600224, "eval_f1": 0.79491833030853, "eval_loss": 0.5446916818618774, "eval_precision": 0.7457434733257662, "eval_recall": 0.8510362694300518, "eval_roc_auc": 0.797884571099597, "eval_runtime": 1.1207, "eval_samples_per_second": 2381.599, "eval_steps_per_second": 5.354, "step": 4441 }, { "epoch": 423.04761904761904, "grad_norm": 10.44428825378418, "learning_rate": 8.320839580209896e-07, "loss": 1.0861, "step": 4442 }, { "epoch": 423.14285714285717, "grad_norm": 9.53567886352539, "learning_rate": 8.322713643178411e-07, "loss": 1.0823, "step": 4443 }, { "epoch": 423.23809523809524, "grad_norm": 8.419621467590332, "learning_rate": 8.324587706146928e-07, "loss": 1.1237, "step": 4444 }, { "epoch": 423.3333333333333, "grad_norm": 10.934845924377441, "learning_rate": 8.326461769115442e-07, "loss": 1.1218, "step": 4445 }, { "epoch": 423.42857142857144, "grad_norm": 14.968563079833984, "learning_rate": 8.328335832083959e-07, "loss": 1.0985, "step": 4446 }, { "epoch": 423.5238095238095, "grad_norm": 7.92880392074585, "learning_rate": 8.330209895052473e-07, "loss": 1.1011, "step": 4447 }, { "epoch": 423.6190476190476, "grad_norm": 18.306106567382812, "learning_rate": 8.33208395802099e-07, "loss": 1.1, "step": 4448 }, { "epoch": 423.7142857142857, "grad_norm": 9.438417434692383, "learning_rate": 8.333958020989507e-07, "loss": 1.0866, "step": 4449 }, { "epoch": 423.8095238095238, "grad_norm": 9.805127143859863, "learning_rate": 8.335832083958021e-07, "loss": 1.0854, "step": 4450 }, { "epoch": 423.9047619047619, "grad_norm": 7.226143836975098, "learning_rate": 8.337706146926538e-07, "loss": 1.1316, "step": 4451 }, { "epoch": 424.0, "grad_norm": 14.099864959716797, "learning_rate": 8.339580209895052e-07, "loss": 1.0947, "step": 4452 }, { "epoch": 424.0, "eval_accuracy": 0.7452229299363057, "eval_f1": 0.7928092626447288, "eval_loss": 0.544590950012207, "eval_precision": 0.7485615650172612, "eval_recall": 0.8426165803108808, "eval_roc_auc": 0.7976683937823834, "eval_runtime": 1.129, "eval_samples_per_second": 2364.033, "eval_steps_per_second": 5.314, "step": 4452 }, { "epoch": 424.0952380952381, "grad_norm": 14.035150527954102, "learning_rate": 8.341454272863569e-07, "loss": 1.0665, "step": 4453 }, { "epoch": 424.1904761904762, "grad_norm": 17.778974533081055, "learning_rate": 8.343328335832083e-07, "loss": 1.1228, "step": 4454 }, { "epoch": 424.2857142857143, "grad_norm": 6.747130870819092, "learning_rate": 8.3452023988006e-07, "loss": 1.1226, "step": 4455 }, { "epoch": 424.3809523809524, "grad_norm": 12.075554847717285, "learning_rate": 8.347076461769116e-07, "loss": 1.113, "step": 4456 }, { "epoch": 424.4761904761905, "grad_norm": 13.99946403503418, "learning_rate": 8.348950524737631e-07, "loss": 1.1179, "step": 4457 }, { "epoch": 424.57142857142856, "grad_norm": 13.580896377563477, "learning_rate": 8.350824587706148e-07, "loss": 1.0727, "step": 4458 }, { "epoch": 424.6666666666667, "grad_norm": 6.982471466064453, "learning_rate": 8.352698650674662e-07, "loss": 1.1384, "step": 4459 }, { "epoch": 424.76190476190476, "grad_norm": 10.41710090637207, "learning_rate": 8.354572713643179e-07, "loss": 1.0804, "step": 4460 }, { "epoch": 424.85714285714283, "grad_norm": 9.565821647644043, "learning_rate": 8.356446776611694e-07, "loss": 1.1133, "step": 4461 }, { "epoch": 424.95238095238096, "grad_norm": 11.481152534484863, "learning_rate": 8.358320839580211e-07, "loss": 1.0711, "step": 4462 }, { "epoch": 424.95238095238096, "eval_accuracy": 0.7455976020981641, "eval_f1": 0.7921640648913376, "eval_loss": 0.5441731214523315, "eval_precision": 0.751015670342426, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.7982130109383995, "eval_runtime": 1.1957, "eval_samples_per_second": 2232.225, "eval_steps_per_second": 5.018, "step": 4462 }, { "epoch": 425.04761904761904, "grad_norm": 5.38265323638916, "learning_rate": 8.360194902548726e-07, "loss": 1.1212, "step": 4463 }, { "epoch": 425.14285714285717, "grad_norm": 17.884220123291016, "learning_rate": 8.362068965517242e-07, "loss": 1.1283, "step": 4464 }, { "epoch": 425.23809523809524, "grad_norm": 9.577488899230957, "learning_rate": 8.363943028485758e-07, "loss": 1.121, "step": 4465 }, { "epoch": 425.3333333333333, "grad_norm": 23.120994567871094, "learning_rate": 8.365817091454273e-07, "loss": 1.171, "step": 4466 }, { "epoch": 425.42857142857144, "grad_norm": 10.079218864440918, "learning_rate": 8.36769115442279e-07, "loss": 1.0982, "step": 4467 }, { "epoch": 425.5238095238095, "grad_norm": 5.675240516662598, "learning_rate": 8.369565217391304e-07, "loss": 1.1029, "step": 4468 }, { "epoch": 425.6190476190476, "grad_norm": 13.191267013549805, "learning_rate": 8.371439280359821e-07, "loss": 1.1224, "step": 4469 }, { "epoch": 425.7142857142857, "grad_norm": 17.76279640197754, "learning_rate": 8.373313343328337e-07, "loss": 1.0952, "step": 4470 }, { "epoch": 425.8095238095238, "grad_norm": 17.341856002807617, "learning_rate": 8.375187406296852e-07, "loss": 1.0476, "step": 4471 }, { "epoch": 425.9047619047619, "grad_norm": 22.54288673400879, "learning_rate": 8.377061469265368e-07, "loss": 1.0452, "step": 4472 }, { "epoch": 426.0, "grad_norm": 8.028871536254883, "learning_rate": 8.378935532233883e-07, "loss": 1.0902, "step": 4473 }, { "epoch": 426.0, "eval_accuracy": 0.7452229299363057, "eval_f1": 0.792302993280391, "eval_loss": 0.544516384601593, "eval_precision": 0.7497109826589595, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7977124352331606, "eval_runtime": 1.143, "eval_samples_per_second": 2335.014, "eval_steps_per_second": 5.249, "step": 4473 }, { "epoch": 426.0952380952381, "grad_norm": 7.671505928039551, "learning_rate": 8.380809595202399e-07, "loss": 1.0819, "step": 4474 }, { "epoch": 426.1904761904762, "grad_norm": 14.16700267791748, "learning_rate": 8.382683658170914e-07, "loss": 1.1159, "step": 4475 }, { "epoch": 426.2857142857143, "grad_norm": 15.45769214630127, "learning_rate": 8.38455772113943e-07, "loss": 1.1012, "step": 4476 }, { "epoch": 426.3809523809524, "grad_norm": 21.72220802307129, "learning_rate": 8.386431784107947e-07, "loss": 1.0954, "step": 4477 }, { "epoch": 426.4761904761905, "grad_norm": 7.532237529754639, "learning_rate": 8.388305847076462e-07, "loss": 1.0694, "step": 4478 }, { "epoch": 426.57142857142856, "grad_norm": 10.898053169250488, "learning_rate": 8.390179910044978e-07, "loss": 1.0699, "step": 4479 }, { "epoch": 426.6666666666667, "grad_norm": 5.27651309967041, "learning_rate": 8.392053973013493e-07, "loss": 1.1184, "step": 4480 }, { "epoch": 426.76190476190476, "grad_norm": 9.029277801513672, "learning_rate": 8.393928035982009e-07, "loss": 1.1861, "step": 4481 }, { "epoch": 426.85714285714283, "grad_norm": 9.089925765991211, "learning_rate": 8.395802098950525e-07, "loss": 1.1061, "step": 4482 }, { "epoch": 426.95238095238096, "grad_norm": 21.758668899536133, "learning_rate": 8.39767616191904e-07, "loss": 1.0619, "step": 4483 }, { "epoch": 426.95238095238096, "eval_accuracy": 0.7403521918321468, "eval_f1": 0.7832342821395057, "eval_loss": 0.5450884699821472, "eval_precision": 0.7574107683000605, "eval_recall": 0.810880829015544, "eval_roc_auc": 0.7967820955670697, "eval_runtime": 1.1327, "eval_samples_per_second": 2356.249, "eval_steps_per_second": 5.297, "step": 4483 }, { "epoch": 427.04761904761904, "grad_norm": 11.016739845275879, "learning_rate": 8.399550224887557e-07, "loss": 1.0744, "step": 4484 }, { "epoch": 427.14285714285717, "grad_norm": 7.590450763702393, "learning_rate": 8.401424287856073e-07, "loss": 1.1195, "step": 4485 }, { "epoch": 427.23809523809524, "grad_norm": 7.918571949005127, "learning_rate": 8.403298350824588e-07, "loss": 1.1003, "step": 4486 }, { "epoch": 427.3333333333333, "grad_norm": 9.06821060180664, "learning_rate": 8.405172413793104e-07, "loss": 1.1264, "step": 4487 }, { "epoch": 427.42857142857144, "grad_norm": 7.186644554138184, "learning_rate": 8.40704647676162e-07, "loss": 1.1383, "step": 4488 }, { "epoch": 427.5238095238095, "grad_norm": 7.61638069152832, "learning_rate": 8.408920539730135e-07, "loss": 1.1264, "step": 4489 }, { "epoch": 427.6190476190476, "grad_norm": 5.264971733093262, "learning_rate": 8.410794602698651e-07, "loss": 1.0935, "step": 4490 }, { "epoch": 427.7142857142857, "grad_norm": 5.830763339996338, "learning_rate": 8.412668665667168e-07, "loss": 1.0946, "step": 4491 }, { "epoch": 427.8095238095238, "grad_norm": 6.984022617340088, "learning_rate": 8.414542728635682e-07, "loss": 1.0678, "step": 4492 }, { "epoch": 427.9047619047619, "grad_norm": 9.952226638793945, "learning_rate": 8.416416791604199e-07, "loss": 1.1374, "step": 4493 }, { "epoch": 428.0, "grad_norm": 8.996271133422852, "learning_rate": 8.418290854572713e-07, "loss": 1.0751, "step": 4494 }, { "epoch": 428.0, "eval_accuracy": 0.744473585612589, "eval_f1": 0.7924528301886793, "eval_loss": 0.5440096855163574, "eval_precision": 0.7474167623421355, "eval_recall": 0.8432642487046632, "eval_roc_auc": 0.7981594703511802, "eval_runtime": 1.1385, "eval_samples_per_second": 2344.319, "eval_steps_per_second": 5.27, "step": 4494 }, { "epoch": 428.0952380952381, "grad_norm": 8.368547439575195, "learning_rate": 8.42016491754123e-07, "loss": 1.0567, "step": 4495 }, { "epoch": 428.1904761904762, "grad_norm": 17.677316665649414, "learning_rate": 8.422038980509745e-07, "loss": 1.1367, "step": 4496 }, { "epoch": 428.2857142857143, "grad_norm": 9.510296821594238, "learning_rate": 8.423913043478261e-07, "loss": 1.1055, "step": 4497 }, { "epoch": 428.3809523809524, "grad_norm": 9.15112018585205, "learning_rate": 8.425787106446778e-07, "loss": 1.0695, "step": 4498 }, { "epoch": 428.4761904761905, "grad_norm": 7.10412073135376, "learning_rate": 8.427661169415292e-07, "loss": 1.0799, "step": 4499 }, { "epoch": 428.57142857142856, "grad_norm": 17.0451602935791, "learning_rate": 8.429535232383809e-07, "loss": 1.0909, "step": 4500 }, { "epoch": 428.6666666666667, "grad_norm": 9.14710521697998, "learning_rate": 8.431409295352323e-07, "loss": 1.1133, "step": 4501 }, { "epoch": 428.76190476190476, "grad_norm": 5.92765474319458, "learning_rate": 8.43328335832084e-07, "loss": 1.1242, "step": 4502 }, { "epoch": 428.85714285714283, "grad_norm": 9.075368881225586, "learning_rate": 8.435157421289354e-07, "loss": 1.1234, "step": 4503 }, { "epoch": 428.95238095238096, "grad_norm": 7.065749168395996, "learning_rate": 8.437031484257871e-07, "loss": 1.139, "step": 4504 }, { "epoch": 428.95238095238096, "eval_accuracy": 0.7463469464218808, "eval_f1": 0.793029654539896, "eval_loss": 0.5434408187866211, "eval_precision": 0.7510133178922987, "eval_recall": 0.8400259067357513, "eval_roc_auc": 0.7986295336787564, "eval_runtime": 1.1676, "eval_samples_per_second": 2285.891, "eval_steps_per_second": 5.139, "step": 4504 }, { "epoch": 429.04761904761904, "grad_norm": 11.20965576171875, "learning_rate": 8.438905547226388e-07, "loss": 1.0627, "step": 4505 }, { "epoch": 429.14285714285717, "grad_norm": 7.081179618835449, "learning_rate": 8.440779610194902e-07, "loss": 1.1284, "step": 4506 }, { "epoch": 429.23809523809524, "grad_norm": 8.498674392700195, "learning_rate": 8.442653673163419e-07, "loss": 1.0396, "step": 4507 }, { "epoch": 429.3333333333333, "grad_norm": 18.132099151611328, "learning_rate": 8.444527736131934e-07, "loss": 1.1265, "step": 4508 }, { "epoch": 429.42857142857144, "grad_norm": 6.378214359283447, "learning_rate": 8.44640179910045e-07, "loss": 1.115, "step": 4509 }, { "epoch": 429.5238095238095, "grad_norm": 14.451581001281738, "learning_rate": 8.448275862068965e-07, "loss": 1.1077, "step": 4510 }, { "epoch": 429.6190476190476, "grad_norm": 7.9214091300964355, "learning_rate": 8.450149925037482e-07, "loss": 1.0987, "step": 4511 }, { "epoch": 429.7142857142857, "grad_norm": 16.34021759033203, "learning_rate": 8.452023988005999e-07, "loss": 1.0802, "step": 4512 }, { "epoch": 429.8095238095238, "grad_norm": 15.271219253540039, "learning_rate": 8.453898050974513e-07, "loss": 1.0665, "step": 4513 }, { "epoch": 429.9047619047619, "grad_norm": 11.202034950256348, "learning_rate": 8.45577211394303e-07, "loss": 1.0668, "step": 4514 }, { "epoch": 430.0, "grad_norm": 8.156551361083984, "learning_rate": 8.457646176911544e-07, "loss": 1.1641, "step": 4515 }, { "epoch": 430.0, "eval_accuracy": 0.7440989134507306, "eval_f1": 0.7883483111248838, "eval_loss": 0.5432211756706238, "eval_precision": 0.7557932263814616, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.7988059873344847, "eval_runtime": 1.1164, "eval_samples_per_second": 2390.72, "eval_steps_per_second": 5.374, "step": 4515 }, { "epoch": 430.0952380952381, "grad_norm": 7.362122535705566, "learning_rate": 8.459520239880061e-07, "loss": 1.0955, "step": 4516 }, { "epoch": 430.1904761904762, "grad_norm": 8.211528778076172, "learning_rate": 8.461394302848575e-07, "loss": 1.1396, "step": 4517 }, { "epoch": 430.2857142857143, "grad_norm": 8.908727645874023, "learning_rate": 8.463268365817092e-07, "loss": 1.0972, "step": 4518 }, { "epoch": 430.3809523809524, "grad_norm": 18.641643524169922, "learning_rate": 8.465142428785608e-07, "loss": 1.1222, "step": 4519 }, { "epoch": 430.4761904761905, "grad_norm": 22.6841983795166, "learning_rate": 8.467016491754123e-07, "loss": 1.1085, "step": 4520 }, { "epoch": 430.57142857142856, "grad_norm": 14.652667999267578, "learning_rate": 8.46889055472264e-07, "loss": 1.0766, "step": 4521 }, { "epoch": 430.6666666666667, "grad_norm": 8.085213661193848, "learning_rate": 8.470764617691154e-07, "loss": 1.0738, "step": 4522 }, { "epoch": 430.76190476190476, "grad_norm": 6.663705825805664, "learning_rate": 8.472638680659671e-07, "loss": 1.1414, "step": 4523 }, { "epoch": 430.85714285714283, "grad_norm": 13.765937805175781, "learning_rate": 8.474512743628185e-07, "loss": 1.1053, "step": 4524 }, { "epoch": 430.95238095238096, "grad_norm": 16.29475975036621, "learning_rate": 8.476386806596702e-07, "loss": 1.0983, "step": 4525 }, { "epoch": 430.95238095238096, "eval_accuracy": 0.7388535031847133, "eval_f1": 0.780886513674945, "eval_loss": 0.5443218350410461, "eval_precision": 0.7587049480757483, "eval_recall": 0.8044041450777202, "eval_roc_auc": 0.7975322394933794, "eval_runtime": 1.1195, "eval_samples_per_second": 2384.206, "eval_steps_per_second": 5.36, "step": 4525 }, { "epoch": 431.04761904761904, "grad_norm": 30.667348861694336, "learning_rate": 8.478260869565218e-07, "loss": 1.1, "step": 4526 }, { "epoch": 431.14285714285717, "grad_norm": 8.329144477844238, "learning_rate": 8.480134932533733e-07, "loss": 1.0965, "step": 4527 }, { "epoch": 431.23809523809524, "grad_norm": 16.07992172241211, "learning_rate": 8.482008995502249e-07, "loss": 1.0884, "step": 4528 }, { "epoch": 431.3333333333333, "grad_norm": 9.440814018249512, "learning_rate": 8.483883058470765e-07, "loss": 1.0635, "step": 4529 }, { "epoch": 431.42857142857144, "grad_norm": 15.835526466369629, "learning_rate": 8.48575712143928e-07, "loss": 1.1016, "step": 4530 }, { "epoch": 431.5238095238095, "grad_norm": 13.049686431884766, "learning_rate": 8.487631184407796e-07, "loss": 1.0904, "step": 4531 }, { "epoch": 431.6190476190476, "grad_norm": 14.782801628112793, "learning_rate": 8.489505247376313e-07, "loss": 1.1082, "step": 4532 }, { "epoch": 431.7142857142857, "grad_norm": 22.376850128173828, "learning_rate": 8.491379310344828e-07, "loss": 1.1032, "step": 4533 }, { "epoch": 431.8095238095238, "grad_norm": 20.260921478271484, "learning_rate": 8.493253373313344e-07, "loss": 1.1155, "step": 4534 }, { "epoch": 431.9047619047619, "grad_norm": 7.1398725509643555, "learning_rate": 8.49512743628186e-07, "loss": 1.1355, "step": 4535 }, { "epoch": 432.0, "grad_norm": 12.543313980102539, "learning_rate": 8.497001499250375e-07, "loss": 1.0893, "step": 4536 }, { "epoch": 432.0, "eval_accuracy": 0.7429748969651555, "eval_f1": 0.785892634207241, "eval_loss": 0.543362021446228, "eval_precision": 0.7584337349397591, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.7986922855497987, "eval_runtime": 1.1357, "eval_samples_per_second": 2350.082, "eval_steps_per_second": 5.283, "step": 4536 }, { "epoch": 432.0952380952381, "grad_norm": 5.253561019897461, "learning_rate": 8.498875562218891e-07, "loss": 1.1291, "step": 4537 }, { "epoch": 432.1904761904762, "grad_norm": 7.003561973571777, "learning_rate": 8.500749625187406e-07, "loss": 1.1181, "step": 4538 }, { "epoch": 432.2857142857143, "grad_norm": 29.183019638061523, "learning_rate": 8.502623688155923e-07, "loss": 1.0722, "step": 4539 }, { "epoch": 432.3809523809524, "grad_norm": 16.94145393371582, "learning_rate": 8.504497751124439e-07, "loss": 1.0844, "step": 4540 }, { "epoch": 432.4761904761905, "grad_norm": 23.304765701293945, "learning_rate": 8.506371814092954e-07, "loss": 1.0513, "step": 4541 }, { "epoch": 432.57142857142856, "grad_norm": 13.36349105834961, "learning_rate": 8.50824587706147e-07, "loss": 1.1252, "step": 4542 }, { "epoch": 432.6666666666667, "grad_norm": 5.048804759979248, "learning_rate": 8.510119940029985e-07, "loss": 1.1111, "step": 4543 }, { "epoch": 432.76190476190476, "grad_norm": 10.21939468383789, "learning_rate": 8.511994002998501e-07, "loss": 1.1129, "step": 4544 }, { "epoch": 432.85714285714283, "grad_norm": 8.018023490905762, "learning_rate": 8.513868065967016e-07, "loss": 1.0731, "step": 4545 }, { "epoch": 432.95238095238096, "grad_norm": 7.383289813995361, "learning_rate": 8.515742128935532e-07, "loss": 1.0969, "step": 4546 }, { "epoch": 432.95238095238096, "eval_accuracy": 0.7463469464218808, "eval_f1": 0.794287450622911, "eval_loss": 0.5438169240951538, "eval_precision": 0.7481396680022896, "eval_recall": 0.8465025906735751, "eval_roc_auc": 0.7986373056994819, "eval_runtime": 1.1242, "eval_samples_per_second": 2374.237, "eval_steps_per_second": 5.337, "step": 4546 }, { "epoch": 433.04761904761904, "grad_norm": 12.807202339172363, "learning_rate": 8.517616191904049e-07, "loss": 1.1247, "step": 4547 }, { "epoch": 433.14285714285717, "grad_norm": 25.507034301757812, "learning_rate": 8.519490254872564e-07, "loss": 1.1461, "step": 4548 }, { "epoch": 433.23809523809524, "grad_norm": 16.608102798461914, "learning_rate": 8.52136431784108e-07, "loss": 1.0899, "step": 4549 }, { "epoch": 433.3333333333333, "grad_norm": 6.582379341125488, "learning_rate": 8.523238380809595e-07, "loss": 1.085, "step": 4550 }, { "epoch": 433.42857142857144, "grad_norm": 7.415794849395752, "learning_rate": 8.525112443778111e-07, "loss": 1.0937, "step": 4551 }, { "epoch": 433.5238095238095, "grad_norm": 9.852500915527344, "learning_rate": 8.526986506746627e-07, "loss": 1.0917, "step": 4552 }, { "epoch": 433.6190476190476, "grad_norm": 7.424929618835449, "learning_rate": 8.528860569715142e-07, "loss": 1.1468, "step": 4553 }, { "epoch": 433.7142857142857, "grad_norm": 11.212461471557617, "learning_rate": 8.530734632683659e-07, "loss": 1.1566, "step": 4554 }, { "epoch": 433.8095238095238, "grad_norm": 7.593562126159668, "learning_rate": 8.532608695652174e-07, "loss": 1.0882, "step": 4555 }, { "epoch": 433.9047619047619, "grad_norm": 29.836612701416016, "learning_rate": 8.53448275862069e-07, "loss": 1.0821, "step": 4556 }, { "epoch": 434.0, "grad_norm": 21.95094108581543, "learning_rate": 8.536356821589206e-07, "loss": 1.0802, "step": 4557 }, { "epoch": 434.0, "eval_accuracy": 0.7429748969651555, "eval_f1": 0.785892634207241, "eval_loss": 0.5434971451759338, "eval_precision": 0.7584337349397591, "eval_recall": 0.8154145077720207, "eval_roc_auc": 0.798743523316062, "eval_runtime": 1.1244, "eval_samples_per_second": 2373.812, "eval_steps_per_second": 5.336, "step": 4557 }, { "epoch": 434.0952380952381, "grad_norm": 9.611329078674316, "learning_rate": 8.538230884557722e-07, "loss": 1.127, "step": 4558 }, { "epoch": 434.1904761904762, "grad_norm": 7.204107761383057, "learning_rate": 8.540104947526237e-07, "loss": 1.1249, "step": 4559 }, { "epoch": 434.2857142857143, "grad_norm": 7.247961044311523, "learning_rate": 8.541979010494753e-07, "loss": 1.0857, "step": 4560 }, { "epoch": 434.3809523809524, "grad_norm": 6.680117607116699, "learning_rate": 8.54385307346327e-07, "loss": 1.0866, "step": 4561 }, { "epoch": 434.4761904761905, "grad_norm": 11.561103820800781, "learning_rate": 8.545727136431784e-07, "loss": 1.1357, "step": 4562 }, { "epoch": 434.57142857142856, "grad_norm": 12.856839179992676, "learning_rate": 8.547601199400301e-07, "loss": 1.0724, "step": 4563 }, { "epoch": 434.6666666666667, "grad_norm": 31.07172203063965, "learning_rate": 8.549475262368815e-07, "loss": 1.1404, "step": 4564 }, { "epoch": 434.76190476190476, "grad_norm": 5.869296550750732, "learning_rate": 8.551349325337332e-07, "loss": 1.0578, "step": 4565 }, { "epoch": 434.85714285714283, "grad_norm": 6.1051926612854, "learning_rate": 8.553223388305846e-07, "loss": 1.1019, "step": 4566 }, { "epoch": 434.95238095238096, "grad_norm": 7.054297924041748, "learning_rate": 8.555097451274363e-07, "loss": 1.075, "step": 4567 }, { "epoch": 434.95238095238096, "eval_accuracy": 0.7452229299363057, "eval_f1": 0.7881619937694704, "eval_loss": 0.5423450469970703, "eval_precision": 0.7593037214885955, "eval_recall": 0.819300518134715, "eval_roc_auc": 0.7997233736327, "eval_runtime": 1.1364, "eval_samples_per_second": 2348.591, "eval_steps_per_second": 5.28, "step": 4567 }, { "epoch": 435.04761904761904, "grad_norm": 6.471446990966797, "learning_rate": 8.55697151424288e-07, "loss": 1.119, "step": 4568 }, { "epoch": 435.14285714285717, "grad_norm": 13.11718463897705, "learning_rate": 8.558845577211394e-07, "loss": 1.0647, "step": 4569 }, { "epoch": 435.23809523809524, "grad_norm": 19.984201431274414, "learning_rate": 8.560719640179911e-07, "loss": 1.0537, "step": 4570 }, { "epoch": 435.3333333333333, "grad_norm": 15.48215389251709, "learning_rate": 8.562593703148425e-07, "loss": 1.0928, "step": 4571 }, { "epoch": 435.42857142857144, "grad_norm": 6.437912940979004, "learning_rate": 8.564467766116942e-07, "loss": 1.0896, "step": 4572 }, { "epoch": 435.5238095238095, "grad_norm": 7.394294738769531, "learning_rate": 8.566341829085456e-07, "loss": 1.0828, "step": 4573 }, { "epoch": 435.6190476190476, "grad_norm": 17.219388961791992, "learning_rate": 8.568215892053973e-07, "loss": 1.1149, "step": 4574 }, { "epoch": 435.7142857142857, "grad_norm": 11.16395378112793, "learning_rate": 8.57008995502249e-07, "loss": 1.1232, "step": 4575 }, { "epoch": 435.8095238095238, "grad_norm": 31.06517791748047, "learning_rate": 8.571964017991004e-07, "loss": 1.1299, "step": 4576 }, { "epoch": 435.9047619047619, "grad_norm": 23.21164894104004, "learning_rate": 8.573838080959521e-07, "loss": 1.1382, "step": 4577 }, { "epoch": 436.0, "grad_norm": 17.702280044555664, "learning_rate": 8.575712143928036e-07, "loss": 1.1218, "step": 4578 }, { "epoch": 436.0, "eval_accuracy": 0.7478456350693143, "eval_f1": 0.7922198209323865, "eval_loss": 0.5420184135437012, "eval_precision": 0.7569321533923303, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.8004461715601612, "eval_runtime": 1.1459, "eval_samples_per_second": 2329.125, "eval_steps_per_second": 5.236, "step": 4578 }, { "epoch": 436.0952380952381, "grad_norm": 9.291200637817383, "learning_rate": 8.577586206896553e-07, "loss": 1.1037, "step": 4579 }, { "epoch": 436.1904761904762, "grad_norm": 7.206141471862793, "learning_rate": 8.579460269865067e-07, "loss": 1.1025, "step": 4580 }, { "epoch": 436.2857142857143, "grad_norm": 11.635160446166992, "learning_rate": 8.581334332833584e-07, "loss": 1.0913, "step": 4581 }, { "epoch": 436.3809523809524, "grad_norm": 16.53832244873047, "learning_rate": 8.5832083958021e-07, "loss": 1.0808, "step": 4582 }, { "epoch": 436.4761904761905, "grad_norm": 9.663097381591797, "learning_rate": 8.585082458770615e-07, "loss": 1.1139, "step": 4583 }, { "epoch": 436.57142857142856, "grad_norm": 8.36888313293457, "learning_rate": 8.586956521739132e-07, "loss": 1.1056, "step": 4584 }, { "epoch": 436.6666666666667, "grad_norm": 9.114745140075684, "learning_rate": 8.588830584707646e-07, "loss": 1.1214, "step": 4585 }, { "epoch": 436.76190476190476, "grad_norm": 6.442534923553467, "learning_rate": 8.590704647676163e-07, "loss": 1.1052, "step": 4586 }, { "epoch": 436.85714285714283, "grad_norm": 15.638298034667969, "learning_rate": 8.592578710644677e-07, "loss": 1.0966, "step": 4587 }, { "epoch": 436.95238095238096, "grad_norm": 8.274981498718262, "learning_rate": 8.594452773613194e-07, "loss": 1.081, "step": 4588 }, { "epoch": 436.95238095238096, "eval_accuracy": 0.7440989134507306, "eval_f1": 0.7911953531030266, "eval_loss": 0.5425230860710144, "eval_precision": 0.7492762015055009, "eval_recall": 0.8380829015544041, "eval_roc_auc": 0.8002530224525044, "eval_runtime": 1.1221, "eval_samples_per_second": 2378.65, "eval_steps_per_second": 5.347, "step": 4588 }, { "epoch": 437.04761904761904, "grad_norm": 12.262890815734863, "learning_rate": 8.59632683658171e-07, "loss": 1.1309, "step": 4589 }, { "epoch": 437.14285714285717, "grad_norm": 7.5080342292785645, "learning_rate": 8.598200899550225e-07, "loss": 1.0975, "step": 4590 }, { "epoch": 437.23809523809524, "grad_norm": 13.298624992370605, "learning_rate": 8.600074962518741e-07, "loss": 1.0747, "step": 4591 }, { "epoch": 437.3333333333333, "grad_norm": 16.601577758789062, "learning_rate": 8.601949025487256e-07, "loss": 1.1014, "step": 4592 }, { "epoch": 437.42857142857144, "grad_norm": 20.987707138061523, "learning_rate": 8.603823088455773e-07, "loss": 1.1211, "step": 4593 }, { "epoch": 437.5238095238095, "grad_norm": 6.50589656829834, "learning_rate": 8.605697151424287e-07, "loss": 1.1063, "step": 4594 }, { "epoch": 437.6190476190476, "grad_norm": 6.247185707092285, "learning_rate": 8.607571214392804e-07, "loss": 1.1114, "step": 4595 }, { "epoch": 437.7142857142857, "grad_norm": 10.65212345123291, "learning_rate": 8.60944527736132e-07, "loss": 1.1128, "step": 4596 }, { "epoch": 437.8095238095238, "grad_norm": 10.363444328308105, "learning_rate": 8.611319340329835e-07, "loss": 1.0817, "step": 4597 }, { "epoch": 437.9047619047619, "grad_norm": 17.171010971069336, "learning_rate": 8.613193403298351e-07, "loss": 1.1228, "step": 4598 }, { "epoch": 438.0, "grad_norm": 12.242009162902832, "learning_rate": 8.615067466266867e-07, "loss": 1.0522, "step": 4599 }, { "epoch": 438.0, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.787673343605547, "eval_loss": 0.543170154094696, "eval_precision": 0.7513227513227513, "eval_recall": 0.8277202072538861, "eval_roc_auc": 0.7993408175014393, "eval_runtime": 1.1738, "eval_samples_per_second": 2273.718, "eval_steps_per_second": 5.111, "step": 4599 }, { "epoch": 438.0952380952381, "grad_norm": 17.506868362426758, "learning_rate": 8.616941529235382e-07, "loss": 1.0765, "step": 4600 }, { "epoch": 438.1904761904762, "grad_norm": 13.496466636657715, "learning_rate": 8.618815592203898e-07, "loss": 1.0902, "step": 4601 }, { "epoch": 438.2857142857143, "grad_norm": 15.746648788452148, "learning_rate": 8.620689655172415e-07, "loss": 1.0835, "step": 4602 }, { "epoch": 438.3809523809524, "grad_norm": 16.976329803466797, "learning_rate": 8.62256371814093e-07, "loss": 1.1295, "step": 4603 }, { "epoch": 438.4761904761905, "grad_norm": 17.543289184570312, "learning_rate": 8.624437781109446e-07, "loss": 1.0789, "step": 4604 }, { "epoch": 438.57142857142856, "grad_norm": 10.029759407043457, "learning_rate": 8.626311844077962e-07, "loss": 1.069, "step": 4605 }, { "epoch": 438.6666666666667, "grad_norm": 7.242961883544922, "learning_rate": 8.628185907046477e-07, "loss": 1.0864, "step": 4606 }, { "epoch": 438.76190476190476, "grad_norm": 18.78436279296875, "learning_rate": 8.630059970014993e-07, "loss": 1.1065, "step": 4607 }, { "epoch": 438.85714285714283, "grad_norm": 9.78477668762207, "learning_rate": 8.631934032983508e-07, "loss": 1.148, "step": 4608 }, { "epoch": 438.95238095238096, "grad_norm": 5.987895965576172, "learning_rate": 8.633808095952024e-07, "loss": 1.09, "step": 4609 }, { "epoch": 438.95238095238096, "eval_accuracy": 0.7411015361558636, "eval_f1": 0.784399375975039, "eval_loss": 0.5427181124687195, "eval_precision": 0.7567730282962071, "eval_recall": 0.814119170984456, "eval_roc_auc": 0.7998529073114565, "eval_runtime": 1.4168, "eval_samples_per_second": 1883.875, "eval_steps_per_second": 4.235, "step": 4609 }, { "epoch": 439.04761904761904, "grad_norm": 9.344883918762207, "learning_rate": 8.635682158920541e-07, "loss": 1.139, "step": 4610 }, { "epoch": 439.14285714285717, "grad_norm": 16.81941795349121, "learning_rate": 8.637556221889056e-07, "loss": 1.1187, "step": 4611 }, { "epoch": 439.23809523809524, "grad_norm": 6.715560436248779, "learning_rate": 8.639430284857572e-07, "loss": 1.1438, "step": 4612 }, { "epoch": 439.3333333333333, "grad_norm": 12.95703411102295, "learning_rate": 8.641304347826087e-07, "loss": 1.0913, "step": 4613 }, { "epoch": 439.42857142857144, "grad_norm": 8.936210632324219, "learning_rate": 8.643178410794603e-07, "loss": 1.0851, "step": 4614 }, { "epoch": 439.5238095238095, "grad_norm": 6.878790855407715, "learning_rate": 8.64505247376312e-07, "loss": 1.078, "step": 4615 }, { "epoch": 439.6190476190476, "grad_norm": 5.259520530700684, "learning_rate": 8.646926536731634e-07, "loss": 1.1036, "step": 4616 }, { "epoch": 439.7142857142857, "grad_norm": 10.157668113708496, "learning_rate": 8.648800599700151e-07, "loss": 1.0713, "step": 4617 }, { "epoch": 439.8095238095238, "grad_norm": 9.646769523620605, "learning_rate": 8.650674662668665e-07, "loss": 1.1216, "step": 4618 }, { "epoch": 439.9047619047619, "grad_norm": 14.076932907104492, "learning_rate": 8.652548725637182e-07, "loss": 1.0955, "step": 4619 }, { "epoch": 440.0, "grad_norm": 7.416194915771484, "learning_rate": 8.654422788605696e-07, "loss": 1.1111, "step": 4620 }, { "epoch": 440.0, "eval_accuracy": 0.7418508804795804, "eval_f1": 0.7871485943775101, "eval_loss": 0.542918860912323, "eval_precision": 0.7525103366804489, "eval_recall": 0.8251295336787565, "eval_roc_auc": 0.7997084052964881, "eval_runtime": 1.1251, "eval_samples_per_second": 2372.254, "eval_steps_per_second": 5.333, "step": 4620 }, { "epoch": 440.0952380952381, "grad_norm": 9.420711517333984, "learning_rate": 8.656296851574213e-07, "loss": 1.0713, "step": 4621 }, { "epoch": 440.1904761904762, "grad_norm": 10.850831031799316, "learning_rate": 8.65817091454273e-07, "loss": 1.1064, "step": 4622 }, { "epoch": 440.2857142857143, "grad_norm": 11.357560157775879, "learning_rate": 8.660044977511244e-07, "loss": 1.1203, "step": 4623 }, { "epoch": 440.3809523809524, "grad_norm": 7.194850921630859, "learning_rate": 8.661919040479761e-07, "loss": 1.0921, "step": 4624 }, { "epoch": 440.4761904761905, "grad_norm": 18.304948806762695, "learning_rate": 8.663793103448276e-07, "loss": 1.0903, "step": 4625 }, { "epoch": 440.57142857142856, "grad_norm": 10.864583969116211, "learning_rate": 8.665667166416792e-07, "loss": 1.1132, "step": 4626 }, { "epoch": 440.6666666666667, "grad_norm": 16.42337989807129, "learning_rate": 8.667541229385307e-07, "loss": 1.1125, "step": 4627 }, { "epoch": 440.76190476190476, "grad_norm": 14.132763862609863, "learning_rate": 8.669415292353824e-07, "loss": 1.1365, "step": 4628 }, { "epoch": 440.85714285714283, "grad_norm": 10.242207527160645, "learning_rate": 8.67128935532234e-07, "loss": 1.0925, "step": 4629 }, { "epoch": 440.95238095238096, "grad_norm": 23.153173446655273, "learning_rate": 8.673163418290855e-07, "loss": 1.0465, "step": 4630 }, { "epoch": 440.95238095238096, "eval_accuracy": 0.7411015361558636, "eval_f1": 0.7820876695048881, "eval_loss": 0.5431788563728333, "eval_precision": 0.762138905961893, "eval_recall": 0.8031088082901554, "eval_roc_auc": 0.7999170984455958, "eval_runtime": 1.1195, "eval_samples_per_second": 2384.095, "eval_steps_per_second": 5.36, "step": 4630 }, { "epoch": 441.04761904761904, "grad_norm": 25.745071411132812, "learning_rate": 8.675037481259372e-07, "loss": 1.08, "step": 4631 }, { "epoch": 441.14285714285717, "grad_norm": 18.603288650512695, "learning_rate": 8.676911544227886e-07, "loss": 1.0773, "step": 4632 }, { "epoch": 441.23809523809524, "grad_norm": 14.742155075073242, "learning_rate": 8.678785607196403e-07, "loss": 1.1537, "step": 4633 }, { "epoch": 441.3333333333333, "grad_norm": 14.234055519104004, "learning_rate": 8.680659670164917e-07, "loss": 1.0868, "step": 4634 }, { "epoch": 441.42857142857144, "grad_norm": 5.549522399902344, "learning_rate": 8.682533733133434e-07, "loss": 1.1686, "step": 4635 }, { "epoch": 441.5238095238095, "grad_norm": 9.679647445678711, "learning_rate": 8.68440779610195e-07, "loss": 1.1148, "step": 4636 }, { "epoch": 441.6190476190476, "grad_norm": 14.539051055908203, "learning_rate": 8.686281859070465e-07, "loss": 1.1136, "step": 4637 }, { "epoch": 441.7142857142857, "grad_norm": 15.705877304077148, "learning_rate": 8.688155922038982e-07, "loss": 1.0616, "step": 4638 }, { "epoch": 441.8095238095238, "grad_norm": 5.833678245544434, "learning_rate": 8.690029985007496e-07, "loss": 1.0744, "step": 4639 }, { "epoch": 441.9047619047619, "grad_norm": 6.723447322845459, "learning_rate": 8.691904047976013e-07, "loss": 1.0927, "step": 4640 }, { "epoch": 442.0, "grad_norm": 8.890020370483398, "learning_rate": 8.693778110944527e-07, "loss": 1.0845, "step": 4641 }, { "epoch": 442.0, "eval_accuracy": 0.7448482577744474, "eval_f1": 0.7902679396365876, "eval_loss": 0.5423680543899536, "eval_precision": 0.7533763945977686, "eval_recall": 0.8309585492227979, "eval_roc_auc": 0.8005368451352908, "eval_runtime": 1.1212, "eval_samples_per_second": 2380.501, "eval_steps_per_second": 5.351, "step": 4641 }, { "epoch": 442.0952380952381, "grad_norm": 12.400238037109375, "learning_rate": 8.695652173913044e-07, "loss": 1.0651, "step": 4642 }, { "epoch": 442.1904761904762, "grad_norm": 14.389338493347168, "learning_rate": 8.69752623688156e-07, "loss": 1.163, "step": 4643 }, { "epoch": 442.2857142857143, "grad_norm": 11.256134033203125, "learning_rate": 8.699400299850075e-07, "loss": 1.0795, "step": 4644 }, { "epoch": 442.3809523809524, "grad_norm": 7.7698974609375, "learning_rate": 8.701274362818591e-07, "loss": 1.0783, "step": 4645 }, { "epoch": 442.4761904761905, "grad_norm": 8.148468971252441, "learning_rate": 8.703148425787106e-07, "loss": 1.1104, "step": 4646 }, { "epoch": 442.57142857142856, "grad_norm": 6.954012393951416, "learning_rate": 8.705022488755623e-07, "loss": 1.151, "step": 4647 }, { "epoch": 442.6666666666667, "grad_norm": 6.079837322235107, "learning_rate": 8.706896551724138e-07, "loss": 1.1116, "step": 4648 }, { "epoch": 442.76190476190476, "grad_norm": 25.121978759765625, "learning_rate": 8.708770614692655e-07, "loss": 1.1644, "step": 4649 }, { "epoch": 442.85714285714283, "grad_norm": 10.784706115722656, "learning_rate": 8.71064467766117e-07, "loss": 1.0589, "step": 4650 }, { "epoch": 442.95238095238096, "grad_norm": 16.868013381958008, "learning_rate": 8.712518740629686e-07, "loss": 1.0715, "step": 4651 }, { "epoch": 442.95238095238096, "eval_accuracy": 0.744473585612589, "eval_f1": 0.7870081199250468, "eval_loss": 0.5424505472183228, "eval_precision": 0.7599517490952955, "eval_recall": 0.8160621761658031, "eval_roc_auc": 0.8000805987334484, "eval_runtime": 1.1447, "eval_samples_per_second": 2331.708, "eval_steps_per_second": 5.242, "step": 4651 }, { "epoch": 443.04761904761904, "grad_norm": 14.400278091430664, "learning_rate": 8.714392803598202e-07, "loss": 1.1153, "step": 4652 }, { "epoch": 443.14285714285717, "grad_norm": 7.858428955078125, "learning_rate": 8.716266866566717e-07, "loss": 1.1209, "step": 4653 }, { "epoch": 443.23809523809524, "grad_norm": 16.608057022094727, "learning_rate": 8.718140929535233e-07, "loss": 1.1302, "step": 4654 }, { "epoch": 443.3333333333333, "grad_norm": 15.479148864746094, "learning_rate": 8.720014992503748e-07, "loss": 1.0628, "step": 4655 }, { "epoch": 443.42857142857144, "grad_norm": 8.721065521240234, "learning_rate": 8.721889055472265e-07, "loss": 1.1089, "step": 4656 }, { "epoch": 443.5238095238095, "grad_norm": 16.358375549316406, "learning_rate": 8.723763118440781e-07, "loss": 1.1184, "step": 4657 }, { "epoch": 443.6190476190476, "grad_norm": 8.80472469329834, "learning_rate": 8.725637181409296e-07, "loss": 1.0479, "step": 4658 }, { "epoch": 443.7142857142857, "grad_norm": 7.0978569984436035, "learning_rate": 8.727511244377812e-07, "loss": 1.1234, "step": 4659 }, { "epoch": 443.8095238095238, "grad_norm": 9.775753021240234, "learning_rate": 8.729385307346327e-07, "loss": 1.1586, "step": 4660 }, { "epoch": 443.9047619047619, "grad_norm": 6.316388130187988, "learning_rate": 8.731259370314843e-07, "loss": 1.1405, "step": 4661 }, { "epoch": 444.0, "grad_norm": 8.489418983459473, "learning_rate": 8.733133433283358e-07, "loss": 1.0751, "step": 4662 }, { "epoch": 444.0, "eval_accuracy": 0.7467216185837392, "eval_f1": 0.7949029126213593, "eval_loss": 0.5433393716812134, "eval_precision": 0.747716894977169, "eval_recall": 0.8484455958549223, "eval_roc_auc": 0.7994668969487622, "eval_runtime": 1.1107, "eval_samples_per_second": 2402.964, "eval_steps_per_second": 5.402, "step": 4662 }, { "epoch": 444.0952380952381, "grad_norm": 13.219255447387695, "learning_rate": 8.735007496251874e-07, "loss": 1.1428, "step": 4663 }, { "epoch": 444.1904761904762, "grad_norm": 11.721603393554688, "learning_rate": 8.736881559220391e-07, "loss": 1.0618, "step": 4664 }, { "epoch": 444.2857142857143, "grad_norm": 11.433424949645996, "learning_rate": 8.738755622188906e-07, "loss": 1.0972, "step": 4665 }, { "epoch": 444.3809523809524, "grad_norm": 16.360595703125, "learning_rate": 8.740629685157422e-07, "loss": 1.1192, "step": 4666 }, { "epoch": 444.4761904761905, "grad_norm": 6.362143516540527, "learning_rate": 8.742503748125937e-07, "loss": 1.0295, "step": 4667 }, { "epoch": 444.57142857142856, "grad_norm": 21.919862747192383, "learning_rate": 8.744377811094453e-07, "loss": 1.1246, "step": 4668 }, { "epoch": 444.6666666666667, "grad_norm": 10.915406227111816, "learning_rate": 8.746251874062969e-07, "loss": 1.1065, "step": 4669 }, { "epoch": 444.76190476190476, "grad_norm": 13.651249885559082, "learning_rate": 8.748125937031484e-07, "loss": 1.1167, "step": 4670 }, { "epoch": 444.85714285714283, "grad_norm": 7.456640720367432, "learning_rate": 8.750000000000001e-07, "loss": 1.1062, "step": 4671 }, { "epoch": 444.95238095238096, "grad_norm": 15.28005313873291, "learning_rate": 8.751874062968516e-07, "loss": 1.0744, "step": 4672 }, { "epoch": 444.95238095238096, "eval_accuracy": 0.7407268639940052, "eval_f1": 0.7825267127592709, "eval_loss": 0.5431521534919739, "eval_precision": 0.76007326007326, "eval_recall": 0.8063471502590673, "eval_roc_auc": 0.7993408175014393, "eval_runtime": 1.1292, "eval_samples_per_second": 2363.639, "eval_steps_per_second": 5.314, "step": 4672 }, { "epoch": 445.04761904761904, "grad_norm": 9.96605110168457, "learning_rate": 8.753748125937032e-07, "loss": 1.1101, "step": 4673 }, { "epoch": 445.14285714285717, "grad_norm": 13.393614768981934, "learning_rate": 8.755622188905548e-07, "loss": 1.0562, "step": 4674 }, { "epoch": 445.23809523809524, "grad_norm": 9.072484970092773, "learning_rate": 8.757496251874064e-07, "loss": 1.0918, "step": 4675 }, { "epoch": 445.3333333333333, "grad_norm": 11.837943077087402, "learning_rate": 8.759370314842579e-07, "loss": 1.0745, "step": 4676 }, { "epoch": 445.42857142857144, "grad_norm": 10.07509994506836, "learning_rate": 8.761244377811095e-07, "loss": 1.0584, "step": 4677 }, { "epoch": 445.5238095238095, "grad_norm": 10.085092544555664, "learning_rate": 8.763118440779612e-07, "loss": 1.1189, "step": 4678 }, { "epoch": 445.6190476190476, "grad_norm": 8.869654655456543, "learning_rate": 8.764992503748126e-07, "loss": 1.0947, "step": 4679 }, { "epoch": 445.7142857142857, "grad_norm": 13.115325927734375, "learning_rate": 8.766866566716643e-07, "loss": 1.1693, "step": 4680 }, { "epoch": 445.8095238095238, "grad_norm": 9.584236145019531, "learning_rate": 8.768740629685157e-07, "loss": 1.1207, "step": 4681 }, { "epoch": 445.9047619047619, "grad_norm": 6.502092361450195, "learning_rate": 8.770614692653674e-07, "loss": 1.1005, "step": 4682 }, { "epoch": 446.0, "grad_norm": 23.737815856933594, "learning_rate": 8.772488755622189e-07, "loss": 1.0992, "step": 4683 }, { "epoch": 446.0, "eval_accuracy": 0.7493443237167479, "eval_f1": 0.7921714818266542, "eval_loss": 0.5422975420951843, "eval_precision": 0.7611940298507462, "eval_recall": 0.8257772020725389, "eval_roc_auc": 0.7999453080023028, "eval_runtime": 1.1243, "eval_samples_per_second": 2373.991, "eval_steps_per_second": 5.337, "step": 4683 }, { "epoch": 446.0952380952381, "grad_norm": 8.786201477050781, "learning_rate": 8.774362818590705e-07, "loss": 1.0786, "step": 4684 }, { "epoch": 446.1904761904762, "grad_norm": 7.7905449867248535, "learning_rate": 8.776236881559222e-07, "loss": 1.1466, "step": 4685 }, { "epoch": 446.2857142857143, "grad_norm": 29.007137298583984, "learning_rate": 8.778110944527736e-07, "loss": 1.0394, "step": 4686 }, { "epoch": 446.3809523809524, "grad_norm": 13.108345031738281, "learning_rate": 8.779985007496253e-07, "loss": 1.1375, "step": 4687 }, { "epoch": 446.4761904761905, "grad_norm": 6.3110833168029785, "learning_rate": 8.781859070464767e-07, "loss": 1.1257, "step": 4688 }, { "epoch": 446.57142857142856, "grad_norm": 14.847665786743164, "learning_rate": 8.783733133433284e-07, "loss": 1.0791, "step": 4689 }, { "epoch": 446.6666666666667, "grad_norm": 6.850281715393066, "learning_rate": 8.785607196401798e-07, "loss": 1.0899, "step": 4690 }, { "epoch": 446.76190476190476, "grad_norm": 7.7451558113098145, "learning_rate": 8.787481259370315e-07, "loss": 1.0919, "step": 4691 }, { "epoch": 446.85714285714283, "grad_norm": 7.732067108154297, "learning_rate": 8.789355322338832e-07, "loss": 1.0963, "step": 4692 }, { "epoch": 446.95238095238096, "grad_norm": 8.77704906463623, "learning_rate": 8.791229385307346e-07, "loss": 1.0686, "step": 4693 }, { "epoch": 446.95238095238096, "eval_accuracy": 0.7455976020981641, "eval_f1": 0.78932671424139, "eval_loss": 0.5424996614456177, "eval_precision": 0.7575938058368076, "eval_recall": 0.8238341968911918, "eval_roc_auc": 0.800234599884859, "eval_runtime": 1.1375, "eval_samples_per_second": 2346.357, "eval_steps_per_second": 5.275, "step": 4693 }, { "epoch": 447.04761904761904, "grad_norm": 8.154366493225098, "learning_rate": 8.793103448275863e-07, "loss": 1.0908, "step": 4694 }, { "epoch": 447.14285714285717, "grad_norm": 10.25619125366211, "learning_rate": 8.794977511244378e-07, "loss": 1.1514, "step": 4695 }, { "epoch": 447.23809523809524, "grad_norm": 6.668560028076172, "learning_rate": 8.796851574212894e-07, "loss": 1.1213, "step": 4696 }, { "epoch": 447.3333333333333, "grad_norm": 16.608366012573242, "learning_rate": 8.798725637181409e-07, "loss": 1.1177, "step": 4697 }, { "epoch": 447.42857142857144, "grad_norm": 19.34969139099121, "learning_rate": 8.800599700149926e-07, "loss": 1.0898, "step": 4698 }, { "epoch": 447.5238095238095, "grad_norm": 15.329229354858398, "learning_rate": 8.802473763118443e-07, "loss": 1.0809, "step": 4699 }, { "epoch": 447.6190476190476, "grad_norm": 6.398341178894043, "learning_rate": 8.804347826086957e-07, "loss": 1.0798, "step": 4700 }, { "epoch": 447.7142857142857, "grad_norm": 4.657569885253906, "learning_rate": 8.806221889055474e-07, "loss": 1.0742, "step": 4701 }, { "epoch": 447.8095238095238, "grad_norm": 11.059224128723145, "learning_rate": 8.808095952023988e-07, "loss": 1.0735, "step": 4702 }, { "epoch": 447.9047619047619, "grad_norm": 6.843538284301758, "learning_rate": 8.809970014992505e-07, "loss": 1.0878, "step": 4703 }, { "epoch": 448.0, "grad_norm": 16.965524673461914, "learning_rate": 8.811844077961019e-07, "loss": 1.1474, "step": 4704 }, { "epoch": 448.0, "eval_accuracy": 0.7452229299363057, "eval_f1": 0.7941888619854721, "eval_loss": 0.5430591702461243, "eval_precision": 0.7454545454545455, "eval_recall": 0.8497409326424871, "eval_roc_auc": 0.8003316062176166, "eval_runtime": 1.1346, "eval_samples_per_second": 2352.38, "eval_steps_per_second": 5.288, "step": 4704 }, { "epoch": 448.0952380952381, "grad_norm": 7.548295497894287, "learning_rate": 8.813718140929536e-07, "loss": 1.0681, "step": 4705 }, { "epoch": 448.1904761904762, "grad_norm": 11.476109504699707, "learning_rate": 8.815592203898052e-07, "loss": 1.115, "step": 4706 }, { "epoch": 448.2857142857143, "grad_norm": 14.373614311218262, "learning_rate": 8.817466266866567e-07, "loss": 1.1139, "step": 4707 }, { "epoch": 448.3809523809524, "grad_norm": 7.564189434051514, "learning_rate": 8.819340329835083e-07, "loss": 1.07, "step": 4708 }, { "epoch": 448.4761904761905, "grad_norm": 7.844211578369141, "learning_rate": 8.821214392803598e-07, "loss": 1.1186, "step": 4709 }, { "epoch": 448.57142857142856, "grad_norm": 14.013773918151855, "learning_rate": 8.823088455772115e-07, "loss": 1.1451, "step": 4710 }, { "epoch": 448.6666666666667, "grad_norm": 17.679235458374023, "learning_rate": 8.824962518740629e-07, "loss": 1.0677, "step": 4711 }, { "epoch": 448.76190476190476, "grad_norm": 18.348920822143555, "learning_rate": 8.826836581709146e-07, "loss": 1.1012, "step": 4712 }, { "epoch": 448.85714285714283, "grad_norm": 24.86972999572754, "learning_rate": 8.828710644677662e-07, "loss": 1.109, "step": 4713 }, { "epoch": 448.95238095238096, "grad_norm": 19.28120994567871, "learning_rate": 8.830584707646177e-07, "loss": 1.0822, "step": 4714 }, { "epoch": 448.95238095238096, "eval_accuracy": 0.7433495691270139, "eval_f1": 0.7857366280888333, "eval_loss": 0.5428649187088013, "eval_precision": 0.7598306110102844, "eval_recall": 0.8134715025906736, "eval_roc_auc": 0.7996755900978698, "eval_runtime": 1.1342, "eval_samples_per_second": 2353.18, "eval_steps_per_second": 5.29, "step": 4714 }, { "epoch": 449.04761904761904, "grad_norm": 13.300694465637207, "learning_rate": 8.832458770614693e-07, "loss": 1.1411, "step": 4715 }, { "epoch": 449.14285714285717, "grad_norm": 6.260472774505615, "learning_rate": 8.834332833583209e-07, "loss": 1.0958, "step": 4716 }, { "epoch": 449.23809523809524, "grad_norm": 15.211816787719727, "learning_rate": 8.836206896551724e-07, "loss": 1.0312, "step": 4717 }, { "epoch": 449.3333333333333, "grad_norm": 11.604256629943848, "learning_rate": 8.83808095952024e-07, "loss": 1.1285, "step": 4718 }, { "epoch": 449.42857142857144, "grad_norm": 29.347837448120117, "learning_rate": 8.839955022488757e-07, "loss": 1.0759, "step": 4719 }, { "epoch": 449.5238095238095, "grad_norm": 21.008678436279297, "learning_rate": 8.841829085457272e-07, "loss": 1.1071, "step": 4720 }, { "epoch": 449.6190476190476, "grad_norm": 25.13567543029785, "learning_rate": 8.843703148425788e-07, "loss": 1.0805, "step": 4721 }, { "epoch": 449.7142857142857, "grad_norm": 17.31960678100586, "learning_rate": 8.845577211394304e-07, "loss": 1.1092, "step": 4722 }, { "epoch": 449.8095238095238, "grad_norm": 28.314422607421875, "learning_rate": 8.847451274362819e-07, "loss": 1.146, "step": 4723 }, { "epoch": 449.9047619047619, "grad_norm": 20.834762573242188, "learning_rate": 8.849325337331335e-07, "loss": 1.1115, "step": 4724 }, { "epoch": 450.0, "grad_norm": 6.921756267547607, "learning_rate": 8.85119940029985e-07, "loss": 1.1164, "step": 4725 }, { "epoch": 450.0, "eval_accuracy": 0.741476208317722, "eval_f1": 0.783427495291902, "eval_loss": 0.5428313612937927, "eval_precision": 0.7600487210718636, "eval_recall": 0.8082901554404145, "eval_roc_auc": 0.7998077144502015, "eval_runtime": 1.437, "eval_samples_per_second": 1857.39, "eval_steps_per_second": 4.175, "step": 4725 }, { "epoch": 450.0952380952381, "grad_norm": 8.552742004394531, "learning_rate": 8.853073463268366e-07, "loss": 1.1103, "step": 4726 }, { "epoch": 450.1904761904762, "grad_norm": 27.3271541595459, "learning_rate": 8.854947526236883e-07, "loss": 1.1035, "step": 4727 }, { "epoch": 450.2857142857143, "grad_norm": 21.7481689453125, "learning_rate": 8.856821589205398e-07, "loss": 1.1017, "step": 4728 }, { "epoch": 450.3809523809524, "grad_norm": 17.90006446838379, "learning_rate": 8.858695652173914e-07, "loss": 1.0786, "step": 4729 }, { "epoch": 450.4761904761905, "grad_norm": 16.27367401123047, "learning_rate": 8.860569715142429e-07, "loss": 1.1039, "step": 4730 }, { "epoch": 450.57142857142856, "grad_norm": 7.353328227996826, "learning_rate": 8.862443778110945e-07, "loss": 1.0386, "step": 4731 }, { "epoch": 450.6666666666667, "grad_norm": 10.046563148498535, "learning_rate": 8.86431784107946e-07, "loss": 1.0887, "step": 4732 }, { "epoch": 450.76190476190476, "grad_norm": 5.69588041305542, "learning_rate": 8.866191904047976e-07, "loss": 1.1012, "step": 4733 }, { "epoch": 450.85714285714283, "grad_norm": 10.160133361816406, "learning_rate": 8.868065967016493e-07, "loss": 1.0874, "step": 4734 }, { "epoch": 450.95238095238096, "grad_norm": 23.668718338012695, "learning_rate": 8.869940029985007e-07, "loss": 1.1325, "step": 4735 }, { "epoch": 450.95238095238096, "eval_accuracy": 0.7482203072311727, "eval_f1": 0.8015357353809806, "eval_loss": 0.5460059642791748, "eval_precision": 0.7366992399565689, "eval_recall": 0.8788860103626943, "eval_roc_auc": 0.7993002302820955, "eval_runtime": 1.104, "eval_samples_per_second": 2417.465, "eval_steps_per_second": 5.435, "step": 4735 } ], "logging_steps": 1, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8974993938336973e+17, "train_batch_size": 512, "trial_name": null, "trial_params": null }