diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,12145 +1,11326 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 15.0, - "global_step": 995085, + "epoch": 14.0, + "global_step": 928746, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, - "learning_rate": 1.998995060723456e-05, - "loss": 0.0895, + "learning_rate": 2.9984925910851835e-05, + "loss": 0.1246, "step": 500 }, { "epoch": 0.02, - "learning_rate": 1.9979901214469116e-05, - "loss": 0.0636, + "learning_rate": 2.9969851821703676e-05, + "loss": 0.0954, "step": 1000 }, { "epoch": 0.02, - "learning_rate": 1.9969851821703673e-05, - "loss": 0.061, + "learning_rate": 2.995477773255551e-05, + "loss": 0.0904, "step": 1500 }, { "epoch": 0.03, - "learning_rate": 1.9959802428938234e-05, - "loss": 0.0595, + "learning_rate": 2.9939703643407347e-05, + "loss": 0.0884, "step": 2000 }, { "epoch": 0.04, - "learning_rate": 1.994975303617279e-05, - "loss": 0.0552, + "learning_rate": 2.9924629554259184e-05, + "loss": 0.0811, "step": 2500 }, { "epoch": 0.05, - "learning_rate": 1.9939703643407348e-05, - "loss": 0.0574, + "learning_rate": 2.9909555465111022e-05, + "loss": 0.0836, "step": 3000 }, { "epoch": 0.05, - "learning_rate": 1.9929654250641905e-05, - "loss": 0.054, + "learning_rate": 2.9894481375962856e-05, + "loss": 0.081, "step": 3500 }, { "epoch": 0.06, - "learning_rate": 1.9919604857876466e-05, - "loss": 0.0522, + "learning_rate": 2.9879407286814693e-05, + "loss": 0.0775, "step": 4000 }, { "epoch": 0.07, - "learning_rate": 1.9909555465111023e-05, - "loss": 0.0567, + "learning_rate": 2.986433319766653e-05, + "loss": 0.0838, "step": 4500 }, { "epoch": 0.08, - "learning_rate": 1.989950607234558e-05, - "loss": 0.0544, + "learning_rate": 2.9849259108518368e-05, + "loss": 0.0793, "step": 5000 }, { "epoch": 0.08, - "learning_rate": 1.9889456679580137e-05, - "loss": 0.0551, + "learning_rate": 2.9834185019370202e-05, + "loss": 0.0796, "step": 5500 }, { "epoch": 0.09, - "learning_rate": 1.9879407286814694e-05, - "loss": 0.0551, + "learning_rate": 2.9819110930222043e-05, + "loss": 0.0796, "step": 6000 }, { "epoch": 0.1, - "learning_rate": 1.986935789404925e-05, - "loss": 0.0514, + "learning_rate": 2.9804036841073877e-05, + "loss": 0.0757, "step": 6500 }, { "epoch": 0.11, - "learning_rate": 1.9859308501283812e-05, - "loss": 0.0515, + "learning_rate": 2.9788962751925715e-05, + "loss": 0.076, "step": 7000 }, { "epoch": 0.11, - "learning_rate": 1.984925910851837e-05, - "loss": 0.0518, + "learning_rate": 2.9773888662777552e-05, + "loss": 0.0754, "step": 7500 }, { "epoch": 0.12, - "learning_rate": 1.9839209715752926e-05, - "loss": 0.0505, + "learning_rate": 2.975881457362939e-05, + "loss": 0.0745, "step": 8000 }, { "epoch": 0.13, - "learning_rate": 1.9829160322987483e-05, - "loss": 0.0504, + "learning_rate": 2.9743740484481223e-05, + "loss": 0.0748, "step": 8500 }, { "epoch": 0.14, - "learning_rate": 1.9819110930222044e-05, - "loss": 0.0501, + "learning_rate": 2.9728666395333064e-05, + "loss": 0.0735, "step": 9000 }, { "epoch": 0.14, - "learning_rate": 1.98090615374566e-05, - "loss": 0.0499, + "learning_rate": 2.9713592306184898e-05, + "loss": 0.0739, "step": 9500 }, { "epoch": 0.15, - "learning_rate": 1.9799012144691158e-05, - "loss": 0.0507, + "learning_rate": 2.9698518217036736e-05, + "loss": 0.074, "step": 10000 }, { "epoch": 0.16, - "learning_rate": 1.9788962751925715e-05, - "loss": 0.0502, + "learning_rate": 2.9683444127888573e-05, + "loss": 0.0725, "step": 10500 }, { "epoch": 0.17, - "learning_rate": 1.9778913359160276e-05, - "loss": 0.0506, + "learning_rate": 2.966837003874041e-05, + "loss": 0.0744, "step": 11000 }, { "epoch": 0.17, - "learning_rate": 1.976886396639483e-05, - "loss": 0.0519, + "learning_rate": 2.9653295949592245e-05, + "loss": 0.0751, "step": 11500 }, { "epoch": 0.18, - "learning_rate": 1.975881457362939e-05, - "loss": 0.0488, + "learning_rate": 2.9638221860444082e-05, + "loss": 0.0714, "step": 12000 }, { "epoch": 0.19, - "learning_rate": 1.9748765180863947e-05, - "loss": 0.0493, + "learning_rate": 2.962314777129592e-05, + "loss": 0.0726, "step": 12500 }, { "epoch": 0.2, - "learning_rate": 1.9738715788098505e-05, - "loss": 0.0481, + "learning_rate": 2.9608073682147757e-05, + "loss": 0.0711, "step": 13000 }, { "epoch": 0.2, - "learning_rate": 1.972866639533306e-05, - "loss": 0.0484, + "learning_rate": 2.959299959299959e-05, + "loss": 0.0716, "step": 13500 }, { "epoch": 0.21, - "learning_rate": 1.9718617002567622e-05, - "loss": 0.0486, + "learning_rate": 2.957792550385143e-05, + "loss": 0.0713, "step": 14000 }, { "epoch": 0.22, - "learning_rate": 1.970856760980218e-05, - "loss": 0.0473, + "learning_rate": 2.9562851414703266e-05, + "loss": 0.07, "step": 14500 }, { "epoch": 0.23, - "learning_rate": 1.9698518217036737e-05, - "loss": 0.0472, + "learning_rate": 2.9547777325555103e-05, + "loss": 0.0688, "step": 15000 }, { "epoch": 0.23, - "learning_rate": 1.9688468824271294e-05, - "loss": 0.0471, + "learning_rate": 2.953270323640694e-05, + "loss": 0.0691, "step": 15500 }, { "epoch": 0.24, - "learning_rate": 1.9678419431505854e-05, - "loss": 0.0481, + "learning_rate": 2.9517629147258778e-05, + "loss": 0.0693, "step": 16000 }, { "epoch": 0.25, - "learning_rate": 1.966837003874041e-05, - "loss": 0.0472, + "learning_rate": 2.9502555058110612e-05, + "loss": 0.0685, "step": 16500 }, { "epoch": 0.26, - "learning_rate": 1.965832064597497e-05, - "loss": 0.0458, + "learning_rate": 2.9487480968962453e-05, + "loss": 0.0686, "step": 17000 }, { "epoch": 0.26, - "learning_rate": 1.9648271253209526e-05, - "loss": 0.0473, + "learning_rate": 2.9472406879814287e-05, + "loss": 0.068, "step": 17500 }, { "epoch": 0.27, - "learning_rate": 1.9638221860444083e-05, - "loss": 0.0487, + "learning_rate": 2.9457332790666124e-05, + "loss": 0.0712, "step": 18000 }, { "epoch": 0.28, - "learning_rate": 1.962817246767864e-05, - "loss": 0.0474, + "learning_rate": 2.944225870151796e-05, + "loss": 0.0683, "step": 18500 }, { "epoch": 0.29, - "learning_rate": 1.96181230749132e-05, - "loss": 0.0452, + "learning_rate": 2.94271846123698e-05, + "loss": 0.067, "step": 19000 }, { "epoch": 0.29, - "learning_rate": 1.9608073682147758e-05, - "loss": 0.0453, + "learning_rate": 2.9412110523221633e-05, + "loss": 0.0663, "step": 19500 }, { "epoch": 0.3, - "learning_rate": 1.9598024289382315e-05, - "loss": 0.0459, + "learning_rate": 2.939703643407347e-05, + "loss": 0.0666, "step": 20000 }, { "epoch": 0.31, - "learning_rate": 1.9587974896616872e-05, - "loss": 0.0462, + "learning_rate": 2.9381962344925308e-05, + "loss": 0.0668, "step": 20500 }, { "epoch": 0.32, - "learning_rate": 1.9577925503851433e-05, - "loss": 0.0467, + "learning_rate": 2.9366888255777145e-05, + "loss": 0.0666, "step": 21000 }, { "epoch": 0.32, - "learning_rate": 1.956787611108599e-05, - "loss": 0.0461, + "learning_rate": 2.935181416662898e-05, + "loss": 0.0664, "step": 21500 }, { "epoch": 0.33, - "learning_rate": 1.9557826718320547e-05, - "loss": 0.0461, + "learning_rate": 2.933674007748082e-05, + "loss": 0.0655, "step": 22000 }, { "epoch": 0.34, - "learning_rate": 1.9547777325555104e-05, - "loss": 0.0454, + "learning_rate": 2.9321665988332654e-05, + "loss": 0.0657, "step": 22500 }, { "epoch": 0.35, - "learning_rate": 1.9537727932789665e-05, - "loss": 0.0459, + "learning_rate": 2.930659189918449e-05, + "loss": 0.0669, "step": 23000 }, { "epoch": 0.35, - "learning_rate": 1.9527678540024218e-05, - "loss": 0.0456, + "learning_rate": 2.929151781003633e-05, + "loss": 0.0667, "step": 23500 }, { "epoch": 0.36, - "learning_rate": 1.951762914725878e-05, - "loss": 0.0458, + "learning_rate": 2.9276443720888167e-05, + "loss": 0.0661, "step": 24000 }, { "epoch": 0.37, - "learning_rate": 1.9507579754493336e-05, - "loss": 0.0452, + "learning_rate": 2.926136963174e-05, + "loss": 0.0644, "step": 24500 }, { "epoch": 0.38, - "learning_rate": 1.9497530361727893e-05, - "loss": 0.0444, + "learning_rate": 2.924629554259184e-05, + "loss": 0.0635, "step": 25000 }, { "epoch": 0.38, - "learning_rate": 1.948748096896245e-05, - "loss": 0.0519, + "learning_rate": 2.9231221453443675e-05, + "loss": 0.0629, "step": 25500 }, { "epoch": 0.39, - "learning_rate": 1.947743157619701e-05, - "loss": 0.0506, + "learning_rate": 2.9216147364295513e-05, + "loss": 0.0673, "step": 26000 }, { "epoch": 0.4, - "learning_rate": 1.9467382183431568e-05, - "loss": 0.046, + "learning_rate": 2.920107327514735e-05, + "loss": 0.0654, "step": 26500 }, { "epoch": 0.41, - "learning_rate": 1.9457332790666125e-05, - "loss": 0.0457, + "learning_rate": 2.9185999185999188e-05, + "loss": 0.0649, "step": 27000 }, { "epoch": 0.41, - "learning_rate": 1.9447283397900682e-05, - "loss": 0.0448, + "learning_rate": 2.9170925096851022e-05, + "loss": 0.0651, "step": 27500 }, { "epoch": 0.42, - "learning_rate": 1.9437234005135243e-05, - "loss": 0.0431, + "learning_rate": 2.915585100770286e-05, + "loss": 0.063, "step": 28000 }, { "epoch": 0.43, - "learning_rate": 1.9427184612369797e-05, - "loss": 0.0446, + "learning_rate": 2.9140776918554697e-05, + "loss": 0.0646, "step": 28500 }, { "epoch": 0.44, - "learning_rate": 1.9417135219604357e-05, - "loss": 0.0428, + "learning_rate": 2.9125702829406534e-05, + "loss": 0.0632, "step": 29000 }, { "epoch": 0.44, - "learning_rate": 1.9407085826838914e-05, - "loss": 0.0452, + "learning_rate": 2.9110628740258368e-05, + "loss": 0.0648, "step": 29500 }, { "epoch": 0.45, - "learning_rate": 1.939703643407347e-05, - "loss": 0.0434, + "learning_rate": 2.909555465111021e-05, + "loss": 0.0628, "step": 30000 }, { "epoch": 0.46, - "learning_rate": 1.938698704130803e-05, - "loss": 0.0426, + "learning_rate": 2.9080480561962043e-05, + "loss": 0.0607, "step": 30500 }, { "epoch": 0.47, - "learning_rate": 1.937693764854259e-05, - "loss": 0.0432, + "learning_rate": 2.906540647281388e-05, + "loss": 0.0622, "step": 31000 }, { "epoch": 0.47, - "learning_rate": 1.9366888255777146e-05, - "loss": 0.0441, + "learning_rate": 2.9050332383665718e-05, + "loss": 0.0645, "step": 31500 }, { "epoch": 0.48, - "learning_rate": 1.9356838863011703e-05, - "loss": 0.0436, + "learning_rate": 2.9035258294517555e-05, + "loss": 0.0633, "step": 32000 }, { "epoch": 0.49, - "learning_rate": 1.934678947024626e-05, - "loss": 0.043, + "learning_rate": 2.902018420536939e-05, + "loss": 0.0617, "step": 32500 }, { "epoch": 0.5, - "learning_rate": 1.933674007748082e-05, - "loss": 0.0437, + "learning_rate": 2.900511011622123e-05, + "loss": 0.0625, "step": 33000 }, { "epoch": 0.5, - "learning_rate": 1.9326690684715378e-05, - "loss": 0.0426, + "learning_rate": 2.8990036027073064e-05, + "loss": 0.0612, "step": 33500 }, { "epoch": 0.51, - "learning_rate": 1.9316641291949935e-05, - "loss": 0.043, + "learning_rate": 2.89749619379249e-05, + "loss": 0.0631, "step": 34000 }, { "epoch": 0.52, - "learning_rate": 1.9306591899184493e-05, - "loss": 0.043, + "learning_rate": 2.8959887848776735e-05, + "loss": 0.0626, "step": 34500 }, { "epoch": 0.53, - "learning_rate": 1.9296542506419053e-05, - "loss": 0.0436, + "learning_rate": 2.8944813759628576e-05, + "loss": 0.0624, "step": 35000 }, { "epoch": 0.54, - "learning_rate": 1.9286493113653607e-05, - "loss": 0.0425, + "learning_rate": 2.892973967048041e-05, + "loss": 0.0602, "step": 35500 }, { "epoch": 0.54, - "learning_rate": 1.9276443720888167e-05, - "loss": 0.0434, + "learning_rate": 2.8914665581332248e-05, + "loss": 0.063, "step": 36000 }, { "epoch": 0.55, - "learning_rate": 1.9266394328122725e-05, - "loss": 0.0425, + "learning_rate": 2.8899591492184085e-05, + "loss": 0.0601, "step": 36500 }, { "epoch": 0.56, - "learning_rate": 1.9256344935357282e-05, - "loss": 0.0417, + "learning_rate": 2.8884517403035923e-05, + "loss": 0.0603, "step": 37000 }, { "epoch": 0.57, - "learning_rate": 1.924629554259184e-05, - "loss": 0.0403, + "learning_rate": 2.8869443313887757e-05, + "loss": 0.059, "step": 37500 }, { "epoch": 0.57, - "learning_rate": 1.92362461498264e-05, - "loss": 0.0401, + "learning_rate": 2.8854369224739597e-05, + "loss": 0.0583, "step": 38000 }, { "epoch": 0.58, - "learning_rate": 1.9226196757060957e-05, - "loss": 0.0426, + "learning_rate": 2.883929513559143e-05, + "loss": 0.0618, "step": 38500 }, { "epoch": 0.59, - "learning_rate": 1.9216147364295514e-05, - "loss": 0.0412, + "learning_rate": 2.882422104644327e-05, + "loss": 0.0607, "step": 39000 }, { "epoch": 0.6, - "learning_rate": 1.920609797153007e-05, - "loss": 0.042, + "learning_rate": 2.8809146957295106e-05, + "loss": 0.0599, "step": 39500 }, { "epoch": 0.6, - "learning_rate": 1.919604857876463e-05, - "loss": 0.0417, + "learning_rate": 2.8794072868146944e-05, + "loss": 0.0609, "step": 40000 }, { "epoch": 0.61, - "learning_rate": 1.9185999185999185e-05, - "loss": 0.0415, + "learning_rate": 2.8778998778998778e-05, + "loss": 0.0607, "step": 40500 }, { "epoch": 0.62, - "learning_rate": 1.9175949793233746e-05, - "loss": 0.0422, + "learning_rate": 2.876392468985062e-05, + "loss": 0.0607, "step": 41000 }, { "epoch": 0.63, - "learning_rate": 1.9165900400468303e-05, - "loss": 0.0405, + "learning_rate": 2.8748850600702453e-05, + "loss": 0.0585, "step": 41500 }, { "epoch": 0.63, - "learning_rate": 1.915585100770286e-05, - "loss": 0.0409, + "learning_rate": 2.873377651155429e-05, + "loss": 0.0598, "step": 42000 }, { "epoch": 0.64, - "learning_rate": 1.9145801614937417e-05, - "loss": 0.0426, + "learning_rate": 2.8718702422406124e-05, + "loss": 0.0596, "step": 42500 }, { "epoch": 0.65, - "learning_rate": 1.9135752222171978e-05, - "loss": 0.0406, + "learning_rate": 2.8703628333257965e-05, + "loss": 0.0585, "step": 43000 }, { "epoch": 0.66, - "learning_rate": 1.9125702829406535e-05, - "loss": 0.0408, + "learning_rate": 2.86885542441098e-05, + "loss": 0.0605, "step": 43500 }, { "epoch": 0.66, - "learning_rate": 1.9115653436641092e-05, - "loss": 0.0416, + "learning_rate": 2.8673480154961636e-05, + "loss": 0.06, "step": 44000 }, { "epoch": 0.67, - "learning_rate": 1.910560404387565e-05, - "loss": 0.0427, + "learning_rate": 2.8658406065813474e-05, + "loss": 0.0607, "step": 44500 }, { "epoch": 0.68, - "learning_rate": 1.909555465111021e-05, - "loss": 0.0402, + "learning_rate": 2.864333197666531e-05, + "loss": 0.0599, "step": 45000 }, { "epoch": 0.69, - "learning_rate": 1.9085505258344767e-05, - "loss": 0.0429, + "learning_rate": 2.8628257887517145e-05, + "loss": 0.062, "step": 45500 }, { "epoch": 0.69, - "learning_rate": 1.9075455865579324e-05, - "loss": 0.0418, + "learning_rate": 2.8613183798368986e-05, + "loss": 0.06, "step": 46000 }, { "epoch": 0.7, - "learning_rate": 1.906540647281388e-05, - "loss": 0.0399, + "learning_rate": 2.859810970922082e-05, + "loss": 0.0594, "step": 46500 }, { "epoch": 0.71, - "learning_rate": 1.9055357080048438e-05, - "loss": 0.0403, + "learning_rate": 2.8583035620072657e-05, + "loss": 0.0589, "step": 47000 }, { "epoch": 0.72, - "learning_rate": 1.9045307687282995e-05, - "loss": 0.0397, + "learning_rate": 2.8567961530924495e-05, + "loss": 0.0578, "step": 47500 }, { "epoch": 0.72, - "learning_rate": 1.9035258294517556e-05, - "loss": 0.041, + "learning_rate": 2.8552887441776332e-05, + "loss": 0.0586, "step": 48000 }, { "epoch": 0.73, - "learning_rate": 1.9025208901752113e-05, - "loss": 0.039, + "learning_rate": 2.8537813352628166e-05, + "loss": 0.0568, "step": 48500 }, { "epoch": 0.74, - "learning_rate": 1.901515950898667e-05, - "loss": 0.0401, + "learning_rate": 2.8522739263480007e-05, + "loss": 0.0579, "step": 49000 }, { "epoch": 0.75, - "learning_rate": 1.9005110116221227e-05, - "loss": 0.0416, + "learning_rate": 2.850766517433184e-05, + "loss": 0.0605, "step": 49500 }, { "epoch": 0.75, - "learning_rate": 1.8995060723455788e-05, - "loss": 0.0382, + "learning_rate": 2.849259108518368e-05, + "loss": 0.0552, "step": 50000 }, { "epoch": 0.76, - "learning_rate": 1.8985011330690345e-05, - "loss": 0.0391, + "learning_rate": 2.8477516996035513e-05, + "loss": 0.0573, "step": 50500 }, { "epoch": 0.77, - "learning_rate": 1.8974961937924902e-05, - "loss": 0.0405, + "learning_rate": 2.8462442906887353e-05, + "loss": 0.0585, "step": 51000 }, { "epoch": 0.78, - "learning_rate": 1.896491254515946e-05, - "loss": 0.0401, + "learning_rate": 2.8447368817739187e-05, + "loss": 0.0577, "step": 51500 }, { "epoch": 0.78, - "learning_rate": 1.895486315239402e-05, - "loss": 0.0379, + "learning_rate": 2.8432294728591025e-05, + "loss": 0.0553, "step": 52000 }, { "epoch": 0.79, - "learning_rate": 1.8944813759628574e-05, - "loss": 0.0408, + "learning_rate": 2.8417220639442862e-05, + "loss": 0.0593, "step": 52500 }, { "epoch": 0.8, - "learning_rate": 1.8934764366863134e-05, - "loss": 0.0407, + "learning_rate": 2.84021465502947e-05, + "loss": 0.0592, "step": 53000 }, { "epoch": 0.81, - "learning_rate": 1.892471497409769e-05, - "loss": 0.039, + "learning_rate": 2.8387072461146534e-05, + "loss": 0.0558, "step": 53500 }, { "epoch": 0.81, - "learning_rate": 1.891466558133225e-05, - "loss": 0.037, + "learning_rate": 2.8371998371998375e-05, + "loss": 0.0548, "step": 54000 }, { "epoch": 0.82, - "learning_rate": 1.8904616188566806e-05, - "loss": 0.0402, + "learning_rate": 2.835692428285021e-05, + "loss": 0.0607, "step": 54500 }, { "epoch": 0.83, - "learning_rate": 1.8894566795801366e-05, - "loss": 0.0413, + "learning_rate": 2.8341850193702046e-05, + "loss": 0.0588, "step": 55000 }, { "epoch": 0.84, - "learning_rate": 1.8884517403035923e-05, - "loss": 0.0398, + "learning_rate": 2.8326776104553883e-05, + "loss": 0.0593, "step": 55500 }, { "epoch": 0.84, - "learning_rate": 1.887446801027048e-05, - "loss": 0.0395, + "learning_rate": 2.831170201540572e-05, + "loss": 0.0577, "step": 56000 }, { "epoch": 0.85, - "learning_rate": 1.8864418617505038e-05, - "loss": 0.0394, + "learning_rate": 2.8296627926257555e-05, + "loss": 0.0567, "step": 56500 }, { "epoch": 0.86, - "learning_rate": 1.8854369224739598e-05, - "loss": 0.0403, + "learning_rate": 2.8281553837109396e-05, + "loss": 0.0589, "step": 57000 }, { "epoch": 0.87, - "learning_rate": 1.8844319831974155e-05, - "loss": 0.0392, + "learning_rate": 2.826647974796123e-05, + "loss": 0.0566, "step": 57500 }, { "epoch": 0.87, - "learning_rate": 1.8834270439208713e-05, - "loss": 0.0378, + "learning_rate": 2.8251405658813067e-05, + "loss": 0.0551, "step": 58000 }, { "epoch": 0.88, - "learning_rate": 1.882422104644327e-05, - "loss": 0.0379, + "learning_rate": 2.82363315696649e-05, + "loss": 0.0542, "step": 58500 }, { "epoch": 0.89, - "learning_rate": 1.8814171653677827e-05, - "loss": 0.0394, + "learning_rate": 2.8221257480516742e-05, + "loss": 0.0558, "step": 59000 }, { "epoch": 0.9, - "learning_rate": 1.8804122260912384e-05, - "loss": 0.0376, + "learning_rate": 2.8206183391368576e-05, + "loss": 0.0552, "step": 59500 }, { "epoch": 0.9, - "learning_rate": 1.8794072868146945e-05, - "loss": 0.0395, + "learning_rate": 2.8191109302220413e-05, + "loss": 0.0566, "step": 60000 }, { "epoch": 0.91, - "learning_rate": 1.8784023475381502e-05, - "loss": 0.0404, + "learning_rate": 2.817603521307225e-05, + "loss": 0.0576, "step": 60500 }, { "epoch": 0.92, - "learning_rate": 1.877397408261606e-05, - "loss": 0.0409, + "learning_rate": 2.816096112392409e-05, + "loss": 0.0594, "step": 61000 }, { "epoch": 0.93, - "learning_rate": 1.8763924689850616e-05, - "loss": 0.0396, + "learning_rate": 2.8145887034775922e-05, + "loss": 0.0578, "step": 61500 }, { "epoch": 0.93, - "learning_rate": 1.8753875297085177e-05, - "loss": 0.0398, + "learning_rate": 2.8130812945627763e-05, + "loss": 0.0561, "step": 62000 }, { "epoch": 0.94, - "learning_rate": 1.8743825904319734e-05, - "loss": 0.0406, + "learning_rate": 2.8115738856479597e-05, + "loss": 0.0586, "step": 62500 }, { "epoch": 0.95, - "learning_rate": 1.873377651155429e-05, - "loss": 0.0397, + "learning_rate": 2.8100664767331435e-05, + "loss": 0.057, "step": 63000 }, { "epoch": 0.96, - "learning_rate": 1.8723727118788848e-05, - "loss": 0.038, + "learning_rate": 2.8085590678183272e-05, + "loss": 0.0542, "step": 63500 }, { "epoch": 0.96, - "learning_rate": 1.871367772602341e-05, - "loss": 0.0387, + "learning_rate": 2.807051658903511e-05, + "loss": 0.0556, "step": 64000 }, { "epoch": 0.97, - "learning_rate": 1.8703628333257962e-05, - "loss": 0.0382, + "learning_rate": 2.8055442499886943e-05, + "loss": 0.0567, "step": 64500 }, { "epoch": 0.98, - "learning_rate": 1.8693578940492523e-05, - "loss": 0.0389, + "learning_rate": 2.804036841073878e-05, + "loss": 0.0557, "step": 65000 }, { "epoch": 0.99, - "learning_rate": 1.868352954772708e-05, - "loss": 0.0392, + "learning_rate": 2.802529432159062e-05, + "loss": 0.0569, "step": 65500 }, { "epoch": 0.99, - "learning_rate": 1.8673480154961637e-05, - "loss": 0.04, + "learning_rate": 2.8010220232442456e-05, + "loss": 0.058, "step": 66000 }, { "epoch": 1.0, - "eval_accuracy": 0.9849140434429086, - "eval_f1": 0.9056098239494281, - "eval_loss": 0.03593166172504425, - "eval_precision": 0.8789740368669595, - "eval_recall": 0.9339103612732053, - "eval_runtime": 257.5124, - "eval_samples_per_second": 457.935, - "eval_steps_per_second": 28.624, + "eval_accuracy": 0.9778978973178251, + "eval_f1": 0.8965530802052815, + "eval_loss": 0.05169834569096565, + "eval_precision": 0.8674113869146362, + "eval_recall": 0.9277209431029291, + "eval_runtime": 281.2887, + "eval_samples_per_second": 419.228, + "eval_steps_per_second": 26.204, "step": 66339 }, { "epoch": 1.0, - "learning_rate": 1.8663430762196194e-05, - "loss": 0.0361, + "learning_rate": 2.799514614329429e-05, + "loss": 0.0535, "step": 66500 }, { "epoch": 1.01, - "learning_rate": 1.8653381369430755e-05, - "loss": 0.0337, + "learning_rate": 2.798007205414613e-05, + "loss": 0.0473, "step": 67000 }, { "epoch": 1.02, - "learning_rate": 1.8643331976665312e-05, - "loss": 0.0339, + "learning_rate": 2.7964997964997965e-05, + "loss": 0.0484, "step": 67500 }, { "epoch": 1.03, - "learning_rate": 1.863328258389987e-05, - "loss": 0.0334, + "learning_rate": 2.7949923875849802e-05, + "loss": 0.0485, "step": 68000 }, { "epoch": 1.03, - "learning_rate": 1.8623233191134426e-05, - "loss": 0.0341, + "learning_rate": 2.793484978670164e-05, + "loss": 0.0497, "step": 68500 }, { "epoch": 1.04, - "learning_rate": 1.8613183798368987e-05, - "loss": 0.0327, + "learning_rate": 2.7919775697553477e-05, + "loss": 0.0487, "step": 69000 }, { "epoch": 1.05, - "learning_rate": 1.8603134405603544e-05, - "loss": 0.0346, + "learning_rate": 2.790470160840531e-05, + "loss": 0.0494, "step": 69500 }, { "epoch": 1.06, - "learning_rate": 1.85930850128381e-05, - "loss": 0.0336, + "learning_rate": 2.7889627519257152e-05, + "loss": 0.0488, "step": 70000 }, { "epoch": 1.06, - "learning_rate": 1.858303562007266e-05, - "loss": 0.034, + "learning_rate": 2.7874553430108986e-05, + "loss": 0.0488, "step": 70500 }, { "epoch": 1.07, - "learning_rate": 1.8572986227307215e-05, - "loss": 0.033, + "learning_rate": 2.7859479340960823e-05, + "loss": 0.0477, "step": 71000 }, { "epoch": 1.08, - "learning_rate": 1.8562936834541773e-05, - "loss": 0.0328, + "learning_rate": 2.784440525181266e-05, + "loss": 0.0478, "step": 71500 }, { "epoch": 1.09, - "learning_rate": 1.8552887441776333e-05, - "loss": 0.0341, + "learning_rate": 2.7829331162664498e-05, + "loss": 0.0488, "step": 72000 }, { "epoch": 1.09, - "learning_rate": 1.854283804901089e-05, - "loss": 0.0331, + "learning_rate": 2.7814257073516332e-05, + "loss": 0.0477, "step": 72500 }, { "epoch": 1.1, - "learning_rate": 1.8532788656245447e-05, - "loss": 0.0344, + "learning_rate": 2.779918298436817e-05, + "loss": 0.049, "step": 73000 }, { "epoch": 1.11, - "learning_rate": 1.8522739263480005e-05, - "loss": 0.0341, + "learning_rate": 2.7784108895220007e-05, + "loss": 0.0496, "step": 73500 }, { "epoch": 1.12, - "learning_rate": 1.8512689870714565e-05, - "loss": 0.0332, + "learning_rate": 2.7769034806071844e-05, + "loss": 0.0487, "step": 74000 }, { "epoch": 1.12, - "learning_rate": 1.8502640477949122e-05, - "loss": 0.0342, + "learning_rate": 2.775396071692368e-05, + "loss": 0.0501, "step": 74500 }, { "epoch": 1.13, - "learning_rate": 1.849259108518368e-05, - "loss": 0.0333, + "learning_rate": 2.773888662777552e-05, + "loss": 0.0489, "step": 75000 }, { "epoch": 1.14, - "learning_rate": 1.8482541692418237e-05, - "loss": 0.0343, + "learning_rate": 2.7723812538627353e-05, + "loss": 0.0493, "step": 75500 }, { "epoch": 1.15, - "learning_rate": 1.8472492299652797e-05, - "loss": 0.0344, + "learning_rate": 2.770873844947919e-05, + "loss": 0.0488, "step": 76000 }, { "epoch": 1.15, - "learning_rate": 1.846244290688735e-05, - "loss": 0.0328, + "learning_rate": 2.7693664360331028e-05, + "loss": 0.047, "step": 76500 }, { "epoch": 1.16, - "learning_rate": 1.845239351412191e-05, - "loss": 0.0325, + "learning_rate": 2.7678590271182865e-05, + "loss": 0.0473, "step": 77000 }, { "epoch": 1.17, - "learning_rate": 1.844234412135647e-05, - "loss": 0.0338, + "learning_rate": 2.76635161820347e-05, + "loss": 0.0495, "step": 77500 }, { "epoch": 1.18, - "learning_rate": 1.8432294728591026e-05, - "loss": 0.0337, + "learning_rate": 2.764844209288654e-05, + "loss": 0.0473, "step": 78000 }, { "epoch": 1.18, - "learning_rate": 1.8422245335825583e-05, - "loss": 0.0344, + "learning_rate": 2.7633368003738374e-05, + "loss": 0.0496, "step": 78500 }, { "epoch": 1.19, - "learning_rate": 1.8412195943060143e-05, - "loss": 0.035, + "learning_rate": 2.7618293914590212e-05, + "loss": 0.0496, "step": 79000 }, { "epoch": 1.2, - "learning_rate": 1.84021465502947e-05, - "loss": 0.0341, + "learning_rate": 2.760321982544205e-05, + "loss": 0.048, "step": 79500 }, { "epoch": 1.21, - "learning_rate": 1.8392097157529258e-05, - "loss": 0.0336, + "learning_rate": 2.7588145736293887e-05, + "loss": 0.0492, "step": 80000 }, { "epoch": 1.21, - "learning_rate": 1.8382047764763815e-05, - "loss": 0.0352, + "learning_rate": 2.757307164714572e-05, + "loss": 0.0507, "step": 80500 }, { "epoch": 1.22, - "learning_rate": 1.8371998371998375e-05, - "loss": 0.0331, + "learning_rate": 2.7557997557997558e-05, + "loss": 0.047, "step": 81000 }, { "epoch": 1.23, - "learning_rate": 1.836194897923293e-05, - "loss": 0.0344, + "learning_rate": 2.7542923468849396e-05, + "loss": 0.0485, "step": 81500 }, { "epoch": 1.24, - "learning_rate": 1.835189958646749e-05, - "loss": 0.0345, + "learning_rate": 2.7527849379701233e-05, + "loss": 0.0487, "step": 82000 }, { "epoch": 1.24, - "learning_rate": 1.8341850193702047e-05, - "loss": 0.0325, + "learning_rate": 2.7512775290553067e-05, + "loss": 0.0469, "step": 82500 }, { "epoch": 1.25, - "learning_rate": 1.8331800800936604e-05, - "loss": 0.0339, + "learning_rate": 2.7497701201404908e-05, + "loss": 0.0494, "step": 83000 }, { "epoch": 1.26, - "learning_rate": 1.832175140817116e-05, - "loss": 0.0315, + "learning_rate": 2.7482627112256742e-05, + "loss": 0.0456, "step": 83500 }, { "epoch": 1.27, - "learning_rate": 1.8311702015405722e-05, - "loss": 0.0342, + "learning_rate": 2.746755302310858e-05, + "loss": 0.0495, "step": 84000 }, { "epoch": 1.27, - "learning_rate": 1.830165262264028e-05, - "loss": 0.0338, + "learning_rate": 2.7452478933960417e-05, + "loss": 0.0481, "step": 84500 }, { "epoch": 1.28, - "learning_rate": 1.8291603229874836e-05, - "loss": 0.0331, + "learning_rate": 2.7437404844812254e-05, + "loss": 0.0472, "step": 85000 }, { "epoch": 1.29, - "learning_rate": 1.8281553837109393e-05, - "loss": 0.0339, + "learning_rate": 2.7422330755664088e-05, + "loss": 0.0484, "step": 85500 }, { "epoch": 1.3, - "learning_rate": 1.8271504444343954e-05, - "loss": 0.033, + "learning_rate": 2.740725666651593e-05, + "loss": 0.0463, "step": 86000 }, { "epoch": 1.3, - "learning_rate": 1.826145505157851e-05, - "loss": 0.0327, + "learning_rate": 2.7392182577367763e-05, + "loss": 0.0468, "step": 86500 }, { "epoch": 1.31, - "learning_rate": 1.8251405658813068e-05, - "loss": 0.0328, + "learning_rate": 2.73771084882196e-05, + "loss": 0.0465, "step": 87000 }, { "epoch": 1.32, - "learning_rate": 1.8241356266047625e-05, - "loss": 0.0322, + "learning_rate": 2.7362034399071438e-05, + "loss": 0.0473, "step": 87500 }, { "epoch": 1.33, - "learning_rate": 1.8231306873282186e-05, - "loss": 0.0325, + "learning_rate": 2.7346960309923275e-05, + "loss": 0.0472, "step": 88000 }, { "epoch": 1.33, - "learning_rate": 1.822125748051674e-05, - "loss": 0.0334, + "learning_rate": 2.733188622077511e-05, + "loss": 0.048, "step": 88500 }, { "epoch": 1.34, - "learning_rate": 1.82112080877513e-05, - "loss": 0.0337, + "learning_rate": 2.7316812131626947e-05, + "loss": 0.0484, "step": 89000 }, { "epoch": 1.35, - "learning_rate": 1.8201158694985857e-05, - "loss": 0.0345, + "learning_rate": 2.7301738042478784e-05, + "loss": 0.0494, "step": 89500 }, { "epoch": 1.36, - "learning_rate": 1.8191109302220414e-05, - "loss": 0.0325, + "learning_rate": 2.728666395333062e-05, + "loss": 0.0466, "step": 90000 }, { "epoch": 1.36, - "learning_rate": 1.818105990945497e-05, - "loss": 0.0341, + "learning_rate": 2.7271589864182456e-05, + "loss": 0.0487, "step": 90500 }, { "epoch": 1.37, - "learning_rate": 1.8171010516689532e-05, - "loss": 0.0331, + "learning_rate": 2.7256515775034296e-05, + "loss": 0.0473, "step": 91000 }, { "epoch": 1.38, - "learning_rate": 1.816096112392409e-05, - "loss": 0.034, + "learning_rate": 2.724144168588613e-05, + "loss": 0.0487, "step": 91500 }, { "epoch": 1.39, - "learning_rate": 1.8150911731158646e-05, - "loss": 0.0323, + "learning_rate": 2.7226367596737968e-05, + "loss": 0.0466, "step": 92000 }, { "epoch": 1.39, - "learning_rate": 1.8140862338393203e-05, - "loss": 0.0333, + "learning_rate": 2.7211293507589805e-05, + "loss": 0.0478, "step": 92500 }, { "epoch": 1.4, - "learning_rate": 1.8130812945627764e-05, - "loss": 0.0334, + "learning_rate": 2.7196219418441643e-05, + "loss": 0.0484, "step": 93000 }, { "epoch": 1.41, - "learning_rate": 1.8120763552862318e-05, - "loss": 0.0312, + "learning_rate": 2.7181145329293477e-05, + "loss": 0.0444, "step": 93500 }, { "epoch": 1.42, - "learning_rate": 1.811071416009688e-05, - "loss": 0.0317, + "learning_rate": 2.7166071240145317e-05, + "loss": 0.046, "step": 94000 }, { "epoch": 1.42, - "learning_rate": 1.8100664767331435e-05, - "loss": 0.0323, + "learning_rate": 2.715099715099715e-05, + "loss": 0.047, "step": 94500 }, { "epoch": 1.43, - "learning_rate": 1.8090615374565993e-05, - "loss": 0.0329, + "learning_rate": 2.713592306184899e-05, + "loss": 0.0462, "step": 95000 }, { "epoch": 1.44, - "learning_rate": 1.808056598180055e-05, - "loss": 0.0322, + "learning_rate": 2.7120848972700823e-05, + "loss": 0.0466, "step": 95500 }, { "epoch": 1.45, - "learning_rate": 1.807051658903511e-05, - "loss": 0.0331, + "learning_rate": 2.7105774883552664e-05, + "loss": 0.0483, "step": 96000 }, { "epoch": 1.45, - "learning_rate": 1.8060467196269667e-05, - "loss": 0.0313, + "learning_rate": 2.7090700794404498e-05, + "loss": 0.046, "step": 96500 }, { "epoch": 1.46, - "learning_rate": 1.8050417803504225e-05, - "loss": 0.032, + "learning_rate": 2.7075626705256335e-05, + "loss": 0.0468, "step": 97000 }, { "epoch": 1.47, - "learning_rate": 1.8040368410738782e-05, - "loss": 0.0326, + "learning_rate": 2.7060552616108173e-05, + "loss": 0.0459, "step": 97500 }, { "epoch": 1.48, - "learning_rate": 1.8030319017973342e-05, - "loss": 0.0335, + "learning_rate": 2.704547852696001e-05, + "loss": 0.0487, "step": 98000 }, { "epoch": 1.48, - "learning_rate": 1.80202696252079e-05, - "loss": 0.0336, + "learning_rate": 2.7030404437811844e-05, + "loss": 0.0473, "step": 98500 }, { "epoch": 1.49, - "learning_rate": 1.8010220232442457e-05, - "loss": 0.0321, + "learning_rate": 2.7015330348663685e-05, + "loss": 0.0453, "step": 99000 }, { "epoch": 1.5, - "learning_rate": 1.8000170839677014e-05, - "loss": 0.0331, + "learning_rate": 2.700025625951552e-05, + "loss": 0.0461, "step": 99500 }, { "epoch": 1.51, - "learning_rate": 1.799012144691157e-05, - "loss": 0.0325, + "learning_rate": 2.6985182170367356e-05, + "loss": 0.0467, "step": 100000 }, { "epoch": 1.51, - "learning_rate": 1.7980072054146128e-05, - "loss": 0.0338, + "learning_rate": 2.6970108081219194e-05, + "loss": 0.0475, "step": 100500 }, { "epoch": 1.52, - "learning_rate": 1.797002266138069e-05, - "loss": 0.0366, + "learning_rate": 2.695503399207103e-05, + "loss": 0.0486, "step": 101000 }, { "epoch": 1.53, - "learning_rate": 1.7959973268615246e-05, - "loss": 0.0363, + "learning_rate": 2.6939959902922865e-05, + "loss": 0.0476, "step": 101500 }, { "epoch": 1.54, - "learning_rate": 1.7949923875849803e-05, - "loss": 0.0345, + "learning_rate": 2.6924885813774706e-05, + "loss": 0.0481, "step": 102000 }, { "epoch": 1.55, - "learning_rate": 1.793987448308436e-05, - "loss": 0.0325, + "learning_rate": 2.690981172462654e-05, + "loss": 0.0458, "step": 102500 }, { "epoch": 1.55, - "learning_rate": 1.792982509031892e-05, - "loss": 0.0324, + "learning_rate": 2.6894737635478378e-05, + "loss": 0.0463, "step": 103000 }, { "epoch": 1.56, - "learning_rate": 1.7919775697553478e-05, - "loss": 0.0337, + "learning_rate": 2.687966354633021e-05, + "loss": 0.0468, "step": 103500 }, { "epoch": 1.57, - "learning_rate": 1.7909726304788035e-05, - "loss": 0.0382, + "learning_rate": 2.6864589457182052e-05, + "loss": 0.0476, "step": 104000 }, { "epoch": 1.58, - "learning_rate": 1.7899676912022592e-05, - "loss": 0.0351, + "learning_rate": 2.6849515368033886e-05, + "loss": 0.0459, "step": 104500 }, { "epoch": 1.58, - "learning_rate": 1.7889627519257153e-05, - "loss": 0.0321, + "learning_rate": 2.6834441278885724e-05, + "loss": 0.0455, "step": 105000 }, { "epoch": 1.59, - "learning_rate": 1.7879578126491706e-05, - "loss": 0.0319, + "learning_rate": 2.681936718973756e-05, + "loss": 0.0465, "step": 105500 }, { "epoch": 1.6, - "learning_rate": 1.7869528733726267e-05, - "loss": 0.0324, + "learning_rate": 2.68042931005894e-05, + "loss": 0.0454, "step": 106000 }, { "epoch": 1.61, - "learning_rate": 1.7859479340960824e-05, - "loss": 0.0344, + "learning_rate": 2.6789219011441233e-05, + "loss": 0.0482, "step": 106500 }, { "epoch": 1.61, - "learning_rate": 1.784942994819538e-05, - "loss": 0.0334, + "learning_rate": 2.6774144922293074e-05, + "loss": 0.0467, "step": 107000 }, { "epoch": 1.62, - "learning_rate": 1.783938055542994e-05, - "loss": 0.035, + "learning_rate": 2.6759070833144908e-05, + "loss": 0.0467, "step": 107500 }, { "epoch": 1.63, - "learning_rate": 1.78293311626645e-05, - "loss": 0.0361, + "learning_rate": 2.6743996743996745e-05, + "loss": 0.0486, "step": 108000 }, { "epoch": 1.64, - "learning_rate": 1.7819281769899056e-05, - "loss": 0.0356, + "learning_rate": 2.6728922654848582e-05, + "loss": 0.0465, "step": 108500 }, { "epoch": 1.64, - "learning_rate": 1.7809232377133613e-05, - "loss": 0.0356, + "learning_rate": 2.671384856570042e-05, + "loss": 0.0476, "step": 109000 }, { "epoch": 1.65, - "learning_rate": 1.779918298436817e-05, - "loss": 0.0329, + "learning_rate": 2.6698774476552254e-05, + "loss": 0.0456, "step": 109500 }, { "epoch": 1.66, - "learning_rate": 1.778913359160273e-05, - "loss": 0.0338, + "learning_rate": 2.6683700387404095e-05, + "loss": 0.0459, "step": 110000 }, { "epoch": 1.67, - "learning_rate": 1.7779084198837288e-05, - "loss": 0.0343, + "learning_rate": 2.666862629825593e-05, + "loss": 0.0478, "step": 110500 }, { "epoch": 1.67, - "learning_rate": 1.7769034806071845e-05, - "loss": 0.0362, + "learning_rate": 2.6653552209107766e-05, + "loss": 0.0459, "step": 111000 }, { "epoch": 1.68, - "learning_rate": 1.7758985413306402e-05, - "loss": 0.0347, + "learning_rate": 2.66384781199596e-05, + "loss": 0.0471, "step": 111500 }, { "epoch": 1.69, - "learning_rate": 1.774893602054096e-05, - "loss": 0.0341, + "learning_rate": 2.662340403081144e-05, + "loss": 0.0459, "step": 112000 }, { "epoch": 1.7, - "learning_rate": 1.7738886627775517e-05, - "loss": 0.0342, + "learning_rate": 2.6608329941663275e-05, + "loss": 0.0479, "step": 112500 }, { "epoch": 1.7, - "learning_rate": 1.7728837235010077e-05, - "loss": 0.0327, + "learning_rate": 2.6593255852515112e-05, + "loss": 0.0459, "step": 113000 }, { "epoch": 1.71, - "learning_rate": 1.7718787842244634e-05, - "loss": 0.0341, + "learning_rate": 2.657818176336695e-05, + "loss": 0.0455, "step": 113500 }, { "epoch": 1.72, - "learning_rate": 1.770873844947919e-05, - "loss": 0.0346, + "learning_rate": 2.6563107674218787e-05, + "loss": 0.0487, "step": 114000 }, { "epoch": 1.73, - "learning_rate": 1.769868905671375e-05, - "loss": 0.0353, + "learning_rate": 2.654803358507062e-05, + "loss": 0.0473, "step": 114500 }, { "epoch": 1.73, - "learning_rate": 1.768863966394831e-05, - "loss": 0.033, + "learning_rate": 2.6532959495922462e-05, + "loss": 0.0461, "step": 115000 }, { "epoch": 1.74, - "learning_rate": 1.7678590271182866e-05, - "loss": 0.0318, + "learning_rate": 2.6517885406774296e-05, + "loss": 0.0455, "step": 115500 }, { "epoch": 1.75, - "learning_rate": 1.7668540878417424e-05, - "loss": 0.0323, + "learning_rate": 2.6502811317626134e-05, + "loss": 0.0451, "step": 116000 }, { "epoch": 1.76, - "learning_rate": 1.765849148565198e-05, - "loss": 0.0322, + "learning_rate": 2.648773722847797e-05, + "loss": 0.046, "step": 116500 }, { "epoch": 1.76, - "learning_rate": 1.764844209288654e-05, - "loss": 0.0322, + "learning_rate": 2.647266313932981e-05, + "loss": 0.047, "step": 117000 }, { "epoch": 1.77, - "learning_rate": 1.7638392700121095e-05, - "loss": 0.0323, + "learning_rate": 2.6457589050181642e-05, + "loss": 0.0461, "step": 117500 }, { "epoch": 1.78, - "learning_rate": 1.7628343307355656e-05, - "loss": 0.0326, + "learning_rate": 2.644251496103348e-05, + "loss": 0.0467, "step": 118000 }, { "epoch": 1.79, - "learning_rate": 1.7618293914590213e-05, - "loss": 0.0326, + "learning_rate": 2.6427440871885317e-05, + "loss": 0.0473, "step": 118500 }, { "epoch": 1.79, - "learning_rate": 1.760824452182477e-05, - "loss": 0.031, + "learning_rate": 2.6412366782737155e-05, + "loss": 0.044, "step": 119000 }, { "epoch": 1.8, - "learning_rate": 1.7598195129059327e-05, - "loss": 0.0337, + "learning_rate": 2.639729269358899e-05, + "loss": 0.0474, "step": 119500 }, { "epoch": 1.81, - "learning_rate": 1.7588145736293888e-05, - "loss": 0.0327, + "learning_rate": 2.638221860444083e-05, + "loss": 0.0465, "step": 120000 }, { "epoch": 1.82, - "learning_rate": 1.7578096343528445e-05, - "loss": 0.0334, + "learning_rate": 2.6367144515292664e-05, + "loss": 0.048, "step": 120500 }, { "epoch": 1.82, - "learning_rate": 1.7568046950763002e-05, - "loss": 0.0324, + "learning_rate": 2.63520704261445e-05, + "loss": 0.0462, "step": 121000 }, { "epoch": 1.83, - "learning_rate": 1.755799755799756e-05, - "loss": 0.0323, + "learning_rate": 2.633699633699634e-05, + "loss": 0.0466, "step": 121500 }, { "epoch": 1.84, - "learning_rate": 1.754794816523212e-05, - "loss": 0.0313, + "learning_rate": 2.6321922247848176e-05, + "loss": 0.0459, "step": 122000 }, { "epoch": 1.85, - "learning_rate": 1.7537898772466673e-05, - "loss": 0.0325, + "learning_rate": 2.630684815870001e-05, + "loss": 0.0462, "step": 122500 }, { "epoch": 1.85, - "learning_rate": 1.7527849379701234e-05, - "loss": 0.0313, + "learning_rate": 2.629177406955185e-05, + "loss": 0.0444, "step": 123000 }, { "epoch": 1.86, - "learning_rate": 1.751779998693579e-05, - "loss": 0.0323, + "learning_rate": 2.6276699980403685e-05, + "loss": 0.0458, "step": 123500 }, { "epoch": 1.87, - "learning_rate": 1.7507750594170348e-05, - "loss": 0.0321, + "learning_rate": 2.6261625891255522e-05, + "loss": 0.0448, "step": 124000 }, { "epoch": 1.88, - "learning_rate": 1.7497701201404905e-05, - "loss": 0.0317, + "learning_rate": 2.624655180210736e-05, + "loss": 0.0452, "step": 124500 }, { "epoch": 1.88, - "learning_rate": 1.7487651808639466e-05, - "loss": 0.0335, + "learning_rate": 2.6231477712959197e-05, + "loss": 0.0473, "step": 125000 }, { "epoch": 1.89, - "learning_rate": 1.7477602415874023e-05, - "loss": 0.0309, + "learning_rate": 2.621640362381103e-05, + "loss": 0.0446, "step": 125500 }, { "epoch": 1.9, - "learning_rate": 1.746755302310858e-05, - "loss": 0.0316, + "learning_rate": 2.620132953466287e-05, + "loss": 0.0442, "step": 126000 }, { "epoch": 1.91, - "learning_rate": 1.7457503630343137e-05, - "loss": 0.0323, + "learning_rate": 2.6186255445514706e-05, + "loss": 0.0464, "step": 126500 }, { "epoch": 1.91, - "learning_rate": 1.7447454237577698e-05, - "loss": 0.0318, + "learning_rate": 2.6171181356366543e-05, + "loss": 0.0454, "step": 127000 }, { "epoch": 1.92, - "learning_rate": 1.7437404844812255e-05, - "loss": 0.0324, + "learning_rate": 2.6156107267218377e-05, + "loss": 0.046, "step": 127500 }, { "epoch": 1.93, - "learning_rate": 1.7427355452046812e-05, - "loss": 0.0314, + "learning_rate": 2.6141033178070218e-05, + "loss": 0.0441, "step": 128000 }, { "epoch": 1.94, - "learning_rate": 1.741730605928137e-05, - "loss": 0.0316, + "learning_rate": 2.6125959088922052e-05, + "loss": 0.0451, "step": 128500 }, { "epoch": 1.94, - "learning_rate": 1.740725666651593e-05, - "loss": 0.0308, + "learning_rate": 2.611088499977389e-05, + "loss": 0.0445, "step": 129000 }, { "epoch": 1.95, - "learning_rate": 1.7397207273750484e-05, - "loss": 0.0314, + "learning_rate": 2.6095810910625727e-05, + "loss": 0.0447, "step": 129500 }, { "epoch": 1.96, - "learning_rate": 1.7387157880985044e-05, - "loss": 0.0313, + "learning_rate": 2.6080736821477564e-05, + "loss": 0.046, "step": 130000 }, { "epoch": 1.97, - "learning_rate": 1.73771084882196e-05, - "loss": 0.0286, + "learning_rate": 2.60656627323294e-05, + "loss": 0.041, "step": 130500 }, { "epoch": 1.97, - "learning_rate": 1.736705909545416e-05, - "loss": 0.033, + "learning_rate": 2.605058864318124e-05, + "loss": 0.0469, "step": 131000 }, { "epoch": 1.98, - "learning_rate": 1.7357009702688716e-05, - "loss": 0.0302, + "learning_rate": 2.6035514554033073e-05, + "loss": 0.0443, "step": 131500 }, { "epoch": 1.99, - "learning_rate": 1.7346960309923276e-05, - "loss": 0.03, + "learning_rate": 2.602044046488491e-05, + "loss": 0.0435, "step": 132000 }, { "epoch": 2.0, - "learning_rate": 1.7336910917157833e-05, - "loss": 0.0322, + "learning_rate": 2.6005366375736748e-05, + "loss": 0.0461, "step": 132500 }, { "epoch": 2.0, - "eval_accuracy": 0.9869833259524916, - "eval_f1": 0.9189582903868618, - "eval_loss": 0.03191002085804939, - "eval_precision": 0.8878090230754933, - "eval_recall": 0.9523728137640289, - "eval_runtime": 232.9867, - "eval_samples_per_second": 506.141, - "eval_steps_per_second": 31.637, + "eval_accuracy": 0.9813823655715507, + "eval_f1": 0.9126315393792152, + "eval_loss": 0.04644881188869476, + "eval_precision": 0.8880811658933832, + "eval_recall": 0.9385778580787981, + "eval_runtime": 245.1843, + "eval_samples_per_second": 480.961, + "eval_steps_per_second": 30.063, "step": 132678 }, { "epoch": 2.0, - "learning_rate": 1.732686152439239e-05, - "loss": 0.0269, + "learning_rate": 2.5990292286588586e-05, + "loss": 0.0385, "step": 133000 }, { "epoch": 2.01, - "learning_rate": 1.7316812131626948e-05, - "loss": 0.0264, + "learning_rate": 2.597521819744042e-05, + "loss": 0.036, "step": 133500 }, { "epoch": 2.02, - "learning_rate": 1.7306762738861508e-05, - "loss": 0.0256, + "learning_rate": 2.5960144108292257e-05, + "loss": 0.037, "step": 134000 }, { "epoch": 2.03, - "learning_rate": 1.7296713346096062e-05, - "loss": 0.0247, + "learning_rate": 2.5945070019144094e-05, + "loss": 0.035, "step": 134500 }, { "epoch": 2.04, - "learning_rate": 1.7286663953330622e-05, - "loss": 0.0269, + "learning_rate": 2.5929995929995932e-05, + "loss": 0.0391, "step": 135000 }, { "epoch": 2.04, - "learning_rate": 1.727661456056518e-05, - "loss": 0.0246, + "learning_rate": 2.5914921840847766e-05, + "loss": 0.0349, "step": 135500 }, { "epoch": 2.05, - "learning_rate": 1.7266565167799737e-05, - "loss": 0.0265, + "learning_rate": 2.5899847751699607e-05, + "loss": 0.0383, "step": 136000 }, { "epoch": 2.06, - "learning_rate": 1.7256515775034294e-05, - "loss": 0.0257, + "learning_rate": 2.588477366255144e-05, + "loss": 0.038, "step": 136500 }, { "epoch": 2.07, - "learning_rate": 1.7246466382268854e-05, - "loss": 0.0274, + "learning_rate": 2.5869699573403278e-05, + "loss": 0.038, "step": 137000 }, { "epoch": 2.07, - "learning_rate": 1.723641698950341e-05, - "loss": 0.0258, + "learning_rate": 2.5854625484255116e-05, + "loss": 0.0373, "step": 137500 }, { "epoch": 2.08, - "learning_rate": 1.722636759673797e-05, - "loss": 0.026, + "learning_rate": 2.5839551395106953e-05, + "loss": 0.0367, "step": 138000 }, { "epoch": 2.09, - "learning_rate": 1.7216318203972526e-05, - "loss": 0.0255, + "learning_rate": 2.5824477305958787e-05, + "loss": 0.0375, "step": 138500 }, { "epoch": 2.1, - "learning_rate": 1.7206268811207086e-05, - "loss": 0.0254, + "learning_rate": 2.5809403216810628e-05, + "loss": 0.0363, "step": 139000 }, { "epoch": 2.1, - "learning_rate": 1.7196219418441644e-05, - "loss": 0.0267, + "learning_rate": 2.5794329127662462e-05, + "loss": 0.0384, "step": 139500 }, { "epoch": 2.11, - "learning_rate": 1.71861700256762e-05, - "loss": 0.0255, + "learning_rate": 2.57792550385143e-05, + "loss": 0.0374, "step": 140000 }, { "epoch": 2.12, - "learning_rate": 1.7176120632910758e-05, - "loss": 0.028, + "learning_rate": 2.5764180949366137e-05, + "loss": 0.0372, "step": 140500 }, { "epoch": 2.13, - "learning_rate": 1.7166071240145315e-05, - "loss": 0.0292, + "learning_rate": 2.5749106860217974e-05, + "loss": 0.0386, "step": 141000 }, { "epoch": 2.13, - "learning_rate": 1.7156021847379872e-05, - "loss": 0.0261, + "learning_rate": 2.5734032771069808e-05, + "loss": 0.0375, "step": 141500 }, { "epoch": 2.14, - "learning_rate": 1.7145972454614433e-05, - "loss": 0.0276, + "learning_rate": 2.5718958681921646e-05, + "loss": 0.0386, "step": 142000 }, { "epoch": 2.15, - "learning_rate": 1.713592306184899e-05, - "loss": 0.0267, + "learning_rate": 2.5703884592773483e-05, + "loss": 0.0383, "step": 142500 }, { "epoch": 2.16, - "learning_rate": 1.7125873669083547e-05, - "loss": 0.0255, + "learning_rate": 2.568881050362532e-05, + "loss": 0.0369, "step": 143000 }, { "epoch": 2.16, - "learning_rate": 1.7115824276318104e-05, - "loss": 0.0259, + "learning_rate": 2.5673736414477155e-05, + "loss": 0.0381, "step": 143500 }, { "epoch": 2.17, - "learning_rate": 1.7105774883552665e-05, - "loss": 0.0265, + "learning_rate": 2.5658662325328995e-05, + "loss": 0.036, "step": 144000 }, { "epoch": 2.18, - "learning_rate": 1.7095725490787222e-05, - "loss": 0.0266, + "learning_rate": 2.564358823618083e-05, + "loss": 0.0378, "step": 144500 }, { "epoch": 2.19, - "learning_rate": 1.708567609802178e-05, - "loss": 0.0267, + "learning_rate": 2.5628514147032667e-05, + "loss": 0.0376, "step": 145000 }, { "epoch": 2.19, - "learning_rate": 1.7075626705256336e-05, - "loss": 0.0253, + "learning_rate": 2.5613440057884504e-05, + "loss": 0.0373, "step": 145500 }, { "epoch": 2.2, - "learning_rate": 1.7065577312490897e-05, - "loss": 0.0258, + "learning_rate": 2.559836596873634e-05, + "loss": 0.0381, "step": 146000 }, { "epoch": 2.21, - "learning_rate": 1.705552791972545e-05, - "loss": 0.0267, + "learning_rate": 2.5583291879588176e-05, + "loss": 0.0372, "step": 146500 }, { "epoch": 2.22, - "learning_rate": 1.704547852696001e-05, - "loss": 0.0255, + "learning_rate": 2.5568217790440016e-05, + "loss": 0.0369, "step": 147000 }, { "epoch": 2.22, - "learning_rate": 1.7035429134194568e-05, - "loss": 0.0255, + "learning_rate": 2.555314370129185e-05, + "loss": 0.0386, "step": 147500 }, { "epoch": 2.23, - "learning_rate": 1.7025379741429125e-05, - "loss": 0.0258, + "learning_rate": 2.5538069612143688e-05, + "loss": 0.037, "step": 148000 }, { "epoch": 2.24, - "learning_rate": 1.7015330348663682e-05, - "loss": 0.0266, + "learning_rate": 2.5522995522995522e-05, + "loss": 0.0398, "step": 148500 }, { "epoch": 2.25, - "learning_rate": 1.7005280955898243e-05, - "loss": 0.0258, + "learning_rate": 2.5507921433847363e-05, + "loss": 0.0368, "step": 149000 }, { "epoch": 2.25, - "learning_rate": 1.69952315631328e-05, - "loss": 0.0264, + "learning_rate": 2.5492847344699197e-05, + "loss": 0.0368, "step": 149500 }, { "epoch": 2.26, - "learning_rate": 1.6985182170367357e-05, - "loss": 0.0271, + "learning_rate": 2.547777325555103e-05, + "loss": 0.0382, "step": 150000 }, { "epoch": 2.27, - "learning_rate": 1.6975132777601914e-05, - "loss": 0.0271, + "learning_rate": 2.546269916640287e-05, + "loss": 0.04, "step": 150500 }, { "epoch": 2.28, - "learning_rate": 1.696508338483647e-05, - "loss": 0.0263, + "learning_rate": 2.5447625077254706e-05, + "loss": 0.0373, "step": 151000 }, { "epoch": 2.28, - "learning_rate": 1.6955033992071032e-05, - "loss": 0.0265, + "learning_rate": 2.5432550988106543e-05, + "loss": 0.0378, "step": 151500 }, { "epoch": 2.29, - "learning_rate": 1.6944984599305586e-05, - "loss": 0.0272, + "learning_rate": 2.541747689895838e-05, + "loss": 0.0395, "step": 152000 }, { "epoch": 2.3, - "learning_rate": 1.6934935206540146e-05, - "loss": 0.0272, + "learning_rate": 2.5402402809810218e-05, + "loss": 0.0377, "step": 152500 }, { "epoch": 2.31, - "learning_rate": 1.6924885813774704e-05, - "loss": 0.0263, + "learning_rate": 2.5387328720662052e-05, + "loss": 0.0383, "step": 153000 }, { "epoch": 2.31, - "learning_rate": 1.691483642100926e-05, - "loss": 0.027, + "learning_rate": 2.5372254631513893e-05, + "loss": 0.0374, "step": 153500 }, { "epoch": 2.32, - "learning_rate": 1.6904787028243818e-05, - "loss": 0.0267, + "learning_rate": 2.5357180542365727e-05, + "loss": 0.0393, "step": 154000 }, { "epoch": 2.33, - "learning_rate": 1.689473763547838e-05, - "loss": 0.0259, + "learning_rate": 2.5342106453217564e-05, + "loss": 0.0377, "step": 154500 }, { "epoch": 2.34, - "learning_rate": 1.6884688242712936e-05, - "loss": 0.0266, + "learning_rate": 2.53270323640694e-05, + "loss": 0.0376, "step": 155000 }, { "epoch": 2.34, - "learning_rate": 1.6874638849947493e-05, - "loss": 0.0267, + "learning_rate": 2.531195827492124e-05, + "loss": 0.0388, "step": 155500 }, { "epoch": 2.35, - "learning_rate": 1.686458945718205e-05, - "loss": 0.0262, + "learning_rate": 2.5296884185773073e-05, + "loss": 0.0369, "step": 156000 }, { "epoch": 2.36, - "learning_rate": 1.685454006441661e-05, - "loss": 0.0267, + "learning_rate": 2.528181009662491e-05, + "loss": 0.038, "step": 156500 }, { "epoch": 2.37, - "learning_rate": 1.6844490671651164e-05, - "loss": 0.0264, + "learning_rate": 2.5266736007476748e-05, + "loss": 0.0386, "step": 157000 }, { "epoch": 2.37, - "learning_rate": 1.6834441278885725e-05, - "loss": 0.0267, + "learning_rate": 2.5251661918328585e-05, + "loss": 0.0389, "step": 157500 }, { "epoch": 2.38, - "learning_rate": 1.6824391886120282e-05, - "loss": 0.0259, + "learning_rate": 2.523658782918042e-05, + "loss": 0.0374, "step": 158000 }, { "epoch": 2.39, - "learning_rate": 1.681434249335484e-05, - "loss": 0.0264, + "learning_rate": 2.522151374003226e-05, + "loss": 0.0377, "step": 158500 }, { "epoch": 2.4, - "learning_rate": 1.6804293100589396e-05, - "loss": 0.0282, + "learning_rate": 2.5206439650884094e-05, + "loss": 0.039, "step": 159000 }, { "epoch": 2.4, - "learning_rate": 1.6794243707823957e-05, - "loss": 0.0272, + "learning_rate": 2.519136556173593e-05, + "loss": 0.0392, "step": 159500 }, { "epoch": 2.41, - "learning_rate": 1.6784194315058514e-05, - "loss": 0.0279, + "learning_rate": 2.517629147258777e-05, + "loss": 0.038, "step": 160000 }, { "epoch": 2.42, - "learning_rate": 1.677414492229307e-05, - "loss": 0.027, + "learning_rate": 2.5161217383439607e-05, + "loss": 0.0383, "step": 160500 }, { "epoch": 2.43, - "learning_rate": 1.6764095529527628e-05, - "loss": 0.0254, + "learning_rate": 2.514614329429144e-05, + "loss": 0.0364, "step": 161000 }, { "epoch": 2.43, - "learning_rate": 1.675404613676219e-05, - "loss": 0.0259, + "learning_rate": 2.513106920514328e-05, + "loss": 0.0366, "step": 161500 }, { "epoch": 2.44, - "learning_rate": 1.6743996743996746e-05, - "loss": 0.0271, + "learning_rate": 2.5115995115995115e-05, + "loss": 0.0387, "step": 162000 }, { "epoch": 2.45, - "learning_rate": 1.6733947351231303e-05, - "loss": 0.026, + "learning_rate": 2.5100921026846953e-05, + "loss": 0.0369, "step": 162500 }, { "epoch": 2.46, - "learning_rate": 1.672389795846586e-05, - "loss": 0.0253, + "learning_rate": 2.508584693769879e-05, + "loss": 0.0384, "step": 163000 }, { "epoch": 2.46, - "learning_rate": 1.671384856570042e-05, - "loss": 0.0275, + "learning_rate": 2.5070772848550628e-05, + "loss": 0.0386, "step": 163500 }, { "epoch": 2.47, - "learning_rate": 1.6703799172934974e-05, - "loss": 0.0268, + "learning_rate": 2.5055698759402462e-05, + "loss": 0.0377, "step": 164000 }, { "epoch": 2.48, - "learning_rate": 1.6693749780169535e-05, - "loss": 0.0263, + "learning_rate": 2.50406246702543e-05, + "loss": 0.0366, "step": 164500 }, { "epoch": 2.49, - "learning_rate": 1.6683700387404092e-05, - "loss": 0.0274, + "learning_rate": 2.5025550581106137e-05, + "loss": 0.0379, "step": 165000 }, { "epoch": 2.49, - "learning_rate": 1.667365099463865e-05, - "loss": 0.0273, + "learning_rate": 2.5010476491957974e-05, + "loss": 0.0381, "step": 165500 }, { "epoch": 2.5, - "learning_rate": 1.6663601601873206e-05, - "loss": 0.0259, + "learning_rate": 2.4995402402809808e-05, + "loss": 0.0377, "step": 166000 }, { "epoch": 2.51, - "learning_rate": 1.6653552209107767e-05, - "loss": 0.0279, + "learning_rate": 2.498032831366165e-05, + "loss": 0.0399, "step": 166500 }, { "epoch": 2.52, - "learning_rate": 1.6643502816342324e-05, - "loss": 0.0273, + "learning_rate": 2.4965254224513483e-05, + "loss": 0.0388, "step": 167000 }, { "epoch": 2.52, - "learning_rate": 1.663345342357688e-05, - "loss": 0.0266, + "learning_rate": 2.495018013536532e-05, + "loss": 0.0391, "step": 167500 }, { "epoch": 2.53, - "learning_rate": 1.662340403081144e-05, - "loss": 0.0262, + "learning_rate": 2.4935106046217158e-05, + "loss": 0.0379, "step": 168000 }, { "epoch": 2.54, - "learning_rate": 1.6613354638046e-05, - "loss": 0.027, + "learning_rate": 2.4920031957068995e-05, + "loss": 0.0381, "step": 168500 }, { "epoch": 2.55, - "learning_rate": 1.6603305245280553e-05, - "loss": 0.0268, + "learning_rate": 2.490495786792083e-05, + "loss": 0.0378, "step": 169000 }, { "epoch": 2.56, - "learning_rate": 1.6593255852515113e-05, - "loss": 0.0268, + "learning_rate": 2.488988377877267e-05, + "loss": 0.039, "step": 169500 }, { "epoch": 2.56, - "learning_rate": 1.658320645974967e-05, - "loss": 0.0278, + "learning_rate": 2.4874809689624504e-05, + "loss": 0.0386, "step": 170000 }, { "epoch": 2.57, - "learning_rate": 1.6573157066984228e-05, - "loss": 0.0269, + "learning_rate": 2.485973560047634e-05, + "loss": 0.0387, "step": 170500 }, { "epoch": 2.58, - "learning_rate": 1.6563107674218785e-05, - "loss": 0.0264, + "learning_rate": 2.484466151132818e-05, + "loss": 0.0372, "step": 171000 }, { "epoch": 2.59, - "learning_rate": 1.6553058281453345e-05, - "loss": 0.0267, + "learning_rate": 2.4829587422180016e-05, + "loss": 0.0387, "step": 171500 }, { "epoch": 2.59, - "learning_rate": 1.6543008888687902e-05, - "loss": 0.0254, + "learning_rate": 2.481451333303185e-05, + "loss": 0.0367, "step": 172000 }, { "epoch": 2.6, - "learning_rate": 1.653295949592246e-05, - "loss": 0.0274, + "learning_rate": 2.4799439243883688e-05, + "loss": 0.0383, "step": 172500 }, { "epoch": 2.61, - "learning_rate": 1.6522910103157017e-05, - "loss": 0.0266, + "learning_rate": 2.4784365154735525e-05, + "loss": 0.037, "step": 173000 }, { "epoch": 2.62, - "learning_rate": 1.6512860710391577e-05, - "loss": 0.0253, + "learning_rate": 2.4769291065587363e-05, + "loss": 0.0366, "step": 173500 }, { "epoch": 2.62, - "learning_rate": 1.6502811317626134e-05, - "loss": 0.0255, + "learning_rate": 2.4754216976439197e-05, + "loss": 0.0366, "step": 174000 }, { "epoch": 2.63, - "learning_rate": 1.649276192486069e-05, - "loss": 0.0265, + "learning_rate": 2.4739142887291037e-05, + "loss": 0.0386, "step": 174500 }, { "epoch": 2.64, - "learning_rate": 1.648271253209525e-05, - "loss": 0.0259, + "learning_rate": 2.472406879814287e-05, + "loss": 0.038, "step": 175000 }, { "epoch": 2.65, - "learning_rate": 1.6472663139329806e-05, - "loss": 0.0265, + "learning_rate": 2.470899470899471e-05, + "loss": 0.0382, "step": 175500 }, { "epoch": 2.65, - "learning_rate": 1.6462613746564363e-05, - "loss": 0.0279, + "learning_rate": 2.4693920619846546e-05, + "loss": 0.0405, "step": 176000 }, { "epoch": 2.66, - "learning_rate": 1.6452564353798924e-05, - "loss": 0.0259, + "learning_rate": 2.4678846530698384e-05, + "loss": 0.0364, "step": 176500 }, { "epoch": 2.67, - "learning_rate": 1.644251496103348e-05, - "loss": 0.0255, + "learning_rate": 2.4663772441550218e-05, + "loss": 0.0371, "step": 177000 }, { "epoch": 2.68, - "learning_rate": 1.6432465568268038e-05, - "loss": 0.0252, + "learning_rate": 2.464869835240206e-05, + "loss": 0.037, "step": 177500 }, { "epoch": 2.68, - "learning_rate": 1.6422416175502595e-05, - "loss": 0.0264, + "learning_rate": 2.4633624263253893e-05, + "loss": 0.0372, "step": 178000 }, { "epoch": 2.69, - "learning_rate": 1.6412366782737156e-05, - "loss": 0.0268, + "learning_rate": 2.461855017410573e-05, + "loss": 0.0377, "step": 178500 }, { "epoch": 2.7, - "learning_rate": 1.6402317389971713e-05, - "loss": 0.0258, + "learning_rate": 2.4603476084957567e-05, + "loss": 0.0363, "step": 179000 }, { "epoch": 2.71, - "learning_rate": 1.639226799720627e-05, - "loss": 0.027, + "learning_rate": 2.4588401995809405e-05, + "loss": 0.0401, "step": 179500 }, { "epoch": 2.71, - "learning_rate": 1.6382218604440827e-05, - "loss": 0.0254, + "learning_rate": 2.457332790666124e-05, + "loss": 0.0373, "step": 180000 }, { "epoch": 2.72, - "learning_rate": 1.6372169211675388e-05, - "loss": 0.0266, + "learning_rate": 2.4558253817513076e-05, + "loss": 0.0376, "step": 180500 }, { "epoch": 2.73, - "learning_rate": 1.636211981890994e-05, - "loss": 0.0258, + "learning_rate": 2.4543179728364914e-05, + "loss": 0.0371, "step": 181000 }, { "epoch": 2.74, - "learning_rate": 1.6352070426144502e-05, - "loss": 0.0252, + "learning_rate": 2.452810563921675e-05, + "loss": 0.0371, "step": 181500 }, { "epoch": 2.74, - "learning_rate": 1.634202103337906e-05, - "loss": 0.0259, + "learning_rate": 2.4513031550068585e-05, + "loss": 0.0377, "step": 182000 }, { "epoch": 2.75, - "learning_rate": 1.6331971640613616e-05, - "loss": 0.0264, + "learning_rate": 2.4497957460920426e-05, + "loss": 0.0387, "step": 182500 }, { "epoch": 2.76, - "learning_rate": 1.6321922247848173e-05, - "loss": 0.0267, + "learning_rate": 2.448288337177226e-05, + "loss": 0.0389, "step": 183000 }, { "epoch": 2.77, - "learning_rate": 1.6311872855082734e-05, - "loss": 0.0254, + "learning_rate": 2.4467809282624097e-05, + "loss": 0.0379, "step": 183500 }, { "epoch": 2.77, - "learning_rate": 1.630182346231729e-05, - "loss": 0.0262, + "learning_rate": 2.4452735193475935e-05, + "loss": 0.037, "step": 184000 }, { "epoch": 2.78, - "learning_rate": 1.6291774069551848e-05, - "loss": 0.0252, + "learning_rate": 2.4437661104327772e-05, + "loss": 0.037, "step": 184500 }, { "epoch": 2.79, - "learning_rate": 1.6281724676786405e-05, - "loss": 0.026, + "learning_rate": 2.4422587015179606e-05, + "loss": 0.0383, "step": 185000 }, { "epoch": 2.8, - "learning_rate": 1.6271675284020966e-05, - "loss": 0.0261, + "learning_rate": 2.4407512926031447e-05, + "loss": 0.0374, "step": 185500 }, { "epoch": 2.8, - "learning_rate": 1.6261625891255523e-05, - "loss": 0.0263, + "learning_rate": 2.439243883688328e-05, + "loss": 0.0376, "step": 186000 }, { "epoch": 2.81, - "learning_rate": 1.625157649849008e-05, - "loss": 0.0262, + "learning_rate": 2.437736474773512e-05, + "loss": 0.0378, "step": 186500 }, { "epoch": 2.82, - "learning_rate": 1.6241527105724637e-05, - "loss": 0.0251, + "learning_rate": 2.4362290658586953e-05, + "loss": 0.037, "step": 187000 }, { "epoch": 2.83, - "learning_rate": 1.6231477712959194e-05, - "loss": 0.0246, + "learning_rate": 2.4347216569438793e-05, + "loss": 0.0364, "step": 187500 }, { "epoch": 2.83, - "learning_rate": 1.622142832019375e-05, - "loss": 0.025, + "learning_rate": 2.4332142480290627e-05, + "loss": 0.0382, "step": 188000 }, { "epoch": 2.84, - "learning_rate": 1.6211378927428312e-05, - "loss": 0.0268, + "learning_rate": 2.4317068391142465e-05, + "loss": 0.0372, "step": 188500 }, { "epoch": 2.85, - "learning_rate": 1.620132953466287e-05, - "loss": 0.0263, + "learning_rate": 2.4301994301994302e-05, + "loss": 0.0381, "step": 189000 }, { "epoch": 2.86, - "learning_rate": 1.6191280141897426e-05, - "loss": 0.0272, + "learning_rate": 2.428692021284614e-05, + "loss": 0.0387, "step": 189500 }, { "epoch": 2.86, - "learning_rate": 1.6181230749131984e-05, - "loss": 0.0264, + "learning_rate": 2.4271846123697974e-05, + "loss": 0.0384, "step": 190000 }, { "epoch": 2.87, - "learning_rate": 1.6171181356366544e-05, - "loss": 0.0258, + "learning_rate": 2.4256772034549815e-05, + "loss": 0.0381, "step": 190500 }, { "epoch": 2.88, - "learning_rate": 1.61611319636011e-05, - "loss": 0.0262, + "learning_rate": 2.424169794540165e-05, + "loss": 0.0376, "step": 191000 }, { "epoch": 2.89, - "learning_rate": 1.615108257083566e-05, - "loss": 0.0259, + "learning_rate": 2.4226623856253486e-05, + "loss": 0.0383, "step": 191500 }, { "epoch": 2.89, - "learning_rate": 1.6141033178070216e-05, - "loss": 0.0265, + "learning_rate": 2.4211549767105323e-05, + "loss": 0.0381, "step": 192000 }, { "epoch": 2.9, - "learning_rate": 1.6130983785304776e-05, - "loss": 0.0253, + "learning_rate": 2.419647567795716e-05, + "loss": 0.0366, "step": 192500 }, { "epoch": 2.91, - "learning_rate": 1.612093439253933e-05, - "loss": 0.0266, + "learning_rate": 2.4181401588808995e-05, + "loss": 0.0391, "step": 193000 }, { "epoch": 2.92, - "learning_rate": 1.611088499977389e-05, - "loss": 0.0269, + "learning_rate": 2.4166327499660836e-05, + "loss": 0.038, "step": 193500 }, { "epoch": 2.92, - "learning_rate": 1.6100835607008448e-05, - "loss": 0.0257, + "learning_rate": 2.415125341051267e-05, + "loss": 0.0384, "step": 194000 }, { "epoch": 2.93, - "learning_rate": 1.6090786214243005e-05, - "loss": 0.0261, + "learning_rate": 2.4136179321364507e-05, + "loss": 0.0384, "step": 194500 }, { "epoch": 2.94, - "learning_rate": 1.6080736821477562e-05, - "loss": 0.026, + "learning_rate": 2.412110523221634e-05, + "loss": 0.0375, "step": 195000 }, { "epoch": 2.95, - "learning_rate": 1.6070687428712122e-05, - "loss": 0.0253, + "learning_rate": 2.4106031143068182e-05, + "loss": 0.038, "step": 195500 }, { "epoch": 2.95, - "learning_rate": 1.606063803594668e-05, - "loss": 0.0247, + "learning_rate": 2.4090957053920016e-05, + "loss": 0.0349, "step": 196000 }, { "epoch": 2.96, - "learning_rate": 1.6050588643181237e-05, - "loss": 0.0255, + "learning_rate": 2.4075882964771853e-05, + "loss": 0.0368, "step": 196500 }, { "epoch": 2.97, - "learning_rate": 1.6040539250415794e-05, - "loss": 0.0274, + "learning_rate": 2.406080887562369e-05, + "loss": 0.0393, "step": 197000 }, { "epoch": 2.98, - "learning_rate": 1.6030489857650354e-05, - "loss": 0.0246, + "learning_rate": 2.404573478647553e-05, + "loss": 0.0367, "step": 197500 }, { "epoch": 2.98, - "learning_rate": 1.6020440464884908e-05, - "loss": 0.0263, + "learning_rate": 2.4030660697327362e-05, + "loss": 0.0381, "step": 198000 }, { "epoch": 2.99, - "learning_rate": 1.601039107211947e-05, - "loss": 0.026, + "learning_rate": 2.4015586608179203e-05, + "loss": 0.0375, "step": 198500 }, { "epoch": 3.0, - "learning_rate": 1.6000341679354026e-05, - "loss": 0.0265, + "learning_rate": 2.4000512519031037e-05, + "loss": 0.0375, "step": 199000 }, { "epoch": 3.0, - "eval_accuracy": 0.9884570743553258, - "eval_f1": 0.9280521554337581, - "eval_loss": 0.030215181410312653, - "eval_precision": 0.9129746648556074, - "eval_recall": 0.943636008496716, - "eval_runtime": 228.7531, - "eval_samples_per_second": 515.508, - "eval_steps_per_second": 32.223, + "eval_accuracy": 0.9830589596281374, + "eval_f1": 0.9202532892026535, + "eval_loss": 0.04450139403343201, + "eval_precision": 0.9090566202540189, + "eval_recall": 0.9317292119377233, + "eval_runtime": 241.3574, + "eval_samples_per_second": 488.587, + "eval_steps_per_second": 30.54, "step": 199017 }, { "epoch": 3.01, - "learning_rate": 1.5990292286588583e-05, - "loss": 0.0213, + "learning_rate": 2.3985438429882875e-05, + "loss": 0.0305, "step": 199500 }, { "epoch": 3.01, - "learning_rate": 1.598024289382314e-05, - "loss": 0.0206, + "learning_rate": 2.3970364340734712e-05, + "loss": 0.0303, "step": 200000 }, { "epoch": 3.02, - "learning_rate": 1.59701935010577e-05, - "loss": 0.0213, + "learning_rate": 2.395529025158655e-05, + "loss": 0.0309, "step": 200500 }, { "epoch": 3.03, - "learning_rate": 1.5960144108292258e-05, - "loss": 0.0213, + "learning_rate": 2.3940216162438383e-05, + "loss": 0.0296, "step": 201000 }, { "epoch": 3.04, - "learning_rate": 1.5950094715526815e-05, - "loss": 0.0222, + "learning_rate": 2.3925142073290224e-05, + "loss": 0.0315, "step": 201500 }, { "epoch": 3.04, - "learning_rate": 1.5940045322761372e-05, - "loss": 0.0202, + "learning_rate": 2.391006798414206e-05, + "loss": 0.0295, "step": 202000 }, { "epoch": 3.05, - "learning_rate": 1.5929995929995933e-05, - "loss": 0.0212, + "learning_rate": 2.3894993894993896e-05, + "loss": 0.0308, "step": 202500 }, { "epoch": 3.06, - "learning_rate": 1.591994653723049e-05, - "loss": 0.0209, + "learning_rate": 2.387991980584573e-05, + "loss": 0.0309, "step": 203000 }, { "epoch": 3.07, - "learning_rate": 1.5909897144465047e-05, - "loss": 0.0215, + "learning_rate": 2.386484571669757e-05, + "loss": 0.031, "step": 203500 }, { "epoch": 3.08, - "learning_rate": 1.5899847751699604e-05, - "loss": 0.0223, + "learning_rate": 2.3849771627549405e-05, + "loss": 0.0312, "step": 204000 }, { "epoch": 3.08, - "learning_rate": 1.5889798358934165e-05, - "loss": 0.0208, + "learning_rate": 2.3834697538401242e-05, + "loss": 0.0304, "step": 204500 }, { "epoch": 3.09, - "learning_rate": 1.587974896616872e-05, - "loss": 0.0212, + "learning_rate": 2.381962344925308e-05, + "loss": 0.03, "step": 205000 }, { "epoch": 3.1, - "learning_rate": 1.586969957340328e-05, - "loss": 0.0206, + "learning_rate": 2.3804549360104917e-05, + "loss": 0.0301, "step": 205500 }, { "epoch": 3.11, - "learning_rate": 1.5859650180637836e-05, - "loss": 0.021, + "learning_rate": 2.378947527095675e-05, + "loss": 0.0301, "step": 206000 }, { "epoch": 3.11, - "learning_rate": 1.5849600787872393e-05, - "loss": 0.0202, + "learning_rate": 2.3774401181808592e-05, + "loss": 0.0293, "step": 206500 }, { "epoch": 3.12, - "learning_rate": 1.583955139510695e-05, - "loss": 0.02, + "learning_rate": 2.3759327092660426e-05, + "loss": 0.0306, "step": 207000 }, { "epoch": 3.13, - "learning_rate": 1.582950200234151e-05, - "loss": 0.021, + "learning_rate": 2.3744253003512263e-05, + "loss": 0.0296, "step": 207500 }, { "epoch": 3.14, - "learning_rate": 1.5819452609576068e-05, - "loss": 0.0208, + "learning_rate": 2.37291789143641e-05, + "loss": 0.0302, "step": 208000 }, { "epoch": 3.14, - "learning_rate": 1.5809403216810625e-05, - "loss": 0.0215, + "learning_rate": 2.3714104825215938e-05, + "loss": 0.0303, "step": 208500 }, { "epoch": 3.15, - "learning_rate": 1.5799353824045183e-05, - "loss": 0.0217, + "learning_rate": 2.3699030736067772e-05, + "loss": 0.0317, "step": 209000 }, { "epoch": 3.16, - "learning_rate": 1.5789304431279743e-05, - "loss": 0.0221, + "learning_rate": 2.368395664691961e-05, + "loss": 0.0318, "step": 209500 }, { "epoch": 3.17, - "learning_rate": 1.5779255038514297e-05, - "loss": 0.0221, + "learning_rate": 2.3668882557771447e-05, + "loss": 0.0314, "step": 210000 }, { "epoch": 3.17, - "learning_rate": 1.5769205645748857e-05, - "loss": 0.0199, + "learning_rate": 2.3653808468623284e-05, + "loss": 0.0286, "step": 210500 }, { "epoch": 3.18, - "learning_rate": 1.5759156252983415e-05, - "loss": 0.0201, + "learning_rate": 2.363873437947512e-05, + "loss": 0.0306, "step": 211000 }, { "epoch": 3.19, - "learning_rate": 1.574910686021797e-05, - "loss": 0.0217, + "learning_rate": 2.362366029032696e-05, + "loss": 0.0321, "step": 211500 }, { "epoch": 3.2, - "learning_rate": 1.573905746745253e-05, - "loss": 0.0218, + "learning_rate": 2.3608586201178793e-05, + "loss": 0.031, "step": 212000 }, { "epoch": 3.2, - "learning_rate": 1.572900807468709e-05, - "loss": 0.02, + "learning_rate": 2.359351211203063e-05, + "loss": 0.0282, "step": 212500 }, { "epoch": 3.21, - "learning_rate": 1.5718958681921647e-05, - "loss": 0.0214, + "learning_rate": 2.3578438022882468e-05, + "loss": 0.0312, "step": 213000 }, { "epoch": 3.22, - "learning_rate": 1.5708909289156204e-05, - "loss": 0.0207, + "learning_rate": 2.3563363933734305e-05, + "loss": 0.0311, "step": 213500 }, { "epoch": 3.23, - "learning_rate": 1.569885989639076e-05, - "loss": 0.021, + "learning_rate": 2.354828984458614e-05, + "loss": 0.0309, "step": 214000 }, { "epoch": 3.23, - "learning_rate": 1.568881050362532e-05, - "loss": 0.0223, + "learning_rate": 2.353321575543798e-05, + "loss": 0.0315, "step": 214500 }, { "epoch": 3.24, - "learning_rate": 1.567876111085988e-05, - "loss": 0.0213, + "learning_rate": 2.3518141666289814e-05, + "loss": 0.0304, "step": 215000 }, { "epoch": 3.25, - "learning_rate": 1.5668711718094436e-05, - "loss": 0.0209, + "learning_rate": 2.3503067577141652e-05, + "loss": 0.0309, "step": 215500 }, { "epoch": 3.26, - "learning_rate": 1.5658662325328993e-05, - "loss": 0.0215, + "learning_rate": 2.348799348799349e-05, + "loss": 0.0308, "step": 216000 }, { "epoch": 3.26, - "learning_rate": 1.564861293256355e-05, - "loss": 0.0205, + "learning_rate": 2.3472919398845327e-05, + "loss": 0.0304, "step": 216500 }, { "epoch": 3.27, - "learning_rate": 1.5638563539798107e-05, - "loss": 0.0205, + "learning_rate": 2.345784530969716e-05, + "loss": 0.0303, "step": 217000 }, { "epoch": 3.28, - "learning_rate": 1.5628514147032668e-05, - "loss": 0.0206, + "learning_rate": 2.3442771220548998e-05, + "loss": 0.0302, "step": 217500 }, { "epoch": 3.29, - "learning_rate": 1.5618464754267225e-05, - "loss": 0.0201, + "learning_rate": 2.3427697131400836e-05, + "loss": 0.0298, "step": 218000 }, { "epoch": 3.29, - "learning_rate": 1.5608415361501782e-05, - "loss": 0.0199, + "learning_rate": 2.3412623042252673e-05, + "loss": 0.0297, "step": 218500 }, { "epoch": 3.3, - "learning_rate": 1.559836596873634e-05, - "loss": 0.022, + "learning_rate": 2.3397548953104507e-05, + "loss": 0.0318, "step": 219000 }, { "epoch": 3.31, - "learning_rate": 1.55883165759709e-05, - "loss": 0.0218, + "learning_rate": 2.3382474863956348e-05, + "loss": 0.0321, "step": 219500 }, { "epoch": 3.32, - "learning_rate": 1.5578267183205457e-05, - "loss": 0.0217, + "learning_rate": 2.3367400774808182e-05, + "loss": 0.0322, "step": 220000 }, { "epoch": 3.32, - "learning_rate": 1.5568217790440014e-05, - "loss": 0.0218, + "learning_rate": 2.335232668566002e-05, + "loss": 0.0321, "step": 220500 }, { "epoch": 3.33, - "learning_rate": 1.555816839767457e-05, - "loss": 0.0206, + "learning_rate": 2.3337252596511857e-05, + "loss": 0.0291, "step": 221000 }, { "epoch": 3.34, - "learning_rate": 1.554811900490913e-05, - "loss": 0.0213, + "learning_rate": 2.3322178507363694e-05, + "loss": 0.0319, "step": 221500 }, { "epoch": 3.35, - "learning_rate": 1.5538069612143685e-05, - "loss": 0.0216, + "learning_rate": 2.3307104418215528e-05, + "loss": 0.0306, "step": 222000 }, { "epoch": 3.35, - "learning_rate": 1.5528020219378246e-05, - "loss": 0.0215, + "learning_rate": 2.329203032906737e-05, + "loss": 0.0313, "step": 222500 }, { "epoch": 3.36, - "learning_rate": 1.5517970826612803e-05, - "loss": 0.0218, + "learning_rate": 2.3276956239919203e-05, + "loss": 0.0313, "step": 223000 }, { "epoch": 3.37, - "learning_rate": 1.550792143384736e-05, - "loss": 0.02, + "learning_rate": 2.326188215077104e-05, + "loss": 0.0297, "step": 223500 }, { "epoch": 3.38, - "learning_rate": 1.5497872041081917e-05, - "loss": 0.0204, + "learning_rate": 2.3246808061622878e-05, + "loss": 0.0302, "step": 224000 }, { "epoch": 3.38, - "learning_rate": 1.5487822648316478e-05, - "loss": 0.021, + "learning_rate": 2.3231733972474715e-05, + "loss": 0.0321, "step": 224500 }, { "epoch": 3.39, - "learning_rate": 1.5477773255551035e-05, - "loss": 0.0222, + "learning_rate": 2.321665988332655e-05, + "loss": 0.0326, "step": 225000 }, { "epoch": 3.4, - "learning_rate": 1.5467723862785592e-05, - "loss": 0.0208, + "learning_rate": 2.3201585794178387e-05, + "loss": 0.0294, "step": 225500 }, { "epoch": 3.41, - "learning_rate": 1.545767447002015e-05, - "loss": 0.0212, + "learning_rate": 2.3186511705030224e-05, + "loss": 0.0306, "step": 226000 }, { "epoch": 3.41, - "learning_rate": 1.544762507725471e-05, - "loss": 0.0212, + "learning_rate": 2.317143761588206e-05, + "loss": 0.0316, "step": 226500 }, { "epoch": 3.42, - "learning_rate": 1.5437575684489267e-05, - "loss": 0.0209, + "learning_rate": 2.3156363526733896e-05, + "loss": 0.0307, "step": 227000 }, { "epoch": 3.43, - "learning_rate": 1.5427526291723824e-05, - "loss": 0.0208, + "learning_rate": 2.3141289437585736e-05, + "loss": 0.0316, "step": 227500 }, { "epoch": 3.44, - "learning_rate": 1.541747689895838e-05, - "loss": 0.0213, + "learning_rate": 2.312621534843757e-05, + "loss": 0.0308, "step": 228000 }, { "epoch": 3.44, - "learning_rate": 1.540742750619294e-05, - "loss": 0.0224, + "learning_rate": 2.3111141259289408e-05, + "loss": 0.0327, "step": 228500 }, { "epoch": 3.45, - "learning_rate": 1.5397378113427496e-05, - "loss": 0.0214, + "learning_rate": 2.3096067170141245e-05, + "loss": 0.032, "step": 229000 }, { "epoch": 3.46, - "learning_rate": 1.5387328720662056e-05, - "loss": 0.0218, + "learning_rate": 2.3080993080993083e-05, + "loss": 0.0324, "step": 229500 }, { "epoch": 3.47, - "learning_rate": 1.5377279327896613e-05, - "loss": 0.0222, + "learning_rate": 2.3065918991844917e-05, + "loss": 0.0315, "step": 230000 }, { "epoch": 3.47, - "learning_rate": 1.536722993513117e-05, - "loss": 0.0207, + "learning_rate": 2.3050844902696758e-05, + "loss": 0.03, "step": 230500 }, { "epoch": 3.48, - "learning_rate": 1.5357180542365728e-05, - "loss": 0.0223, + "learning_rate": 2.303577081354859e-05, + "loss": 0.0331, "step": 231000 }, { "epoch": 3.49, - "learning_rate": 1.5347131149600288e-05, - "loss": 0.0208, + "learning_rate": 2.302069672440043e-05, + "loss": 0.0316, "step": 231500 }, { "epoch": 3.5, - "learning_rate": 1.5337081756834845e-05, - "loss": 0.0223, + "learning_rate": 2.3005622635252266e-05, + "loss": 0.0319, "step": 232000 }, { "epoch": 3.5, - "learning_rate": 1.5327032364069403e-05, - "loss": 0.0205, + "learning_rate": 2.2990548546104104e-05, + "loss": 0.0317, "step": 232500 }, { "epoch": 3.51, - "learning_rate": 1.531698297130396e-05, - "loss": 0.0221, + "learning_rate": 2.2975474456955938e-05, + "loss": 0.033, "step": 233000 }, { "epoch": 3.52, - "learning_rate": 1.530693357853852e-05, - "loss": 0.0225, + "learning_rate": 2.2960400367807775e-05, + "loss": 0.0328, "step": 233500 }, { "epoch": 3.53, - "learning_rate": 1.5296884185773074e-05, - "loss": 0.0228, + "learning_rate": 2.2945326278659613e-05, + "loss": 0.0341, "step": 234000 }, { "epoch": 3.53, - "learning_rate": 1.5286834793007635e-05, - "loss": 0.0215, + "learning_rate": 2.293025218951145e-05, + "loss": 0.0319, "step": 234500 }, { "epoch": 3.54, - "learning_rate": 1.527678540024219e-05, - "loss": 0.0212, + "learning_rate": 2.2915178100363284e-05, + "loss": 0.0314, "step": 235000 }, { "epoch": 3.55, - "learning_rate": 1.526673600747675e-05, - "loss": 0.0213, + "learning_rate": 2.2900104011215125e-05, + "loss": 0.0314, "step": 235500 }, { "epoch": 3.56, - "learning_rate": 1.5256686614711308e-05, - "loss": 0.021, + "learning_rate": 2.288502992206696e-05, + "loss": 0.0314, "step": 236000 }, { "epoch": 3.57, - "learning_rate": 1.5246637221945867e-05, - "loss": 0.0218, + "learning_rate": 2.2869955832918796e-05, + "loss": 0.0309, "step": 236500 }, { "epoch": 3.57, - "learning_rate": 1.5236587829180422e-05, - "loss": 0.0221, + "learning_rate": 2.2854881743770634e-05, + "loss": 0.0323, "step": 237000 }, { "epoch": 3.58, - "learning_rate": 1.522653843641498e-05, - "loss": 0.022, + "learning_rate": 2.283980765462247e-05, + "loss": 0.0331, "step": 237500 }, { "epoch": 3.59, - "learning_rate": 1.5216489043649538e-05, - "loss": 0.0202, + "learning_rate": 2.2824733565474305e-05, + "loss": 0.029, "step": 238000 }, { "epoch": 3.6, - "learning_rate": 1.5206439650884097e-05, - "loss": 0.0219, + "learning_rate": 2.2809659476326146e-05, + "loss": 0.0331, "step": 238500 }, { "epoch": 3.6, - "learning_rate": 1.5196390258118654e-05, - "loss": 0.0204, + "learning_rate": 2.279458538717798e-05, + "loss": 0.0308, "step": 239000 }, { "epoch": 3.61, - "learning_rate": 1.5186340865353213e-05, - "loss": 0.0226, + "learning_rate": 2.2779511298029818e-05, + "loss": 0.0332, "step": 239500 }, { "epoch": 3.62, - "learning_rate": 1.517629147258777e-05, - "loss": 0.0215, + "learning_rate": 2.276443720888165e-05, + "loss": 0.0322, "step": 240000 }, { "epoch": 3.63, - "learning_rate": 1.5166242079822329e-05, - "loss": 0.022, + "learning_rate": 2.2749363119733492e-05, + "loss": 0.0315, "step": 240500 }, { "epoch": 3.63, - "learning_rate": 1.5156192687056886e-05, - "loss": 0.0213, + "learning_rate": 2.2734289030585326e-05, + "loss": 0.0314, "step": 241000 }, { "epoch": 3.64, - "learning_rate": 1.5146143294291445e-05, - "loss": 0.0213, + "learning_rate": 2.2719214941437164e-05, + "loss": 0.0314, "step": 241500 }, { "epoch": 3.65, - "learning_rate": 1.5136093901526e-05, - "loss": 0.0223, + "learning_rate": 2.2704140852289e-05, + "loss": 0.0335, "step": 242000 }, { "epoch": 3.66, - "learning_rate": 1.512604450876056e-05, - "loss": 0.0212, + "learning_rate": 2.268906676314084e-05, + "loss": 0.031, "step": 242500 }, { "epoch": 3.66, - "learning_rate": 1.5115995115995116e-05, - "loss": 0.0221, + "learning_rate": 2.2673992673992673e-05, + "loss": 0.0319, "step": 243000 }, { "epoch": 3.67, - "learning_rate": 1.5105945723229675e-05, - "loss": 0.0216, + "learning_rate": 2.2658918584844514e-05, + "loss": 0.0322, "step": 243500 }, { "epoch": 3.68, - "learning_rate": 1.5095896330464232e-05, - "loss": 0.0211, + "learning_rate": 2.2643844495696348e-05, + "loss": 0.031, "step": 244000 }, { "epoch": 3.69, - "learning_rate": 1.5085846937698791e-05, - "loss": 0.022, + "learning_rate": 2.2628770406548185e-05, + "loss": 0.0318, "step": 244500 }, { "epoch": 3.69, - "learning_rate": 1.5075797544933348e-05, - "loss": 0.0207, + "learning_rate": 2.2613696317400022e-05, + "loss": 0.0297, "step": 245000 }, { "epoch": 3.7, - "learning_rate": 1.5065748152167907e-05, - "loss": 0.0207, + "learning_rate": 2.259862222825186e-05, + "loss": 0.031, "step": 245500 }, { "epoch": 3.71, - "learning_rate": 1.5055698759402464e-05, - "loss": 0.0206, + "learning_rate": 2.2583548139103694e-05, + "loss": 0.0298, "step": 246000 }, { "epoch": 3.72, - "learning_rate": 1.5045649366637023e-05, - "loss": 0.0222, + "learning_rate": 2.2568474049955535e-05, + "loss": 0.0323, "step": 246500 }, { "epoch": 3.72, - "learning_rate": 1.503559997387158e-05, - "loss": 0.0218, + "learning_rate": 2.255339996080737e-05, + "loss": 0.032, "step": 247000 }, { "epoch": 3.73, - "learning_rate": 1.5025550581106139e-05, - "loss": 0.0216, + "learning_rate": 2.2538325871659206e-05, + "loss": 0.0309, "step": 247500 }, { "epoch": 3.74, - "learning_rate": 1.5015501188340695e-05, - "loss": 0.022, + "learning_rate": 2.252325178251104e-05, + "loss": 0.0325, "step": 248000 }, { "epoch": 3.75, - "learning_rate": 1.5005451795575255e-05, - "loss": 0.022, + "learning_rate": 2.250817769336288e-05, + "loss": 0.0315, "step": 248500 }, { "epoch": 3.75, - "learning_rate": 1.499540240280981e-05, - "loss": 0.0213, + "learning_rate": 2.2493103604214715e-05, + "loss": 0.031, "step": 249000 }, { "epoch": 3.76, - "learning_rate": 1.498535301004437e-05, - "loss": 0.022, + "learning_rate": 2.2478029515066552e-05, + "loss": 0.0333, "step": 249500 }, { "epoch": 3.77, - "learning_rate": 1.4975303617278927e-05, - "loss": 0.0212, + "learning_rate": 2.246295542591839e-05, + "loss": 0.0322, "step": 250000 }, { "epoch": 3.78, - "learning_rate": 1.4965254224513485e-05, - "loss": 0.0214, + "learning_rate": 2.2447881336770227e-05, + "loss": 0.0308, "step": 250500 }, { "epoch": 3.78, - "learning_rate": 1.4955204831748043e-05, - "loss": 0.0201, + "learning_rate": 2.243280724762206e-05, + "loss": 0.0293, "step": 251000 }, { "epoch": 3.79, - "learning_rate": 1.4945155438982601e-05, - "loss": 0.0213, + "learning_rate": 2.2417733158473902e-05, + "loss": 0.033, "step": 251500 }, { "epoch": 3.8, - "learning_rate": 1.4935106046217159e-05, - "loss": 0.0209, + "learning_rate": 2.2402659069325736e-05, + "loss": 0.0325, "step": 252000 }, { "epoch": 3.81, - "learning_rate": 1.4925056653451717e-05, - "loss": 0.0208, + "learning_rate": 2.2387584980177574e-05, + "loss": 0.0318, "step": 252500 }, { "epoch": 3.81, - "learning_rate": 1.4915007260686275e-05, - "loss": 0.0208, + "learning_rate": 2.237251089102941e-05, + "loss": 0.0308, "step": 253000 }, { "epoch": 3.82, - "learning_rate": 1.4904957867920833e-05, - "loss": 0.0228, + "learning_rate": 2.235743680188125e-05, + "loss": 0.0339, "step": 253500 }, { "epoch": 3.83, - "learning_rate": 1.4894908475155389e-05, - "loss": 0.0209, + "learning_rate": 2.2342362712733082e-05, + "loss": 0.0306, "step": 254000 }, { "epoch": 3.84, - "learning_rate": 1.488485908238995e-05, - "loss": 0.0211, + "learning_rate": 2.2327288623584923e-05, + "loss": 0.0314, "step": 254500 }, { "epoch": 3.84, - "learning_rate": 1.4874809689624505e-05, - "loss": 0.0209, + "learning_rate": 2.2312214534436757e-05, + "loss": 0.0307, "step": 255000 }, { "epoch": 3.85, - "learning_rate": 1.4864760296859064e-05, - "loss": 0.0221, + "learning_rate": 2.2297140445288595e-05, + "loss": 0.0311, "step": 255500 }, { "epoch": 3.86, - "learning_rate": 1.4854710904093621e-05, - "loss": 0.021, + "learning_rate": 2.228206635614043e-05, + "loss": 0.0319, "step": 256000 }, { "epoch": 3.87, - "learning_rate": 1.484466151132818e-05, - "loss": 0.0215, + "learning_rate": 2.226699226699227e-05, + "loss": 0.0314, "step": 256500 }, { "epoch": 3.87, - "learning_rate": 1.4834612118562737e-05, - "loss": 0.0216, + "learning_rate": 2.2251918177844104e-05, + "loss": 0.0312, "step": 257000 }, { "epoch": 3.88, - "learning_rate": 1.4824562725797296e-05, - "loss": 0.0227, + "learning_rate": 2.223684408869594e-05, + "loss": 0.0321, "step": 257500 }, { "epoch": 3.89, - "learning_rate": 1.4814513333031853e-05, - "loss": 0.0223, + "learning_rate": 2.222176999954778e-05, + "loss": 0.032, "step": 258000 }, { "epoch": 3.9, - "learning_rate": 1.4804463940266412e-05, - "loss": 0.0221, + "learning_rate": 2.2206695910399616e-05, + "loss": 0.0327, "step": 258500 }, { "epoch": 3.9, - "learning_rate": 1.4794414547500969e-05, - "loss": 0.021, + "learning_rate": 2.219162182125145e-05, + "loss": 0.0298, "step": 259000 }, { "epoch": 3.91, - "learning_rate": 1.4784365154735528e-05, - "loss": 0.0205, + "learning_rate": 2.217654773210329e-05, + "loss": 0.0311, "step": 259500 }, { "epoch": 3.92, - "learning_rate": 1.4774315761970083e-05, - "loss": 0.0213, + "learning_rate": 2.2161473642955125e-05, + "loss": 0.0305, "step": 260000 }, { "epoch": 3.93, - "learning_rate": 1.4764266369204642e-05, - "loss": 0.021, + "learning_rate": 2.2146399553806962e-05, + "loss": 0.0306, "step": 260500 }, { "epoch": 3.93, - "learning_rate": 1.47542169764392e-05, - "loss": 0.0209, + "learning_rate": 2.21313254646588e-05, + "loss": 0.0311, "step": 261000 }, { "epoch": 3.94, - "learning_rate": 1.4744167583673758e-05, - "loss": 0.022, + "learning_rate": 2.2116251375510637e-05, + "loss": 0.0336, "step": 261500 }, { "epoch": 3.95, - "learning_rate": 1.4734118190908315e-05, - "loss": 0.0219, + "learning_rate": 2.210117728636247e-05, + "loss": 0.0305, "step": 262000 }, { "epoch": 3.96, - "learning_rate": 1.4724068798142874e-05, - "loss": 0.0209, + "learning_rate": 2.208610319721431e-05, + "loss": 0.0312, "step": 262500 }, { "epoch": 3.96, - "learning_rate": 1.4714019405377431e-05, - "loss": 0.0212, + "learning_rate": 2.2071029108066146e-05, + "loss": 0.0325, "step": 263000 }, { "epoch": 3.97, - "learning_rate": 1.470397001261199e-05, - "loss": 0.0217, + "learning_rate": 2.2055955018917983e-05, + "loss": 0.0321, "step": 263500 }, { "epoch": 3.98, - "learning_rate": 1.4693920619846547e-05, - "loss": 0.0211, + "learning_rate": 2.2040880929769817e-05, + "loss": 0.031, "step": 264000 }, { "epoch": 3.99, - "learning_rate": 1.4683871227081106e-05, - "loss": 0.0224, + "learning_rate": 2.2025806840621658e-05, + "loss": 0.0325, "step": 264500 }, { "epoch": 3.99, - "learning_rate": 1.4673821834315663e-05, - "loss": 0.0211, + "learning_rate": 2.2010732751473492e-05, + "loss": 0.0317, "step": 265000 }, { "epoch": 4.0, - "eval_accuracy": 0.9894537148623533, - "eval_f1": 0.9339812534401504, - "eval_loss": 0.028693294152617455, - "eval_precision": 0.9222831434284444, - "eval_recall": 0.9459799301706668, - "eval_runtime": 232.064, - "eval_samples_per_second": 508.153, - "eval_steps_per_second": 31.763, + "eval_accuracy": 0.9846929860097222, + "eval_f1": 0.929668210030934, + "eval_loss": 0.04334929585456848, + "eval_precision": 0.914498960760849, + "eval_recall": 0.9453491873591002, + "eval_runtime": 269.8313, + "eval_samples_per_second": 437.029, + "eval_steps_per_second": 27.317, "step": 265356 }, { "epoch": 4.0, - "learning_rate": 1.4663772441550222e-05, - "loss": 0.0195, + "learning_rate": 2.199565866232533e-05, + "loss": 0.0277, "step": 265500 }, { "epoch": 4.01, - "learning_rate": 1.4653723048784777e-05, - "loss": 0.0167, + "learning_rate": 2.1980584573177167e-05, + "loss": 0.0244, "step": 266000 }, { "epoch": 4.02, - "learning_rate": 1.4643673656019336e-05, - "loss": 0.0171, + "learning_rate": 2.1965510484029004e-05, + "loss": 0.0244, "step": 266500 }, { "epoch": 4.02, - "learning_rate": 1.4633624263253893e-05, - "loss": 0.0174, + "learning_rate": 2.195043639488084e-05, + "loss": 0.0252, "step": 267000 }, { "epoch": 4.03, - "learning_rate": 1.4623574870488452e-05, - "loss": 0.0171, + "learning_rate": 2.193536230573268e-05, + "loss": 0.0242, "step": 267500 }, { "epoch": 4.04, - "learning_rate": 1.461352547772301e-05, - "loss": 0.017, + "learning_rate": 2.1920288216584513e-05, + "loss": 0.0245, "step": 268000 }, { "epoch": 4.05, - "learning_rate": 1.4603476084957568e-05, - "loss": 0.0172, + "learning_rate": 2.190521412743635e-05, + "loss": 0.0253, "step": 268500 }, { "epoch": 4.05, - "learning_rate": 1.4593426692192125e-05, - "loss": 0.0166, + "learning_rate": 2.1890140038288188e-05, + "loss": 0.0246, "step": 269000 }, { "epoch": 4.06, - "learning_rate": 1.4583377299426684e-05, - "loss": 0.0176, + "learning_rate": 2.1875065949140026e-05, + "loss": 0.0255, "step": 269500 }, { "epoch": 4.07, - "learning_rate": 1.4573327906661241e-05, - "loss": 0.0168, + "learning_rate": 2.185999185999186e-05, + "loss": 0.0259, "step": 270000 }, { "epoch": 4.08, - "learning_rate": 1.45632785138958e-05, - "loss": 0.0164, + "learning_rate": 2.1844917770843697e-05, + "loss": 0.0232, "step": 270500 }, { "epoch": 4.09, - "learning_rate": 1.4553229121130357e-05, - "loss": 0.0167, + "learning_rate": 2.1829843681695534e-05, + "loss": 0.0239, "step": 271000 }, { "epoch": 4.09, - "learning_rate": 1.4543179728364916e-05, - "loss": 0.0168, + "learning_rate": 2.1814769592547372e-05, + "loss": 0.0254, "step": 271500 }, { "epoch": 4.1, - "learning_rate": 1.4533130335599472e-05, - "loss": 0.0161, + "learning_rate": 2.1799695503399206e-05, + "loss": 0.0249, "step": 272000 }, { "epoch": 4.11, - "learning_rate": 1.452308094283403e-05, - "loss": 0.017, + "learning_rate": 2.1784621414251047e-05, + "loss": 0.0247, "step": 272500 }, { "epoch": 4.12, - "learning_rate": 1.4513031550068588e-05, - "loss": 0.017, + "learning_rate": 2.176954732510288e-05, + "loss": 0.0253, "step": 273000 }, { "epoch": 4.12, - "learning_rate": 1.4502982157303147e-05, - "loss": 0.0178, + "learning_rate": 2.1754473235954718e-05, + "loss": 0.0259, "step": 273500 }, { "epoch": 4.13, - "learning_rate": 1.4492932764537704e-05, - "loss": 0.0175, + "learning_rate": 2.1739399146806556e-05, + "loss": 0.0258, "step": 274000 }, { "epoch": 4.14, - "learning_rate": 1.4482883371772263e-05, - "loss": 0.0179, + "learning_rate": 2.1724325057658393e-05, + "loss": 0.0247, "step": 274500 }, { "epoch": 4.15, - "learning_rate": 1.447283397900682e-05, - "loss": 0.0184, + "learning_rate": 2.1709250968510227e-05, + "loss": 0.0271, "step": 275000 }, { "epoch": 4.15, - "learning_rate": 1.4462784586241379e-05, - "loss": 0.0174, + "learning_rate": 2.1694176879362068e-05, + "loss": 0.0253, "step": 275500 }, { "epoch": 4.16, - "learning_rate": 1.4452735193475936e-05, - "loss": 0.0171, + "learning_rate": 2.1679102790213902e-05, + "loss": 0.0258, "step": 276000 }, { "epoch": 4.17, - "learning_rate": 1.4442685800710495e-05, - "loss": 0.0173, + "learning_rate": 2.166402870106574e-05, + "loss": 0.0258, "step": 276500 }, { "epoch": 4.18, - "learning_rate": 1.4432636407945052e-05, - "loss": 0.018, + "learning_rate": 2.1648954611917577e-05, + "loss": 0.0261, "step": 277000 }, { "epoch": 4.18, - "learning_rate": 1.442258701517961e-05, - "loss": 0.0165, + "learning_rate": 2.1633880522769414e-05, + "loss": 0.0243, "step": 277500 }, { "epoch": 4.19, - "learning_rate": 1.4412537622414166e-05, - "loss": 0.0173, + "learning_rate": 2.1618806433621248e-05, + "loss": 0.0253, "step": 278000 }, { "epoch": 4.2, - "learning_rate": 1.4402488229648725e-05, - "loss": 0.0171, + "learning_rate": 2.1603732344473086e-05, + "loss": 0.0244, "step": 278500 }, { "epoch": 4.21, - "learning_rate": 1.4392438836883282e-05, - "loss": 0.0175, + "learning_rate": 2.1588658255324923e-05, + "loss": 0.0266, "step": 279000 }, { "epoch": 4.21, - "learning_rate": 1.4382389444117841e-05, - "loss": 0.017, + "learning_rate": 2.157358416617676e-05, + "loss": 0.0251, "step": 279500 }, { "epoch": 4.22, - "learning_rate": 1.4372340051352398e-05, - "loss": 0.0165, + "learning_rate": 2.1558510077028595e-05, + "loss": 0.0242, "step": 280000 }, { "epoch": 4.23, - "learning_rate": 1.4362290658586957e-05, - "loss": 0.0178, + "learning_rate": 2.1543435987880435e-05, + "loss": 0.0264, "step": 280500 }, { "epoch": 4.24, - "learning_rate": 1.4352241265821514e-05, - "loss": 0.0166, + "learning_rate": 2.152836189873227e-05, + "loss": 0.025, "step": 281000 }, { "epoch": 4.24, - "learning_rate": 1.4342191873056073e-05, - "loss": 0.0174, + "learning_rate": 2.1513287809584107e-05, + "loss": 0.0257, "step": 281500 }, { "epoch": 4.25, - "learning_rate": 1.433214248029063e-05, - "loss": 0.0176, + "learning_rate": 2.1498213720435944e-05, + "loss": 0.026, "step": 282000 }, { "epoch": 4.26, - "learning_rate": 1.4322093087525189e-05, - "loss": 0.0174, + "learning_rate": 2.148313963128778e-05, + "loss": 0.0274, "step": 282500 }, { "epoch": 4.27, - "learning_rate": 1.4312043694759746e-05, - "loss": 0.0181, + "learning_rate": 2.1468065542139616e-05, + "loss": 0.0264, "step": 283000 }, { "epoch": 4.27, - "learning_rate": 1.4301994301994305e-05, - "loss": 0.0163, + "learning_rate": 2.1452991452991456e-05, + "loss": 0.0239, "step": 283500 }, { "epoch": 4.28, - "learning_rate": 1.429194490922886e-05, - "loss": 0.017, + "learning_rate": 2.143791736384329e-05, + "loss": 0.0258, "step": 284000 }, { "epoch": 4.29, - "learning_rate": 1.428189551646342e-05, - "loss": 0.0169, + "learning_rate": 2.1422843274695128e-05, + "loss": 0.0253, "step": 284500 }, { "epoch": 4.3, - "learning_rate": 1.4271846123697976e-05, - "loss": 0.0174, + "learning_rate": 2.1407769185546965e-05, + "loss": 0.026, "step": 285000 }, { "epoch": 4.3, - "learning_rate": 1.4261796730932535e-05, - "loss": 0.0176, + "learning_rate": 2.1392695096398803e-05, + "loss": 0.0253, "step": 285500 }, { "epoch": 4.31, - "learning_rate": 1.4251747338167092e-05, - "loss": 0.0177, + "learning_rate": 2.1377621007250637e-05, + "loss": 0.0255, "step": 286000 }, { "epoch": 4.32, - "learning_rate": 1.4241697945401651e-05, - "loss": 0.0186, + "learning_rate": 2.1362546918102474e-05, + "loss": 0.0283, "step": 286500 }, { "epoch": 4.33, - "learning_rate": 1.4231648552636208e-05, - "loss": 0.0181, + "learning_rate": 2.134747282895431e-05, + "loss": 0.0261, "step": 287000 }, { "epoch": 4.33, - "learning_rate": 1.4221599159870767e-05, - "loss": 0.0163, + "learning_rate": 2.133239873980615e-05, + "loss": 0.0253, "step": 287500 }, { "epoch": 4.34, - "learning_rate": 1.4211549767105324e-05, - "loss": 0.0182, + "learning_rate": 2.1317324650657983e-05, + "loss": 0.0271, "step": 288000 }, { "epoch": 4.35, - "learning_rate": 1.4201500374339883e-05, - "loss": 0.0176, + "learning_rate": 2.1302250561509824e-05, + "loss": 0.0252, "step": 288500 }, { "epoch": 4.36, - "learning_rate": 1.4191450981574439e-05, - "loss": 0.017, + "learning_rate": 2.1287176472361658e-05, + "loss": 0.025, "step": 289000 }, { "epoch": 4.36, - "learning_rate": 1.4181401588809e-05, - "loss": 0.0171, + "learning_rate": 2.1272102383213495e-05, + "loss": 0.0256, "step": 289500 }, { "epoch": 4.37, - "learning_rate": 1.4171352196043555e-05, - "loss": 0.0175, + "learning_rate": 2.1257028294065333e-05, + "loss": 0.0253, "step": 290000 }, { "epoch": 4.38, - "learning_rate": 1.4161302803278113e-05, - "loss": 0.0176, + "learning_rate": 2.124195420491717e-05, + "loss": 0.0259, "step": 290500 }, { "epoch": 4.39, - "learning_rate": 1.415125341051267e-05, - "loss": 0.0177, + "learning_rate": 2.1226880115769004e-05, + "loss": 0.0271, "step": 291000 }, { "epoch": 4.39, - "learning_rate": 1.414120401774723e-05, - "loss": 0.0181, + "learning_rate": 2.1211806026620845e-05, + "loss": 0.0272, "step": 291500 }, { "epoch": 4.4, - "learning_rate": 1.4131154624981787e-05, - "loss": 0.018, + "learning_rate": 2.119673193747268e-05, + "loss": 0.0253, "step": 292000 }, { "epoch": 4.41, - "learning_rate": 1.4121105232216345e-05, - "loss": 0.0175, + "learning_rate": 2.1181657848324517e-05, + "loss": 0.0262, "step": 292500 }, { "epoch": 4.42, - "learning_rate": 1.4111055839450903e-05, - "loss": 0.0175, + "learning_rate": 2.1166583759176354e-05, + "loss": 0.0251, "step": 293000 }, { "epoch": 4.42, - "learning_rate": 1.4101006446685461e-05, - "loss": 0.0184, + "learning_rate": 2.115150967002819e-05, + "loss": 0.0264, "step": 293500 }, { "epoch": 4.43, - "learning_rate": 1.4090957053920019e-05, - "loss": 0.0176, + "learning_rate": 2.1136435580880025e-05, + "loss": 0.0258, "step": 294000 }, { "epoch": 4.44, - "learning_rate": 1.4080907661154577e-05, - "loss": 0.0174, + "learning_rate": 2.1121361491731863e-05, + "loss": 0.0251, "step": 294500 }, { "epoch": 4.45, - "learning_rate": 1.4070858268389133e-05, - "loss": 0.0171, + "learning_rate": 2.11062874025837e-05, + "loss": 0.0244, "step": 295000 }, { "epoch": 4.45, - "learning_rate": 1.4060808875623693e-05, - "loss": 0.0176, + "learning_rate": 2.1091213313435538e-05, + "loss": 0.026, "step": 295500 }, { "epoch": 4.46, - "learning_rate": 1.4050759482858249e-05, - "loss": 0.0176, + "learning_rate": 2.107613922428737e-05, + "loss": 0.0262, "step": 296000 }, { "epoch": 4.47, - "learning_rate": 1.4040710090092808e-05, - "loss": 0.0172, + "learning_rate": 2.1061065135139212e-05, + "loss": 0.0261, "step": 296500 }, { "epoch": 4.48, - "learning_rate": 1.4030660697327365e-05, - "loss": 0.0164, + "learning_rate": 2.1045991045991047e-05, + "loss": 0.0259, "step": 297000 }, { "epoch": 4.48, - "learning_rate": 1.4020611304561924e-05, - "loss": 0.0172, + "learning_rate": 2.1030916956842884e-05, + "loss": 0.0254, "step": 297500 }, { "epoch": 4.49, - "learning_rate": 1.4010561911796481e-05, - "loss": 0.0185, + "learning_rate": 2.101584286769472e-05, + "loss": 0.0268, "step": 298000 }, { "epoch": 4.5, - "learning_rate": 1.4000512519031038e-05, - "loss": 0.018, + "learning_rate": 2.1000768778546555e-05, + "loss": 0.0276, "step": 298500 }, { "epoch": 4.51, - "learning_rate": 1.3990463126265597e-05, - "loss": 0.0175, + "learning_rate": 2.0985694689398393e-05, + "loss": 0.0259, "step": 299000 }, { "epoch": 4.51, - "learning_rate": 1.3980413733500154e-05, - "loss": 0.0176, + "learning_rate": 2.097062060025023e-05, + "loss": 0.0259, "step": 299500 }, { "epoch": 4.52, - "learning_rate": 1.3970364340734713e-05, - "loss": 0.0182, + "learning_rate": 2.0955546511102068e-05, + "loss": 0.0265, "step": 300000 }, { "epoch": 4.53, - "learning_rate": 1.3960314947969268e-05, - "loss": 0.0179, + "learning_rate": 2.0940472421953902e-05, + "loss": 0.0268, "step": 300500 }, { "epoch": 4.54, - "learning_rate": 1.3950265555203827e-05, - "loss": 0.0173, + "learning_rate": 2.092539833280574e-05, + "loss": 0.0253, "step": 301000 }, { "epoch": 4.54, - "learning_rate": 1.3940216162438384e-05, - "loss": 0.0168, + "learning_rate": 2.0910324243657577e-05, + "loss": 0.0258, "step": 301500 }, { "epoch": 4.55, - "learning_rate": 1.3930166769672943e-05, - "loss": 0.0172, + "learning_rate": 2.0895250154509414e-05, + "loss": 0.0262, "step": 302000 }, { "epoch": 4.56, - "learning_rate": 1.39201173769075e-05, - "loss": 0.0191, + "learning_rate": 2.0880176065361248e-05, + "loss": 0.0272, "step": 302500 }, { "epoch": 4.57, - "learning_rate": 1.391006798414206e-05, - "loss": 0.0175, + "learning_rate": 2.086510197621309e-05, + "loss": 0.0256, "step": 303000 }, { "epoch": 4.57, - "learning_rate": 1.3900018591376616e-05, - "loss": 0.0176, + "learning_rate": 2.0850027887064923e-05, + "loss": 0.0272, "step": 303500 }, { "epoch": 4.58, - "learning_rate": 1.3889969198611175e-05, - "loss": 0.0175, + "learning_rate": 2.083495379791676e-05, + "loss": 0.0257, "step": 304000 }, { "epoch": 4.59, - "learning_rate": 1.3879919805845732e-05, - "loss": 0.018, + "learning_rate": 2.0819879708768598e-05, + "loss": 0.0267, "step": 304500 }, { "epoch": 4.6, - "learning_rate": 1.3869870413080291e-05, - "loss": 0.0189, + "learning_rate": 2.0804805619620435e-05, + "loss": 0.0275, "step": 305000 }, { "epoch": 4.61, - "learning_rate": 1.3859821020314848e-05, - "loss": 0.0184, + "learning_rate": 2.078973153047227e-05, + "loss": 0.0271, "step": 305500 }, { "epoch": 4.61, - "learning_rate": 1.3849771627549407e-05, - "loss": 0.018, + "learning_rate": 2.077465744132411e-05, + "loss": 0.0272, "step": 306000 }, { "epoch": 4.62, - "learning_rate": 1.3839722234783963e-05, - "loss": 0.0178, + "learning_rate": 2.0759583352175944e-05, + "loss": 0.0263, "step": 306500 }, { "epoch": 4.63, - "learning_rate": 1.3829672842018522e-05, - "loss": 0.0182, + "learning_rate": 2.074450926302778e-05, + "loss": 0.0262, "step": 307000 }, { "epoch": 4.64, - "learning_rate": 1.3819623449253079e-05, - "loss": 0.0174, + "learning_rate": 2.072943517387962e-05, + "loss": 0.0261, "step": 307500 }, { "epoch": 4.64, - "learning_rate": 1.3809574056487638e-05, - "loss": 0.017, + "learning_rate": 2.0714361084731456e-05, + "loss": 0.0252, "step": 308000 }, { "epoch": 4.65, - "learning_rate": 1.3799524663722195e-05, - "loss": 0.0174, + "learning_rate": 2.069928699558329e-05, + "loss": 0.0258, "step": 308500 }, { "epoch": 4.66, - "learning_rate": 1.3789475270956754e-05, - "loss": 0.0173, + "learning_rate": 2.0684212906435128e-05, + "loss": 0.0263, "step": 309000 }, { "epoch": 4.67, - "learning_rate": 1.377942587819131e-05, - "loss": 0.0174, + "learning_rate": 2.0669138817286965e-05, + "loss": 0.0261, "step": 309500 }, { "epoch": 4.67, - "learning_rate": 1.376937648542587e-05, - "loss": 0.0175, + "learning_rate": 2.0654064728138803e-05, + "loss": 0.0268, "step": 310000 }, { "epoch": 4.68, - "learning_rate": 1.3759327092660427e-05, - "loss": 0.0176, + "learning_rate": 2.0638990638990637e-05, + "loss": 0.0259, "step": 310500 }, { "epoch": 4.69, - "learning_rate": 1.3749277699894986e-05, - "loss": 0.0167, + "learning_rate": 2.0623916549842477e-05, + "loss": 0.0245, "step": 311000 }, { "epoch": 4.7, - "learning_rate": 1.3739228307129543e-05, - "loss": 0.0174, + "learning_rate": 2.060884246069431e-05, + "loss": 0.0255, "step": 311500 }, { "epoch": 4.7, - "learning_rate": 1.3729178914364102e-05, - "loss": 0.0183, + "learning_rate": 2.059376837154615e-05, + "loss": 0.0269, "step": 312000 }, { "epoch": 4.71, - "learning_rate": 1.3719129521598657e-05, - "loss": 0.0175, + "learning_rate": 2.0578694282397986e-05, + "loss": 0.0276, "step": 312500 }, { "epoch": 4.72, - "learning_rate": 1.3709080128833216e-05, - "loss": 0.0163, + "learning_rate": 2.0563620193249824e-05, + "loss": 0.025, "step": 313000 }, { "epoch": 4.73, - "learning_rate": 1.3699030736067773e-05, - "loss": 0.0173, + "learning_rate": 2.0548546104101658e-05, + "loss": 0.0257, "step": 313500 }, { "epoch": 4.73, - "learning_rate": 1.3688981343302332e-05, - "loss": 0.0192, + "learning_rate": 2.05334720149535e-05, + "loss": 0.0289, "step": 314000 }, { "epoch": 4.74, - "learning_rate": 1.3678931950536889e-05, - "loss": 0.0174, + "learning_rate": 2.0518397925805333e-05, + "loss": 0.0263, "step": 314500 }, { "epoch": 4.75, - "learning_rate": 1.3668882557771448e-05, - "loss": 0.0177, + "learning_rate": 2.050332383665717e-05, + "loss": 0.026, "step": 315000 }, { "epoch": 4.76, - "learning_rate": 1.3658833165006005e-05, - "loss": 0.0172, + "learning_rate": 2.0488249747509007e-05, + "loss": 0.0256, "step": 315500 }, { "epoch": 4.76, - "learning_rate": 1.3648783772240564e-05, - "loss": 0.0179, + "learning_rate": 2.0473175658360845e-05, + "loss": 0.0261, "step": 316000 }, { "epoch": 4.77, - "learning_rate": 1.3638734379475121e-05, - "loss": 0.0191, + "learning_rate": 2.045810156921268e-05, + "loss": 0.0271, "step": 316500 }, { "epoch": 4.78, - "learning_rate": 1.362868498670968e-05, - "loss": 0.0179, + "learning_rate": 2.0443027480064516e-05, + "loss": 0.0259, "step": 317000 }, { "epoch": 4.79, - "learning_rate": 1.3618635593944235e-05, - "loss": 0.0185, + "learning_rate": 2.0427953390916354e-05, + "loss": 0.0256, "step": 317500 }, { "epoch": 4.79, - "learning_rate": 1.3608586201178796e-05, - "loss": 0.018, + "learning_rate": 2.041287930176819e-05, + "loss": 0.0272, "step": 318000 }, { "epoch": 4.8, - "learning_rate": 1.3598536808413351e-05, - "loss": 0.0178, + "learning_rate": 2.0397805212620025e-05, + "loss": 0.0264, "step": 318500 }, { "epoch": 4.81, - "learning_rate": 1.358848741564791e-05, - "loss": 0.0194, + "learning_rate": 2.0382731123471866e-05, + "loss": 0.0285, "step": 319000 }, { "epoch": 4.82, - "learning_rate": 1.3578438022882467e-05, - "loss": 0.0179, + "learning_rate": 2.03676570343237e-05, + "loss": 0.027, "step": 319500 }, { "epoch": 4.82, - "learning_rate": 1.3568388630117026e-05, - "loss": 0.0179, + "learning_rate": 2.0352582945175537e-05, + "loss": 0.0267, "step": 320000 }, { "epoch": 4.83, - "learning_rate": 1.3558339237351583e-05, - "loss": 0.018, + "learning_rate": 2.0337508856027375e-05, + "loss": 0.0267, "step": 320500 }, { "epoch": 4.84, - "learning_rate": 1.3548289844586142e-05, - "loss": 0.0179, + "learning_rate": 2.0322434766879212e-05, + "loss": 0.0257, "step": 321000 }, { "epoch": 4.85, - "learning_rate": 1.35382404518207e-05, - "loss": 0.0173, + "learning_rate": 2.0307360677731046e-05, + "loss": 0.0262, "step": 321500 }, { "epoch": 4.85, - "learning_rate": 1.3528191059055258e-05, - "loss": 0.0189, + "learning_rate": 2.0292286588582887e-05, + "loss": 0.0274, "step": 322000 }, { "epoch": 4.86, - "learning_rate": 1.3518141666289815e-05, - "loss": 0.0184, + "learning_rate": 2.027721249943472e-05, + "loss": 0.0275, "step": 322500 }, { "epoch": 4.87, - "learning_rate": 1.3508092273524374e-05, - "loss": 0.0173, + "learning_rate": 2.026213841028656e-05, + "loss": 0.0261, "step": 323000 }, { "epoch": 4.88, - "learning_rate": 1.349804288075893e-05, - "loss": 0.018, + "learning_rate": 2.0247064321138396e-05, + "loss": 0.0265, "step": 323500 }, { "epoch": 4.88, - "learning_rate": 1.348799348799349e-05, - "loss": 0.0173, + "learning_rate": 2.0231990231990233e-05, + "loss": 0.0261, "step": 324000 }, { "epoch": 4.89, - "learning_rate": 1.3477944095228046e-05, - "loss": 0.0173, + "learning_rate": 2.0216916142842067e-05, + "loss": 0.027, "step": 324500 }, { "epoch": 4.9, - "learning_rate": 1.3467894702462604e-05, - "loss": 0.0178, + "learning_rate": 2.0201842053693905e-05, + "loss": 0.0263, "step": 325000 }, { "epoch": 4.91, - "learning_rate": 1.3457845309697162e-05, - "loss": 0.0185, + "learning_rate": 2.0186767964545742e-05, + "loss": 0.0267, "step": 325500 }, { "epoch": 4.91, - "learning_rate": 1.344779591693172e-05, - "loss": 0.0176, + "learning_rate": 2.017169387539758e-05, + "loss": 0.0263, "step": 326000 }, { "epoch": 4.92, - "learning_rate": 1.3437746524166278e-05, - "loss": 0.0179, + "learning_rate": 2.0156619786249414e-05, + "loss": 0.026, "step": 326500 }, { "epoch": 4.93, - "learning_rate": 1.3427697131400836e-05, - "loss": 0.0174, + "learning_rate": 2.0141545697101255e-05, + "loss": 0.0266, "step": 327000 }, { "epoch": 4.94, - "learning_rate": 1.3417647738635394e-05, - "loss": 0.0174, + "learning_rate": 2.012647160795309e-05, + "loss": 0.0254, "step": 327500 }, { "epoch": 4.94, - "learning_rate": 1.3407598345869952e-05, - "loss": 0.0176, + "learning_rate": 2.0111397518804926e-05, + "loss": 0.0279, "step": 328000 }, { "epoch": 4.95, - "learning_rate": 1.339754895310451e-05, - "loss": 0.0184, + "learning_rate": 2.0096323429656763e-05, + "loss": 0.0277, "step": 328500 }, { "epoch": 4.96, - "learning_rate": 1.3387499560339068e-05, - "loss": 0.0177, + "learning_rate": 2.00812493405086e-05, + "loss": 0.0262, "step": 329000 }, { "epoch": 4.97, - "learning_rate": 1.3377450167573624e-05, - "loss": 0.0178, + "learning_rate": 2.0066175251360435e-05, + "loss": 0.0273, "step": 329500 }, { "epoch": 4.97, - "learning_rate": 1.3367400774808184e-05, - "loss": 0.0176, + "learning_rate": 2.0051101162212276e-05, + "loss": 0.0263, "step": 330000 }, { "epoch": 4.98, - "learning_rate": 1.335735138204274e-05, - "loss": 0.0175, + "learning_rate": 2.003602707306411e-05, + "loss": 0.0266, "step": 330500 }, { "epoch": 4.99, - "learning_rate": 1.3347301989277299e-05, - "loss": 0.0171, + "learning_rate": 2.0020952983915947e-05, + "loss": 0.0262, "step": 331000 }, { "epoch": 5.0, - "learning_rate": 1.3337252596511856e-05, - "loss": 0.0172, + "learning_rate": 2.000587889476778e-05, + "loss": 0.0257, "step": 331500 }, { "epoch": 5.0, - "eval_accuracy": 0.9900642223980718, - "eval_f1": 0.9395505775009235, - "eval_loss": 0.030921388417482376, - "eval_precision": 0.922276705745856, - "eval_recall": 0.9574838651919493, - "eval_runtime": 228.9445, - "eval_samples_per_second": 515.077, - "eval_steps_per_second": 32.196, + "eval_accuracy": 0.9854380991780325, + "eval_f1": 0.9340379855331353, + "eval_loss": 0.04560817405581474, + "eval_precision": 0.9195563196454343, + "eval_recall": 0.9489830798154162, + "eval_runtime": 243.78, + "eval_samples_per_second": 483.731, + "eval_steps_per_second": 30.236, "step": 331695 }, { "epoch": 5.0, - "learning_rate": 1.3327203203746415e-05, - "loss": 0.0156, + "learning_rate": 1.9990804805619622e-05, + "loss": 0.0227, "step": 332000 }, { "epoch": 5.01, - "learning_rate": 1.3317153810980972e-05, - "loss": 0.0137, + "learning_rate": 1.9975730716471456e-05, + "loss": 0.0196, "step": 332500 }, { "epoch": 5.02, - "learning_rate": 1.330710441821553e-05, - "loss": 0.0128, + "learning_rate": 1.9960656627323293e-05, + "loss": 0.0191, "step": 333000 }, { "epoch": 5.03, - "learning_rate": 1.3297055025450088e-05, - "loss": 0.0147, + "learning_rate": 1.994558253817513e-05, + "loss": 0.0217, "step": 333500 }, { "epoch": 5.03, - "learning_rate": 1.3287005632684647e-05, - "loss": 0.0136, + "learning_rate": 1.993050844902697e-05, + "loss": 0.0207, "step": 334000 }, { "epoch": 5.04, - "learning_rate": 1.3276956239919204e-05, - "loss": 0.0136, + "learning_rate": 1.9915434359878802e-05, + "loss": 0.021, "step": 334500 }, { "epoch": 5.05, - "learning_rate": 1.3266906847153763e-05, - "loss": 0.0139, + "learning_rate": 1.9900360270730643e-05, + "loss": 0.0202, "step": 335000 }, { "epoch": 5.06, - "learning_rate": 1.3256857454388318e-05, - "loss": 0.0131, + "learning_rate": 1.9885286181582477e-05, + "loss": 0.0207, "step": 335500 }, { "epoch": 5.06, - "learning_rate": 1.3246808061622877e-05, - "loss": 0.0134, + "learning_rate": 1.9870212092434315e-05, + "loss": 0.0202, "step": 336000 }, { "epoch": 5.07, - "learning_rate": 1.3236758668857434e-05, - "loss": 0.0142, + "learning_rate": 1.9855138003286152e-05, + "loss": 0.0208, "step": 336500 }, { "epoch": 5.08, - "learning_rate": 1.3226709276091993e-05, - "loss": 0.0134, + "learning_rate": 1.984006391413799e-05, + "loss": 0.021, "step": 337000 }, { "epoch": 5.09, - "learning_rate": 1.321665988332655e-05, - "loss": 0.0133, + "learning_rate": 1.9824989824989823e-05, + "loss": 0.0198, "step": 337500 }, { "epoch": 5.1, - "learning_rate": 1.3206610490561109e-05, - "loss": 0.014, + "learning_rate": 1.9809915735841664e-05, + "loss": 0.0204, "step": 338000 }, { "epoch": 5.1, - "learning_rate": 1.3196561097795666e-05, - "loss": 0.0136, + "learning_rate": 1.97948416466935e-05, + "loss": 0.0204, "step": 338500 }, { "epoch": 5.11, - "learning_rate": 1.3186511705030225e-05, - "loss": 0.0139, + "learning_rate": 1.9779767557545336e-05, + "loss": 0.0207, "step": 339000 }, { "epoch": 5.12, - "learning_rate": 1.3176462312264782e-05, - "loss": 0.0138, + "learning_rate": 1.976469346839717e-05, + "loss": 0.0203, "step": 339500 }, { "epoch": 5.13, - "learning_rate": 1.3166412919499341e-05, - "loss": 0.0141, + "learning_rate": 1.974961937924901e-05, + "loss": 0.021, "step": 340000 }, { "epoch": 5.13, - "learning_rate": 1.3156363526733898e-05, - "loss": 0.0142, + "learning_rate": 1.9734545290100845e-05, + "loss": 0.0205, "step": 340500 }, { "epoch": 5.14, - "learning_rate": 1.3146314133968457e-05, - "loss": 0.014, + "learning_rate": 1.9719471200952682e-05, + "loss": 0.0204, "step": 341000 }, { "epoch": 5.15, - "learning_rate": 1.3136264741203012e-05, - "loss": 0.0137, + "learning_rate": 1.970439711180452e-05, + "loss": 0.0202, "step": 341500 }, { "epoch": 5.16, - "learning_rate": 1.3126215348437571e-05, - "loss": 0.014, + "learning_rate": 1.9689323022656357e-05, + "loss": 0.0206, "step": 342000 }, { "epoch": 5.16, - "learning_rate": 1.3116165955672128e-05, - "loss": 0.0144, + "learning_rate": 1.967424893350819e-05, + "loss": 0.0216, "step": 342500 }, { "epoch": 5.17, - "learning_rate": 1.3106116562906687e-05, - "loss": 0.0143, + "learning_rate": 1.9659174844360032e-05, + "loss": 0.0214, "step": 343000 }, { "epoch": 5.18, - "learning_rate": 1.3096067170141244e-05, - "loss": 0.0143, + "learning_rate": 1.9644100755211866e-05, + "loss": 0.022, "step": 343500 }, { "epoch": 5.19, - "learning_rate": 1.3086017777375803e-05, - "loss": 0.0141, + "learning_rate": 1.9629026666063703e-05, + "loss": 0.0211, "step": 344000 }, { "epoch": 5.19, - "learning_rate": 1.307596838461036e-05, - "loss": 0.0147, + "learning_rate": 1.961395257691554e-05, + "loss": 0.0208, "step": 344500 }, { "epoch": 5.2, - "learning_rate": 1.306591899184492e-05, - "loss": 0.0145, + "learning_rate": 1.9598878487767378e-05, + "loss": 0.0216, "step": 345000 }, { "epoch": 5.21, - "learning_rate": 1.3055869599079476e-05, - "loss": 0.0147, + "learning_rate": 1.9583804398619212e-05, + "loss": 0.0228, "step": 345500 }, { "epoch": 5.22, - "learning_rate": 1.3045820206314035e-05, - "loss": 0.0149, + "learning_rate": 1.9568730309471053e-05, + "loss": 0.0218, "step": 346000 }, { "epoch": 5.22, - "learning_rate": 1.3035770813548592e-05, - "loss": 0.0163, + "learning_rate": 1.9553656220322887e-05, + "loss": 0.0237, "step": 346500 }, { "epoch": 5.23, - "learning_rate": 1.3025721420783151e-05, - "loss": 0.0149, + "learning_rate": 1.9538582131174724e-05, + "loss": 0.0214, "step": 347000 }, { "epoch": 5.24, - "learning_rate": 1.3015672028017707e-05, - "loss": 0.015, + "learning_rate": 1.952350804202656e-05, + "loss": 0.0223, "step": 347500 }, { "epoch": 5.25, - "learning_rate": 1.3005622635252266e-05, - "loss": 0.0149, + "learning_rate": 1.95084339528784e-05, + "loss": 0.0226, "step": 348000 }, { "epoch": 5.25, - "learning_rate": 1.2995573242486823e-05, - "loss": 0.0144, + "learning_rate": 1.9493359863730233e-05, + "loss": 0.0219, "step": 348500 }, { "epoch": 5.26, - "learning_rate": 1.2985523849721382e-05, - "loss": 0.0146, + "learning_rate": 1.947828577458207e-05, + "loss": 0.0235, "step": 349000 }, { "epoch": 5.27, - "learning_rate": 1.2975474456955939e-05, - "loss": 0.014, + "learning_rate": 1.9463211685433908e-05, + "loss": 0.0201, "step": 349500 }, { "epoch": 5.28, - "learning_rate": 1.2965425064190498e-05, - "loss": 0.0141, + "learning_rate": 1.9448137596285745e-05, + "loss": 0.0216, "step": 350000 }, { "epoch": 5.28, - "learning_rate": 1.2955375671425055e-05, - "loss": 0.014, + "learning_rate": 1.943306350713758e-05, + "loss": 0.0218, "step": 350500 }, { "epoch": 5.29, - "learning_rate": 1.2945326278659614e-05, - "loss": 0.0142, + "learning_rate": 1.941798941798942e-05, + "loss": 0.0219, "step": 351000 }, { "epoch": 5.3, - "learning_rate": 1.293527688589417e-05, - "loss": 0.0136, + "learning_rate": 1.9402915328841254e-05, + "loss": 0.022, "step": 351500 }, { "epoch": 5.31, - "learning_rate": 1.292522749312873e-05, - "loss": 0.0156, + "learning_rate": 1.9387841239693092e-05, + "loss": 0.0231, "step": 352000 }, { "epoch": 5.31, - "learning_rate": 1.2915178100363287e-05, - "loss": 0.0143, + "learning_rate": 1.937276715054493e-05, + "loss": 0.0216, "step": 352500 }, { "epoch": 5.32, - "learning_rate": 1.2905128707597846e-05, - "loss": 0.0144, + "learning_rate": 1.9357693061396767e-05, + "loss": 0.0205, "step": 353000 }, { "epoch": 5.33, - "learning_rate": 1.2895079314832401e-05, - "loss": 0.0146, + "learning_rate": 1.93426189722486e-05, + "loss": 0.0212, "step": 353500 }, { "epoch": 5.34, - "learning_rate": 1.288502992206696e-05, - "loss": 0.0153, + "learning_rate": 1.9327544883100438e-05, + "loss": 0.021, "step": 354000 }, { "epoch": 5.34, - "learning_rate": 1.2874980529301517e-05, - "loss": 0.0148, + "learning_rate": 1.9312470793952276e-05, + "loss": 0.0227, "step": 354500 }, { "epoch": 5.35, - "learning_rate": 1.2864931136536076e-05, - "loss": 0.0148, + "learning_rate": 1.9297396704804113e-05, + "loss": 0.0233, "step": 355000 }, { "epoch": 5.36, - "learning_rate": 1.2854881743770633e-05, - "loss": 0.0151, + "learning_rate": 1.9282322615655947e-05, + "loss": 0.0232, "step": 355500 }, { "epoch": 5.37, - "learning_rate": 1.2844832351005192e-05, - "loss": 0.0143, + "learning_rate": 1.9267248526507788e-05, + "loss": 0.0214, "step": 356000 }, { "epoch": 5.37, - "learning_rate": 1.2834782958239749e-05, - "loss": 0.0137, + "learning_rate": 1.9252174437359622e-05, + "loss": 0.0213, "step": 356500 }, { "epoch": 5.38, - "learning_rate": 1.2824733565474308e-05, - "loss": 0.0146, + "learning_rate": 1.923710034821146e-05, + "loss": 0.0216, "step": 357000 }, { "epoch": 5.39, - "learning_rate": 1.2814684172708865e-05, - "loss": 0.0141, + "learning_rate": 1.9222026259063297e-05, + "loss": 0.0224, "step": 357500 }, { "epoch": 5.4, - "learning_rate": 1.2804634779943424e-05, - "loss": 0.0151, + "learning_rate": 1.9206952169915134e-05, + "loss": 0.0218, "step": 358000 }, { "epoch": 5.4, - "learning_rate": 1.279458538717798e-05, - "loss": 0.0145, + "learning_rate": 1.9191878080766968e-05, + "loss": 0.0216, "step": 358500 }, { "epoch": 5.41, - "learning_rate": 1.278453599441254e-05, - "loss": 0.0142, + "learning_rate": 1.917680399161881e-05, + "loss": 0.0221, "step": 359000 }, { "epoch": 5.42, - "learning_rate": 1.2774486601647095e-05, - "loss": 0.0144, + "learning_rate": 1.9161729902470643e-05, + "loss": 0.0226, "step": 359500 }, { "epoch": 5.43, - "learning_rate": 1.2764437208881654e-05, - "loss": 0.0151, + "learning_rate": 1.914665581332248e-05, + "loss": 0.0225, "step": 360000 }, { "epoch": 5.43, - "learning_rate": 1.2754387816116211e-05, - "loss": 0.0147, + "learning_rate": 1.9131581724174318e-05, + "loss": 0.0216, "step": 360500 }, { "epoch": 5.44, - "learning_rate": 1.274433842335077e-05, - "loss": 0.0142, + "learning_rate": 1.9116507635026155e-05, + "loss": 0.0218, "step": 361000 }, { "epoch": 5.45, - "learning_rate": 1.2734289030585327e-05, - "loss": 0.0158, + "learning_rate": 1.910143354587799e-05, + "loss": 0.0232, "step": 361500 }, { "epoch": 5.46, - "learning_rate": 1.2724239637819886e-05, - "loss": 0.0139, + "learning_rate": 1.9086359456729827e-05, + "loss": 0.0216, "step": 362000 }, { "epoch": 5.46, - "learning_rate": 1.2714190245054443e-05, - "loss": 0.0146, + "learning_rate": 1.9071285367581664e-05, + "loss": 0.0216, "step": 362500 }, { "epoch": 5.47, - "learning_rate": 1.2704140852289002e-05, - "loss": 0.0135, + "learning_rate": 1.90562112784335e-05, + "loss": 0.0213, "step": 363000 }, { "epoch": 5.48, - "learning_rate": 1.269409145952356e-05, - "loss": 0.0153, + "learning_rate": 1.9041137189285336e-05, + "loss": 0.0217, "step": 363500 }, { "epoch": 5.49, - "learning_rate": 1.2684042066758118e-05, - "loss": 0.0152, + "learning_rate": 1.9026063100137176e-05, + "loss": 0.0225, "step": 364000 }, { "epoch": 5.49, - "learning_rate": 1.2673992673992674e-05, - "loss": 0.0147, + "learning_rate": 1.901098901098901e-05, + "loss": 0.0216, "step": 364500 }, { "epoch": 5.5, - "learning_rate": 1.2663943281227234e-05, - "loss": 0.0143, + "learning_rate": 1.8995914921840848e-05, + "loss": 0.0224, "step": 365000 }, { "epoch": 5.51, - "learning_rate": 1.265389388846179e-05, - "loss": 0.0144, + "learning_rate": 1.8980840832692685e-05, + "loss": 0.0218, "step": 365500 }, { "epoch": 5.52, - "learning_rate": 1.2643844495696348e-05, - "loss": 0.0152, + "learning_rate": 1.8965766743544523e-05, + "loss": 0.0237, "step": 366000 }, { "epoch": 5.52, - "learning_rate": 1.2633795102930906e-05, - "loss": 0.015, + "learning_rate": 1.8950692654396357e-05, + "loss": 0.0216, "step": 366500 }, { "epoch": 5.53, - "learning_rate": 1.2623745710165464e-05, - "loss": 0.0139, + "learning_rate": 1.8935618565248198e-05, + "loss": 0.0213, "step": 367000 }, { "epoch": 5.54, - "learning_rate": 1.2613696317400022e-05, - "loss": 0.0159, + "learning_rate": 1.892054447610003e-05, + "loss": 0.0227, "step": 367500 }, { "epoch": 5.55, - "learning_rate": 1.260364692463458e-05, - "loss": 0.015, + "learning_rate": 1.890547038695187e-05, + "loss": 0.0226, "step": 368000 }, { "epoch": 5.55, - "learning_rate": 1.2593597531869138e-05, - "loss": 0.0146, + "learning_rate": 1.8890396297803706e-05, + "loss": 0.0223, "step": 368500 }, { "epoch": 5.56, - "learning_rate": 1.2583548139103696e-05, - "loss": 0.0152, + "learning_rate": 1.8875322208655544e-05, + "loss": 0.0216, "step": 369000 }, { "epoch": 5.57, - "learning_rate": 1.2573498746338254e-05, - "loss": 0.0147, + "learning_rate": 1.8860248119507378e-05, + "loss": 0.0214, "step": 369500 }, { "epoch": 5.58, - "learning_rate": 1.2563449353572812e-05, - "loss": 0.0153, + "learning_rate": 1.8845174030359215e-05, + "loss": 0.0227, "step": 370000 }, { "epoch": 5.58, - "learning_rate": 1.2553399960807368e-05, - "loss": 0.0151, + "learning_rate": 1.8830099941211053e-05, + "loss": 0.0223, "step": 370500 }, { "epoch": 5.59, - "learning_rate": 1.2543350568041928e-05, - "loss": 0.015, + "learning_rate": 1.881502585206289e-05, + "loss": 0.0221, "step": 371000 }, { "epoch": 5.6, - "learning_rate": 1.2533301175276484e-05, - "loss": 0.0144, + "learning_rate": 1.8799951762914724e-05, + "loss": 0.0224, "step": 371500 }, { "epoch": 5.61, - "learning_rate": 1.2523251782511043e-05, - "loss": 0.015, + "learning_rate": 1.8784877673766565e-05, + "loss": 0.0221, "step": 372000 }, { "epoch": 5.62, - "learning_rate": 1.25132023897456e-05, - "loss": 0.015, + "learning_rate": 1.87698035846184e-05, + "loss": 0.0221, "step": 372500 }, { "epoch": 5.62, - "learning_rate": 1.2503152996980159e-05, - "loss": 0.0147, + "learning_rate": 1.8754729495470236e-05, + "loss": 0.022, "step": 373000 }, { "epoch": 5.63, - "learning_rate": 1.2493103604214716e-05, - "loss": 0.0147, + "learning_rate": 1.8739655406322074e-05, + "loss": 0.0224, "step": 373500 }, { "epoch": 5.64, - "learning_rate": 1.2483054211449275e-05, - "loss": 0.0149, + "learning_rate": 1.872458131717391e-05, + "loss": 0.023, "step": 374000 }, { "epoch": 5.65, - "learning_rate": 1.2473004818683832e-05, - "loss": 0.0153, + "learning_rate": 1.8709507228025745e-05, + "loss": 0.0231, "step": 374500 }, { "epoch": 5.65, - "learning_rate": 1.246295542591839e-05, - "loss": 0.0163, + "learning_rate": 1.8694433138877586e-05, + "loss": 0.0247, "step": 375000 }, { "epoch": 5.66, - "learning_rate": 1.2452906033152948e-05, - "loss": 0.0155, + "learning_rate": 1.867935904972942e-05, + "loss": 0.0226, "step": 375500 }, { "epoch": 5.67, - "learning_rate": 1.2442856640387507e-05, - "loss": 0.0154, + "learning_rate": 1.8664284960581258e-05, + "loss": 0.0215, "step": 376000 }, { "epoch": 5.68, - "learning_rate": 1.2432807247622062e-05, - "loss": 0.0149, + "learning_rate": 1.8649210871433095e-05, + "loss": 0.023, "step": 376500 }, { "epoch": 5.68, - "learning_rate": 1.2422757854856623e-05, - "loss": 0.0156, + "learning_rate": 1.8634136782284932e-05, + "loss": 0.0233, "step": 377000 }, { "epoch": 5.69, - "learning_rate": 1.2412708462091178e-05, - "loss": 0.0149, + "learning_rate": 1.8619062693136766e-05, + "loss": 0.0225, "step": 377500 }, { "epoch": 5.7, - "learning_rate": 1.2402659069325737e-05, - "loss": 0.0142, + "learning_rate": 1.8603988603988604e-05, + "loss": 0.0231, "step": 378000 }, { "epoch": 5.71, - "learning_rate": 1.2392609676560294e-05, - "loss": 0.0145, + "learning_rate": 1.858891451484044e-05, + "loss": 0.0225, "step": 378500 }, { "epoch": 5.71, - "learning_rate": 1.2382560283794853e-05, - "loss": 0.0158, + "learning_rate": 1.857384042569228e-05, + "loss": 0.0237, "step": 379000 }, { "epoch": 5.72, - "learning_rate": 1.237251089102941e-05, - "loss": 0.0153, + "learning_rate": 1.8558766336544113e-05, + "loss": 0.022, "step": 379500 }, { "epoch": 5.73, - "learning_rate": 1.2362461498263969e-05, - "loss": 0.015, + "learning_rate": 1.8543692247395954e-05, + "loss": 0.0223, "step": 380000 }, { "epoch": 5.74, - "learning_rate": 1.2352412105498526e-05, - "loss": 0.0155, + "learning_rate": 1.8528618158247788e-05, + "loss": 0.0218, "step": 380500 }, { "epoch": 5.74, - "learning_rate": 1.2342362712733085e-05, - "loss": 0.0162, + "learning_rate": 1.8513544069099625e-05, + "loss": 0.0243, "step": 381000 }, { "epoch": 5.75, - "learning_rate": 1.2332313319967642e-05, - "loss": 0.0154, + "learning_rate": 1.8498469979951462e-05, + "loss": 0.0218, "step": 381500 }, { "epoch": 5.76, - "learning_rate": 1.2322263927202201e-05, - "loss": 0.0153, + "learning_rate": 1.84833958908033e-05, + "loss": 0.0229, "step": 382000 }, { "epoch": 5.77, - "learning_rate": 1.2312214534436756e-05, - "loss": 0.0153, + "learning_rate": 1.8468321801655134e-05, + "loss": 0.0226, "step": 382500 }, { "epoch": 5.77, - "learning_rate": 1.2302165141671315e-05, - "loss": 0.0139, + "learning_rate": 1.8453247712506975e-05, + "loss": 0.0207, "step": 383000 }, { "epoch": 5.78, - "learning_rate": 1.2292115748905872e-05, - "loss": 0.0152, + "learning_rate": 1.843817362335881e-05, + "loss": 0.0217, "step": 383500 }, { "epoch": 5.79, - "learning_rate": 1.2282066356140431e-05, - "loss": 0.015, + "learning_rate": 1.8423099534210646e-05, + "loss": 0.0218, "step": 384000 }, { "epoch": 5.8, - "learning_rate": 1.2272016963374988e-05, - "loss": 0.015, + "learning_rate": 1.840802544506248e-05, + "loss": 0.0225, "step": 384500 }, { "epoch": 5.8, - "learning_rate": 1.2261967570609547e-05, - "loss": 0.0153, + "learning_rate": 1.839295135591432e-05, + "loss": 0.0233, "step": 385000 }, { "epoch": 5.81, - "learning_rate": 1.2251918177844104e-05, - "loss": 0.0145, + "learning_rate": 1.8377877266766155e-05, + "loss": 0.0211, "step": 385500 }, { "epoch": 5.82, - "learning_rate": 1.2241868785078663e-05, - "loss": 0.015, + "learning_rate": 1.8362803177617992e-05, + "loss": 0.0213, "step": 386000 }, { "epoch": 5.83, - "learning_rate": 1.223181939231322e-05, - "loss": 0.0145, + "learning_rate": 1.834772908846983e-05, + "loss": 0.0233, "step": 386500 }, { "epoch": 5.83, - "learning_rate": 1.222176999954778e-05, - "loss": 0.0144, + "learning_rate": 1.8332654999321667e-05, + "loss": 0.0221, "step": 387000 }, { "epoch": 5.84, - "learning_rate": 1.2211720606782336e-05, - "loss": 0.0152, + "learning_rate": 1.83175809101735e-05, + "loss": 0.0224, "step": 387500 }, { "epoch": 5.85, - "learning_rate": 1.2201671214016895e-05, - "loss": 0.016, + "learning_rate": 1.8302506821025342e-05, + "loss": 0.0228, "step": 388000 }, { "epoch": 5.86, - "learning_rate": 1.219162182125145e-05, - "loss": 0.014, + "learning_rate": 1.8287432731877176e-05, + "loss": 0.0209, "step": 388500 }, { "epoch": 5.86, - "learning_rate": 1.218157242848601e-05, - "loss": 0.0152, + "learning_rate": 1.8272358642729014e-05, + "loss": 0.0224, "step": 389000 }, { "epoch": 5.87, - "learning_rate": 1.2171523035720567e-05, - "loss": 0.0156, + "learning_rate": 1.825728455358085e-05, + "loss": 0.0232, "step": 389500 }, { "epoch": 5.88, - "learning_rate": 1.2161473642955126e-05, - "loss": 0.0151, + "learning_rate": 1.824221046443269e-05, + "loss": 0.0224, "step": 390000 }, { "epoch": 5.89, - "learning_rate": 1.2151424250189683e-05, - "loss": 0.015, + "learning_rate": 1.8227136375284522e-05, + "loss": 0.022, "step": 390500 }, { "epoch": 5.89, - "learning_rate": 1.2141374857424242e-05, - "loss": 0.0156, + "learning_rate": 1.8212062286136363e-05, + "loss": 0.0222, "step": 391000 }, { "epoch": 5.9, - "learning_rate": 1.2131325464658799e-05, - "loss": 0.0154, + "learning_rate": 1.8196988196988197e-05, + "loss": 0.0218, "step": 391500 }, { "epoch": 5.91, - "learning_rate": 1.2121276071893358e-05, - "loss": 0.0138, + "learning_rate": 1.8181914107840035e-05, + "loss": 0.0205, "step": 392000 }, { "epoch": 5.92, - "learning_rate": 1.2111226679127915e-05, - "loss": 0.0157, + "learning_rate": 1.816684001869187e-05, + "loss": 0.0225, "step": 392500 }, { "epoch": 5.92, - "learning_rate": 1.2101177286362474e-05, - "loss": 0.0146, + "learning_rate": 1.815176592954371e-05, + "loss": 0.0221, "step": 393000 }, { "epoch": 5.93, - "learning_rate": 1.209112789359703e-05, - "loss": 0.0135, + "learning_rate": 1.8136691840395544e-05, + "loss": 0.0198, "step": 393500 }, { "epoch": 5.94, - "learning_rate": 1.208107850083159e-05, - "loss": 0.0144, + "learning_rate": 1.812161775124738e-05, + "loss": 0.0217, "step": 394000 }, { "epoch": 5.95, - "learning_rate": 1.2071029108066145e-05, - "loss": 0.0146, + "learning_rate": 1.810654366209922e-05, + "loss": 0.0207, "step": 394500 }, { "epoch": 5.95, - "learning_rate": 1.2060979715300704e-05, - "loss": 0.0156, + "learning_rate": 1.8091469572951056e-05, + "loss": 0.0229, "step": 395000 }, { "epoch": 5.96, - "learning_rate": 1.2050930322535261e-05, - "loss": 0.0153, + "learning_rate": 1.807639548380289e-05, + "loss": 0.0246, "step": 395500 }, { "epoch": 5.97, - "learning_rate": 1.204088092976982e-05, - "loss": 0.015, + "learning_rate": 1.806132139465473e-05, + "loss": 0.0222, "step": 396000 }, { "epoch": 5.98, - "learning_rate": 1.2030831537004377e-05, - "loss": 0.0149, + "learning_rate": 1.8046247305506565e-05, + "loss": 0.0217, "step": 396500 }, { "epoch": 5.98, - "learning_rate": 1.2020782144238936e-05, - "loss": 0.0174, + "learning_rate": 1.8031173216358402e-05, + "loss": 0.024, "step": 397000 }, { "epoch": 5.99, - "learning_rate": 1.2010732751473493e-05, - "loss": 0.0144, + "learning_rate": 1.801609912721024e-05, + "loss": 0.0212, "step": 397500 }, { "epoch": 6.0, - "learning_rate": 1.2000683358708052e-05, - "loss": 0.0142, + "learning_rate": 1.8001025038062077e-05, + "loss": 0.0215, "step": 398000 }, { "epoch": 6.0, - "eval_accuracy": 0.9905363391508886, - "eval_f1": 0.9411826216354806, - "eval_loss": 0.029992301017045975, - "eval_precision": 0.9303300032201254, - "eval_recall": 0.9522914275947946, - "eval_runtime": 228.6577, - "eval_samples_per_second": 515.723, - "eval_steps_per_second": 32.236, + "eval_accuracy": 0.9862980377553815, + "eval_f1": 0.9387226916343153, + "eval_loss": 0.04899383336305618, + "eval_precision": 0.9244231922482088, + "eval_recall": 0.9534715270486933, + "eval_runtime": 249.5171, + "eval_samples_per_second": 472.609, + "eval_steps_per_second": 29.541, "step": 398034 }, { "epoch": 6.01, - "learning_rate": 1.1990633965942609e-05, - "loss": 0.012, + "learning_rate": 1.798595094891391e-05, + "loss": 0.0174, "step": 398500 }, { "epoch": 6.01, - "learning_rate": 1.1980584573177168e-05, - "loss": 0.0117, + "learning_rate": 1.7970876859765752e-05, + "loss": 0.0181, "step": 399000 }, { "epoch": 6.02, - "learning_rate": 1.1970535180411725e-05, - "loss": 0.0109, + "learning_rate": 1.7955802770617586e-05, + "loss": 0.0163, "step": 399500 }, { "epoch": 6.03, - "learning_rate": 1.1960485787646284e-05, - "loss": 0.0111, + "learning_rate": 1.7940728681469423e-05, + "loss": 0.0163, "step": 400000 }, { "epoch": 6.04, - "learning_rate": 1.195043639488084e-05, - "loss": 0.0105, + "learning_rate": 1.7925654592321257e-05, + "loss": 0.0166, "step": 400500 }, { "epoch": 6.04, - "learning_rate": 1.1940387002115398e-05, - "loss": 0.0112, + "learning_rate": 1.7910580503173098e-05, + "loss": 0.0174, "step": 401000 }, { "epoch": 6.05, - "learning_rate": 1.1930337609349955e-05, - "loss": 0.0118, + "learning_rate": 1.7895506414024932e-05, + "loss": 0.0177, "step": 401500 }, { "epoch": 6.06, - "learning_rate": 1.1920288216584514e-05, - "loss": 0.0118, + "learning_rate": 1.788043232487677e-05, + "loss": 0.0175, "step": 402000 }, { "epoch": 6.07, - "learning_rate": 1.1910238823819071e-05, - "loss": 0.0116, + "learning_rate": 1.7865358235728607e-05, + "loss": 0.0176, "step": 402500 }, { "epoch": 6.07, - "learning_rate": 1.190018943105363e-05, - "loss": 0.0116, + "learning_rate": 1.7850284146580444e-05, + "loss": 0.0171, "step": 403000 }, { "epoch": 6.08, - "learning_rate": 1.1890140038288187e-05, - "loss": 0.0123, + "learning_rate": 1.783521005743228e-05, + "loss": 0.0184, "step": 403500 }, { "epoch": 6.09, - "learning_rate": 1.1880090645522746e-05, - "loss": 0.0119, + "learning_rate": 1.782013596828412e-05, + "loss": 0.0172, "step": 404000 }, { "epoch": 6.1, - "learning_rate": 1.1870041252757303e-05, - "loss": 0.0123, + "learning_rate": 1.7805061879135953e-05, + "loss": 0.0179, "step": 404500 }, { "epoch": 6.11, - "learning_rate": 1.1859991859991862e-05, - "loss": 0.0116, + "learning_rate": 1.778998778998779e-05, + "loss": 0.0171, "step": 405000 }, { "epoch": 6.11, - "learning_rate": 1.1849942467226418e-05, - "loss": 0.0114, + "learning_rate": 1.7774913700839628e-05, + "loss": 0.0173, "step": 405500 }, { "epoch": 6.12, - "learning_rate": 1.1839893074460978e-05, - "loss": 0.0119, + "learning_rate": 1.7759839611691466e-05, + "loss": 0.018, "step": 406000 }, { "epoch": 6.13, - "learning_rate": 1.1829843681695534e-05, - "loss": 0.0114, + "learning_rate": 1.77447655225433e-05, + "loss": 0.0178, "step": 406500 }, { "epoch": 6.14, - "learning_rate": 1.1819794288930093e-05, - "loss": 0.012, + "learning_rate": 1.772969143339514e-05, + "loss": 0.0174, "step": 407000 }, { "epoch": 6.14, - "learning_rate": 1.180974489616465e-05, - "loss": 0.0124, + "learning_rate": 1.7714617344246974e-05, + "loss": 0.0188, "step": 407500 }, { "epoch": 6.15, - "learning_rate": 1.1799695503399208e-05, - "loss": 0.012, + "learning_rate": 1.7699543255098812e-05, + "loss": 0.0182, "step": 408000 }, { "epoch": 6.16, - "learning_rate": 1.1789646110633766e-05, - "loss": 0.0118, + "learning_rate": 1.7684469165950646e-05, + "loss": 0.0177, "step": 408500 }, { "epoch": 6.17, - "learning_rate": 1.1779596717868324e-05, - "loss": 0.0121, + "learning_rate": 1.7669395076802487e-05, + "loss": 0.0182, "step": 409000 }, { "epoch": 6.17, - "learning_rate": 1.1769547325102882e-05, - "loss": 0.013, + "learning_rate": 1.765432098765432e-05, + "loss": 0.0191, "step": 409500 }, { "epoch": 6.18, - "learning_rate": 1.175949793233744e-05, - "loss": 0.0132, + "learning_rate": 1.7639246898506158e-05, + "loss": 0.0192, "step": 410000 }, { "epoch": 6.19, - "learning_rate": 1.1749448539571998e-05, - "loss": 0.0122, + "learning_rate": 1.7624172809357996e-05, + "loss": 0.0176, "step": 410500 }, { "epoch": 6.2, - "learning_rate": 1.1739399146806556e-05, - "loss": 0.0108, + "learning_rate": 1.7609098720209833e-05, + "loss": 0.0165, "step": 411000 }, { "epoch": 6.2, - "learning_rate": 1.1729349754041112e-05, - "loss": 0.0114, + "learning_rate": 1.7594024631061667e-05, + "loss": 0.017, "step": 411500 }, { "epoch": 6.21, - "learning_rate": 1.1719300361275672e-05, - "loss": 0.0121, + "learning_rate": 1.7578950541913508e-05, + "loss": 0.017, "step": 412000 }, { "epoch": 6.22, - "learning_rate": 1.1709250968510228e-05, - "loss": 0.0118, + "learning_rate": 1.7563876452765342e-05, + "loss": 0.0174, "step": 412500 }, { "epoch": 6.23, - "learning_rate": 1.1699201575744787e-05, - "loss": 0.013, + "learning_rate": 1.754880236361718e-05, + "loss": 0.0191, "step": 413000 }, { "epoch": 6.23, - "learning_rate": 1.1689152182979344e-05, - "loss": 0.0117, + "learning_rate": 1.7533728274469017e-05, + "loss": 0.017, "step": 413500 }, { "epoch": 6.24, - "learning_rate": 1.1679102790213903e-05, - "loss": 0.0112, + "learning_rate": 1.7518654185320854e-05, + "loss": 0.0176, "step": 414000 }, { "epoch": 6.25, - "learning_rate": 1.166905339744846e-05, - "loss": 0.0122, + "learning_rate": 1.7503580096172688e-05, + "loss": 0.0171, "step": 414500 }, { "epoch": 6.26, - "learning_rate": 1.1659004004683019e-05, - "loss": 0.0117, + "learning_rate": 1.7488506007024526e-05, + "loss": 0.018, "step": 415000 }, { "epoch": 6.26, - "learning_rate": 1.1648954611917576e-05, - "loss": 0.012, + "learning_rate": 1.7473431917876363e-05, + "loss": 0.0189, "step": 415500 }, { "epoch": 6.27, - "learning_rate": 1.1638905219152135e-05, - "loss": 0.013, + "learning_rate": 1.74583578287282e-05, + "loss": 0.0198, "step": 416000 }, { "epoch": 6.28, - "learning_rate": 1.1628855826386692e-05, - "loss": 0.012, + "learning_rate": 1.7443283739580035e-05, + "loss": 0.0193, "step": 416500 }, { "epoch": 6.29, - "learning_rate": 1.161880643362125e-05, - "loss": 0.0123, + "learning_rate": 1.7428209650431875e-05, + "loss": 0.0177, "step": 417000 }, { "epoch": 6.29, - "learning_rate": 1.1608757040855806e-05, - "loss": 0.0118, + "learning_rate": 1.741313556128371e-05, + "loss": 0.0182, "step": 417500 }, { "epoch": 6.3, - "learning_rate": 1.1598707648090367e-05, - "loss": 0.0118, + "learning_rate": 1.7398061472135547e-05, + "loss": 0.0183, "step": 418000 }, { "epoch": 6.31, - "learning_rate": 1.1588658255324922e-05, - "loss": 0.012, + "learning_rate": 1.7382987382987384e-05, + "loss": 0.0178, "step": 418500 }, { "epoch": 6.32, - "learning_rate": 1.1578608862559481e-05, - "loss": 0.0122, + "learning_rate": 1.736791329383922e-05, + "loss": 0.0175, "step": 419000 }, { "epoch": 6.32, - "learning_rate": 1.1568559469794038e-05, - "loss": 0.0125, + "learning_rate": 1.7352839204691056e-05, + "loss": 0.018, "step": 419500 }, { "epoch": 6.33, - "learning_rate": 1.1558510077028597e-05, - "loss": 0.0124, + "learning_rate": 1.7337765115542896e-05, + "loss": 0.0186, "step": 420000 }, { "epoch": 6.34, - "learning_rate": 1.1548460684263154e-05, - "loss": 0.0122, + "learning_rate": 1.732269102639473e-05, + "loss": 0.017, "step": 420500 }, { "epoch": 6.35, - "learning_rate": 1.1538411291497713e-05, - "loss": 0.0117, + "learning_rate": 1.7307616937246568e-05, + "loss": 0.0173, "step": 421000 }, { "epoch": 6.35, - "learning_rate": 1.152836189873227e-05, - "loss": 0.0124, + "learning_rate": 1.7292542848098405e-05, + "loss": 0.0185, "step": 421500 }, { "epoch": 6.36, - "learning_rate": 1.1518312505966829e-05, - "loss": 0.0119, + "learning_rate": 1.7277468758950243e-05, + "loss": 0.0177, "step": 422000 }, { "epoch": 6.37, - "learning_rate": 1.1508263113201386e-05, - "loss": 0.0125, + "learning_rate": 1.7262394669802077e-05, + "loss": 0.0196, "step": 422500 }, { "epoch": 6.38, - "learning_rate": 1.1498213720435945e-05, - "loss": 0.0128, + "learning_rate": 1.7247320580653914e-05, + "loss": 0.0189, "step": 423000 }, { "epoch": 6.38, - "learning_rate": 1.14881643276705e-05, - "loss": 0.0135, + "learning_rate": 1.723224649150575e-05, + "loss": 0.0189, "step": 423500 }, { "epoch": 6.39, - "learning_rate": 1.1478114934905061e-05, - "loss": 0.0119, + "learning_rate": 1.721717240235759e-05, + "loss": 0.0199, "step": 424000 }, { "epoch": 6.4, - "learning_rate": 1.1468065542139617e-05, - "loss": 0.0121, + "learning_rate": 1.7202098313209423e-05, + "loss": 0.0188, "step": 424500 }, { "epoch": 6.41, - "learning_rate": 1.1458016149374175e-05, - "loss": 0.0129, + "learning_rate": 1.7187024224061264e-05, + "loss": 0.0189, "step": 425000 }, { "epoch": 6.41, - "learning_rate": 1.1447966756608733e-05, - "loss": 0.0121, + "learning_rate": 1.7171950134913098e-05, + "loss": 0.0182, "step": 425500 }, { "epoch": 6.42, - "learning_rate": 1.1437917363843291e-05, - "loss": 0.0123, + "learning_rate": 1.7156876045764935e-05, + "loss": 0.0182, "step": 426000 }, { "epoch": 6.43, - "learning_rate": 1.1427867971077849e-05, - "loss": 0.0123, + "learning_rate": 1.7141801956616773e-05, + "loss": 0.0186, "step": 426500 }, { "epoch": 6.44, - "learning_rate": 1.1417818578312407e-05, - "loss": 0.0124, + "learning_rate": 1.712672786746861e-05, + "loss": 0.0196, "step": 427000 }, { "epoch": 6.44, - "learning_rate": 1.1407769185546965e-05, - "loss": 0.0117, + "learning_rate": 1.7111653778320444e-05, + "loss": 0.0178, "step": 427500 }, { "epoch": 6.45, - "learning_rate": 1.1397719792781523e-05, - "loss": 0.0128, + "learning_rate": 1.7096579689172285e-05, + "loss": 0.0187, "step": 428000 }, { "epoch": 6.46, - "learning_rate": 1.138767040001608e-05, - "loss": 0.0126, + "learning_rate": 1.708150560002412e-05, + "loss": 0.0189, "step": 428500 }, { "epoch": 6.47, - "learning_rate": 1.137762100725064e-05, - "loss": 0.0129, + "learning_rate": 1.7066431510875957e-05, + "loss": 0.0191, "step": 429000 }, { "epoch": 6.47, - "learning_rate": 1.1367571614485195e-05, - "loss": 0.0119, + "learning_rate": 1.7051357421727794e-05, + "loss": 0.0188, "step": 429500 }, { "epoch": 6.48, - "learning_rate": 1.1357522221719754e-05, - "loss": 0.0124, + "learning_rate": 1.703628333257963e-05, + "loss": 0.0199, "step": 430000 }, { "epoch": 6.49, - "learning_rate": 1.134747282895431e-05, - "loss": 0.0126, + "learning_rate": 1.7021209243431465e-05, + "loss": 0.0176, "step": 430500 }, { "epoch": 6.5, - "learning_rate": 1.133742343618887e-05, - "loss": 0.0129, + "learning_rate": 1.7006135154283303e-05, + "loss": 0.0187, "step": 431000 }, { "epoch": 6.5, - "learning_rate": 1.1327374043423427e-05, - "loss": 0.012, + "learning_rate": 1.699106106513514e-05, + "loss": 0.0186, "step": 431500 }, { "epoch": 6.51, - "learning_rate": 1.1317324650657986e-05, - "loss": 0.0117, + "learning_rate": 1.6975986975986978e-05, + "loss": 0.0185, "step": 432000 }, { "epoch": 6.52, - "learning_rate": 1.1307275257892543e-05, - "loss": 0.0116, + "learning_rate": 1.6960912886838812e-05, + "loss": 0.0177, "step": 432500 }, { "epoch": 6.53, - "learning_rate": 1.1297225865127102e-05, - "loss": 0.0116, + "learning_rate": 1.6945838797690653e-05, + "loss": 0.0187, "step": 433000 }, { "epoch": 6.53, - "learning_rate": 1.1287176472361659e-05, - "loss": 0.0124, + "learning_rate": 1.6930764708542487e-05, + "loss": 0.0192, "step": 433500 }, { "epoch": 6.54, - "learning_rate": 1.1277127079596218e-05, - "loss": 0.0123, + "learning_rate": 1.6915690619394324e-05, + "loss": 0.0186, "step": 434000 }, { "epoch": 6.55, - "learning_rate": 1.1267077686830775e-05, - "loss": 0.0124, + "learning_rate": 1.690061653024616e-05, + "loss": 0.0187, "step": 434500 }, { "epoch": 6.56, - "learning_rate": 1.1257028294065334e-05, - "loss": 0.0122, + "learning_rate": 1.6885542441098e-05, + "loss": 0.0189, "step": 435000 }, { "epoch": 6.56, - "learning_rate": 1.1246978901299889e-05, - "loss": 0.013, + "learning_rate": 1.6870468351949833e-05, + "loss": 0.0193, "step": 435500 }, { "epoch": 6.57, - "learning_rate": 1.1236929508534448e-05, - "loss": 0.0121, + "learning_rate": 1.6855394262801674e-05, + "loss": 0.0173, "step": 436000 }, { "epoch": 6.58, - "learning_rate": 1.1226880115769005e-05, - "loss": 0.0119, + "learning_rate": 1.6840320173653508e-05, + "loss": 0.0192, "step": 436500 }, { "epoch": 6.59, - "learning_rate": 1.1216830723003564e-05, - "loss": 0.0134, + "learning_rate": 1.6825246084505345e-05, + "loss": 0.0195, "step": 437000 }, { "epoch": 6.59, - "learning_rate": 1.1206781330238121e-05, - "loss": 0.0121, + "learning_rate": 1.6810171995357183e-05, + "loss": 0.0173, "step": 437500 }, { "epoch": 6.6, - "learning_rate": 1.119673193747268e-05, - "loss": 0.0125, + "learning_rate": 1.679509790620902e-05, + "loss": 0.0196, "step": 438000 }, { "epoch": 6.61, - "learning_rate": 1.1186682544707237e-05, - "loss": 0.0122, + "learning_rate": 1.6780023817060854e-05, + "loss": 0.019, "step": 438500 }, { "epoch": 6.62, - "learning_rate": 1.1176633151941796e-05, - "loss": 0.0127, + "learning_rate": 1.676494972791269e-05, + "loss": 0.0174, "step": 439000 }, { "epoch": 6.63, - "learning_rate": 1.1166583759176353e-05, - "loss": 0.0118, + "learning_rate": 1.674987563876453e-05, + "loss": 0.0183, "step": 439500 }, { "epoch": 6.63, - "learning_rate": 1.1156534366410912e-05, - "loss": 0.0124, + "learning_rate": 1.6734801549616366e-05, + "loss": 0.0179, "step": 440000 }, { "epoch": 6.64, - "learning_rate": 1.1146484973645469e-05, - "loss": 0.0131, + "learning_rate": 1.67197274604682e-05, + "loss": 0.0198, "step": 440500 }, { "epoch": 6.65, - "learning_rate": 1.1136435580880028e-05, - "loss": 0.0129, + "learning_rate": 1.670465337132004e-05, + "loss": 0.0193, "step": 441000 }, { "epoch": 6.66, - "learning_rate": 1.1126386188114583e-05, - "loss": 0.0121, + "learning_rate": 1.6689579282171875e-05, + "loss": 0.0187, "step": 441500 }, { "epoch": 6.66, - "learning_rate": 1.1116336795349142e-05, - "loss": 0.0132, + "learning_rate": 1.6674505193023713e-05, + "loss": 0.0204, "step": 442000 }, { "epoch": 6.67, - "learning_rate": 1.11062874025837e-05, - "loss": 0.013, + "learning_rate": 1.665943110387555e-05, + "loss": 0.0192, "step": 442500 }, { "epoch": 6.68, - "learning_rate": 1.1096238009818258e-05, - "loss": 0.0128, + "learning_rate": 1.6644357014727387e-05, + "loss": 0.0177, "step": 443000 }, { "epoch": 6.69, - "learning_rate": 1.1086188617052815e-05, - "loss": 0.012, + "learning_rate": 1.662928292557922e-05, + "loss": 0.0182, "step": 443500 }, { "epoch": 6.69, - "learning_rate": 1.1076139224287374e-05, - "loss": 0.0132, + "learning_rate": 1.6614208836431062e-05, + "loss": 0.0196, "step": 444000 }, { "epoch": 6.7, - "learning_rate": 1.1066089831521931e-05, - "loss": 0.0127, + "learning_rate": 1.6599134747282896e-05, + "loss": 0.0195, "step": 444500 }, { "epoch": 6.71, - "learning_rate": 1.105604043875649e-05, - "loss": 0.0123, + "learning_rate": 1.6584060658134734e-05, + "loss": 0.0178, "step": 445000 }, { "epoch": 6.72, - "learning_rate": 1.1045991045991047e-05, - "loss": 0.0122, + "learning_rate": 1.6568986568986568e-05, + "loss": 0.0196, "step": 445500 }, { "epoch": 6.72, - "learning_rate": 1.1035941653225606e-05, - "loss": 0.0121, + "learning_rate": 1.655391247983841e-05, + "loss": 0.0197, "step": 446000 }, { "epoch": 6.73, - "learning_rate": 1.1025892260460163e-05, - "loss": 0.0126, + "learning_rate": 1.6538838390690243e-05, + "loss": 0.0187, "step": 446500 }, { "epoch": 6.74, - "learning_rate": 1.1015842867694722e-05, - "loss": 0.0116, + "learning_rate": 1.652376430154208e-05, + "loss": 0.0192, "step": 447000 }, { "epoch": 6.75, - "learning_rate": 1.1005793474929278e-05, - "loss": 0.0135, + "learning_rate": 1.6508690212393917e-05, + "loss": 0.0194, "step": 447500 }, { "epoch": 6.75, - "learning_rate": 1.0995744082163835e-05, - "loss": 0.0134, + "learning_rate": 1.649361612324575e-05, + "loss": 0.0198, "step": 448000 }, { "epoch": 6.76, - "learning_rate": 1.0985694689398394e-05, - "loss": 0.0131, + "learning_rate": 1.647854203409759e-05, + "loss": 0.0194, "step": 448500 }, { "epoch": 6.77, - "learning_rate": 1.0975645296632951e-05, - "loss": 0.0119, + "learning_rate": 1.6463467944949426e-05, + "loss": 0.0187, "step": 449000 }, { "epoch": 6.78, - "learning_rate": 1.096559590386751e-05, - "loss": 0.0122, + "learning_rate": 1.6448393855801264e-05, + "loss": 0.02, "step": 449500 }, { "epoch": 6.78, - "learning_rate": 1.0955546511102067e-05, - "loss": 0.0128, + "learning_rate": 1.6433319766653098e-05, + "loss": 0.0199, "step": 450000 }, { "epoch": 6.79, - "learning_rate": 1.0945497118336626e-05, - "loss": 0.0122, + "learning_rate": 1.641824567750494e-05, + "loss": 0.0185, "step": 450500 }, { "epoch": 6.8, - "learning_rate": 1.0935447725571183e-05, - "loss": 0.0135, + "learning_rate": 1.6403171588356773e-05, + "loss": 0.0204, "step": 451000 }, { "epoch": 6.81, - "learning_rate": 1.0925398332805742e-05, - "loss": 0.0127, + "learning_rate": 1.638809749920861e-05, + "loss": 0.0181, "step": 451500 }, { "epoch": 6.81, - "learning_rate": 1.0915348940040297e-05, - "loss": 0.0129, + "learning_rate": 1.6373023410060447e-05, + "loss": 0.0194, "step": 452000 }, { "epoch": 6.82, - "learning_rate": 1.0905299547274856e-05, - "loss": 0.0126, + "learning_rate": 1.6357949320912285e-05, + "loss": 0.0178, "step": 452500 }, { "epoch": 6.83, - "learning_rate": 1.0895250154509413e-05, - "loss": 0.0122, + "learning_rate": 1.634287523176412e-05, + "loss": 0.0186, "step": 453000 }, { "epoch": 6.84, - "learning_rate": 1.0885200761743972e-05, - "loss": 0.0118, + "learning_rate": 1.6327801142615956e-05, + "loss": 0.0187, "step": 453500 }, { "epoch": 6.84, - "learning_rate": 1.087515136897853e-05, - "loss": 0.0131, + "learning_rate": 1.6312727053467794e-05, + "loss": 0.0192, "step": 454000 }, { "epoch": 6.85, - "learning_rate": 1.0865101976213088e-05, - "loss": 0.0131, + "learning_rate": 1.629765296431963e-05, + "loss": 0.0193, "step": 454500 }, { "epoch": 6.86, - "learning_rate": 1.0855052583447645e-05, - "loss": 0.0129, + "learning_rate": 1.6282578875171465e-05, + "loss": 0.0192, "step": 455000 }, { "epoch": 6.87, - "learning_rate": 1.0845003190682204e-05, - "loss": 0.0124, + "learning_rate": 1.6267504786023306e-05, + "loss": 0.0197, "step": 455500 }, { "epoch": 6.87, - "learning_rate": 1.0834953797916761e-05, - "loss": 0.0136, + "learning_rate": 1.625243069687514e-05, + "loss": 0.0199, "step": 456000 }, { "epoch": 6.88, - "learning_rate": 1.082490440515132e-05, - "loss": 0.0133, + "learning_rate": 1.6237356607726977e-05, + "loss": 0.0195, "step": 456500 }, { "epoch": 6.89, - "learning_rate": 1.0814855012385877e-05, - "loss": 0.0128, + "learning_rate": 1.6222282518578815e-05, + "loss": 0.0196, "step": 457000 }, { "epoch": 6.9, - "learning_rate": 1.0804805619620436e-05, - "loss": 0.0136, + "learning_rate": 1.6207208429430652e-05, + "loss": 0.0194, "step": 457500 }, { "epoch": 6.9, - "learning_rate": 1.0794756226854991e-05, - "loss": 0.0121, + "learning_rate": 1.6192134340282486e-05, + "loss": 0.0189, "step": 458000 }, { "epoch": 6.91, - "learning_rate": 1.078470683408955e-05, - "loss": 0.0133, + "learning_rate": 1.6177060251134327e-05, + "loss": 0.0208, "step": 458500 }, { "epoch": 6.92, - "learning_rate": 1.0774657441324107e-05, - "loss": 0.0122, + "learning_rate": 1.616198616198616e-05, + "loss": 0.0188, "step": 459000 }, { "epoch": 6.93, - "learning_rate": 1.0764608048558666e-05, - "loss": 0.0125, + "learning_rate": 1.6146912072838e-05, + "loss": 0.0187, "step": 459500 }, { "epoch": 6.93, - "learning_rate": 1.0754558655793223e-05, - "loss": 0.012, + "learning_rate": 1.6131837983689836e-05, + "loss": 0.0176, "step": 460000 }, { "epoch": 6.94, - "learning_rate": 1.0744509263027782e-05, - "loss": 0.0124, + "learning_rate": 1.6116763894541673e-05, + "loss": 0.0185, "step": 460500 }, { "epoch": 6.95, - "learning_rate": 1.073445987026234e-05, - "loss": 0.0129, + "learning_rate": 1.6101689805393507e-05, + "loss": 0.0197, "step": 461000 }, { "epoch": 6.96, - "learning_rate": 1.0724410477496898e-05, - "loss": 0.0128, + "learning_rate": 1.6086615716245345e-05, + "loss": 0.0193, "step": 461500 }, { "epoch": 6.96, - "learning_rate": 1.0714361084731455e-05, - "loss": 0.0129, + "learning_rate": 1.6071541627097182e-05, + "loss": 0.0192, "step": 462000 }, { "epoch": 6.97, - "learning_rate": 1.0704311691966014e-05, - "loss": 0.0128, + "learning_rate": 1.605646753794902e-05, + "loss": 0.0182, "step": 462500 }, { "epoch": 6.98, - "learning_rate": 1.0694262299200571e-05, - "loss": 0.0128, + "learning_rate": 1.6041393448800854e-05, + "loss": 0.0199, "step": 463000 }, { "epoch": 6.99, - "learning_rate": 1.068421290643513e-05, - "loss": 0.0129, + "learning_rate": 1.6026319359652695e-05, + "loss": 0.0194, "step": 463500 }, { "epoch": 6.99, - "learning_rate": 1.0674163513669686e-05, - "loss": 0.0127, + "learning_rate": 1.601124527050453e-05, + "loss": 0.0187, "step": 464000 }, { "epoch": 7.0, - "eval_accuracy": 0.9909258751742362, - "eval_f1": 0.9459397100867732, - "eval_loss": 0.032830607146024704, - "eval_precision": 0.9329476062893032, - "eval_recall": 0.9592987767658764, - "eval_runtime": 230.9084, - "eval_samples_per_second": 510.696, - "eval_steps_per_second": 31.922, + "eval_accuracy": 0.9867848831812434, + "eval_f1": 0.9414967774830111, + "eval_loss": 0.04958844557404518, + "eval_precision": 0.9220167739418763, + "eval_recall": 0.9618176787036811, + "eval_runtime": 215.2851, + "eval_samples_per_second": 547.757, + "eval_steps_per_second": 34.238, "step": 464373 }, { "epoch": 7.0, - "learning_rate": 1.0664114120904245e-05, - "loss": 0.0119, + "learning_rate": 1.5996171181356366e-05, + "loss": 0.0178, "step": 464500 }, { "epoch": 7.01, - "learning_rate": 1.0654064728138802e-05, - "loss": 0.0096, + "learning_rate": 1.5981097092208203e-05, + "loss": 0.0159, "step": 465000 }, { "epoch": 7.02, - "learning_rate": 1.064401533537336e-05, - "loss": 0.01, + "learning_rate": 1.596602300306004e-05, + "loss": 0.0156, "step": 465500 }, { "epoch": 7.02, - "learning_rate": 1.0633965942607918e-05, - "loss": 0.0092, + "learning_rate": 1.5950948913911875e-05, + "loss": 0.0141, "step": 466000 }, { "epoch": 7.03, - "learning_rate": 1.0623916549842477e-05, - "loss": 0.0098, + "learning_rate": 1.5935874824763716e-05, + "loss": 0.0136, "step": 466500 }, { "epoch": 7.04, - "learning_rate": 1.0613867157077034e-05, - "loss": 0.009, + "learning_rate": 1.592080073561555e-05, + "loss": 0.0139, "step": 467000 }, { "epoch": 7.05, - "learning_rate": 1.0603817764311593e-05, - "loss": 0.0096, + "learning_rate": 1.5905726646467387e-05, + "loss": 0.0144, "step": 467500 }, { "epoch": 7.05, - "learning_rate": 1.059376837154615e-05, - "loss": 0.0096, + "learning_rate": 1.5890652557319225e-05, + "loss": 0.0142, "step": 468000 }, { "epoch": 7.06, - "learning_rate": 1.0583718978780709e-05, - "loss": 0.01, + "learning_rate": 1.5875578468171062e-05, + "loss": 0.0146, "step": 468500 }, { "epoch": 7.07, - "learning_rate": 1.0573669586015266e-05, - "loss": 0.0099, + "learning_rate": 1.5860504379022896e-05, + "loss": 0.0157, "step": 469000 }, { "epoch": 7.08, - "learning_rate": 1.0563620193249825e-05, - "loss": 0.0095, + "learning_rate": 1.5845430289874733e-05, + "loss": 0.0144, "step": 469500 }, { "epoch": 7.08, - "learning_rate": 1.055357080048438e-05, - "loss": 0.01, + "learning_rate": 1.583035620072657e-05, + "loss": 0.0153, "step": 470000 }, { "epoch": 7.09, - "learning_rate": 1.0543521407718939e-05, - "loss": 0.0092, + "learning_rate": 1.581528211157841e-05, + "loss": 0.0141, "step": 470500 }, { "epoch": 7.1, - "learning_rate": 1.0533472014953496e-05, - "loss": 0.0102, + "learning_rate": 1.5800208022430242e-05, + "loss": 0.014, "step": 471000 }, { "epoch": 7.11, - "learning_rate": 1.0523422622188055e-05, - "loss": 0.0101, + "learning_rate": 1.5785133933282083e-05, + "loss": 0.0149, "step": 471500 }, { "epoch": 7.11, - "learning_rate": 1.0513373229422612e-05, - "loss": 0.0099, + "learning_rate": 1.5770059844133917e-05, + "loss": 0.015, "step": 472000 }, { "epoch": 7.12, - "learning_rate": 1.0503323836657171e-05, - "loss": 0.0093, + "learning_rate": 1.5754985754985755e-05, + "loss": 0.0137, "step": 472500 }, { "epoch": 7.13, - "learning_rate": 1.0493274443891728e-05, - "loss": 0.0106, + "learning_rate": 1.5739911665837592e-05, + "loss": 0.0157, "step": 473000 }, { "epoch": 7.14, - "learning_rate": 1.0483225051126287e-05, - "loss": 0.0104, + "learning_rate": 1.572483757668943e-05, + "loss": 0.0153, "step": 473500 }, { "epoch": 7.15, - "learning_rate": 1.0473175658360844e-05, - "loss": 0.0102, + "learning_rate": 1.5709763487541264e-05, + "loss": 0.0148, "step": 474000 }, { "epoch": 7.15, - "learning_rate": 1.0463126265595403e-05, - "loss": 0.0099, + "learning_rate": 1.5694689398393104e-05, + "loss": 0.0153, "step": 474500 }, { "epoch": 7.16, - "learning_rate": 1.045307687282996e-05, - "loss": 0.0097, + "learning_rate": 1.567961530924494e-05, + "loss": 0.0147, "step": 475000 }, { "epoch": 7.17, - "learning_rate": 1.0443027480064519e-05, - "loss": 0.0099, + "learning_rate": 1.5664541220096776e-05, + "loss": 0.0156, "step": 475500 }, { "epoch": 7.18, - "learning_rate": 1.0432978087299074e-05, - "loss": 0.0103, + "learning_rate": 1.564946713094861e-05, + "loss": 0.0148, "step": 476000 }, { "epoch": 7.18, - "learning_rate": 1.0422928694533633e-05, - "loss": 0.011, + "learning_rate": 1.563439304180045e-05, + "loss": 0.0162, "step": 476500 }, { "epoch": 7.19, - "learning_rate": 1.041287930176819e-05, - "loss": 0.0109, + "learning_rate": 1.5619318952652285e-05, + "loss": 0.0155, "step": 477000 }, { "epoch": 7.2, - "learning_rate": 1.040282990900275e-05, - "loss": 0.0103, + "learning_rate": 1.5604244863504122e-05, + "loss": 0.015, "step": 477500 }, { "epoch": 7.21, - "learning_rate": 1.0392780516237306e-05, - "loss": 0.0102, + "learning_rate": 1.558917077435596e-05, + "loss": 0.0149, "step": 478000 }, { "epoch": 7.21, - "learning_rate": 1.0382731123471865e-05, - "loss": 0.0094, + "learning_rate": 1.5574096685207797e-05, + "loss": 0.0148, "step": 478500 }, { "epoch": 7.22, - "learning_rate": 1.0372681730706422e-05, - "loss": 0.0097, + "learning_rate": 1.555902259605963e-05, + "loss": 0.0153, "step": 479000 }, { "epoch": 7.23, - "learning_rate": 1.0362632337940981e-05, - "loss": 0.0102, + "learning_rate": 1.5543948506911472e-05, + "loss": 0.0163, "step": 479500 }, { "epoch": 7.24, - "learning_rate": 1.0352582945175538e-05, - "loss": 0.0103, + "learning_rate": 1.5528874417763306e-05, + "loss": 0.0168, "step": 480000 }, { "epoch": 7.24, - "learning_rate": 1.0342533552410097e-05, - "loss": 0.011, + "learning_rate": 1.5513800328615143e-05, + "loss": 0.0151, "step": 480500 }, { "epoch": 7.25, - "learning_rate": 1.0332484159644653e-05, - "loss": 0.0101, + "learning_rate": 1.549872623946698e-05, + "loss": 0.0148, "step": 481000 }, { "epoch": 7.26, - "learning_rate": 1.0322434766879213e-05, - "loss": 0.01, + "learning_rate": 1.5483652150318818e-05, + "loss": 0.0152, "step": 481500 }, { "epoch": 7.27, - "learning_rate": 1.0312385374113769e-05, - "loss": 0.0102, + "learning_rate": 1.5468578061170652e-05, + "loss": 0.0165, "step": 482000 }, { "epoch": 7.27, - "learning_rate": 1.0302335981348327e-05, - "loss": 0.0106, + "learning_rate": 1.5453503972022493e-05, + "loss": 0.0162, "step": 482500 }, { "epoch": 7.28, - "learning_rate": 1.0292286588582885e-05, - "loss": 0.0102, + "learning_rate": 1.5438429882874327e-05, + "loss": 0.0152, "step": 483000 }, { "epoch": 7.29, - "learning_rate": 1.0282237195817443e-05, - "loss": 0.0111, + "learning_rate": 1.5423355793726164e-05, + "loss": 0.0174, "step": 483500 }, { "epoch": 7.3, - "learning_rate": 1.0272187803052e-05, - "loss": 0.0101, + "learning_rate": 1.5408281704578e-05, + "loss": 0.0151, "step": 484000 }, { "epoch": 7.3, - "learning_rate": 1.026213841028656e-05, - "loss": 0.0103, + "learning_rate": 1.539320761542984e-05, + "loss": 0.0147, "step": 484500 }, { "epoch": 7.31, - "learning_rate": 1.0252089017521117e-05, - "loss": 0.0104, + "learning_rate": 1.5378133526281673e-05, + "loss": 0.0156, "step": 485000 }, { "epoch": 7.32, - "learning_rate": 1.0242039624755675e-05, - "loss": 0.01, + "learning_rate": 1.536305943713351e-05, + "loss": 0.0151, "step": 485500 }, { "epoch": 7.33, - "learning_rate": 1.0231990231990233e-05, - "loss": 0.0094, + "learning_rate": 1.5347985347985348e-05, + "loss": 0.0153, "step": 486000 }, { "epoch": 7.33, - "learning_rate": 1.0221940839224791e-05, - "loss": 0.0113, + "learning_rate": 1.5332911258837186e-05, + "loss": 0.0164, "step": 486500 }, { "epoch": 7.34, - "learning_rate": 1.0211891446459347e-05, - "loss": 0.0107, + "learning_rate": 1.531783716968902e-05, + "loss": 0.0159, "step": 487000 }, { "epoch": 7.35, - "learning_rate": 1.0201842053693907e-05, - "loss": 0.0107, + "learning_rate": 1.530276308054086e-05, + "loss": 0.0152, "step": 487500 }, { "epoch": 7.36, - "learning_rate": 1.0191792660928463e-05, - "loss": 0.0102, + "learning_rate": 1.5287688991392694e-05, + "loss": 0.0161, "step": 488000 }, { "epoch": 7.36, - "learning_rate": 1.0181743268163022e-05, - "loss": 0.0103, + "learning_rate": 1.5272614902244532e-05, + "loss": 0.0157, "step": 488500 }, { "epoch": 7.37, - "learning_rate": 1.0171693875397579e-05, - "loss": 0.0102, + "learning_rate": 1.5257540813096368e-05, + "loss": 0.0154, "step": 489000 }, { "epoch": 7.38, - "learning_rate": 1.0161644482632138e-05, - "loss": 0.0105, + "learning_rate": 1.5242466723948207e-05, + "loss": 0.0155, "step": 489500 }, { "epoch": 7.39, - "learning_rate": 1.0151595089866695e-05, - "loss": 0.0107, + "learning_rate": 1.522739263480004e-05, + "loss": 0.0155, "step": 490000 }, { "epoch": 7.39, - "learning_rate": 1.0141545697101254e-05, - "loss": 0.0104, + "learning_rate": 1.521231854565188e-05, + "loss": 0.0164, "step": 490500 }, { "epoch": 7.4, - "learning_rate": 1.0131496304335811e-05, - "loss": 0.0113, + "learning_rate": 1.5197244456503716e-05, + "loss": 0.0158, "step": 491000 }, { "epoch": 7.41, - "learning_rate": 1.012144691157037e-05, - "loss": 0.0098, + "learning_rate": 1.5182170367355553e-05, + "loss": 0.0144, "step": 491500 }, { "epoch": 7.42, - "learning_rate": 1.0111397518804927e-05, - "loss": 0.0106, + "learning_rate": 1.5167096278207389e-05, + "loss": 0.0153, "step": 492000 }, { "epoch": 7.42, - "learning_rate": 1.0101348126039486e-05, - "loss": 0.0104, + "learning_rate": 1.5152022189059228e-05, + "loss": 0.0159, "step": 492500 }, { "epoch": 7.43, - "learning_rate": 1.0091298733274041e-05, - "loss": 0.0107, + "learning_rate": 1.5136948099911062e-05, + "loss": 0.0158, "step": 493000 }, { "epoch": 7.44, - "learning_rate": 1.0081249340508602e-05, - "loss": 0.0106, + "learning_rate": 1.5121874010762901e-05, + "loss": 0.0163, "step": 493500 }, { "epoch": 7.45, - "learning_rate": 1.0071199947743157e-05, - "loss": 0.011, + "learning_rate": 1.5106799921614735e-05, + "loss": 0.0154, "step": 494000 }, { "epoch": 7.45, - "learning_rate": 1.0061150554977716e-05, - "loss": 0.0101, + "learning_rate": 1.5091725832466574e-05, + "loss": 0.0155, "step": 494500 }, { "epoch": 7.46, - "learning_rate": 1.0051101162212273e-05, - "loss": 0.01, + "learning_rate": 1.507665174331841e-05, + "loss": 0.0152, "step": 495000 }, { "epoch": 7.47, - "learning_rate": 1.0041051769446832e-05, - "loss": 0.0098, + "learning_rate": 1.5061577654170247e-05, + "loss": 0.0143, "step": 495500 }, { "epoch": 7.48, - "learning_rate": 1.003100237668139e-05, - "loss": 0.0098, + "learning_rate": 1.5046503565022083e-05, + "loss": 0.0145, "step": 496000 }, { "epoch": 7.48, - "learning_rate": 1.0020952983915948e-05, - "loss": 0.0108, + "learning_rate": 1.5031429475873922e-05, + "loss": 0.0161, "step": 496500 }, { "epoch": 7.49, - "learning_rate": 1.0010903591150505e-05, - "loss": 0.0102, + "learning_rate": 1.5016355386725756e-05, + "loss": 0.0156, "step": 497000 }, { "epoch": 7.5, - "learning_rate": 1.0000854198385064e-05, - "loss": 0.0105, + "learning_rate": 1.5001281297577595e-05, + "loss": 0.0163, "step": 497500 }, { "epoch": 7.51, - "learning_rate": 9.990804805619621e-06, - "loss": 0.0106, + "learning_rate": 1.4986207208429431e-05, + "loss": 0.0159, "step": 498000 }, { "epoch": 7.51, - "learning_rate": 9.98075541285418e-06, - "loss": 0.0095, + "learning_rate": 1.4971133119281268e-05, + "loss": 0.0149, "step": 498500 }, { "epoch": 7.52, - "learning_rate": 9.970706020088737e-06, - "loss": 0.0106, + "learning_rate": 1.4956059030133106e-05, + "loss": 0.0166, "step": 499000 }, { "epoch": 7.53, - "learning_rate": 9.960656627323294e-06, - "loss": 0.0105, + "learning_rate": 1.4940984940984942e-05, + "loss": 0.0154, "step": 499500 }, { "epoch": 7.54, - "learning_rate": 9.950607234557853e-06, - "loss": 0.0108, + "learning_rate": 1.4925910851836779e-05, + "loss": 0.0147, "step": 500000 }, { "epoch": 7.54, - "learning_rate": 9.94055784179241e-06, - "loss": 0.0109, + "learning_rate": 1.4910836762688615e-05, + "loss": 0.0151, "step": 500500 }, { "epoch": 7.55, - "learning_rate": 9.93050844902697e-06, - "loss": 0.0096, + "learning_rate": 1.4895762673540452e-05, + "loss": 0.0141, "step": 501000 }, { "epoch": 7.56, - "learning_rate": 9.920459056261526e-06, - "loss": 0.0103, + "learning_rate": 1.488068858439229e-05, + "loss": 0.0149, "step": 501500 }, { "epoch": 7.57, - "learning_rate": 9.910409663496085e-06, - "loss": 0.0108, + "learning_rate": 1.4865614495244125e-05, + "loss": 0.0165, "step": 502000 }, { "epoch": 7.57, - "learning_rate": 9.900360270730642e-06, - "loss": 0.0108, + "learning_rate": 1.4850540406095963e-05, + "loss": 0.0165, "step": 502500 }, { "epoch": 7.58, - "learning_rate": 9.8903108779652e-06, - "loss": 0.0113, + "learning_rate": 1.48354663169478e-05, + "loss": 0.0169, "step": 503000 }, { "epoch": 7.59, - "learning_rate": 9.880261485199758e-06, - "loss": 0.0106, + "learning_rate": 1.4820392227799636e-05, + "loss": 0.0163, "step": 503500 }, { "epoch": 7.6, - "learning_rate": 9.870212092434315e-06, - "loss": 0.0102, + "learning_rate": 1.4805318138651473e-05, + "loss": 0.0155, "step": 504000 }, { "epoch": 7.6, - "learning_rate": 9.860162699668874e-06, - "loss": 0.0104, + "learning_rate": 1.4790244049503309e-05, + "loss": 0.0166, "step": 504500 }, { "epoch": 7.61, - "learning_rate": 9.850113306903431e-06, - "loss": 0.0094, + "learning_rate": 1.4775169960355146e-05, + "loss": 0.015, "step": 505000 }, { "epoch": 7.62, - "learning_rate": 9.840063914137989e-06, - "loss": 0.011, + "learning_rate": 1.4760095871206984e-05, + "loss": 0.0153, "step": 505500 }, { "epoch": 7.63, - "learning_rate": 9.830014521372547e-06, - "loss": 0.0109, + "learning_rate": 1.474502178205882e-05, + "loss": 0.016, "step": 506000 }, { "epoch": 7.64, - "learning_rate": 9.819965128607105e-06, - "loss": 0.0106, + "learning_rate": 1.4729947692910657e-05, + "loss": 0.0167, "step": 506500 }, { "epoch": 7.64, - "learning_rate": 9.809915735841663e-06, - "loss": 0.0106, + "learning_rate": 1.4714873603762494e-05, + "loss": 0.0165, "step": 507000 }, { "epoch": 7.65, - "learning_rate": 9.79986634307622e-06, - "loss": 0.0111, + "learning_rate": 1.469979951461433e-05, + "loss": 0.0161, "step": 507500 }, { "epoch": 7.66, - "learning_rate": 9.78981695031078e-06, - "loss": 0.0104, + "learning_rate": 1.4684725425466168e-05, + "loss": 0.0162, "step": 508000 }, { "epoch": 7.67, - "learning_rate": 9.779767557545337e-06, - "loss": 0.0108, + "learning_rate": 1.4669651336318003e-05, + "loss": 0.0158, "step": 508500 }, { "epoch": 7.67, - "learning_rate": 9.769718164779894e-06, - "loss": 0.0099, + "learning_rate": 1.465457724716984e-05, + "loss": 0.0147, "step": 509000 }, { "epoch": 7.68, - "learning_rate": 9.759668772014453e-06, - "loss": 0.0111, + "learning_rate": 1.4639503158021678e-05, + "loss": 0.0169, "step": 509500 }, { "epoch": 7.69, - "learning_rate": 9.74961937924901e-06, - "loss": 0.0097, + "learning_rate": 1.4624429068873514e-05, + "loss": 0.0159, "step": 510000 }, { "epoch": 7.7, - "learning_rate": 9.739569986483569e-06, - "loss": 0.0104, + "learning_rate": 1.4609354979725351e-05, + "loss": 0.0156, "step": 510500 }, { "epoch": 7.7, - "learning_rate": 9.729520593718126e-06, - "loss": 0.0103, + "learning_rate": 1.4594280890577189e-05, + "loss": 0.0156, "step": 511000 }, { "epoch": 7.71, - "learning_rate": 9.719471200952683e-06, - "loss": 0.0108, + "learning_rate": 1.4579206801429024e-05, + "loss": 0.0167, "step": 511500 }, { "epoch": 7.72, - "learning_rate": 9.709421808187242e-06, - "loss": 0.0109, + "learning_rate": 1.4564132712280862e-05, + "loss": 0.0161, "step": 512000 }, { "epoch": 7.73, - "learning_rate": 9.699372415421799e-06, - "loss": 0.0103, + "learning_rate": 1.4549058623132698e-05, + "loss": 0.0164, "step": 512500 }, { "epoch": 7.73, - "learning_rate": 9.689323022656358e-06, - "loss": 0.0101, + "learning_rate": 1.4533984533984535e-05, + "loss": 0.015, "step": 513000 }, { "epoch": 7.74, - "learning_rate": 9.679273629890915e-06, - "loss": 0.0109, + "learning_rate": 1.4518910444836372e-05, + "loss": 0.0157, "step": 513500 }, { "epoch": 7.75, - "learning_rate": 9.669224237125474e-06, - "loss": 0.0106, + "learning_rate": 1.4503836355688208e-05, + "loss": 0.0163, "step": 514000 }, { "epoch": 7.76, - "learning_rate": 9.659174844360031e-06, - "loss": 0.011, + "learning_rate": 1.4488762266540046e-05, + "loss": 0.0158, "step": 514500 }, { "epoch": 7.76, - "learning_rate": 9.649125451594588e-06, - "loss": 0.0108, + "learning_rate": 1.4473688177391883e-05, + "loss": 0.0164, "step": 515000 }, { "epoch": 7.77, - "learning_rate": 9.639076058829147e-06, - "loss": 0.0104, + "learning_rate": 1.4458614088243719e-05, + "loss": 0.0164, "step": 515500 }, { "epoch": 7.78, - "learning_rate": 9.629026666063704e-06, - "loss": 0.0105, + "learning_rate": 1.4443539999095556e-05, + "loss": 0.0155, "step": 516000 }, { "epoch": 7.79, - "learning_rate": 9.618977273298263e-06, - "loss": 0.0108, + "learning_rate": 1.4428465909947392e-05, + "loss": 0.0168, "step": 516500 }, { "epoch": 7.79, - "learning_rate": 9.60892788053282e-06, - "loss": 0.0106, + "learning_rate": 1.441339182079923e-05, + "loss": 0.0163, "step": 517000 }, { "epoch": 7.8, - "learning_rate": 9.598878487767377e-06, - "loss": 0.0101, + "learning_rate": 1.4398317731651067e-05, + "loss": 0.0154, "step": 517500 }, { "epoch": 7.81, - "learning_rate": 9.588829095001936e-06, - "loss": 0.0113, + "learning_rate": 1.4383243642502902e-05, + "loss": 0.0159, "step": 518000 }, { "epoch": 7.82, - "learning_rate": 9.578779702236493e-06, - "loss": 0.0108, + "learning_rate": 1.436816955335474e-05, + "loss": 0.0175, "step": 518500 }, { "epoch": 7.82, - "learning_rate": 9.568730309471052e-06, - "loss": 0.0109, + "learning_rate": 1.4353095464206577e-05, + "loss": 0.0164, "step": 519000 }, { "epoch": 7.83, - "learning_rate": 9.55868091670561e-06, - "loss": 0.0104, + "learning_rate": 1.4338021375058413e-05, + "loss": 0.0155, "step": 519500 }, { "epoch": 7.84, - "learning_rate": 9.548631523940168e-06, - "loss": 0.0107, + "learning_rate": 1.432294728591025e-05, + "loss": 0.0167, "step": 520000 }, { "epoch": 7.85, - "learning_rate": 9.538582131174725e-06, - "loss": 0.0102, + "learning_rate": 1.4307873196762086e-05, + "loss": 0.0157, "step": 520500 }, { "epoch": 7.85, - "learning_rate": 9.528532738409282e-06, - "loss": 0.0111, + "learning_rate": 1.4292799107613924e-05, + "loss": 0.0165, "step": 521000 }, { "epoch": 7.86, - "learning_rate": 9.518483345643841e-06, - "loss": 0.0111, + "learning_rate": 1.4277725018465761e-05, + "loss": 0.016, "step": 521500 }, { "epoch": 7.87, - "learning_rate": 9.508433952878398e-06, - "loss": 0.0106, + "learning_rate": 1.4262650929317597e-05, + "loss": 0.0161, "step": 522000 }, { "epoch": 7.88, - "learning_rate": 9.498384560112956e-06, - "loss": 0.0103, + "learning_rate": 1.4247576840169432e-05, + "loss": 0.0155, "step": 522500 }, { "epoch": 7.88, - "learning_rate": 9.488335167347513e-06, - "loss": 0.0101, + "learning_rate": 1.423250275102127e-05, + "loss": 0.015, "step": 523000 }, { "epoch": 7.89, - "learning_rate": 9.478285774582072e-06, - "loss": 0.0108, + "learning_rate": 1.4217428661873106e-05, + "loss": 0.0157, "step": 523500 }, { "epoch": 7.9, - "learning_rate": 9.468236381816629e-06, - "loss": 0.0105, + "learning_rate": 1.4202354572724943e-05, + "loss": 0.016, "step": 524000 }, { "epoch": 7.91, - "learning_rate": 9.458186989051188e-06, - "loss": 0.0107, + "learning_rate": 1.4187280483576779e-05, + "loss": 0.0159, "step": 524500 }, { "epoch": 7.91, - "learning_rate": 9.448137596285745e-06, - "loss": 0.0107, + "learning_rate": 1.4172206394428616e-05, + "loss": 0.0169, "step": 525000 }, { "epoch": 7.92, - "learning_rate": 9.438088203520302e-06, - "loss": 0.0105, + "learning_rate": 1.4157132305280454e-05, + "loss": 0.0157, "step": 525500 }, { "epoch": 7.93, - "learning_rate": 9.42803881075486e-06, - "loss": 0.0107, + "learning_rate": 1.414205821613229e-05, + "loss": 0.0161, "step": 526000 }, { "epoch": 7.94, - "learning_rate": 9.417989417989418e-06, - "loss": 0.0103, + "learning_rate": 1.4126984126984127e-05, + "loss": 0.0155, "step": 526500 }, { "epoch": 7.94, - "learning_rate": 9.407940025223977e-06, - "loss": 0.0119, + "learning_rate": 1.4111910037835964e-05, + "loss": 0.0165, "step": 527000 }, { "epoch": 7.95, - "learning_rate": 9.397890632458534e-06, - "loss": 0.01, + "learning_rate": 1.40968359486878e-05, + "loss": 0.0148, "step": 527500 }, { "epoch": 7.96, - "learning_rate": 9.387841239693091e-06, - "loss": 0.0104, + "learning_rate": 1.4081761859539637e-05, + "loss": 0.0155, "step": 528000 }, { "epoch": 7.97, - "learning_rate": 9.37779184692765e-06, - "loss": 0.0103, + "learning_rate": 1.4066687770391473e-05, + "loss": 0.0148, "step": 528500 }, { "epoch": 7.97, - "learning_rate": 9.367742454162207e-06, - "loss": 0.0115, + "learning_rate": 1.405161368124331e-05, + "loss": 0.0161, "step": 529000 }, { "epoch": 7.98, - "learning_rate": 9.357693061396766e-06, - "loss": 0.0102, + "learning_rate": 1.4036539592095148e-05, + "loss": 0.0161, "step": 529500 }, { "epoch": 7.99, - "learning_rate": 9.347643668631323e-06, - "loss": 0.0109, + "learning_rate": 1.4021465502946984e-05, + "loss": 0.0164, "step": 530000 }, { "epoch": 8.0, - "learning_rate": 9.337594275865882e-06, - "loss": 0.0114, + "learning_rate": 1.4006391413798821e-05, + "loss": 0.0168, "step": 530500 }, { "epoch": 8.0, - "eval_accuracy": 0.9908770980950169, - "eval_f1": 0.9460637647588714, - "eval_loss": 0.0413435734808445, - "eval_precision": 0.9350758786259967, - "eval_recall": 0.957312954236557, - "eval_runtime": 232.1729, - "eval_samples_per_second": 507.914, - "eval_steps_per_second": 31.748, + "eval_accuracy": 0.9867512582951073, + "eval_f1": 0.9419972937708403, + "eval_loss": 0.05773118510842323, + "eval_precision": 0.9323328948941767, + "eval_recall": 0.951864150206314, + "eval_runtime": 228.8409, + "eval_samples_per_second": 515.31, + "eval_steps_per_second": 32.21, "step": 530712 }, { "epoch": 8.0, - "learning_rate": 9.327544883100439e-06, - "loss": 0.009, + "learning_rate": 1.3991317324650657e-05, + "loss": 0.0136, "step": 531000 }, { "epoch": 8.01, - "learning_rate": 9.317495490334996e-06, - "loss": 0.009, + "learning_rate": 1.3976243235502494e-05, + "loss": 0.0122, "step": 531500 }, { "epoch": 8.02, - "learning_rate": 9.307446097569555e-06, - "loss": 0.0088, + "learning_rate": 1.3961169146354332e-05, + "loss": 0.0137, "step": 532000 }, { "epoch": 8.03, - "learning_rate": 9.297396704804112e-06, - "loss": 0.0076, + "learning_rate": 1.3946095057206167e-05, + "loss": 0.0119, "step": 532500 }, { "epoch": 8.03, - "learning_rate": 9.287347312038671e-06, - "loss": 0.008, + "learning_rate": 1.3931020968058005e-05, + "loss": 0.0123, "step": 533000 }, { "epoch": 8.04, - "learning_rate": 9.277297919273228e-06, - "loss": 0.0085, + "learning_rate": 1.3915946878909842e-05, + "loss": 0.0138, "step": 533500 }, { "epoch": 8.05, - "learning_rate": 9.267248526507785e-06, - "loss": 0.0088, + "learning_rate": 1.3900872789761678e-05, + "loss": 0.0129, "step": 534000 }, { "epoch": 8.06, - "learning_rate": 9.257199133742344e-06, - "loss": 0.0078, + "learning_rate": 1.3885798700613515e-05, + "loss": 0.0117, "step": 534500 }, { "epoch": 8.06, - "learning_rate": 9.247149740976901e-06, - "loss": 0.008, + "learning_rate": 1.3870724611465351e-05, + "loss": 0.0117, "step": 535000 }, { "epoch": 8.07, - "learning_rate": 9.23710034821146e-06, - "loss": 0.0086, + "learning_rate": 1.3855650522317188e-05, + "loss": 0.0124, "step": 535500 }, { "epoch": 8.08, - "learning_rate": 9.227050955446017e-06, - "loss": 0.0076, + "learning_rate": 1.3840576433169026e-05, + "loss": 0.0123, "step": 536000 }, { "epoch": 8.09, - "learning_rate": 9.217001562680576e-06, - "loss": 0.0085, + "learning_rate": 1.3825502344020862e-05, + "loss": 0.0125, "step": 536500 }, { "epoch": 8.09, - "learning_rate": 9.206952169915133e-06, - "loss": 0.0086, + "learning_rate": 1.3810428254872699e-05, + "loss": 0.0127, "step": 537000 }, { "epoch": 8.1, - "learning_rate": 9.19690277714969e-06, - "loss": 0.0087, + "learning_rate": 1.3795354165724536e-05, + "loss": 0.0128, "step": 537500 }, { "epoch": 8.11, - "learning_rate": 9.18685338438425e-06, - "loss": 0.0075, + "learning_rate": 1.3780280076576372e-05, + "loss": 0.0125, "step": 538000 }, { "epoch": 8.12, - "learning_rate": 9.176803991618806e-06, - "loss": 0.0077, + "learning_rate": 1.376520598742821e-05, + "loss": 0.0121, "step": 538500 }, { "epoch": 8.12, - "learning_rate": 9.166754598853365e-06, - "loss": 0.0083, + "learning_rate": 1.3750131898280045e-05, + "loss": 0.0133, "step": 539000 }, { "epoch": 8.13, - "learning_rate": 9.156705206087922e-06, - "loss": 0.0086, + "learning_rate": 1.3735057809131883e-05, + "loss": 0.0122, "step": 539500 }, { "epoch": 8.14, - "learning_rate": 9.14665581332248e-06, - "loss": 0.0088, + "learning_rate": 1.371998371998372e-05, + "loss": 0.0129, "step": 540000 }, { "epoch": 8.15, - "learning_rate": 9.136606420557038e-06, - "loss": 0.0086, + "learning_rate": 1.3704909630835556e-05, + "loss": 0.0129, "step": 540500 }, { "epoch": 8.16, - "learning_rate": 9.126557027791596e-06, - "loss": 0.0088, + "learning_rate": 1.3689835541687393e-05, + "loss": 0.0143, "step": 541000 }, { "epoch": 8.16, - "learning_rate": 9.116507635026154e-06, - "loss": 0.0095, + "learning_rate": 1.367476145253923e-05, + "loss": 0.0134, "step": 541500 }, { "epoch": 8.17, - "learning_rate": 9.106458242260712e-06, - "loss": 0.0088, + "learning_rate": 1.3659687363391066e-05, + "loss": 0.0126, "step": 542000 }, { "epoch": 8.18, - "learning_rate": 9.09640884949527e-06, - "loss": 0.0087, + "learning_rate": 1.3644613274242904e-05, + "loss": 0.0123, "step": 542500 }, { "epoch": 8.19, - "learning_rate": 9.086359456729828e-06, - "loss": 0.0083, + "learning_rate": 1.362953918509474e-05, + "loss": 0.0133, "step": 543000 }, { "epoch": 8.19, - "learning_rate": 9.076310063964385e-06, - "loss": 0.0095, + "learning_rate": 1.3614465095946577e-05, + "loss": 0.013, "step": 543500 }, { "epoch": 8.2, - "learning_rate": 9.066260671198944e-06, - "loss": 0.0087, + "learning_rate": 1.3599391006798414e-05, + "loss": 0.0127, "step": 544000 }, { "epoch": 8.21, - "learning_rate": 9.0562112784335e-06, - "loss": 0.0086, + "learning_rate": 1.358431691765025e-05, + "loss": 0.0124, "step": 544500 }, { "epoch": 8.22, - "learning_rate": 9.04616188566806e-06, - "loss": 0.0093, + "learning_rate": 1.3569242828502088e-05, + "loss": 0.0131, "step": 545000 }, { "epoch": 8.22, - "learning_rate": 9.036112492902617e-06, - "loss": 0.0083, + "learning_rate": 1.3554168739353925e-05, + "loss": 0.0129, "step": 545500 }, { "epoch": 8.23, - "learning_rate": 9.026063100137174e-06, - "loss": 0.0084, + "learning_rate": 1.353909465020576e-05, + "loss": 0.0137, "step": 546000 }, { "epoch": 8.24, - "learning_rate": 9.016013707371733e-06, - "loss": 0.0087, + "learning_rate": 1.3524020561057598e-05, + "loss": 0.0119, "step": 546500 }, { "epoch": 8.25, - "learning_rate": 9.00596431460629e-06, - "loss": 0.008, + "learning_rate": 1.3508946471909434e-05, + "loss": 0.0133, "step": 547000 }, { "epoch": 8.25, - "learning_rate": 8.995914921840849e-06, - "loss": 0.0087, + "learning_rate": 1.3493872382761271e-05, + "loss": 0.0134, "step": 547500 }, { "epoch": 8.26, - "learning_rate": 8.985865529075406e-06, - "loss": 0.0088, + "learning_rate": 1.3478798293613109e-05, + "loss": 0.0128, "step": 548000 }, { "epoch": 8.27, - "learning_rate": 8.975816136309963e-06, - "loss": 0.0091, + "learning_rate": 1.3463724204464945e-05, + "loss": 0.0141, "step": 548500 }, { "epoch": 8.28, - "learning_rate": 8.965766743544522e-06, - "loss": 0.0087, + "learning_rate": 1.3448650115316782e-05, + "loss": 0.0132, "step": 549000 }, { "epoch": 8.28, - "learning_rate": 8.955717350779079e-06, - "loss": 0.0098, + "learning_rate": 1.343357602616862e-05, + "loss": 0.0144, "step": 549500 }, { "epoch": 8.29, - "learning_rate": 8.945667958013638e-06, - "loss": 0.0095, + "learning_rate": 1.3418501937020455e-05, + "loss": 0.0136, "step": 550000 }, { "epoch": 8.3, - "learning_rate": 8.935618565248195e-06, - "loss": 0.0096, + "learning_rate": 1.3403427847872293e-05, + "loss": 0.0129, "step": 550500 }, { "epoch": 8.31, - "learning_rate": 8.925569172482754e-06, - "loss": 0.0089, + "learning_rate": 1.3388353758724128e-05, + "loss": 0.0134, "step": 551000 }, { "epoch": 8.31, - "learning_rate": 8.915519779717311e-06, - "loss": 0.0089, + "learning_rate": 1.3373279669575966e-05, + "loss": 0.0131, "step": 551500 }, { "epoch": 8.32, - "learning_rate": 8.905470386951868e-06, - "loss": 0.0079, + "learning_rate": 1.3358205580427803e-05, + "loss": 0.0124, "step": 552000 }, { "epoch": 8.33, - "learning_rate": 8.895420994186427e-06, - "loss": 0.0084, + "learning_rate": 1.3343131491279639e-05, + "loss": 0.0127, "step": 552500 }, { "epoch": 8.34, - "learning_rate": 8.885371601420984e-06, - "loss": 0.0083, + "learning_rate": 1.3328057402131476e-05, + "loss": 0.0128, "step": 553000 }, { "epoch": 8.34, - "learning_rate": 8.875322208655543e-06, - "loss": 0.01, + "learning_rate": 1.3312983312983314e-05, + "loss": 0.0142, "step": 553500 }, { "epoch": 8.35, - "learning_rate": 8.8652728158901e-06, - "loss": 0.0088, + "learning_rate": 1.329790922383515e-05, + "loss": 0.0127, "step": 554000 }, { "epoch": 8.36, - "learning_rate": 8.855223423124657e-06, - "loss": 0.009, + "learning_rate": 1.3282835134686987e-05, + "loss": 0.0127, "step": 554500 }, { "epoch": 8.37, - "learning_rate": 8.845174030359216e-06, - "loss": 0.0092, + "learning_rate": 1.3267761045538823e-05, + "loss": 0.0135, "step": 555000 }, { "epoch": 8.37, - "learning_rate": 8.835124637593773e-06, - "loss": 0.0084, + "learning_rate": 1.325268695639066e-05, + "loss": 0.0142, "step": 555500 }, { "epoch": 8.38, - "learning_rate": 8.825075244828332e-06, - "loss": 0.0086, + "learning_rate": 1.3237612867242497e-05, + "loss": 0.0126, "step": 556000 }, { "epoch": 8.39, - "learning_rate": 8.81502585206289e-06, - "loss": 0.0084, + "learning_rate": 1.3222538778094333e-05, + "loss": 0.013, "step": 556500 }, { "epoch": 8.4, - "learning_rate": 8.804976459297448e-06, - "loss": 0.0078, + "learning_rate": 1.320746468894617e-05, + "loss": 0.0127, "step": 557000 }, { "epoch": 8.4, - "learning_rate": 8.794927066532005e-06, - "loss": 0.0091, + "learning_rate": 1.3192390599798008e-05, + "loss": 0.0136, "step": 557500 }, { "epoch": 8.41, - "learning_rate": 8.784877673766562e-06, - "loss": 0.0092, + "learning_rate": 1.3177316510649844e-05, + "loss": 0.0132, "step": 558000 }, { "epoch": 8.42, - "learning_rate": 8.774828281001121e-06, - "loss": 0.0093, + "learning_rate": 1.3162242421501681e-05, + "loss": 0.0141, "step": 558500 }, { "epoch": 8.43, - "learning_rate": 8.764778888235678e-06, - "loss": 0.0085, + "learning_rate": 1.3147168332353517e-05, + "loss": 0.0121, "step": 559000 }, { "epoch": 8.43, - "learning_rate": 8.754729495470237e-06, - "loss": 0.0082, + "learning_rate": 1.3132094243205354e-05, + "loss": 0.0129, "step": 559500 }, { "epoch": 8.44, - "learning_rate": 8.744680102704794e-06, - "loss": 0.0084, + "learning_rate": 1.3117020154057192e-05, + "loss": 0.0138, "step": 560000 }, { "epoch": 8.45, - "learning_rate": 8.734630709939352e-06, - "loss": 0.0091, + "learning_rate": 1.3101946064909027e-05, + "loss": 0.0132, "step": 560500 }, { "epoch": 8.46, - "learning_rate": 8.72458131717391e-06, - "loss": 0.0093, + "learning_rate": 1.3086871975760865e-05, + "loss": 0.0131, "step": 561000 }, { "epoch": 8.46, - "learning_rate": 8.714531924408468e-06, - "loss": 0.0092, + "learning_rate": 1.30717978866127e-05, + "loss": 0.0133, "step": 561500 }, { "epoch": 8.47, - "learning_rate": 8.704482531643026e-06, - "loss": 0.0086, + "learning_rate": 1.3056723797464538e-05, + "loss": 0.013, "step": 562000 }, { "epoch": 8.48, - "learning_rate": 8.694433138877584e-06, - "loss": 0.0097, + "learning_rate": 1.3041649708316375e-05, + "loss": 0.0152, "step": 562500 }, { "epoch": 8.49, - "learning_rate": 8.684383746112142e-06, - "loss": 0.0092, + "learning_rate": 1.3026575619168211e-05, + "loss": 0.0142, "step": 563000 }, { "epoch": 8.49, - "learning_rate": 8.6743343533467e-06, - "loss": 0.0085, + "learning_rate": 1.3011501530020049e-05, + "loss": 0.0132, "step": 563500 }, { "epoch": 8.5, - "learning_rate": 8.664284960581257e-06, - "loss": 0.0099, + "learning_rate": 1.2996427440871886e-05, + "loss": 0.0138, "step": 564000 }, { "epoch": 8.51, - "learning_rate": 8.654235567815816e-06, - "loss": 0.0083, + "learning_rate": 1.2981353351723722e-05, + "loss": 0.0127, "step": 564500 }, { "epoch": 8.52, - "learning_rate": 8.644186175050373e-06, - "loss": 0.0092, + "learning_rate": 1.2966279262575559e-05, + "loss": 0.0135, "step": 565000 }, { "epoch": 8.52, - "learning_rate": 8.634136782284932e-06, - "loss": 0.0093, + "learning_rate": 1.2951205173427395e-05, + "loss": 0.014, "step": 565500 }, { "epoch": 8.53, - "learning_rate": 8.624087389519489e-06, - "loss": 0.009, + "learning_rate": 1.2936131084279232e-05, + "loss": 0.014, "step": 566000 }, { "epoch": 8.54, - "learning_rate": 8.614037996754046e-06, - "loss": 0.0092, + "learning_rate": 1.292105699513107e-05, + "loss": 0.0131, "step": 566500 }, { "epoch": 8.55, - "learning_rate": 8.603988603988605e-06, - "loss": 0.0093, + "learning_rate": 1.2905982905982905e-05, + "loss": 0.0137, "step": 567000 }, { "epoch": 8.55, - "learning_rate": 8.593939211223162e-06, - "loss": 0.0092, + "learning_rate": 1.2890908816834743e-05, + "loss": 0.0141, "step": 567500 }, { "epoch": 8.56, - "learning_rate": 8.58388981845772e-06, - "loss": 0.0094, + "learning_rate": 1.287583472768658e-05, + "loss": 0.0142, "step": 568000 }, { "epoch": 8.57, - "learning_rate": 8.573840425692278e-06, - "loss": 0.009, + "learning_rate": 1.2860760638538416e-05, + "loss": 0.0131, "step": 568500 }, { "epoch": 8.58, - "learning_rate": 8.563791032926837e-06, - "loss": 0.009, + "learning_rate": 1.2845686549390253e-05, + "loss": 0.0129, "step": 569000 }, { "epoch": 8.58, - "learning_rate": 8.553741640161394e-06, - "loss": 0.0097, + "learning_rate": 1.2830612460242089e-05, + "loss": 0.0139, "step": 569500 }, { "epoch": 8.59, - "learning_rate": 8.543692247395951e-06, - "loss": 0.009, + "learning_rate": 1.2815538371093927e-05, + "loss": 0.0134, "step": 570000 }, { "epoch": 8.6, - "learning_rate": 8.53364285463051e-06, - "loss": 0.0088, + "learning_rate": 1.2800464281945764e-05, + "loss": 0.0136, "step": 570500 }, { "epoch": 8.61, - "learning_rate": 8.523593461865067e-06, - "loss": 0.0092, + "learning_rate": 1.27853901927976e-05, + "loss": 0.0139, "step": 571000 }, { "epoch": 8.61, - "learning_rate": 8.513544069099626e-06, - "loss": 0.0093, + "learning_rate": 1.2770316103649437e-05, + "loss": 0.0143, "step": 571500 }, { "epoch": 8.62, - "learning_rate": 8.503494676334183e-06, - "loss": 0.0093, + "learning_rate": 1.2755242014501275e-05, + "loss": 0.0138, "step": 572000 }, { "epoch": 8.63, - "learning_rate": 8.49344528356874e-06, - "loss": 0.0092, + "learning_rate": 1.274016792535311e-05, + "loss": 0.0138, "step": 572500 }, { "epoch": 8.64, - "learning_rate": 8.483395890803299e-06, - "loss": 0.0102, + "learning_rate": 1.2725093836204948e-05, + "loss": 0.0135, "step": 573000 }, { "epoch": 8.64, - "learning_rate": 8.473346498037856e-06, - "loss": 0.0096, + "learning_rate": 1.2710019747056783e-05, + "loss": 0.0138, "step": 573500 }, { "epoch": 8.65, - "learning_rate": 8.463297105272415e-06, - "loss": 0.0091, + "learning_rate": 1.269494565790862e-05, + "loss": 0.0139, "step": 574000 }, { "epoch": 8.66, - "learning_rate": 8.453247712506972e-06, - "loss": 0.0096, + "learning_rate": 1.2679871568760458e-05, + "loss": 0.0132, "step": 574500 }, { "epoch": 8.67, - "learning_rate": 8.44319831974153e-06, - "loss": 0.0089, + "learning_rate": 1.2664797479612294e-05, + "loss": 0.0139, "step": 575000 }, { "epoch": 8.68, - "learning_rate": 8.433148926976088e-06, - "loss": 0.0093, + "learning_rate": 1.2649723390464131e-05, + "loss": 0.0123, "step": 575500 }, { "epoch": 8.68, - "learning_rate": 8.423099534210645e-06, - "loss": 0.0091, + "learning_rate": 1.2634649301315969e-05, + "loss": 0.0135, "step": 576000 }, { "epoch": 8.69, - "learning_rate": 8.413050141445204e-06, - "loss": 0.0095, + "learning_rate": 1.2619575212167805e-05, + "loss": 0.015, "step": 576500 }, { "epoch": 8.7, - "learning_rate": 8.403000748679761e-06, - "loss": 0.0086, + "learning_rate": 1.2604501123019642e-05, + "loss": 0.014, "step": 577000 }, { "epoch": 8.71, - "learning_rate": 8.39295135591432e-06, - "loss": 0.0091, + "learning_rate": 1.2589427033871478e-05, + "loss": 0.0134, "step": 577500 }, { "epoch": 8.71, - "learning_rate": 8.382901963148877e-06, - "loss": 0.0098, + "learning_rate": 1.2574352944723315e-05, + "loss": 0.0132, "step": 578000 }, { "epoch": 8.72, - "learning_rate": 8.372852570383434e-06, - "loss": 0.0086, + "learning_rate": 1.2559278855575153e-05, + "loss": 0.0123, "step": 578500 }, { "epoch": 8.73, - "learning_rate": 8.362803177617993e-06, - "loss": 0.0089, + "learning_rate": 1.2544204766426988e-05, + "loss": 0.0128, "step": 579000 }, { "epoch": 8.74, - "learning_rate": 8.35275378485255e-06, - "loss": 0.009, + "learning_rate": 1.2529130677278826e-05, + "loss": 0.0127, "step": 579500 }, { "epoch": 8.74, - "learning_rate": 8.34270439208711e-06, - "loss": 0.0091, + "learning_rate": 1.2514056588130663e-05, + "loss": 0.013, "step": 580000 }, { "epoch": 8.75, - "learning_rate": 8.332654999321666e-06, - "loss": 0.0096, + "learning_rate": 1.2498982498982499e-05, + "loss": 0.0127, "step": 580500 }, { "epoch": 8.76, - "learning_rate": 8.322605606556224e-06, - "loss": 0.0102, + "learning_rate": 1.2483908409834336e-05, + "loss": 0.0143, "step": 581000 }, { "epoch": 8.77, - "learning_rate": 8.312556213790782e-06, - "loss": 0.009, + "learning_rate": 1.2468834320686172e-05, + "loss": 0.0128, "step": 581500 }, { "epoch": 8.77, - "learning_rate": 8.30250682102534e-06, - "loss": 0.0091, + "learning_rate": 1.245376023153801e-05, + "loss": 0.0141, "step": 582000 }, { "epoch": 8.78, - "learning_rate": 8.292457428259898e-06, - "loss": 0.0093, + "learning_rate": 1.2438686142389847e-05, + "loss": 0.0142, "step": 582500 }, { "epoch": 8.79, - "learning_rate": 8.282408035494456e-06, - "loss": 0.0085, + "learning_rate": 1.2423612053241683e-05, + "loss": 0.0126, "step": 583000 }, { "epoch": 8.8, - "learning_rate": 8.272358642729014e-06, - "loss": 0.0092, + "learning_rate": 1.240853796409352e-05, + "loss": 0.014, "step": 583500 }, { "epoch": 8.8, - "learning_rate": 8.262309249963572e-06, - "loss": 0.0095, + "learning_rate": 1.2393463874945357e-05, + "loss": 0.0127, "step": 584000 }, { "epoch": 8.81, - "learning_rate": 8.252259857198129e-06, - "loss": 0.0091, + "learning_rate": 1.2378389785797193e-05, + "loss": 0.0138, "step": 584500 }, { "epoch": 8.82, - "learning_rate": 8.242210464432688e-06, - "loss": 0.0094, + "learning_rate": 1.236331569664903e-05, + "loss": 0.0135, "step": 585000 }, { "epoch": 8.83, - "learning_rate": 8.232161071667245e-06, - "loss": 0.0088, + "learning_rate": 1.2348241607500866e-05, + "loss": 0.0138, "step": 585500 }, { "epoch": 8.83, - "learning_rate": 8.222111678901804e-06, - "loss": 0.0099, + "learning_rate": 1.2333167518352704e-05, + "loss": 0.0139, "step": 586000 }, { "epoch": 8.84, - "learning_rate": 8.21206228613636e-06, - "loss": 0.0091, + "learning_rate": 1.2318093429204541e-05, + "loss": 0.0141, "step": 586500 }, { "epoch": 8.85, - "learning_rate": 8.202012893370918e-06, - "loss": 0.009, + "learning_rate": 1.2303019340056377e-05, + "loss": 0.014, "step": 587000 }, { "epoch": 8.86, - "learning_rate": 8.191963500605477e-06, - "loss": 0.0101, + "learning_rate": 1.2287945250908214e-05, + "loss": 0.0149, "step": 587500 }, { "epoch": 8.86, - "learning_rate": 8.181914107840034e-06, - "loss": 0.0085, + "learning_rate": 1.227287116176005e-05, + "loss": 0.0129, "step": 588000 }, { "epoch": 8.87, - "learning_rate": 8.171864715074593e-06, - "loss": 0.0091, + "learning_rate": 1.2257797072611887e-05, + "loss": 0.0141, "step": 588500 }, { "epoch": 8.88, - "learning_rate": 8.16181532230915e-06, - "loss": 0.0085, + "learning_rate": 1.2242722983463725e-05, + "loss": 0.0136, "step": 589000 }, { "epoch": 8.89, - "learning_rate": 8.151765929543709e-06, - "loss": 0.0094, + "learning_rate": 1.222764889431556e-05, + "loss": 0.0125, "step": 589500 }, { "epoch": 8.89, - "learning_rate": 8.141716536778266e-06, - "loss": 0.0096, + "learning_rate": 1.2212574805167398e-05, + "loss": 0.0138, "step": 590000 }, { "epoch": 8.9, - "learning_rate": 8.131667144012823e-06, - "loss": 0.009, + "learning_rate": 1.2197500716019235e-05, + "loss": 0.013, "step": 590500 }, { "epoch": 8.91, - "learning_rate": 8.121617751247382e-06, - "loss": 0.0091, + "learning_rate": 1.2182426626871071e-05, + "loss": 0.0142, "step": 591000 }, { "epoch": 8.92, - "learning_rate": 8.111568358481939e-06, - "loss": 0.0095, + "learning_rate": 1.2167352537722909e-05, + "loss": 0.0136, "step": 591500 }, { "epoch": 8.92, - "learning_rate": 8.101518965716498e-06, - "loss": 0.0089, + "learning_rate": 1.2152278448574744e-05, + "loss": 0.0132, "step": 592000 }, { "epoch": 8.93, - "learning_rate": 8.091469572951055e-06, - "loss": 0.0091, + "learning_rate": 1.2137204359426582e-05, + "loss": 0.0142, "step": 592500 }, { "epoch": 8.94, - "learning_rate": 8.081420180185612e-06, - "loss": 0.0094, + "learning_rate": 1.212213027027842e-05, + "loss": 0.0139, "step": 593000 }, { "epoch": 8.95, - "learning_rate": 8.071370787420171e-06, - "loss": 0.0102, + "learning_rate": 1.2107056181130255e-05, + "loss": 0.0143, "step": 593500 }, { "epoch": 8.95, - "learning_rate": 8.061321394654728e-06, - "loss": 0.0101, + "learning_rate": 1.2091982091982092e-05, + "loss": 0.0144, "step": 594000 }, { "epoch": 8.96, - "learning_rate": 8.051272001889287e-06, - "loss": 0.0089, + "learning_rate": 1.207690800283393e-05, + "loss": 0.0136, "step": 594500 }, { "epoch": 8.97, - "learning_rate": 8.041222609123844e-06, - "loss": 0.009, + "learning_rate": 1.2061833913685765e-05, + "loss": 0.0138, "step": 595000 }, { "epoch": 8.98, - "learning_rate": 8.031173216358401e-06, - "loss": 0.0091, + "learning_rate": 1.2046759824537603e-05, + "loss": 0.0122, "step": 595500 }, { "epoch": 8.98, - "learning_rate": 8.02112382359296e-06, - "loss": 0.0086, + "learning_rate": 1.2031685735389439e-05, + "loss": 0.0135, "step": 596000 }, { "epoch": 8.99, - "learning_rate": 8.011074430827517e-06, - "loss": 0.0097, + "learning_rate": 1.2016611646241276e-05, + "loss": 0.0135, "step": 596500 }, { "epoch": 9.0, - "learning_rate": 8.001025038062076e-06, - "loss": 0.0095, + "learning_rate": 1.2001537557093113e-05, + "loss": 0.0138, "step": 597000 }, { "epoch": 9.0, - "eval_accuracy": 0.9913156380677198, - "eval_f1": 0.9489966790216164, - "eval_loss": 0.03690173476934433, - "eval_precision": 0.9366557803284926, - "eval_recall": 0.9616671142905975, - "eval_runtime": 233.9661, - "eval_samples_per_second": 504.022, - "eval_steps_per_second": 31.505, + "eval_accuracy": 0.9875720274572235, + "eval_f1": 0.9454708265624094, + "eval_loss": 0.05194343999028206, + "eval_precision": 0.929096987338888, + "eval_recall": 0.9624321442814008, + "eval_runtime": 244.5713, + "eval_samples_per_second": 482.166, + "eval_steps_per_second": 30.138, "step": 597051 }, { "epoch": 9.01, - "learning_rate": 7.990975645296633e-06, - "loss": 0.0072, + "learning_rate": 1.198646346794495e-05, + "loss": 0.0115, "step": 597500 }, { "epoch": 9.01, - "learning_rate": 7.980926252531192e-06, - "loss": 0.007, + "learning_rate": 1.1971389378796787e-05, + "loss": 0.0096, "step": 598000 }, { "epoch": 9.02, - "learning_rate": 7.97087685976575e-06, - "loss": 0.0074, + "learning_rate": 1.1956315289648624e-05, + "loss": 0.0106, "step": 598500 }, { "epoch": 9.03, - "learning_rate": 7.960827467000306e-06, - "loss": 0.0073, + "learning_rate": 1.194124120050046e-05, + "loss": 0.0114, "step": 599000 }, { "epoch": 9.04, - "learning_rate": 7.950778074234865e-06, - "loss": 0.0073, + "learning_rate": 1.1926167111352297e-05, + "loss": 0.0111, "step": 599500 }, { "epoch": 9.04, - "learning_rate": 7.940728681469422e-06, - "loss": 0.007, + "learning_rate": 1.1911093022204133e-05, + "loss": 0.0105, "step": 600000 }, { "epoch": 9.05, - "learning_rate": 7.930679288703981e-06, - "loss": 0.0075, + "learning_rate": 1.189601893305597e-05, + "loss": 0.0104, "step": 600500 }, { "epoch": 9.06, - "learning_rate": 7.920629895938538e-06, - "loss": 0.0078, + "learning_rate": 1.1880944843907808e-05, + "loss": 0.0109, "step": 601000 }, { "epoch": 9.07, - "learning_rate": 7.910580503173096e-06, - "loss": 0.0081, + "learning_rate": 1.1865870754759643e-05, + "loss": 0.0118, "step": 601500 }, { "epoch": 9.07, - "learning_rate": 7.900531110407654e-06, - "loss": 0.007, + "learning_rate": 1.1850796665611481e-05, + "loss": 0.0109, "step": 602000 }, { "epoch": 9.08, - "learning_rate": 7.890481717642212e-06, - "loss": 0.0069, + "learning_rate": 1.1835722576463318e-05, + "loss": 0.0111, "step": 602500 }, { "epoch": 9.09, - "learning_rate": 7.88043232487677e-06, - "loss": 0.0072, + "learning_rate": 1.1820648487315154e-05, + "loss": 0.0106, "step": 603000 }, { "epoch": 9.1, - "learning_rate": 7.870382932111328e-06, - "loss": 0.0075, + "learning_rate": 1.1805574398166991e-05, + "loss": 0.0112, "step": 603500 }, { "epoch": 9.1, - "learning_rate": 7.860333539345886e-06, - "loss": 0.007, + "learning_rate": 1.1790500309018827e-05, + "loss": 0.0099, "step": 604000 }, { "epoch": 9.11, - "learning_rate": 7.850284146580444e-06, - "loss": 0.0081, + "learning_rate": 1.1775426219870665e-05, + "loss": 0.0108, "step": 604500 }, { "epoch": 9.12, - "learning_rate": 7.840234753815e-06, - "loss": 0.0073, + "learning_rate": 1.1760352130722502e-05, + "loss": 0.0104, "step": 605000 }, { "epoch": 9.13, - "learning_rate": 7.83018536104956e-06, - "loss": 0.0079, + "learning_rate": 1.1745278041574338e-05, + "loss": 0.0117, "step": 605500 }, { "epoch": 9.13, - "learning_rate": 7.820135968284117e-06, - "loss": 0.0079, + "learning_rate": 1.1730203952426175e-05, + "loss": 0.0108, "step": 606000 }, { "epoch": 9.14, - "learning_rate": 7.810086575518676e-06, - "loss": 0.0078, + "learning_rate": 1.1715129863278013e-05, + "loss": 0.0111, "step": 606500 }, { "epoch": 9.15, - "learning_rate": 7.800037182753233e-06, - "loss": 0.0075, + "learning_rate": 1.1700055774129848e-05, + "loss": 0.0104, "step": 607000 }, { "epoch": 9.16, - "learning_rate": 7.78998778998779e-06, - "loss": 0.0076, + "learning_rate": 1.1684981684981686e-05, + "loss": 0.0118, "step": 607500 }, { "epoch": 9.17, - "learning_rate": 7.779938397222349e-06, - "loss": 0.0075, + "learning_rate": 1.1669907595833521e-05, + "loss": 0.0106, "step": 608000 }, { "epoch": 9.17, - "learning_rate": 7.769889004456906e-06, - "loss": 0.0073, + "learning_rate": 1.1654833506685359e-05, + "loss": 0.0107, "step": 608500 }, { "epoch": 9.18, - "learning_rate": 7.759839611691465e-06, - "loss": 0.0068, + "learning_rate": 1.1639759417537196e-05, + "loss": 0.0109, "step": 609000 }, { "epoch": 9.19, - "learning_rate": 7.749790218926022e-06, - "loss": 0.0079, + "learning_rate": 1.1624685328389032e-05, + "loss": 0.0109, "step": 609500 }, { "epoch": 9.2, - "learning_rate": 7.73974082616058e-06, - "loss": 0.0071, + "learning_rate": 1.160961123924087e-05, + "loss": 0.0105, "step": 610000 }, { "epoch": 9.2, - "learning_rate": 7.729691433395138e-06, - "loss": 0.0073, + "learning_rate": 1.1594537150092707e-05, + "loss": 0.0111, "step": 610500 }, { "epoch": 9.21, - "learning_rate": 7.719642040629695e-06, - "loss": 0.008, + "learning_rate": 1.1579463060944543e-05, + "loss": 0.0113, "step": 611000 }, { "epoch": 9.22, - "learning_rate": 7.709592647864254e-06, - "loss": 0.0064, + "learning_rate": 1.156438897179638e-05, + "loss": 0.0101, "step": 611500 }, { "epoch": 9.23, - "learning_rate": 7.699543255098811e-06, - "loss": 0.0078, + "learning_rate": 1.1549314882648216e-05, + "loss": 0.0105, "step": 612000 }, { "epoch": 9.23, - "learning_rate": 7.68949386233337e-06, - "loss": 0.0075, + "learning_rate": 1.1534240793500053e-05, + "loss": 0.0115, "step": 612500 }, { "epoch": 9.24, - "learning_rate": 7.679444469567927e-06, - "loss": 0.0081, + "learning_rate": 1.151916670435189e-05, + "loss": 0.0113, "step": 613000 }, { "epoch": 9.25, - "learning_rate": 7.669395076802484e-06, - "loss": 0.0077, + "learning_rate": 1.1504092615203726e-05, + "loss": 0.0111, "step": 613500 }, { "epoch": 9.26, - "learning_rate": 7.659345684037043e-06, - "loss": 0.0078, + "learning_rate": 1.1489018526055564e-05, + "loss": 0.0115, "step": 614000 }, { "epoch": 9.26, - "learning_rate": 7.6492962912716e-06, - "loss": 0.0073, + "learning_rate": 1.1473944436907401e-05, + "loss": 0.0109, "step": 614500 }, { "epoch": 9.27, - "learning_rate": 7.639246898506159e-06, - "loss": 0.0075, + "learning_rate": 1.1458870347759237e-05, + "loss": 0.012, "step": 615000 }, { "epoch": 9.28, - "learning_rate": 7.629197505740716e-06, - "loss": 0.0078, + "learning_rate": 1.1443796258611074e-05, + "loss": 0.0119, "step": 615500 }, { "epoch": 9.29, - "learning_rate": 7.619148112975274e-06, - "loss": 0.0074, + "learning_rate": 1.142872216946291e-05, + "loss": 0.0101, "step": 616000 }, { "epoch": 9.29, - "learning_rate": 7.609098720209832e-06, - "loss": 0.007, + "learning_rate": 1.1413648080314747e-05, + "loss": 0.0111, "step": 616500 }, { "epoch": 9.3, - "learning_rate": 7.59904932744439e-06, - "loss": 0.0076, + "learning_rate": 1.1398573991166585e-05, + "loss": 0.0121, "step": 617000 }, { "epoch": 9.31, - "learning_rate": 7.588999934678947e-06, - "loss": 0.0069, + "learning_rate": 1.138349990201842e-05, + "loss": 0.011, "step": 617500 }, { "epoch": 9.32, - "learning_rate": 7.578950541913505e-06, - "loss": 0.0079, + "learning_rate": 1.1368425812870258e-05, + "loss": 0.011, "step": 618000 }, { "epoch": 9.32, - "learning_rate": 7.568901149148063e-06, - "loss": 0.0074, + "learning_rate": 1.1353351723722094e-05, + "loss": 0.0114, "step": 618500 }, { "epoch": 9.33, - "learning_rate": 7.558851756382621e-06, - "loss": 0.0075, + "learning_rate": 1.1338277634573931e-05, + "loss": 0.0119, "step": 619000 }, { "epoch": 9.34, - "learning_rate": 7.548802363617179e-06, - "loss": 0.0073, + "learning_rate": 1.1323203545425769e-05, + "loss": 0.0098, "step": 619500 }, { "epoch": 9.35, - "learning_rate": 7.538752970851737e-06, - "loss": 0.0081, + "learning_rate": 1.1308129456277604e-05, + "loss": 0.0115, "step": 620000 }, { "epoch": 9.35, - "learning_rate": 7.5287035780862945e-06, - "loss": 0.0075, + "learning_rate": 1.1293055367129442e-05, + "loss": 0.0114, "step": 620500 }, { "epoch": 9.36, - "learning_rate": 7.5186541853208525e-06, - "loss": 0.0078, + "learning_rate": 1.127798127798128e-05, + "loss": 0.0124, "step": 621000 }, { "epoch": 9.37, - "learning_rate": 7.5086047925554105e-06, - "loss": 0.0077, + "learning_rate": 1.1262907188833115e-05, + "loss": 0.0118, "step": 621500 }, { "epoch": 9.38, - "learning_rate": 7.4985553997899685e-06, - "loss": 0.0075, + "learning_rate": 1.1247833099684952e-05, + "loss": 0.0109, "step": 622000 }, { "epoch": 9.38, - "learning_rate": 7.4885060070245265e-06, - "loss": 0.0077, + "learning_rate": 1.1232759010536788e-05, + "loss": 0.0122, "step": 622500 }, { "epoch": 9.39, - "learning_rate": 7.4784566142590845e-06, - "loss": 0.008, + "learning_rate": 1.1217684921388626e-05, + "loss": 0.0124, "step": 623000 }, { "epoch": 9.4, - "learning_rate": 7.468407221493642e-06, - "loss": 0.0078, + "learning_rate": 1.1202610832240463e-05, + "loss": 0.012, "step": 623500 }, { "epoch": 9.41, - "learning_rate": 7.4583578287282e-06, - "loss": 0.0076, + "learning_rate": 1.1187536743092299e-05, + "loss": 0.011, "step": 624000 }, { "epoch": 9.41, - "learning_rate": 7.448308435962758e-06, - "loss": 0.0071, + "learning_rate": 1.1172462653944136e-05, + "loss": 0.0107, "step": 624500 }, { "epoch": 9.42, - "learning_rate": 7.438259043197316e-06, - "loss": 0.0074, + "learning_rate": 1.1157388564795974e-05, + "loss": 0.0114, "step": 625000 }, { "epoch": 9.43, - "learning_rate": 7.428209650431874e-06, - "loss": 0.0074, + "learning_rate": 1.114231447564781e-05, + "loss": 0.0122, "step": 625500 }, { "epoch": 9.44, - "learning_rate": 7.418160257666432e-06, - "loss": 0.0077, + "learning_rate": 1.1127240386499647e-05, + "loss": 0.0128, "step": 626000 }, { "epoch": 9.44, - "learning_rate": 7.408110864900989e-06, - "loss": 0.0077, + "learning_rate": 1.1112166297351482e-05, + "loss": 0.0115, "step": 626500 }, { "epoch": 9.45, - "learning_rate": 7.398061472135547e-06, - "loss": 0.0076, + "learning_rate": 1.109709220820332e-05, + "loss": 0.011, "step": 627000 }, { "epoch": 9.46, - "learning_rate": 7.388012079370105e-06, - "loss": 0.0075, + "learning_rate": 1.1082018119055157e-05, + "loss": 0.0114, "step": 627500 }, { "epoch": 9.47, - "learning_rate": 7.377962686604663e-06, - "loss": 0.0078, + "learning_rate": 1.1066944029906993e-05, + "loss": 0.0115, "step": 628000 }, { "epoch": 9.47, - "learning_rate": 7.367913293839221e-06, - "loss": 0.0079, + "learning_rate": 1.105186994075883e-05, + "loss": 0.0114, "step": 628500 }, { "epoch": 9.48, - "learning_rate": 7.357863901073779e-06, - "loss": 0.008, + "learning_rate": 1.1036795851610668e-05, + "loss": 0.0117, "step": 629000 }, { "epoch": 9.49, - "learning_rate": 7.347814508308336e-06, - "loss": 0.0084, + "learning_rate": 1.1021721762462504e-05, + "loss": 0.0124, "step": 629500 }, { "epoch": 9.5, - "learning_rate": 7.337765115542894e-06, - "loss": 0.0075, + "learning_rate": 1.1006647673314341e-05, + "loss": 0.0124, "step": 630000 }, { "epoch": 9.5, - "learning_rate": 7.327715722777452e-06, - "loss": 0.008, + "learning_rate": 1.0991573584166177e-05, + "loss": 0.0123, "step": 630500 }, { "epoch": 9.51, - "learning_rate": 7.31766633001201e-06, - "loss": 0.0077, + "learning_rate": 1.0976499495018014e-05, + "loss": 0.0111, "step": 631000 }, { "epoch": 9.52, - "learning_rate": 7.307616937246568e-06, - "loss": 0.0079, + "learning_rate": 1.0961425405869852e-05, + "loss": 0.0115, "step": 631500 }, { "epoch": 9.53, - "learning_rate": 7.297567544481126e-06, - "loss": 0.0079, + "learning_rate": 1.0946351316721687e-05, + "loss": 0.0113, "step": 632000 }, { "epoch": 9.53, - "learning_rate": 7.287518151715683e-06, - "loss": 0.0081, + "learning_rate": 1.0931277227573525e-05, + "loss": 0.0123, "step": 632500 }, { "epoch": 9.54, - "learning_rate": 7.277468758950241e-06, - "loss": 0.0081, + "learning_rate": 1.0916203138425362e-05, + "loss": 0.0122, "step": 633000 }, { "epoch": 9.55, - "learning_rate": 7.267419366184799e-06, - "loss": 0.0077, + "learning_rate": 1.0901129049277198e-05, + "loss": 0.0114, "step": 633500 }, { "epoch": 9.56, - "learning_rate": 7.257369973419357e-06, - "loss": 0.0076, + "learning_rate": 1.0886054960129035e-05, + "loss": 0.0114, "step": 634000 }, { "epoch": 9.56, - "learning_rate": 7.247320580653915e-06, - "loss": 0.008, + "learning_rate": 1.0870980870980871e-05, + "loss": 0.0114, "step": 634500 }, { "epoch": 9.57, - "learning_rate": 7.237271187888473e-06, - "loss": 0.0075, + "learning_rate": 1.0855906781832708e-05, + "loss": 0.0119, "step": 635000 }, { "epoch": 9.58, - "learning_rate": 7.22722179512303e-06, - "loss": 0.0075, + "learning_rate": 1.0840832692684546e-05, + "loss": 0.0105, "step": 635500 }, { "epoch": 9.59, - "learning_rate": 7.217172402357588e-06, - "loss": 0.0083, + "learning_rate": 1.0825758603536382e-05, + "loss": 0.0121, "step": 636000 }, { "epoch": 9.59, - "learning_rate": 7.207123009592146e-06, - "loss": 0.007, + "learning_rate": 1.0810684514388219e-05, + "loss": 0.0107, "step": 636500 }, { "epoch": 9.6, - "learning_rate": 7.197073616826704e-06, - "loss": 0.0072, + "learning_rate": 1.0795610425240056e-05, + "loss": 0.0116, "step": 637000 }, { "epoch": 9.61, - "learning_rate": 7.187024224061262e-06, - "loss": 0.0077, + "learning_rate": 1.0780536336091892e-05, + "loss": 0.012, "step": 637500 }, { "epoch": 9.62, - "learning_rate": 7.17697483129582e-06, - "loss": 0.007, + "learning_rate": 1.076546224694373e-05, + "loss": 0.0111, "step": 638000 }, { "epoch": 9.62, - "learning_rate": 7.166925438530377e-06, - "loss": 0.0073, + "learning_rate": 1.0750388157795565e-05, + "loss": 0.0123, "step": 638500 }, { "epoch": 9.63, - "learning_rate": 7.156876045764935e-06, - "loss": 0.0074, + "learning_rate": 1.0735314068647403e-05, + "loss": 0.0123, "step": 639000 }, { "epoch": 9.64, - "learning_rate": 7.146826652999493e-06, - "loss": 0.0081, + "learning_rate": 1.072023997949924e-05, + "loss": 0.0125, "step": 639500 }, { "epoch": 9.65, - "learning_rate": 7.136777260234051e-06, - "loss": 0.0075, + "learning_rate": 1.0705165890351076e-05, + "loss": 0.0119, "step": 640000 }, { "epoch": 9.65, - "learning_rate": 7.126727867468609e-06, - "loss": 0.0074, + "learning_rate": 1.0690091801202913e-05, + "loss": 0.0118, "step": 640500 }, { "epoch": 9.66, - "learning_rate": 7.1166784747031666e-06, - "loss": 0.0085, + "learning_rate": 1.067501771205475e-05, + "loss": 0.0113, "step": 641000 }, { "epoch": 9.67, - "learning_rate": 7.1066290819377246e-06, - "loss": 0.0074, + "learning_rate": 1.0659943622906586e-05, + "loss": 0.0111, "step": 641500 }, { "epoch": 9.68, - "learning_rate": 7.0965796891722826e-06, - "loss": 0.0081, + "learning_rate": 1.0644869533758424e-05, + "loss": 0.0114, "step": 642000 }, { "epoch": 9.69, - "learning_rate": 7.0865302964068405e-06, - "loss": 0.0083, + "learning_rate": 1.062979544461026e-05, + "loss": 0.0126, "step": 642500 }, { "epoch": 9.69, - "learning_rate": 7.0764809036413985e-06, - "loss": 0.0071, + "learning_rate": 1.0614721355462097e-05, + "loss": 0.0118, "step": 643000 }, { "epoch": 9.7, - "learning_rate": 7.0664315108759565e-06, - "loss": 0.0074, + "learning_rate": 1.0599647266313934e-05, + "loss": 0.0113, "step": 643500 }, { "epoch": 9.71, - "learning_rate": 7.056382118110514e-06, - "loss": 0.0073, + "learning_rate": 1.058457317716577e-05, + "loss": 0.0121, "step": 644000 }, { "epoch": 9.72, - "learning_rate": 7.046332725345072e-06, - "loss": 0.0082, + "learning_rate": 1.0569499088017608e-05, + "loss": 0.0123, "step": 644500 }, { "epoch": 9.72, - "learning_rate": 7.03628333257963e-06, - "loss": 0.0076, + "learning_rate": 1.0554424998869443e-05, + "loss": 0.0119, "step": 645000 }, { "epoch": 9.73, - "learning_rate": 7.026233939814188e-06, - "loss": 0.0079, + "learning_rate": 1.053935090972128e-05, + "loss": 0.0123, "step": 645500 }, { "epoch": 9.74, - "learning_rate": 7.016184547048746e-06, - "loss": 0.0077, + "learning_rate": 1.0524276820573118e-05, + "loss": 0.0111, "step": 646000 }, { "epoch": 9.75, - "learning_rate": 7.006135154283304e-06, - "loss": 0.0073, + "learning_rate": 1.0509202731424954e-05, + "loss": 0.0112, "step": 646500 }, { "epoch": 9.75, - "learning_rate": 6.996085761517861e-06, - "loss": 0.0085, + "learning_rate": 1.0494128642276791e-05, + "loss": 0.0114, "step": 647000 }, { "epoch": 9.76, - "learning_rate": 6.986036368752419e-06, - "loss": 0.0076, + "learning_rate": 1.0479054553128629e-05, + "loss": 0.0112, "step": 647500 }, { "epoch": 9.77, - "learning_rate": 6.975986975986977e-06, - "loss": 0.007, + "learning_rate": 1.0463980463980464e-05, + "loss": 0.0119, "step": 648000 }, { "epoch": 9.78, - "learning_rate": 6.965937583221535e-06, - "loss": 0.0076, + "learning_rate": 1.0448906374832302e-05, + "loss": 0.0113, "step": 648500 }, { "epoch": 9.78, - "learning_rate": 6.955888190456093e-06, - "loss": 0.0076, + "learning_rate": 1.0433832285684138e-05, + "loss": 0.0112, "step": 649000 }, { "epoch": 9.79, - "learning_rate": 6.945838797690651e-06, - "loss": 0.0075, + "learning_rate": 1.0418758196535975e-05, + "loss": 0.0118, "step": 649500 }, { "epoch": 9.8, - "learning_rate": 6.935789404925208e-06, - "loss": 0.008, + "learning_rate": 1.0403684107387812e-05, + "loss": 0.0118, "step": 650000 }, { "epoch": 9.81, - "learning_rate": 6.925740012159766e-06, - "loss": 0.0082, + "learning_rate": 1.0388610018239648e-05, + "loss": 0.0114, "step": 650500 }, { "epoch": 9.81, - "learning_rate": 6.915690619394324e-06, - "loss": 0.0079, + "learning_rate": 1.0373535929091486e-05, + "loss": 0.0124, "step": 651000 }, { "epoch": 9.82, - "learning_rate": 6.905641226628882e-06, - "loss": 0.0074, + "learning_rate": 1.0358461839943323e-05, + "loss": 0.0127, "step": 651500 }, { "epoch": 9.83, - "learning_rate": 6.89559183386344e-06, - "loss": 0.0075, + "learning_rate": 1.0343387750795159e-05, + "loss": 0.0122, "step": 652000 }, { "epoch": 9.84, - "learning_rate": 6.885542441097998e-06, - "loss": 0.0085, + "learning_rate": 1.0328313661646996e-05, + "loss": 0.0117, "step": 652500 }, { "epoch": 9.84, - "learning_rate": 6.875493048332555e-06, - "loss": 0.0083, + "learning_rate": 1.0313239572498832e-05, + "loss": 0.013, "step": 653000 }, { "epoch": 9.85, - "learning_rate": 6.865443655567113e-06, - "loss": 0.0079, + "learning_rate": 1.029816548335067e-05, + "loss": 0.0121, "step": 653500 }, { "epoch": 9.86, - "learning_rate": 6.855394262801671e-06, - "loss": 0.0082, + "learning_rate": 1.0283091394202507e-05, + "loss": 0.0123, "step": 654000 }, { "epoch": 9.87, - "learning_rate": 6.845344870036229e-06, - "loss": 0.007, + "learning_rate": 1.0268017305054342e-05, + "loss": 0.0115, "step": 654500 }, { "epoch": 9.87, - "learning_rate": 6.835295477270787e-06, - "loss": 0.0081, + "learning_rate": 1.025294321590618e-05, + "loss": 0.0122, "step": 655000 }, { "epoch": 9.88, - "learning_rate": 6.825246084505345e-06, - "loss": 0.0075, + "learning_rate": 1.0237869126758017e-05, + "loss": 0.0112, "step": 655500 }, { "epoch": 9.89, - "learning_rate": 6.815196691739902e-06, - "loss": 0.0071, + "learning_rate": 1.0222795037609853e-05, + "loss": 0.0119, "step": 656000 }, { "epoch": 9.9, - "learning_rate": 6.80514729897446e-06, - "loss": 0.0082, + "learning_rate": 1.020772094846169e-05, + "loss": 0.0119, "step": 656500 }, { "epoch": 9.9, - "learning_rate": 6.795097906209018e-06, - "loss": 0.0078, + "learning_rate": 1.0192646859313526e-05, + "loss": 0.0119, "step": 657000 }, { "epoch": 9.91, - "learning_rate": 6.785048513443576e-06, - "loss": 0.0078, + "learning_rate": 1.0177572770165364e-05, + "loss": 0.0123, "step": 657500 }, { "epoch": 9.92, - "learning_rate": 6.774999120678134e-06, - "loss": 0.008, + "learning_rate": 1.0162498681017201e-05, + "loss": 0.0121, "step": 658000 }, { "epoch": 9.93, - "learning_rate": 6.764949727912692e-06, - "loss": 0.0074, + "learning_rate": 1.0147424591869037e-05, + "loss": 0.0117, "step": 658500 }, { "epoch": 9.93, - "learning_rate": 6.7549003351472494e-06, - "loss": 0.0076, + "learning_rate": 1.0132350502720874e-05, + "loss": 0.0121, "step": 659000 }, { "epoch": 9.94, - "learning_rate": 6.7448509423818074e-06, - "loss": 0.0078, + "learning_rate": 1.0117276413572712e-05, + "loss": 0.0112, "step": 659500 }, { "epoch": 9.95, - "learning_rate": 6.7348015496163654e-06, - "loss": 0.0076, + "learning_rate": 1.0102202324424547e-05, + "loss": 0.0116, "step": 660000 }, { "epoch": 9.96, - "learning_rate": 6.7247521568509234e-06, - "loss": 0.0074, + "learning_rate": 1.0087128235276385e-05, + "loss": 0.0113, "step": 660500 }, { "epoch": 9.96, - "learning_rate": 6.7147027640854814e-06, - "loss": 0.0071, + "learning_rate": 1.007205414612822e-05, + "loss": 0.012, "step": 661000 }, { "epoch": 9.97, - "learning_rate": 6.7046533713200394e-06, - "loss": 0.0074, + "learning_rate": 1.0056980056980058e-05, + "loss": 0.0121, "step": 661500 }, { "epoch": 9.98, - "learning_rate": 6.694603978554597e-06, - "loss": 0.0089, + "learning_rate": 1.0041905967831895e-05, + "loss": 0.0132, "step": 662000 }, { "epoch": 9.99, - "learning_rate": 6.684554585789155e-06, - "loss": 0.0076, + "learning_rate": 1.0026831878683731e-05, + "loss": 0.0118, "step": 662500 }, { "epoch": 9.99, - "learning_rate": 6.674505193023713e-06, - "loss": 0.0075, + "learning_rate": 1.0011757789535568e-05, + "loss": 0.0121, "step": 663000 }, { "epoch": 10.0, - "eval_accuracy": 0.9912074210128935, - "eval_f1": 0.9486804962579738, - "eval_loss": 0.04348108172416687, - "eval_precision": 0.9369322533662985, - "eval_recall": 0.9607271040359401, - "eval_runtime": 258.6008, - "eval_samples_per_second": 456.008, - "eval_steps_per_second": 28.503, + "eval_accuracy": 0.9875053142522836, + "eval_f1": 0.9440039434689302, + "eval_loss": 0.05791113153100014, + "eval_precision": 0.9299003991141325, + "eval_recall": 0.9585418853919965, + "eval_runtime": 248.4287, + "eval_samples_per_second": 474.679, + "eval_steps_per_second": 29.67, "step": 663390 }, { "epoch": 10.0, - "learning_rate": 6.6644558002582706e-06, - "loss": 0.0073, + "learning_rate": 9.996683700387406e-06, + "loss": 0.0104, "step": 663500 }, { "epoch": 10.01, - "learning_rate": 6.6544064074928286e-06, - "loss": 0.0059, + "learning_rate": 9.981609611239242e-06, + "loss": 0.009, "step": 664000 }, { "epoch": 10.02, - "learning_rate": 6.644357014727386e-06, - "loss": 0.0064, + "learning_rate": 9.966535522091079e-06, + "loss": 0.0099, "step": 664500 }, { "epoch": 10.02, - "learning_rate": 6.634307621961944e-06, - "loss": 0.0064, + "learning_rate": 9.951461432942915e-06, + "loss": 0.01, "step": 665000 }, { "epoch": 10.03, - "learning_rate": 6.624258229196502e-06, - "loss": 0.0063, + "learning_rate": 9.936387343794752e-06, + "loss": 0.01, "step": 665500 }, { "epoch": 10.04, - "learning_rate": 6.61420883643106e-06, - "loss": 0.0062, + "learning_rate": 9.92131325464659e-06, + "loss": 0.0096, "step": 666000 }, { "epoch": 10.05, - "learning_rate": 6.604159443665618e-06, - "loss": 0.0061, + "learning_rate": 9.906239165498425e-06, + "loss": 0.0098, "step": 666500 }, { "epoch": 10.05, - "learning_rate": 6.594110050900176e-06, - "loss": 0.0062, + "learning_rate": 9.891165076350263e-06, + "loss": 0.0091, "step": 667000 }, { "epoch": 10.06, - "learning_rate": 6.584060658134733e-06, - "loss": 0.0066, + "learning_rate": 9.8760909872021e-06, + "loss": 0.01, "step": 667500 }, { "epoch": 10.07, - "learning_rate": 6.574011265369291e-06, - "loss": 0.0064, + "learning_rate": 9.861016898053936e-06, + "loss": 0.0095, "step": 668000 }, { "epoch": 10.08, - "learning_rate": 6.563961872603849e-06, - "loss": 0.0068, + "learning_rate": 9.845942808905773e-06, + "loss": 0.0093, "step": 668500 }, { "epoch": 10.08, - "learning_rate": 6.553912479838407e-06, - "loss": 0.0071, + "learning_rate": 9.830868719757609e-06, + "loss": 0.0096, "step": 669000 }, { "epoch": 10.09, - "learning_rate": 6.543863087072965e-06, - "loss": 0.0063, + "learning_rate": 9.815794630609446e-06, + "loss": 0.0097, "step": 669500 }, { "epoch": 10.1, - "learning_rate": 6.533813694307523e-06, - "loss": 0.0064, + "learning_rate": 9.800720541461284e-06, + "loss": 0.0099, "step": 670000 }, { "epoch": 10.11, - "learning_rate": 6.52376430154208e-06, - "loss": 0.0061, + "learning_rate": 9.78564645231312e-06, + "loss": 0.0095, "step": 670500 }, { "epoch": 10.11, - "learning_rate": 6.513714908776638e-06, - "loss": 0.006, + "learning_rate": 9.770572363164957e-06, + "loss": 0.0088, "step": 671000 }, { "epoch": 10.12, - "learning_rate": 6.503665516011195e-06, - "loss": 0.0063, + "learning_rate": 9.755498274016793e-06, + "loss": 0.01, "step": 671500 }, { "epoch": 10.13, - "learning_rate": 6.493616123245753e-06, - "loss": 0.0064, + "learning_rate": 9.740424184868628e-06, + "loss": 0.0104, "step": 672000 }, { "epoch": 10.14, - "learning_rate": 6.48356673048031e-06, - "loss": 0.0066, + "learning_rate": 9.725350095720466e-06, + "loss": 0.0102, "step": 672500 }, { "epoch": 10.14, - "learning_rate": 6.473517337714868e-06, - "loss": 0.0064, + "learning_rate": 9.710276006572302e-06, + "loss": 0.0098, "step": 673000 }, { "epoch": 10.15, - "learning_rate": 6.463467944949426e-06, - "loss": 0.0068, + "learning_rate": 9.695201917424139e-06, + "loss": 0.0102, "step": 673500 }, { "epoch": 10.16, - "learning_rate": 6.453418552183984e-06, - "loss": 0.0069, + "learning_rate": 9.680127828275976e-06, + "loss": 0.0093, "step": 674000 }, { "epoch": 10.17, - "learning_rate": 6.443369159418542e-06, - "loss": 0.0065, + "learning_rate": 9.665053739127812e-06, + "loss": 0.0099, "step": 674500 }, { "epoch": 10.18, - "learning_rate": 6.4333197666531e-06, - "loss": 0.0067, + "learning_rate": 9.64997964997965e-06, + "loss": 0.0091, "step": 675000 }, { "epoch": 10.18, - "learning_rate": 6.4232703738876575e-06, - "loss": 0.006, + "learning_rate": 9.634905560831487e-06, + "loss": 0.0099, "step": 675500 }, { "epoch": 10.19, - "learning_rate": 6.4132209811222155e-06, - "loss": 0.0065, + "learning_rate": 9.619831471683323e-06, + "loss": 0.0095, "step": 676000 }, { "epoch": 10.2, - "learning_rate": 6.4031715883567735e-06, - "loss": 0.0068, + "learning_rate": 9.60475738253516e-06, + "loss": 0.0102, "step": 676500 }, { "epoch": 10.21, - "learning_rate": 6.3931221955913315e-06, - "loss": 0.0062, + "learning_rate": 9.589683293386996e-06, + "loss": 0.0094, "step": 677000 }, { "epoch": 10.21, - "learning_rate": 6.3830728028258895e-06, - "loss": 0.0064, + "learning_rate": 9.574609204238833e-06, + "loss": 0.0106, "step": 677500 }, { "epoch": 10.22, - "learning_rate": 6.3730234100604475e-06, - "loss": 0.0065, + "learning_rate": 9.55953511509067e-06, + "loss": 0.0087, "step": 678000 }, { "epoch": 10.23, - "learning_rate": 6.362974017295005e-06, - "loss": 0.0068, + "learning_rate": 9.544461025942506e-06, + "loss": 0.0107, "step": 678500 }, { "epoch": 10.24, - "learning_rate": 6.352924624529563e-06, - "loss": 0.0069, + "learning_rate": 9.529386936794344e-06, + "loss": 0.0091, "step": 679000 }, { "epoch": 10.24, - "learning_rate": 6.342875231764121e-06, - "loss": 0.0069, + "learning_rate": 9.51431284764618e-06, + "loss": 0.0097, "step": 679500 }, { "epoch": 10.25, - "learning_rate": 6.332825838998679e-06, - "loss": 0.0063, + "learning_rate": 9.499238758498017e-06, + "loss": 0.0095, "step": 680000 }, { "epoch": 10.26, - "learning_rate": 6.322776446233237e-06, - "loss": 0.0065, + "learning_rate": 9.484164669349854e-06, + "loss": 0.0096, "step": 680500 }, { "epoch": 10.27, - "learning_rate": 6.312727053467795e-06, - "loss": 0.0067, + "learning_rate": 9.46909058020169e-06, + "loss": 0.01, "step": 681000 }, { "epoch": 10.27, - "learning_rate": 6.302677660702352e-06, - "loss": 0.0064, + "learning_rate": 9.454016491053528e-06, + "loss": 0.0097, "step": 681500 }, { "epoch": 10.28, - "learning_rate": 6.29262826793691e-06, - "loss": 0.0065, + "learning_rate": 9.438942401905365e-06, + "loss": 0.0091, "step": 682000 }, { "epoch": 10.29, - "learning_rate": 6.282578875171468e-06, - "loss": 0.0068, + "learning_rate": 9.4238683127572e-06, + "loss": 0.01, "step": 682500 }, { "epoch": 10.3, - "learning_rate": 6.272529482406026e-06, - "loss": 0.0058, + "learning_rate": 9.408794223609038e-06, + "loss": 0.0085, "step": 683000 }, { "epoch": 10.3, - "learning_rate": 6.262480089640584e-06, - "loss": 0.007, + "learning_rate": 9.393720134460874e-06, + "loss": 0.0099, "step": 683500 }, { "epoch": 10.31, - "learning_rate": 6.252430696875142e-06, - "loss": 0.0064, + "learning_rate": 9.378646045312711e-06, + "loss": 0.0094, "step": 684000 }, { "epoch": 10.32, - "learning_rate": 6.242381304109699e-06, - "loss": 0.0061, + "learning_rate": 9.363571956164549e-06, + "loss": 0.0082, "step": 684500 }, { "epoch": 10.33, - "learning_rate": 6.232331911344257e-06, - "loss": 0.0065, + "learning_rate": 9.348497867016385e-06, + "loss": 0.0099, "step": 685000 }, { "epoch": 10.33, - "learning_rate": 6.222282518578815e-06, - "loss": 0.007, + "learning_rate": 9.333423777868222e-06, + "loss": 0.0096, "step": 685500 }, { "epoch": 10.34, - "learning_rate": 6.212233125813373e-06, - "loss": 0.0065, + "learning_rate": 9.31834968872006e-06, + "loss": 0.0107, "step": 686000 }, { "epoch": 10.35, - "learning_rate": 6.202183733047931e-06, - "loss": 0.0061, + "learning_rate": 9.303275599571895e-06, + "loss": 0.0095, "step": 686500 }, { "epoch": 10.36, - "learning_rate": 6.192134340282489e-06, - "loss": 0.0063, + "learning_rate": 9.288201510423733e-06, + "loss": 0.01, "step": 687000 }, { "epoch": 10.36, - "learning_rate": 6.182084947517046e-06, - "loss": 0.0063, + "learning_rate": 9.273127421275568e-06, + "loss": 0.009, "step": 687500 }, { "epoch": 10.37, - "learning_rate": 6.172035554751604e-06, - "loss": 0.0067, + "learning_rate": 9.258053332127406e-06, + "loss": 0.0089, "step": 688000 }, { "epoch": 10.38, - "learning_rate": 6.161986161986162e-06, - "loss": 0.0066, + "learning_rate": 9.242979242979243e-06, + "loss": 0.01, "step": 688500 }, { "epoch": 10.39, - "learning_rate": 6.15193676922072e-06, - "loss": 0.0066, + "learning_rate": 9.227905153831079e-06, + "loss": 0.0091, "step": 689000 }, { "epoch": 10.39, - "learning_rate": 6.141887376455278e-06, - "loss": 0.006, + "learning_rate": 9.212831064682916e-06, + "loss": 0.0094, "step": 689500 }, { "epoch": 10.4, - "learning_rate": 6.131837983689836e-06, - "loss": 0.0066, + "learning_rate": 9.197756975534754e-06, + "loss": 0.0097, "step": 690000 }, { "epoch": 10.41, - "learning_rate": 6.121788590924393e-06, - "loss": 0.0065, + "learning_rate": 9.18268288638659e-06, + "loss": 0.0096, "step": 690500 }, { "epoch": 10.42, - "learning_rate": 6.111739198158951e-06, - "loss": 0.0072, + "learning_rate": 9.167608797238427e-06, + "loss": 0.0103, "step": 691000 }, { "epoch": 10.42, - "learning_rate": 6.101689805393509e-06, - "loss": 0.0066, + "learning_rate": 9.152534708090263e-06, + "loss": 0.0096, "step": 691500 }, { "epoch": 10.43, - "learning_rate": 6.091640412628067e-06, - "loss": 0.0064, + "learning_rate": 9.1374606189421e-06, + "loss": 0.0093, "step": 692000 }, { "epoch": 10.44, - "learning_rate": 6.081591019862625e-06, - "loss": 0.0069, + "learning_rate": 9.122386529793937e-06, + "loss": 0.0098, "step": 692500 }, { "epoch": 10.45, - "learning_rate": 6.071541627097182e-06, - "loss": 0.0072, + "learning_rate": 9.107312440645773e-06, + "loss": 0.0102, "step": 693000 }, { "epoch": 10.45, - "learning_rate": 6.06149223433174e-06, - "loss": 0.0072, + "learning_rate": 9.09223835149761e-06, + "loss": 0.0098, "step": 693500 }, { "epoch": 10.46, - "learning_rate": 6.051442841566298e-06, - "loss": 0.0068, + "learning_rate": 9.077164262349448e-06, + "loss": 0.0097, "step": 694000 }, { "epoch": 10.47, - "learning_rate": 6.041393448800856e-06, - "loss": 0.007, + "learning_rate": 9.062090173201284e-06, + "loss": 0.0103, "step": 694500 }, { "epoch": 10.48, - "learning_rate": 6.031344056035414e-06, - "loss": 0.0072, + "learning_rate": 9.047016084053121e-06, + "loss": 0.0099, "step": 695000 }, { "epoch": 10.48, - "learning_rate": 6.021294663269972e-06, - "loss": 0.0065, + "learning_rate": 9.031941994904957e-06, + "loss": 0.0092, "step": 695500 }, { "epoch": 10.49, - "learning_rate": 6.0112452705045295e-06, - "loss": 0.0062, + "learning_rate": 9.016867905756794e-06, + "loss": 0.0093, "step": 696000 }, { "epoch": 10.5, - "learning_rate": 6.0011958777390875e-06, - "loss": 0.0061, + "learning_rate": 9.001793816608632e-06, + "loss": 0.0094, "step": 696500 }, { "epoch": 10.51, - "learning_rate": 5.9911464849736455e-06, - "loss": 0.0071, + "learning_rate": 8.986719727460467e-06, + "loss": 0.0097, "step": 697000 }, { "epoch": 10.51, - "learning_rate": 5.9810970922082035e-06, - "loss": 0.007, + "learning_rate": 8.971645638312305e-06, + "loss": 0.0092, "step": 697500 }, { "epoch": 10.52, - "learning_rate": 5.9710476994427615e-06, - "loss": 0.0075, + "learning_rate": 8.956571549164142e-06, + "loss": 0.0107, "step": 698000 }, { "epoch": 10.53, - "learning_rate": 5.9609983066773195e-06, - "loss": 0.0066, + "learning_rate": 8.941497460015978e-06, + "loss": 0.01, "step": 698500 }, { "epoch": 10.54, - "learning_rate": 5.950948913911877e-06, - "loss": 0.0071, + "learning_rate": 8.926423370867815e-06, + "loss": 0.0098, "step": 699000 }, { "epoch": 10.54, - "learning_rate": 5.940899521146435e-06, - "loss": 0.006, + "learning_rate": 8.911349281719651e-06, + "loss": 0.0093, "step": 699500 }, { "epoch": 10.55, - "learning_rate": 5.930850128380993e-06, - "loss": 0.0071, + "learning_rate": 8.896275192571489e-06, + "loss": 0.0102, "step": 700000 }, { "epoch": 10.56, - "learning_rate": 5.920800735615551e-06, - "loss": 0.0059, + "learning_rate": 8.881201103423326e-06, + "loss": 0.0098, "step": 700500 }, { "epoch": 10.57, - "learning_rate": 5.910751342850109e-06, - "loss": 0.0067, + "learning_rate": 8.866127014275162e-06, + "loss": 0.0106, "step": 701000 }, { "epoch": 10.57, - "learning_rate": 5.900701950084667e-06, - "loss": 0.0068, + "learning_rate": 8.851052925126999e-06, + "loss": 0.0095, "step": 701500 }, { "epoch": 10.58, - "learning_rate": 5.890652557319224e-06, - "loss": 0.0073, + "learning_rate": 8.835978835978837e-06, + "loss": 0.0105, "step": 702000 }, { "epoch": 10.59, - "learning_rate": 5.880603164553782e-06, - "loss": 0.0072, + "learning_rate": 8.820904746830672e-06, + "loss": 0.0105, "step": 702500 }, { "epoch": 10.6, - "learning_rate": 5.87055377178834e-06, - "loss": 0.007, + "learning_rate": 8.80583065768251e-06, + "loss": 0.0103, "step": 703000 }, { "epoch": 10.6, - "learning_rate": 5.860504379022898e-06, - "loss": 0.0064, + "learning_rate": 8.790756568534345e-06, + "loss": 0.0101, "step": 703500 }, { "epoch": 10.61, - "learning_rate": 5.850454986257456e-06, - "loss": 0.0065, + "learning_rate": 8.775682479386183e-06, + "loss": 0.0095, "step": 704000 }, { "epoch": 10.62, - "learning_rate": 5.840405593492014e-06, - "loss": 0.0071, + "learning_rate": 8.76060839023802e-06, + "loss": 0.0103, "step": 704500 }, { "epoch": 10.63, - "learning_rate": 5.830356200726571e-06, - "loss": 0.0067, + "learning_rate": 8.745534301089856e-06, + "loss": 0.0102, "step": 705000 }, { "epoch": 10.63, - "learning_rate": 5.820306807961129e-06, - "loss": 0.0072, + "learning_rate": 8.730460211941693e-06, + "loss": 0.0102, "step": 705500 }, { "epoch": 10.64, - "learning_rate": 5.810257415195687e-06, - "loss": 0.0067, + "learning_rate": 8.715386122793529e-06, + "loss": 0.01, "step": 706000 }, { "epoch": 10.65, - "learning_rate": 5.800208022430245e-06, - "loss": 0.0068, + "learning_rate": 8.700312033645367e-06, + "loss": 0.0098, "step": 706500 }, { "epoch": 10.66, - "learning_rate": 5.790158629664803e-06, - "loss": 0.0063, + "learning_rate": 8.685237944497204e-06, + "loss": 0.0091, "step": 707000 }, { "epoch": 10.66, - "learning_rate": 5.780109236899361e-06, - "loss": 0.0069, + "learning_rate": 8.67016385534904e-06, + "loss": 0.0106, "step": 707500 }, { "epoch": 10.67, - "learning_rate": 5.770059844133918e-06, - "loss": 0.0066, + "learning_rate": 8.655089766200877e-06, + "loss": 0.0102, "step": 708000 }, { "epoch": 10.68, - "learning_rate": 5.760010451368476e-06, - "loss": 0.0063, + "learning_rate": 8.640015677052715e-06, + "loss": 0.0094, "step": 708500 }, { "epoch": 10.69, - "learning_rate": 5.749961058603034e-06, - "loss": 0.007, + "learning_rate": 8.62494158790455e-06, + "loss": 0.0101, "step": 709000 }, { "epoch": 10.7, - "learning_rate": 5.739911665837592e-06, - "loss": 0.0066, + "learning_rate": 8.609867498756388e-06, + "loss": 0.0099, "step": 709500 }, { "epoch": 10.7, - "learning_rate": 5.72986227307215e-06, - "loss": 0.0067, + "learning_rate": 8.594793409608223e-06, + "loss": 0.0095, "step": 710000 }, { "epoch": 10.71, - "learning_rate": 5.719812880306708e-06, - "loss": 0.0073, + "learning_rate": 8.57971932046006e-06, + "loss": 0.0098, "step": 710500 }, { "epoch": 10.72, - "learning_rate": 5.709763487541265e-06, - "loss": 0.0065, + "learning_rate": 8.564645231311898e-06, + "loss": 0.0094, "step": 711000 }, { "epoch": 10.73, - "learning_rate": 5.699714094775823e-06, - "loss": 0.0066, + "learning_rate": 8.549571142163734e-06, + "loss": 0.0097, "step": 711500 }, { "epoch": 10.73, - "learning_rate": 5.689664702010381e-06, - "loss": 0.0062, + "learning_rate": 8.534497053015571e-06, + "loss": 0.0099, "step": 712000 }, { "epoch": 10.74, - "learning_rate": 5.679615309244939e-06, - "loss": 0.0068, + "learning_rate": 8.519422963867409e-06, + "loss": 0.0104, "step": 712500 }, { "epoch": 10.75, - "learning_rate": 5.669565916479497e-06, - "loss": 0.0058, + "learning_rate": 8.504348874719245e-06, + "loss": 0.0086, "step": 713000 }, { "epoch": 10.76, - "learning_rate": 5.659516523714054e-06, - "loss": 0.0066, + "learning_rate": 8.489274785571082e-06, + "loss": 0.0099, "step": 713500 }, { "epoch": 10.76, - "learning_rate": 5.649467130948612e-06, - "loss": 0.0068, + "learning_rate": 8.474200696422918e-06, + "loss": 0.0101, "step": 714000 }, { "epoch": 10.77, - "learning_rate": 5.63941773818317e-06, - "loss": 0.007, + "learning_rate": 8.459126607274755e-06, + "loss": 0.0099, "step": 714500 }, { "epoch": 10.78, - "learning_rate": 5.629368345417728e-06, - "loss": 0.0073, + "learning_rate": 8.444052518126593e-06, + "loss": 0.0097, "step": 715000 }, { "epoch": 10.79, - "learning_rate": 5.619318952652286e-06, - "loss": 0.0069, + "learning_rate": 8.428978428978428e-06, + "loss": 0.0093, "step": 715500 }, { "epoch": 10.79, - "learning_rate": 5.609269559886844e-06, - "loss": 0.0066, + "learning_rate": 8.413904339830266e-06, + "loss": 0.0105, "step": 716000 }, { "epoch": 10.8, - "learning_rate": 5.5992201671214015e-06, - "loss": 0.0063, + "learning_rate": 8.398830250682103e-06, + "loss": 0.0086, "step": 716500 }, { "epoch": 10.81, - "learning_rate": 5.5891707743559595e-06, - "loss": 0.0075, + "learning_rate": 8.383756161533939e-06, + "loss": 0.0105, "step": 717000 }, { "epoch": 10.82, - "learning_rate": 5.5791213815905175e-06, - "loss": 0.0069, + "learning_rate": 8.368682072385776e-06, + "loss": 0.0105, "step": 717500 }, { "epoch": 10.82, - "learning_rate": 5.5690719888250755e-06, - "loss": 0.0072, + "learning_rate": 8.353607983237612e-06, + "loss": 0.0094, "step": 718000 }, { "epoch": 10.83, - "learning_rate": 5.5590225960596335e-06, - "loss": 0.0067, + "learning_rate": 8.33853389408945e-06, + "loss": 0.0098, "step": 718500 }, { "epoch": 10.84, - "learning_rate": 5.5489732032941915e-06, - "loss": 0.0061, + "learning_rate": 8.323459804941287e-06, + "loss": 0.0095, "step": 719000 }, { "epoch": 10.85, - "learning_rate": 5.538923810528749e-06, - "loss": 0.0066, + "learning_rate": 8.308385715793123e-06, + "loss": 0.01, "step": 719500 }, { "epoch": 10.85, - "learning_rate": 5.528874417763307e-06, - "loss": 0.007, + "learning_rate": 8.29331162664496e-06, + "loss": 0.0091, "step": 720000 }, { "epoch": 10.86, - "learning_rate": 5.518825024997865e-06, - "loss": 0.0064, + "learning_rate": 8.278237537496797e-06, + "loss": 0.0095, "step": 720500 }, { "epoch": 10.87, - "learning_rate": 5.508775632232423e-06, - "loss": 0.0069, + "learning_rate": 8.263163448348633e-06, + "loss": 0.0107, "step": 721000 }, { "epoch": 10.88, - "learning_rate": 5.498726239466981e-06, - "loss": 0.007, + "learning_rate": 8.24808935920047e-06, + "loss": 0.011, "step": 721500 }, { "epoch": 10.88, - "learning_rate": 5.488676846701539e-06, - "loss": 0.0058, + "learning_rate": 8.233015270052306e-06, + "loss": 0.0083, "step": 722000 }, { "epoch": 10.89, - "learning_rate": 5.478627453936096e-06, - "loss": 0.0068, + "learning_rate": 8.217941180904144e-06, + "loss": 0.0102, "step": 722500 }, { "epoch": 10.9, - "learning_rate": 5.468578061170654e-06, - "loss": 0.0067, + "learning_rate": 8.202867091755981e-06, + "loss": 0.0101, "step": 723000 }, { "epoch": 10.91, - "learning_rate": 5.458528668405212e-06, - "loss": 0.0069, + "learning_rate": 8.187793002607817e-06, + "loss": 0.0105, "step": 723500 }, { "epoch": 10.91, - "learning_rate": 5.44847927563977e-06, - "loss": 0.0068, + "learning_rate": 8.172718913459654e-06, + "loss": 0.0105, "step": 724000 }, { "epoch": 10.92, - "learning_rate": 5.438429882874328e-06, - "loss": 0.0067, + "learning_rate": 8.157644824311492e-06, + "loss": 0.0101, "step": 724500 }, { "epoch": 10.93, - "learning_rate": 5.428380490108886e-06, - "loss": 0.0064, + "learning_rate": 8.142570735163327e-06, + "loss": 0.0095, "step": 725000 }, { "epoch": 10.94, - "learning_rate": 5.418331097343443e-06, - "loss": 0.0069, + "learning_rate": 8.127496646015165e-06, + "loss": 0.0101, "step": 725500 }, { "epoch": 10.94, - "learning_rate": 5.408281704578001e-06, - "loss": 0.0069, + "learning_rate": 8.112422556867e-06, + "loss": 0.0109, "step": 726000 }, { "epoch": 10.95, - "learning_rate": 5.398232311812559e-06, - "loss": 0.0071, + "learning_rate": 8.097348467718838e-06, + "loss": 0.0097, "step": 726500 }, { "epoch": 10.96, - "learning_rate": 5.388182919047117e-06, - "loss": 0.0064, + "learning_rate": 8.082274378570675e-06, + "loss": 0.0093, "step": 727000 }, { "epoch": 10.97, - "learning_rate": 5.378133526281675e-06, - "loss": 0.0072, + "learning_rate": 8.067200289422511e-06, + "loss": 0.01, "step": 727500 }, { "epoch": 10.97, - "learning_rate": 5.368084133516233e-06, - "loss": 0.0062, + "learning_rate": 8.052126200274349e-06, + "loss": 0.0089, "step": 728000 }, { "epoch": 10.98, - "learning_rate": 5.35803474075079e-06, - "loss": 0.007, + "learning_rate": 8.037052111126186e-06, + "loss": 0.0101, "step": 728500 }, { "epoch": 10.99, - "learning_rate": 5.347985347985348e-06, - "loss": 0.0072, + "learning_rate": 8.021978021978022e-06, + "loss": 0.0101, "step": 729000 }, { "epoch": 11.0, - "learning_rate": 5.337935955219906e-06, - "loss": 0.0069, + "learning_rate": 8.00690393282986e-06, + "loss": 0.0099, "step": 729500 }, { "epoch": 11.0, - "eval_accuracy": 0.9915817567371356, - "eval_f1": 0.9504698590824685, - "eval_loss": 0.045706018805503845, - "eval_precision": 0.9389368443622098, - "eval_recall": 0.9622897184852406, - "eval_runtime": 231.9373, - "eval_samples_per_second": 508.43, - "eval_steps_per_second": 31.78, + "eval_accuracy": 0.9879735586773043, + "eval_f1": 0.9489221995763095, + "eval_loss": 0.06842657923698425, + "eval_precision": 0.9366573774195978, + "eval_recall": 0.9615124805690521, + "eval_runtime": 214.3851, + "eval_samples_per_second": 550.057, + "eval_steps_per_second": 34.382, "step": 729729 }, { "epoch": 11.0, - "learning_rate": 5.327886562454464e-06, - "loss": 0.0055, + "learning_rate": 7.991829843681695e-06, + "loss": 0.0083, "step": 730000 }, { "epoch": 11.01, - "learning_rate": 5.317837169689022e-06, - "loss": 0.0059, + "learning_rate": 7.976755754533532e-06, + "loss": 0.0087, "step": 730500 }, { "epoch": 11.02, - "learning_rate": 5.30778777692358e-06, - "loss": 0.0057, + "learning_rate": 7.96168166538537e-06, + "loss": 0.0079, "step": 731000 }, { "epoch": 11.03, - "learning_rate": 5.297738384158137e-06, - "loss": 0.0052, + "learning_rate": 7.946607576237205e-06, + "loss": 0.0077, "step": 731500 }, { "epoch": 11.03, - "learning_rate": 5.287688991392695e-06, - "loss": 0.0055, + "learning_rate": 7.931533487089043e-06, + "loss": 0.0071, "step": 732000 }, { "epoch": 11.04, - "learning_rate": 5.277639598627253e-06, - "loss": 0.0058, + "learning_rate": 7.91645939794088e-06, + "loss": 0.0078, "step": 732500 }, { "epoch": 11.05, - "learning_rate": 5.267590205861811e-06, - "loss": 0.0055, + "learning_rate": 7.901385308792716e-06, + "loss": 0.0075, "step": 733000 }, { "epoch": 11.06, - "learning_rate": 5.257540813096369e-06, - "loss": 0.0054, + "learning_rate": 7.886311219644553e-06, + "loss": 0.008, "step": 733500 }, { "epoch": 11.06, - "learning_rate": 5.247491420330927e-06, - "loss": 0.0053, + "learning_rate": 7.87123713049639e-06, + "loss": 0.0079, "step": 734000 }, { "epoch": 11.07, - "learning_rate": 5.237442027565484e-06, - "loss": 0.0058, + "learning_rate": 7.856163041348227e-06, + "loss": 0.0077, "step": 734500 }, { "epoch": 11.08, - "learning_rate": 5.227392634800042e-06, - "loss": 0.0057, + "learning_rate": 7.841088952200064e-06, + "loss": 0.0082, "step": 735000 }, { "epoch": 11.09, - "learning_rate": 5.2173432420346e-06, - "loss": 0.0054, + "learning_rate": 7.8260148630519e-06, + "loss": 0.0084, "step": 735500 }, { "epoch": 11.09, - "learning_rate": 5.207293849269158e-06, - "loss": 0.0057, + "learning_rate": 7.810940773903737e-06, + "loss": 0.0078, "step": 736000 }, { "epoch": 11.1, - "learning_rate": 5.197244456503716e-06, - "loss": 0.0052, + "learning_rate": 7.795866684755573e-06, + "loss": 0.0079, "step": 736500 }, { "epoch": 11.11, - "learning_rate": 5.1871950637382736e-06, - "loss": 0.0058, + "learning_rate": 7.78079259560741e-06, + "loss": 0.0078, "step": 737000 }, { "epoch": 11.12, - "learning_rate": 5.1771456709728316e-06, - "loss": 0.0054, + "learning_rate": 7.765718506459248e-06, + "loss": 0.0079, "step": 737500 }, { "epoch": 11.12, - "learning_rate": 5.1670962782073895e-06, - "loss": 0.0052, + "learning_rate": 7.750644417311083e-06, + "loss": 0.0075, "step": 738000 }, { "epoch": 11.13, - "learning_rate": 5.1570468854419475e-06, - "loss": 0.0054, + "learning_rate": 7.735570328162921e-06, + "loss": 0.0075, "step": 738500 }, { "epoch": 11.14, - "learning_rate": 5.1469974926765055e-06, - "loss": 0.0058, + "learning_rate": 7.720496239014758e-06, + "loss": 0.0076, "step": 739000 }, { "epoch": 11.15, - "learning_rate": 5.1369480999110635e-06, - "loss": 0.0049, + "learning_rate": 7.705422149866594e-06, + "loss": 0.0085, "step": 739500 }, { "epoch": 11.15, - "learning_rate": 5.126898707145621e-06, - "loss": 0.0057, + "learning_rate": 7.690348060718431e-06, + "loss": 0.0082, "step": 740000 }, { "epoch": 11.16, - "learning_rate": 5.116849314380179e-06, - "loss": 0.0056, + "learning_rate": 7.675273971570267e-06, + "loss": 0.0072, "step": 740500 }, { "epoch": 11.17, - "learning_rate": 5.106799921614737e-06, - "loss": 0.0055, + "learning_rate": 7.660199882422105e-06, + "loss": 0.0083, "step": 741000 }, { "epoch": 11.18, - "learning_rate": 5.096750528849295e-06, - "loss": 0.0052, + "learning_rate": 7.645125793273942e-06, + "loss": 0.0073, "step": 741500 }, { "epoch": 11.18, - "learning_rate": 5.086701136083853e-06, - "loss": 0.0054, + "learning_rate": 7.630051704125778e-06, + "loss": 0.0079, "step": 742000 }, { "epoch": 11.19, - "learning_rate": 5.076651743318411e-06, - "loss": 0.0057, + "learning_rate": 7.614977614977615e-06, + "loss": 0.0077, "step": 742500 }, { "epoch": 11.2, - "learning_rate": 5.066602350552968e-06, - "loss": 0.006, + "learning_rate": 7.599903525829452e-06, + "loss": 0.0095, "step": 743000 }, { "epoch": 11.21, - "learning_rate": 5.056552957787526e-06, - "loss": 0.0059, + "learning_rate": 7.584829436681288e-06, + "loss": 0.0088, "step": 743500 }, { "epoch": 11.22, - "learning_rate": 5.046503565022084e-06, - "loss": 0.0057, + "learning_rate": 7.569755347533126e-06, + "loss": 0.0084, "step": 744000 }, { "epoch": 11.22, - "learning_rate": 5.036454172256642e-06, - "loss": 0.0058, + "learning_rate": 7.554681258384962e-06, + "loss": 0.0087, "step": 744500 }, { "epoch": 11.23, - "learning_rate": 5.0264047794912e-06, - "loss": 0.0058, + "learning_rate": 7.539607169236799e-06, + "loss": 0.0079, "step": 745000 }, { "epoch": 11.24, - "learning_rate": 5.016355386725758e-06, - "loss": 0.0066, + "learning_rate": 7.5245330800886355e-06, + "loss": 0.0093, "step": 745500 }, { "epoch": 11.25, - "learning_rate": 5.006305993960315e-06, - "loss": 0.0058, + "learning_rate": 7.509458990940473e-06, + "loss": 0.0079, "step": 746000 }, { "epoch": 11.25, - "learning_rate": 4.996256601194873e-06, - "loss": 0.0057, + "learning_rate": 7.4943849017923095e-06, + "loss": 0.0079, "step": 746500 }, { "epoch": 11.26, - "learning_rate": 4.986207208429431e-06, - "loss": 0.0061, + "learning_rate": 7.479310812644146e-06, + "loss": 0.0082, "step": 747000 }, { "epoch": 11.27, - "learning_rate": 4.976157815663989e-06, - "loss": 0.0054, + "learning_rate": 7.464236723495983e-06, + "loss": 0.0082, "step": 747500 }, { "epoch": 11.28, - "learning_rate": 4.966108422898547e-06, - "loss": 0.0056, + "learning_rate": 7.44916263434782e-06, + "loss": 0.0083, "step": 748000 }, { "epoch": 11.28, - "learning_rate": 4.956059030133105e-06, - "loss": 0.0053, + "learning_rate": 7.434088545199657e-06, + "loss": 0.008, "step": 748500 }, { "epoch": 11.29, - "learning_rate": 4.946009637367662e-06, - "loss": 0.0061, + "learning_rate": 7.419014456051493e-06, + "loss": 0.0091, "step": 749000 }, { "epoch": 11.3, - "learning_rate": 4.93596024460222e-06, - "loss": 0.006, + "learning_rate": 7.40394036690333e-06, + "loss": 0.0079, "step": 749500 }, { "epoch": 11.31, - "learning_rate": 4.925910851836778e-06, - "loss": 0.0061, + "learning_rate": 7.388866277755167e-06, + "loss": 0.0079, "step": 750000 }, { "epoch": 11.31, - "learning_rate": 4.915861459071336e-06, - "loss": 0.0051, + "learning_rate": 7.373792188607004e-06, + "loss": 0.0076, "step": 750500 }, { "epoch": 11.32, - "learning_rate": 4.905812066305894e-06, - "loss": 0.0056, + "learning_rate": 7.35871809945884e-06, + "loss": 0.0083, "step": 751000 }, { "epoch": 11.33, - "learning_rate": 4.895762673540452e-06, - "loss": 0.0056, + "learning_rate": 7.343644010310677e-06, + "loss": 0.0083, "step": 751500 }, { "epoch": 11.34, - "learning_rate": 4.885713280775009e-06, - "loss": 0.0057, + "learning_rate": 7.328569921162514e-06, + "loss": 0.0088, "step": 752000 }, { "epoch": 11.34, - "learning_rate": 4.875663888009567e-06, - "loss": 0.0057, + "learning_rate": 7.313495832014351e-06, + "loss": 0.0085, "step": 752500 }, { "epoch": 11.35, - "learning_rate": 4.865614495244125e-06, - "loss": 0.0055, + "learning_rate": 7.2984217428661875e-06, + "loss": 0.0078, "step": 753000 }, { "epoch": 11.36, - "learning_rate": 4.855565102478683e-06, - "loss": 0.0065, + "learning_rate": 7.283347653718024e-06, + "loss": 0.009, "step": 753500 }, { "epoch": 11.37, - "learning_rate": 4.845515709713241e-06, - "loss": 0.006, + "learning_rate": 7.2682735645698615e-06, + "loss": 0.0084, "step": 754000 }, { "epoch": 11.37, - "learning_rate": 4.835466316947799e-06, - "loss": 0.0053, + "learning_rate": 7.253199475421698e-06, + "loss": 0.0077, "step": 754500 }, { "epoch": 11.38, - "learning_rate": 4.8254169241823564e-06, - "loss": 0.0056, + "learning_rate": 7.238125386273535e-06, + "loss": 0.0082, "step": 755000 }, { "epoch": 11.39, - "learning_rate": 4.8153675314169144e-06, - "loss": 0.0062, + "learning_rate": 7.223051297125371e-06, + "loss": 0.0089, "step": 755500 }, { "epoch": 11.4, - "learning_rate": 4.8053181386514724e-06, - "loss": 0.0055, + "learning_rate": 7.207977207977209e-06, + "loss": 0.0083, "step": 756000 }, { "epoch": 11.4, - "learning_rate": 4.7952687458860304e-06, - "loss": 0.0055, + "learning_rate": 7.192903118829045e-06, + "loss": 0.0077, "step": 756500 }, { "epoch": 11.41, - "learning_rate": 4.7852193531205884e-06, - "loss": 0.0063, + "learning_rate": 7.177829029680882e-06, + "loss": 0.0089, "step": 757000 }, { "epoch": 11.42, - "learning_rate": 4.7751699603551464e-06, - "loss": 0.0057, + "learning_rate": 7.162754940532718e-06, + "loss": 0.0083, "step": 757500 }, { "epoch": 11.43, - "learning_rate": 4.765120567589704e-06, - "loss": 0.006, + "learning_rate": 7.147680851384556e-06, + "loss": 0.0082, "step": 758000 }, { "epoch": 11.43, - "learning_rate": 4.755071174824262e-06, - "loss": 0.0054, + "learning_rate": 7.132606762236392e-06, + "loss": 0.0076, "step": 758500 }, { "epoch": 11.44, - "learning_rate": 4.7450217820588196e-06, - "loss": 0.006, + "learning_rate": 7.117532673088229e-06, + "loss": 0.0091, "step": 759000 }, { "epoch": 11.45, - "learning_rate": 4.7349723892933776e-06, - "loss": 0.0062, + "learning_rate": 7.1024585839400655e-06, + "loss": 0.0091, "step": 759500 }, { "epoch": 11.46, - "learning_rate": 4.7249229965279356e-06, - "loss": 0.0058, + "learning_rate": 7.087384494791903e-06, + "loss": 0.0086, "step": 760000 }, { "epoch": 11.46, - "learning_rate": 4.714873603762493e-06, - "loss": 0.0066, + "learning_rate": 7.0723104056437395e-06, + "loss": 0.0082, "step": 760500 }, { "epoch": 11.47, - "learning_rate": 4.704824210997051e-06, - "loss": 0.0062, + "learning_rate": 7.057236316495576e-06, + "loss": 0.0085, "step": 761000 }, { "epoch": 11.48, - "learning_rate": 4.694774818231609e-06, - "loss": 0.0061, + "learning_rate": 7.042162227347413e-06, + "loss": 0.0093, "step": 761500 }, { "epoch": 11.49, - "learning_rate": 4.684725425466167e-06, - "loss": 0.0055, + "learning_rate": 7.02708813819925e-06, + "loss": 0.0082, "step": 762000 }, { "epoch": 11.49, - "learning_rate": 4.674676032700725e-06, - "loss": 0.0057, + "learning_rate": 7.012014049051087e-06, + "loss": 0.0081, "step": 762500 }, { "epoch": 11.5, - "learning_rate": 4.664626639935283e-06, - "loss": 0.0055, + "learning_rate": 6.996939959902923e-06, + "loss": 0.0088, "step": 763000 }, { "epoch": 11.51, - "learning_rate": 4.65457724716984e-06, - "loss": 0.0065, + "learning_rate": 6.98186587075476e-06, + "loss": 0.0084, "step": 763500 }, { "epoch": 11.52, - "learning_rate": 4.644527854404398e-06, - "loss": 0.0055, + "learning_rate": 6.966791781606596e-06, + "loss": 0.0084, "step": 764000 }, { "epoch": 11.52, - "learning_rate": 4.634478461638956e-06, - "loss": 0.0059, + "learning_rate": 6.951717692458434e-06, + "loss": 0.0083, "step": 764500 }, { "epoch": 11.53, - "learning_rate": 4.624429068873514e-06, - "loss": 0.0053, + "learning_rate": 6.93664360331027e-06, + "loss": 0.0081, "step": 765000 }, { "epoch": 11.54, - "learning_rate": 4.614379676108072e-06, - "loss": 0.006, + "learning_rate": 6.921569514162107e-06, + "loss": 0.0086, "step": 765500 }, { "epoch": 11.55, - "learning_rate": 4.60433028334263e-06, - "loss": 0.0056, + "learning_rate": 6.9064954250139435e-06, + "loss": 0.0079, "step": 766000 }, { "epoch": 11.55, - "learning_rate": 4.594280890577187e-06, - "loss": 0.0056, + "learning_rate": 6.891421335865781e-06, + "loss": 0.0077, "step": 766500 }, { "epoch": 11.56, - "learning_rate": 4.584231497811745e-06, - "loss": 0.0058, + "learning_rate": 6.8763472467176175e-06, + "loss": 0.0077, "step": 767000 }, { "epoch": 11.57, - "learning_rate": 4.574182105046303e-06, - "loss": 0.0054, + "learning_rate": 6.861273157569454e-06, + "loss": 0.0083, "step": 767500 }, { "epoch": 11.58, - "learning_rate": 4.564132712280861e-06, - "loss": 0.0057, + "learning_rate": 6.846199068421291e-06, + "loss": 0.0086, "step": 768000 }, { "epoch": 11.58, - "learning_rate": 4.554083319515419e-06, - "loss": 0.0056, + "learning_rate": 6.831124979273128e-06, + "loss": 0.008, "step": 768500 }, { "epoch": 11.59, - "learning_rate": 4.544033926749977e-06, - "loss": 0.0062, + "learning_rate": 6.816050890124965e-06, + "loss": 0.009, "step": 769000 }, { "epoch": 11.6, - "learning_rate": 4.533984533984534e-06, - "loss": 0.0054, + "learning_rate": 6.800976800976801e-06, + "loss": 0.0089, "step": 769500 }, { "epoch": 11.61, - "learning_rate": 4.523935141219092e-06, - "loss": 0.0059, + "learning_rate": 6.785902711828638e-06, + "loss": 0.0079, "step": 770000 }, { "epoch": 11.61, - "learning_rate": 4.51388574845365e-06, - "loss": 0.0059, + "learning_rate": 6.770828622680475e-06, + "loss": 0.009, "step": 770500 }, { "epoch": 11.62, - "learning_rate": 4.503836355688208e-06, - "loss": 0.0054, + "learning_rate": 6.755754533532312e-06, + "loss": 0.0078, "step": 771000 }, { "epoch": 11.63, - "learning_rate": 4.493786962922766e-06, - "loss": 0.0055, + "learning_rate": 6.740680444384148e-06, + "loss": 0.0082, "step": 771500 }, { "epoch": 11.64, - "learning_rate": 4.483737570157324e-06, - "loss": 0.0059, + "learning_rate": 6.725606355235985e-06, + "loss": 0.0087, "step": 772000 }, { "epoch": 11.64, - "learning_rate": 4.473688177391881e-06, - "loss": 0.0058, + "learning_rate": 6.710532266087822e-06, + "loss": 0.0078, "step": 772500 }, { "epoch": 11.65, - "learning_rate": 4.463638784626439e-06, - "loss": 0.0051, + "learning_rate": 6.695458176939659e-06, + "loss": 0.0084, "step": 773000 }, { "epoch": 11.66, - "learning_rate": 4.453589391860997e-06, - "loss": 0.0056, + "learning_rate": 6.6803840877914955e-06, + "loss": 0.0087, "step": 773500 }, { "epoch": 11.67, - "learning_rate": 4.443539999095555e-06, - "loss": 0.0057, + "learning_rate": 6.665309998643332e-06, + "loss": 0.009, "step": 774000 }, { "epoch": 11.67, - "learning_rate": 4.433490606330113e-06, - "loss": 0.0058, + "learning_rate": 6.6502359094951695e-06, + "loss": 0.0086, "step": 774500 }, { "epoch": 11.68, - "learning_rate": 4.423441213564671e-06, - "loss": 0.0056, + "learning_rate": 6.635161820347006e-06, + "loss": 0.0084, "step": 775000 }, { "epoch": 11.69, - "learning_rate": 4.4133918207992285e-06, - "loss": 0.0057, + "learning_rate": 6.620087731198843e-06, + "loss": 0.0088, "step": 775500 }, { "epoch": 11.7, - "learning_rate": 4.4033424280337865e-06, - "loss": 0.0064, + "learning_rate": 6.605013642050679e-06, + "loss": 0.0088, "step": 776000 }, { "epoch": 11.71, - "learning_rate": 4.3932930352683445e-06, - "loss": 0.0065, + "learning_rate": 6.589939552902517e-06, + "loss": 0.0091, "step": 776500 }, { "epoch": 11.71, - "learning_rate": 4.3832436425029025e-06, - "loss": 0.0062, + "learning_rate": 6.574865463754353e-06, + "loss": 0.0087, "step": 777000 }, { "epoch": 11.72, - "learning_rate": 4.3731942497374605e-06, - "loss": 0.0056, + "learning_rate": 6.55979137460619e-06, + "loss": 0.0082, "step": 777500 }, { "epoch": 11.73, - "learning_rate": 4.3631448569720185e-06, - "loss": 0.0059, + "learning_rate": 6.544717285458026e-06, + "loss": 0.008, "step": 778000 }, { "epoch": 11.74, - "learning_rate": 4.353095464206576e-06, - "loss": 0.0064, + "learning_rate": 6.529643196309864e-06, + "loss": 0.0079, "step": 778500 }, { "epoch": 11.74, - "learning_rate": 4.343046071441134e-06, - "loss": 0.0056, + "learning_rate": 6.5145691071617e-06, + "loss": 0.0076, "step": 779000 }, { "epoch": 11.75, - "learning_rate": 4.332996678675692e-06, - "loss": 0.0061, + "learning_rate": 6.499495018013537e-06, + "loss": 0.0078, "step": 779500 }, { "epoch": 11.76, - "learning_rate": 4.32294728591025e-06, - "loss": 0.0058, + "learning_rate": 6.4844209288653736e-06, + "loss": 0.0079, "step": 780000 }, { "epoch": 11.77, - "learning_rate": 4.312897893144808e-06, - "loss": 0.0064, + "learning_rate": 6.469346839717211e-06, + "loss": 0.009, "step": 780500 }, { "epoch": 11.77, - "learning_rate": 4.302848500379366e-06, - "loss": 0.0057, + "learning_rate": 6.4542727505690476e-06, + "loss": 0.0077, "step": 781000 }, { "epoch": 11.78, - "learning_rate": 4.292799107613923e-06, - "loss": 0.0063, + "learning_rate": 6.439198661420884e-06, + "loss": 0.0092, "step": 781500 }, { "epoch": 11.79, - "learning_rate": 4.282749714848481e-06, - "loss": 0.005, + "learning_rate": 6.424124572272721e-06, + "loss": 0.0077, "step": 782000 }, { "epoch": 11.8, - "learning_rate": 4.272700322083039e-06, - "loss": 0.0058, + "learning_rate": 6.409050483124558e-06, + "loss": 0.0082, "step": 782500 }, { "epoch": 11.8, - "learning_rate": 4.262650929317597e-06, - "loss": 0.0058, + "learning_rate": 6.393976393976395e-06, + "loss": 0.0089, "step": 783000 }, { "epoch": 11.81, - "learning_rate": 4.252601536552154e-06, - "loss": 0.0061, + "learning_rate": 6.3789023048282304e-06, + "loss": 0.0087, "step": 783500 }, { "epoch": 11.82, - "learning_rate": 4.242552143786712e-06, - "loss": 0.0058, + "learning_rate": 6.363828215680067e-06, + "loss": 0.0087, "step": 784000 }, { "epoch": 11.83, - "learning_rate": 4.23250275102127e-06, - "loss": 0.006, + "learning_rate": 6.3487541265319044e-06, + "loss": 0.0083, "step": 784500 }, { "epoch": 11.83, - "learning_rate": 4.222453358255827e-06, - "loss": 0.0062, + "learning_rate": 6.333680037383741e-06, + "loss": 0.0083, "step": 785000 }, { "epoch": 11.84, - "learning_rate": 4.212403965490385e-06, - "loss": 0.0052, + "learning_rate": 6.3186059482355776e-06, + "loss": 0.0077, "step": 785500 }, { "epoch": 11.85, - "learning_rate": 4.202354572724943e-06, - "loss": 0.0055, + "learning_rate": 6.303531859087414e-06, + "loss": 0.0089, "step": 786000 }, { "epoch": 11.86, - "learning_rate": 4.192305179959501e-06, - "loss": 0.0058, + "learning_rate": 6.2884577699392516e-06, + "loss": 0.0078, "step": 786500 }, { "epoch": 11.86, - "learning_rate": 4.182255787194059e-06, - "loss": 0.0055, + "learning_rate": 6.273383680791088e-06, + "loss": 0.008, "step": 787000 }, { "epoch": 11.87, - "learning_rate": 4.172206394428617e-06, - "loss": 0.0055, + "learning_rate": 6.258309591642925e-06, + "loss": 0.0088, "step": 787500 }, { "epoch": 11.88, - "learning_rate": 4.162157001663174e-06, - "loss": 0.0051, + "learning_rate": 6.243235502494761e-06, + "loss": 0.0076, "step": 788000 }, { "epoch": 11.89, - "learning_rate": 4.152107608897732e-06, - "loss": 0.0058, + "learning_rate": 6.228161413346599e-06, + "loss": 0.0084, "step": 788500 }, { "epoch": 11.89, - "learning_rate": 4.14205821613229e-06, - "loss": 0.006, + "learning_rate": 6.213087324198435e-06, + "loss": 0.0086, "step": 789000 }, { "epoch": 11.9, - "learning_rate": 4.132008823366848e-06, - "loss": 0.0059, + "learning_rate": 6.198013235050272e-06, + "loss": 0.0094, "step": 789500 }, { "epoch": 11.91, - "learning_rate": 4.121959430601406e-06, - "loss": 0.006, + "learning_rate": 6.1829391459021084e-06, + "loss": 0.0085, "step": 790000 }, { "epoch": 11.92, - "learning_rate": 4.111910037835964e-06, - "loss": 0.0059, + "learning_rate": 6.167865056753946e-06, + "loss": 0.0088, "step": 790500 }, { "epoch": 11.92, - "learning_rate": 4.101860645070521e-06, - "loss": 0.0062, + "learning_rate": 6.1527909676057824e-06, + "loss": 0.0088, "step": 791000 }, { "epoch": 11.93, - "learning_rate": 4.091811252305079e-06, - "loss": 0.0062, + "learning_rate": 6.137716878457619e-06, + "loss": 0.0082, "step": 791500 }, { "epoch": 11.94, - "learning_rate": 4.081761859539637e-06, - "loss": 0.0053, + "learning_rate": 6.122642789309456e-06, + "loss": 0.0078, "step": 792000 }, { "epoch": 11.95, - "learning_rate": 4.071712466774195e-06, - "loss": 0.0056, + "learning_rate": 6.107568700161293e-06, + "loss": 0.0084, "step": 792500 }, { "epoch": 11.95, - "learning_rate": 4.061663074008753e-06, - "loss": 0.0061, + "learning_rate": 6.09249461101313e-06, + "loss": 0.0089, "step": 793000 }, { "epoch": 11.96, - "learning_rate": 4.051613681243311e-06, - "loss": 0.0058, + "learning_rate": 6.077420521864966e-06, + "loss": 0.0079, "step": 793500 }, { "epoch": 11.97, - "learning_rate": 4.0415642884778685e-06, - "loss": 0.0059, + "learning_rate": 6.062346432716803e-06, + "loss": 0.0084, "step": 794000 }, { "epoch": 11.98, - "learning_rate": 4.0315148957124265e-06, - "loss": 0.0058, + "learning_rate": 6.047272343568639e-06, + "loss": 0.0087, "step": 794500 }, { "epoch": 11.98, - "learning_rate": 4.0214655029469845e-06, - "loss": 0.0058, + "learning_rate": 6.032198254420477e-06, + "loss": 0.0084, "step": 795000 }, { "epoch": 11.99, - "learning_rate": 4.0114161101815425e-06, - "loss": 0.0059, + "learning_rate": 6.017124165272313e-06, + "loss": 0.0084, "step": 795500 }, { "epoch": 12.0, - "learning_rate": 4.0013667174161005e-06, - "loss": 0.006, + "learning_rate": 6.00205007612415e-06, + "loss": 0.009, "step": 796000 }, { "epoch": 12.0, - "eval_accuracy": 0.9914476764868161, - "eval_f1": 0.9513021461297323, - "eval_loss": 0.05014181137084961, - "eval_precision": 0.9396779648905509, - "eval_recall": 0.9632175208145127, - "eval_runtime": 230.8243, - "eval_samples_per_second": 510.882, - "eval_steps_per_second": 31.933, + "eval_accuracy": 0.9879435109067147, + "eval_f1": 0.9494733505974576, + "eval_loss": 0.06574396789073944, + "eval_precision": 0.9362427656485499, + "eval_recall": 0.963083233635276, + "eval_runtime": 225.9481, + "eval_samples_per_second": 521.907, + "eval_steps_per_second": 32.623, "step": 796068 }, { "epoch": 12.01, - "learning_rate": 3.9913173246506585e-06, - "loss": 0.0052, + "learning_rate": 5.9869759869759865e-06, + "loss": 0.0065, "step": 796500 }, { "epoch": 12.01, - "learning_rate": 3.981267931885216e-06, - "loss": 0.005, + "learning_rate": 5.971901897827824e-06, + "loss": 0.0072, "step": 797000 }, { "epoch": 12.02, - "learning_rate": 3.971218539119774e-06, - "loss": 0.0049, + "learning_rate": 5.9568278086796605e-06, + "loss": 0.0067, "step": 797500 }, { "epoch": 12.03, - "learning_rate": 3.961169146354332e-06, - "loss": 0.0049, + "learning_rate": 5.941753719531497e-06, + "loss": 0.0071, "step": 798000 }, { "epoch": 12.04, - "learning_rate": 3.95111975358889e-06, - "loss": 0.0049, + "learning_rate": 5.926679630383334e-06, + "loss": 0.007, "step": 798500 }, { "epoch": 12.04, - "learning_rate": 3.941070360823448e-06, - "loss": 0.0051, + "learning_rate": 5.911605541235171e-06, + "loss": 0.0073, "step": 799000 }, { "epoch": 12.05, - "learning_rate": 3.931020968058006e-06, - "loss": 0.0053, + "learning_rate": 5.896531452087008e-06, + "loss": 0.0076, "step": 799500 }, { "epoch": 12.06, - "learning_rate": 3.920971575292563e-06, - "loss": 0.0052, + "learning_rate": 5.881457362938844e-06, + "loss": 0.0075, "step": 800000 }, { "epoch": 12.07, - "learning_rate": 3.910922182527121e-06, - "loss": 0.0048, + "learning_rate": 5.866383273790681e-06, + "loss": 0.0065, "step": 800500 }, { "epoch": 12.07, - "learning_rate": 3.900872789761679e-06, - "loss": 0.0049, + "learning_rate": 5.851309184642518e-06, + "loss": 0.0072, "step": 801000 }, { "epoch": 12.08, - "learning_rate": 3.890823396996237e-06, - "loss": 0.005, + "learning_rate": 5.836235095494355e-06, + "loss": 0.0073, "step": 801500 }, { "epoch": 12.09, - "learning_rate": 3.880774004230795e-06, - "loss": 0.0049, + "learning_rate": 5.821161006346191e-06, + "loss": 0.0072, "step": 802000 }, { "epoch": 12.1, - "learning_rate": 3.870724611465353e-06, - "loss": 0.0053, + "learning_rate": 5.806086917198028e-06, + "loss": 0.0068, "step": 802500 }, { "epoch": 12.1, - "learning_rate": 3.86067521869991e-06, - "loss": 0.0048, + "learning_rate": 5.791012828049865e-06, + "loss": 0.0068, "step": 803000 }, { "epoch": 12.11, - "learning_rate": 3.850625825934468e-06, - "loss": 0.0052, + "learning_rate": 5.775938738901702e-06, + "loss": 0.0072, "step": 803500 }, { "epoch": 12.12, - "learning_rate": 3.840576433169026e-06, - "loss": 0.0053, + "learning_rate": 5.7608646497535385e-06, + "loss": 0.0081, "step": 804000 }, { "epoch": 12.13, - "learning_rate": 3.830527040403584e-06, - "loss": 0.0052, + "learning_rate": 5.745790560605375e-06, + "loss": 0.0075, "step": 804500 }, { "epoch": 12.13, - "learning_rate": 3.820477647638142e-06, - "loss": 0.0047, + "learning_rate": 5.7307164714572125e-06, + "loss": 0.0069, "step": 805000 }, { "epoch": 12.14, - "learning_rate": 3.8104282548726995e-06, - "loss": 0.0051, + "learning_rate": 5.715642382309049e-06, + "loss": 0.0073, "step": 805500 }, { "epoch": 12.15, - "learning_rate": 3.8003788621072575e-06, - "loss": 0.0051, + "learning_rate": 5.700568293160886e-06, + "loss": 0.0072, "step": 806000 }, { "epoch": 12.16, - "learning_rate": 3.790329469341815e-06, - "loss": 0.0051, + "learning_rate": 5.685494204012722e-06, + "loss": 0.007, "step": 806500 }, { "epoch": 12.16, - "learning_rate": 3.780280076576373e-06, - "loss": 0.0051, + "learning_rate": 5.67042011486456e-06, + "loss": 0.0071, "step": 807000 }, { "epoch": 12.17, - "learning_rate": 3.770230683810931e-06, - "loss": 0.0049, + "learning_rate": 5.655346025716396e-06, + "loss": 0.0071, "step": 807500 }, { "epoch": 12.18, - "learning_rate": 3.7601812910454886e-06, - "loss": 0.0054, + "learning_rate": 5.640271936568233e-06, + "loss": 0.007, "step": 808000 }, { "epoch": 12.19, - "learning_rate": 3.7501318982800466e-06, - "loss": 0.0053, + "learning_rate": 5.625197847420069e-06, + "loss": 0.0066, "step": 808500 }, { "epoch": 12.19, - "learning_rate": 3.7400825055146046e-06, - "loss": 0.0051, + "learning_rate": 5.610123758271907e-06, + "loss": 0.0073, "step": 809000 }, { "epoch": 12.2, - "learning_rate": 3.7300331127491622e-06, - "loss": 0.0048, + "learning_rate": 5.595049669123743e-06, + "loss": 0.0067, "step": 809500 }, { "epoch": 12.21, - "learning_rate": 3.7199837199837202e-06, - "loss": 0.0051, + "learning_rate": 5.57997557997558e-06, + "loss": 0.0073, "step": 810000 }, { "epoch": 12.22, - "learning_rate": 3.7099343272182782e-06, - "loss": 0.006, + "learning_rate": 5.5649014908274165e-06, + "loss": 0.0071, "step": 810500 }, { "epoch": 12.23, - "learning_rate": 3.699884934452836e-06, - "loss": 0.0047, + "learning_rate": 5.549827401679254e-06, + "loss": 0.0075, "step": 811000 }, { "epoch": 12.23, - "learning_rate": 3.689835541687394e-06, - "loss": 0.0051, + "learning_rate": 5.5347533125310905e-06, + "loss": 0.0074, "step": 811500 }, { "epoch": 12.24, - "learning_rate": 3.679786148921952e-06, - "loss": 0.0053, + "learning_rate": 5.519679223382927e-06, + "loss": 0.0069, "step": 812000 }, { "epoch": 12.25, - "learning_rate": 3.6697367561565094e-06, - "loss": 0.0049, + "learning_rate": 5.504605134234764e-06, + "loss": 0.0064, "step": 812500 }, { "epoch": 12.26, - "learning_rate": 3.6596873633910674e-06, - "loss": 0.0054, + "learning_rate": 5.489531045086601e-06, + "loss": 0.0067, "step": 813000 }, { "epoch": 12.26, - "learning_rate": 3.6496379706256254e-06, - "loss": 0.0047, + "learning_rate": 5.474456955938438e-06, + "loss": 0.0068, "step": 813500 }, { "epoch": 12.27, - "learning_rate": 3.639588577860183e-06, - "loss": 0.0046, + "learning_rate": 5.459382866790274e-06, + "loss": 0.0066, "step": 814000 }, { "epoch": 12.28, - "learning_rate": 3.629539185094741e-06, - "loss": 0.005, + "learning_rate": 5.444308777642111e-06, + "loss": 0.0069, "step": 814500 }, { "epoch": 12.29, - "learning_rate": 3.619489792329299e-06, - "loss": 0.0054, + "learning_rate": 5.429234688493948e-06, + "loss": 0.0075, "step": 815000 }, { "epoch": 12.29, - "learning_rate": 3.6094403995638565e-06, - "loss": 0.0053, + "learning_rate": 5.414160599345785e-06, + "loss": 0.0082, "step": 815500 }, { "epoch": 12.3, - "learning_rate": 3.5993910067984145e-06, - "loss": 0.0055, + "learning_rate": 5.399086510197621e-06, + "loss": 0.0078, "step": 816000 }, { "epoch": 12.31, - "learning_rate": 3.5893416140329725e-06, - "loss": 0.005, + "learning_rate": 5.384012421049458e-06, + "loss": 0.0071, "step": 816500 }, { "epoch": 12.32, - "learning_rate": 3.57929222126753e-06, - "loss": 0.0057, + "learning_rate": 5.368938331901295e-06, + "loss": 0.0076, "step": 817000 }, { "epoch": 12.32, - "learning_rate": 3.569242828502088e-06, - "loss": 0.0049, + "learning_rate": 5.353864242753132e-06, + "loss": 0.0071, "step": 817500 }, { "epoch": 12.33, - "learning_rate": 3.559193435736646e-06, - "loss": 0.005, + "learning_rate": 5.3387901536049685e-06, + "loss": 0.0069, "step": 818000 }, { "epoch": 12.34, - "learning_rate": 3.5491440429712037e-06, - "loss": 0.0051, + "learning_rate": 5.323716064456805e-06, + "loss": 0.0068, "step": 818500 }, { "epoch": 12.35, - "learning_rate": 3.5390946502057617e-06, - "loss": 0.0046, + "learning_rate": 5.3086419753086425e-06, + "loss": 0.0067, "step": 819000 }, { "epoch": 12.35, - "learning_rate": 3.5290452574403197e-06, - "loss": 0.0047, + "learning_rate": 5.293567886160479e-06, + "loss": 0.0067, "step": 819500 }, { "epoch": 12.36, - "learning_rate": 3.5189958646748772e-06, - "loss": 0.0049, + "learning_rate": 5.278493797012316e-06, + "loss": 0.0075, "step": 820000 }, { "epoch": 12.37, - "learning_rate": 3.5089464719094352e-06, - "loss": 0.0048, + "learning_rate": 5.263419707864152e-06, + "loss": 0.0067, "step": 820500 }, { "epoch": 12.38, - "learning_rate": 3.498897079143993e-06, - "loss": 0.0053, + "learning_rate": 5.24834561871599e-06, + "loss": 0.0072, "step": 821000 }, { "epoch": 12.38, - "learning_rate": 3.488847686378551e-06, - "loss": 0.005, + "learning_rate": 5.233271529567826e-06, + "loss": 0.0071, "step": 821500 }, { "epoch": 12.39, - "learning_rate": 3.478798293613109e-06, - "loss": 0.0049, + "learning_rate": 5.218197440419663e-06, + "loss": 0.0073, "step": 822000 }, { "epoch": 12.4, - "learning_rate": 3.4687489008476664e-06, - "loss": 0.0049, + "learning_rate": 5.203123351271499e-06, + "loss": 0.0068, "step": 822500 }, { "epoch": 12.41, - "learning_rate": 3.4586995080822244e-06, - "loss": 0.0052, + "learning_rate": 5.188049262123336e-06, + "loss": 0.0079, "step": 823000 }, { "epoch": 12.41, - "learning_rate": 3.4486501153167824e-06, - "loss": 0.0053, + "learning_rate": 5.172975172975173e-06, + "loss": 0.0076, "step": 823500 }, { "epoch": 12.42, - "learning_rate": 3.43860072255134e-06, - "loss": 0.005, + "learning_rate": 5.15790108382701e-06, + "loss": 0.0066, "step": 824000 }, { "epoch": 12.43, - "learning_rate": 3.428551329785898e-06, - "loss": 0.005, + "learning_rate": 5.1428269946788465e-06, + "loss": 0.0078, "step": 824500 }, { "epoch": 12.44, - "learning_rate": 3.418501937020456e-06, - "loss": 0.0051, + "learning_rate": 5.127752905530683e-06, + "loss": 0.0067, "step": 825000 }, { "epoch": 12.44, - "learning_rate": 3.4084525442550135e-06, - "loss": 0.0049, + "learning_rate": 5.1126788163825205e-06, + "loss": 0.0069, "step": 825500 }, { "epoch": 12.45, - "learning_rate": 3.3984031514895715e-06, - "loss": 0.0051, + "learning_rate": 5.097604727234357e-06, + "loss": 0.0077, "step": 826000 }, { "epoch": 12.46, - "learning_rate": 3.3883537587241295e-06, - "loss": 0.0048, + "learning_rate": 5.082530638086194e-06, + "loss": 0.0066, "step": 826500 }, { "epoch": 12.47, - "learning_rate": 3.378304365958687e-06, - "loss": 0.0055, + "learning_rate": 5.06745654893803e-06, + "loss": 0.0082, "step": 827000 }, { "epoch": 12.47, - "learning_rate": 3.368254973193245e-06, - "loss": 0.0047, + "learning_rate": 5.052382459789868e-06, + "loss": 0.0064, "step": 827500 }, { "epoch": 12.48, - "learning_rate": 3.358205580427803e-06, - "loss": 0.0055, + "learning_rate": 5.037308370641704e-06, + "loss": 0.007, "step": 828000 }, { "epoch": 12.49, - "learning_rate": 3.3481561876623607e-06, - "loss": 0.0057, + "learning_rate": 5.022234281493541e-06, + "loss": 0.0072, "step": 828500 }, { "epoch": 12.5, - "learning_rate": 3.3381067948969187e-06, - "loss": 0.0052, + "learning_rate": 5.007160192345377e-06, + "loss": 0.0076, "step": 829000 }, { "epoch": 12.5, - "learning_rate": 3.3280574021314767e-06, - "loss": 0.0049, + "learning_rate": 4.992086103197215e-06, + "loss": 0.0075, "step": 829500 }, { "epoch": 12.51, - "learning_rate": 3.3180080093660343e-06, - "loss": 0.0053, + "learning_rate": 4.977012014049051e-06, + "loss": 0.0072, "step": 830000 }, { "epoch": 12.52, - "learning_rate": 3.3079586166005922e-06, - "loss": 0.0053, + "learning_rate": 4.961937924900888e-06, + "loss": 0.0069, "step": 830500 }, { "epoch": 12.53, - "learning_rate": 3.2979092238351502e-06, - "loss": 0.0049, + "learning_rate": 4.9468638357527245e-06, + "loss": 0.0068, "step": 831000 }, { "epoch": 12.53, - "learning_rate": 3.287859831069708e-06, - "loss": 0.0053, + "learning_rate": 4.931789746604562e-06, + "loss": 0.0068, "step": 831500 }, { "epoch": 12.54, - "learning_rate": 3.277810438304266e-06, - "loss": 0.005, + "learning_rate": 4.9167156574563985e-06, + "loss": 0.0075, "step": 832000 }, { "epoch": 12.55, - "learning_rate": 3.267761045538824e-06, - "loss": 0.0054, + "learning_rate": 4.901641568308235e-06, + "loss": 0.0077, "step": 832500 }, { "epoch": 12.56, - "learning_rate": 3.2577116527733814e-06, - "loss": 0.0054, + "learning_rate": 4.886567479160072e-06, + "loss": 0.0072, "step": 833000 }, { "epoch": 12.56, - "learning_rate": 3.2476622600079394e-06, - "loss": 0.0048, + "learning_rate": 4.871493390011909e-06, + "loss": 0.0076, "step": 833500 }, { "epoch": 12.57, - "learning_rate": 3.2376128672424974e-06, - "loss": 0.0053, + "learning_rate": 4.856419300863746e-06, + "loss": 0.0067, "step": 834000 }, { "epoch": 12.58, - "learning_rate": 3.227563474477055e-06, - "loss": 0.0056, + "learning_rate": 4.841345211715582e-06, + "loss": 0.0075, "step": 834500 }, { "epoch": 12.59, - "learning_rate": 3.217514081711613e-06, - "loss": 0.005, + "learning_rate": 4.826271122567419e-06, + "loss": 0.0076, "step": 835000 }, { "epoch": 12.59, - "learning_rate": 3.207464688946171e-06, - "loss": 0.0048, + "learning_rate": 4.811197033419256e-06, + "loss": 0.0068, "step": 835500 }, { "epoch": 12.6, - "learning_rate": 3.1974152961807285e-06, - "loss": 0.0051, + "learning_rate": 4.796122944271093e-06, + "loss": 0.0071, "step": 836000 }, { "epoch": 12.61, - "learning_rate": 3.1873659034152865e-06, - "loss": 0.0048, + "learning_rate": 4.781048855122929e-06, + "loss": 0.0074, "step": 836500 }, { "epoch": 12.62, - "learning_rate": 3.1773165106498445e-06, - "loss": 0.0055, + "learning_rate": 4.765974765974766e-06, + "loss": 0.0083, "step": 837000 }, { "epoch": 12.62, - "learning_rate": 3.167267117884402e-06, - "loss": 0.0051, + "learning_rate": 4.750900676826603e-06, + "loss": 0.0069, "step": 837500 }, { "epoch": 12.63, - "learning_rate": 3.15721772511896e-06, - "loss": 0.0052, + "learning_rate": 4.73582658767844e-06, + "loss": 0.0077, "step": 838000 }, { "epoch": 12.64, - "learning_rate": 3.147168332353518e-06, - "loss": 0.0049, + "learning_rate": 4.7207524985302765e-06, + "loss": 0.0073, "step": 838500 }, { "epoch": 12.65, - "learning_rate": 3.1371189395880757e-06, - "loss": 0.0054, + "learning_rate": 4.705678409382113e-06, + "loss": 0.0068, "step": 839000 }, { "epoch": 12.65, - "learning_rate": 3.1270695468226337e-06, - "loss": 0.0055, + "learning_rate": 4.6906043202339505e-06, + "loss": 0.0075, "step": 839500 }, { "epoch": 12.66, - "learning_rate": 3.1170201540571917e-06, - "loss": 0.0048, + "learning_rate": 4.675530231085787e-06, + "loss": 0.0068, "step": 840000 }, { "epoch": 12.67, - "learning_rate": 3.1069707612917493e-06, - "loss": 0.0051, + "learning_rate": 4.660456141937624e-06, + "loss": 0.0067, "step": 840500 }, { "epoch": 12.68, - "learning_rate": 3.0969213685263073e-06, - "loss": 0.005, + "learning_rate": 4.64538205278946e-06, + "loss": 0.0067, "step": 841000 }, { "epoch": 12.68, - "learning_rate": 3.0868719757608653e-06, - "loss": 0.0049, + "learning_rate": 4.630307963641298e-06, + "loss": 0.0073, "step": 841500 }, { "epoch": 12.69, - "learning_rate": 3.076822582995423e-06, - "loss": 0.005, + "learning_rate": 4.615233874493134e-06, + "loss": 0.0068, "step": 842000 }, { "epoch": 12.7, - "learning_rate": 3.066773190229981e-06, - "loss": 0.0049, + "learning_rate": 4.600159785344971e-06, + "loss": 0.0072, "step": 842500 }, { "epoch": 12.71, - "learning_rate": 3.0567237974645384e-06, - "loss": 0.0053, + "learning_rate": 4.585085696196807e-06, + "loss": 0.0067, "step": 843000 }, { "epoch": 12.71, - "learning_rate": 3.0466744046990964e-06, - "loss": 0.0048, + "learning_rate": 4.570011607048645e-06, + "loss": 0.0063, "step": 843500 }, { "epoch": 12.72, - "learning_rate": 3.0366250119336544e-06, - "loss": 0.0054, + "learning_rate": 4.554937517900481e-06, + "loss": 0.0068, "step": 844000 }, { "epoch": 12.73, - "learning_rate": 3.026575619168212e-06, - "loss": 0.0051, + "learning_rate": 4.539863428752318e-06, + "loss": 0.008, "step": 844500 }, { "epoch": 12.74, - "learning_rate": 3.01652622640277e-06, - "loss": 0.0048, + "learning_rate": 4.5247893396041546e-06, + "loss": 0.0067, "step": 845000 }, { "epoch": 12.75, - "learning_rate": 3.006476833637328e-06, - "loss": 0.005, + "learning_rate": 4.509715250455992e-06, + "loss": 0.0075, "step": 845500 }, { "epoch": 12.75, - "learning_rate": 2.9964274408718856e-06, - "loss": 0.0054, + "learning_rate": 4.4946411613078285e-06, + "loss": 0.0071, "step": 846000 }, { "epoch": 12.76, - "learning_rate": 2.9863780481064436e-06, - "loss": 0.0053, + "learning_rate": 4.479567072159665e-06, + "loss": 0.0072, "step": 846500 }, { "epoch": 12.77, - "learning_rate": 2.9763286553410016e-06, - "loss": 0.0054, + "learning_rate": 4.464492983011502e-06, + "loss": 0.0074, "step": 847000 }, { "epoch": 12.78, - "learning_rate": 2.966279262575559e-06, - "loss": 0.0052, + "learning_rate": 4.449418893863339e-06, + "loss": 0.0067, "step": 847500 }, { "epoch": 12.78, - "learning_rate": 2.956229869810117e-06, - "loss": 0.0049, + "learning_rate": 4.434344804715176e-06, + "loss": 0.0069, "step": 848000 }, { "epoch": 12.79, - "learning_rate": 2.946180477044675e-06, - "loss": 0.0057, + "learning_rate": 4.419270715567012e-06, + "loss": 0.0073, "step": 848500 }, { "epoch": 12.8, - "learning_rate": 2.9361310842792327e-06, - "loss": 0.005, + "learning_rate": 4.404196626418849e-06, + "loss": 0.0071, "step": 849000 }, { "epoch": 12.81, - "learning_rate": 2.9260816915137907e-06, - "loss": 0.0054, + "learning_rate": 4.389122537270686e-06, + "loss": 0.0073, "step": 849500 }, { "epoch": 12.81, - "learning_rate": 2.9160322987483487e-06, - "loss": 0.0048, + "learning_rate": 4.374048448122523e-06, + "loss": 0.0069, "step": 850000 }, { "epoch": 12.82, - "learning_rate": 2.9059829059829063e-06, - "loss": 0.0052, + "learning_rate": 4.358974358974359e-06, + "loss": 0.0078, "step": 850500 }, { "epoch": 12.83, - "learning_rate": 2.8959335132174643e-06, - "loss": 0.0047, + "learning_rate": 4.343900269826196e-06, + "loss": 0.0066, "step": 851000 }, { "epoch": 12.84, - "learning_rate": 2.8858841204520223e-06, - "loss": 0.0047, + "learning_rate": 4.3288261806780326e-06, + "loss": 0.0074, "step": 851500 }, { "epoch": 12.84, - "learning_rate": 2.87583472768658e-06, - "loss": 0.0052, + "learning_rate": 4.31375209152987e-06, + "loss": 0.0075, "step": 852000 }, { "epoch": 12.85, - "learning_rate": 2.865785334921138e-06, - "loss": 0.0053, + "learning_rate": 4.2986780023817066e-06, + "loss": 0.0071, "step": 852500 }, { "epoch": 12.86, - "learning_rate": 2.855735942155696e-06, - "loss": 0.0051, + "learning_rate": 4.283603913233543e-06, + "loss": 0.0069, "step": 853000 }, { "epoch": 12.87, - "learning_rate": 2.8456865493902534e-06, - "loss": 0.005, + "learning_rate": 4.26852982408538e-06, + "loss": 0.0065, "step": 853500 }, { "epoch": 12.87, - "learning_rate": 2.8356371566248114e-06, - "loss": 0.0043, + "learning_rate": 4.253455734937217e-06, + "loss": 0.0064, "step": 854000 }, { "epoch": 12.88, - "learning_rate": 2.8255877638593694e-06, - "loss": 0.005, + "learning_rate": 4.238381645789054e-06, + "loss": 0.0068, "step": 854500 }, { "epoch": 12.89, - "learning_rate": 2.815538371093927e-06, - "loss": 0.0049, + "learning_rate": 4.22330755664089e-06, + "loss": 0.0069, "step": 855000 }, { "epoch": 12.9, - "learning_rate": 2.805488978328485e-06, - "loss": 0.0052, + "learning_rate": 4.208233467492727e-06, + "loss": 0.0072, "step": 855500 }, { "epoch": 12.9, - "learning_rate": 2.795439585563043e-06, - "loss": 0.0049, + "learning_rate": 4.193159378344564e-06, + "loss": 0.0071, "step": 856000 }, { "epoch": 12.91, - "learning_rate": 2.7853901927976006e-06, - "loss": 0.0053, + "learning_rate": 4.178085289196401e-06, + "loss": 0.0069, "step": 856500 }, { "epoch": 12.92, - "learning_rate": 2.7753408000321586e-06, - "loss": 0.0054, + "learning_rate": 4.1630112000482374e-06, + "loss": 0.0076, "step": 857000 }, { "epoch": 12.93, - "learning_rate": 2.7652914072667166e-06, - "loss": 0.0052, + "learning_rate": 4.147937110900074e-06, + "loss": 0.0082, "step": 857500 }, { "epoch": 12.93, - "learning_rate": 2.7552420145012737e-06, - "loss": 0.0051, + "learning_rate": 4.132863021751911e-06, + "loss": 0.0074, "step": 858000 }, { "epoch": 12.94, - "learning_rate": 2.7451926217358317e-06, - "loss": 0.0056, + "learning_rate": 4.117788932603747e-06, + "loss": 0.0079, "step": 858500 }, { "epoch": 12.95, - "learning_rate": 2.7351432289703893e-06, - "loss": 0.0054, + "learning_rate": 4.102714843455584e-06, + "loss": 0.0073, "step": 859000 }, { "epoch": 12.96, - "learning_rate": 2.7250938362049473e-06, - "loss": 0.0047, + "learning_rate": 4.08764075430742e-06, + "loss": 0.0066, "step": 859500 }, { "epoch": 12.96, - "learning_rate": 2.7150444434395053e-06, - "loss": 0.0051, + "learning_rate": 4.072566665159258e-06, + "loss": 0.0074, "step": 860000 }, { "epoch": 12.97, - "learning_rate": 2.704995050674063e-06, - "loss": 0.0046, + "learning_rate": 4.057492576011094e-06, + "loss": 0.0069, "step": 860500 }, { "epoch": 12.98, - "learning_rate": 2.694945657908621e-06, - "loss": 0.0052, + "learning_rate": 4.042418486862931e-06, + "loss": 0.0077, "step": 861000 }, { "epoch": 12.99, - "learning_rate": 2.684896265143179e-06, - "loss": 0.0054, + "learning_rate": 4.0273443977147675e-06, + "loss": 0.0073, "step": 861500 }, { "epoch": 12.99, - "learning_rate": 2.6748468723777364e-06, - "loss": 0.0054, + "learning_rate": 4.012270308566605e-06, + "loss": 0.0076, "step": 862000 }, { "epoch": 13.0, - "eval_accuracy": 0.9915232242420723, - "eval_f1": 0.9523242625825498, - "eval_loss": 0.05233670771121979, - "eval_precision": 0.9413598161679932, - "eval_recall": 0.963547134799912, - "eval_runtime": 231.452, - "eval_samples_per_second": 509.496, - "eval_steps_per_second": 31.847, + "eval_accuracy": 0.988018272621634, + "eval_f1": 0.9499993983080423, + "eval_loss": 0.07826410979032516, + "eval_precision": 0.9366427791751697, + "eval_recall": 0.9637424616060747, + "eval_runtime": 275.2269, + "eval_samples_per_second": 428.461, + "eval_steps_per_second": 26.782, "step": 862407 }, { "epoch": 13.0, - "learning_rate": 2.6647974796122944e-06, - "loss": 0.0047, + "learning_rate": 3.9971962194184415e-06, + "loss": 0.0065, "step": 862500 }, { "epoch": 13.01, - "learning_rate": 2.6547480868468524e-06, - "loss": 0.0043, + "learning_rate": 3.982122130270278e-06, + "loss": 0.0063, "step": 863000 }, { "epoch": 13.02, - "learning_rate": 2.64469869408141e-06, - "loss": 0.0044, + "learning_rate": 3.967048041122115e-06, + "loss": 0.0061, "step": 863500 }, { "epoch": 13.02, - "learning_rate": 2.634649301315968e-06, - "loss": 0.0046, + "learning_rate": 3.951973951973952e-06, + "loss": 0.0066, "step": 864000 }, { "epoch": 13.03, - "learning_rate": 2.624599908550526e-06, - "loss": 0.004, + "learning_rate": 3.936899862825789e-06, + "loss": 0.0057, "step": 864500 }, { "epoch": 13.04, - "learning_rate": 2.6145505157850836e-06, - "loss": 0.0041, + "learning_rate": 3.921825773677625e-06, + "loss": 0.0057, "step": 865000 }, { "epoch": 13.05, - "learning_rate": 2.6045011230196416e-06, - "loss": 0.0044, + "learning_rate": 3.906751684529462e-06, + "loss": 0.0069, "step": 865500 }, { "epoch": 13.05, - "learning_rate": 2.5944517302541996e-06, - "loss": 0.0044, + "learning_rate": 3.891677595381299e-06, + "loss": 0.0061, "step": 866000 }, { "epoch": 13.06, - "learning_rate": 2.584402337488757e-06, - "loss": 0.0048, + "learning_rate": 3.876603506233136e-06, + "loss": 0.0069, "step": 866500 }, { "epoch": 13.07, - "learning_rate": 2.574352944723315e-06, - "loss": 0.0048, + "learning_rate": 3.861529417084972e-06, + "loss": 0.0063, "step": 867000 }, { "epoch": 13.08, - "learning_rate": 2.564303551957873e-06, - "loss": 0.0049, + "learning_rate": 3.846455327936809e-06, + "loss": 0.0064, "step": 867500 }, { "epoch": 13.08, - "learning_rate": 2.5542541591924307e-06, - "loss": 0.0042, + "learning_rate": 3.831381238788646e-06, + "loss": 0.0062, "step": 868000 }, { "epoch": 13.09, - "learning_rate": 2.5442047664269887e-06, - "loss": 0.0044, + "learning_rate": 3.816307149640483e-06, + "loss": 0.0064, "step": 868500 }, { "epoch": 13.1, - "learning_rate": 2.5341553736615463e-06, - "loss": 0.0042, + "learning_rate": 3.8012330604923195e-06, + "loss": 0.0056, "step": 869000 }, { "epoch": 13.11, - "learning_rate": 2.5241059808961043e-06, - "loss": 0.0043, + "learning_rate": 3.7861589713441565e-06, + "loss": 0.0063, "step": 869500 }, { "epoch": 13.11, - "learning_rate": 2.5140565881306623e-06, - "loss": 0.0042, + "learning_rate": 3.771084882195993e-06, + "loss": 0.0064, "step": 870000 }, { "epoch": 13.12, - "learning_rate": 2.50400719536522e-06, - "loss": 0.0044, + "learning_rate": 3.75601079304783e-06, + "loss": 0.0055, "step": 870500 }, { "epoch": 13.13, - "learning_rate": 2.4939578025997783e-06, - "loss": 0.0045, + "learning_rate": 3.740936703899667e-06, + "loss": 0.0061, "step": 871000 }, { "epoch": 13.14, - "learning_rate": 2.483908409834336e-06, - "loss": 0.0042, + "learning_rate": 3.725862614751504e-06, + "loss": 0.006, "step": 871500 }, { "epoch": 13.14, - "learning_rate": 2.473859017068894e-06, - "loss": 0.0049, + "learning_rate": 3.7107885256033406e-06, + "loss": 0.0062, "step": 872000 }, { "epoch": 13.15, - "learning_rate": 2.463809624303452e-06, - "loss": 0.0044, + "learning_rate": 3.6957144364551776e-06, + "loss": 0.0061, "step": 872500 }, { "epoch": 13.16, - "learning_rate": 2.4537602315380095e-06, - "loss": 0.0043, + "learning_rate": 3.680640347307014e-06, + "loss": 0.0065, "step": 873000 }, { "epoch": 13.17, - "learning_rate": 2.4437108387725675e-06, - "loss": 0.0044, + "learning_rate": 3.665566258158851e-06, + "loss": 0.0067, "step": 873500 }, { "epoch": 13.17, - "learning_rate": 2.4336614460071255e-06, - "loss": 0.0042, + "learning_rate": 3.6504921690106878e-06, + "loss": 0.0053, "step": 874000 }, { "epoch": 13.18, - "learning_rate": 2.423612053241683e-06, - "loss": 0.0048, + "learning_rate": 3.6354180798625248e-06, + "loss": 0.0066, "step": 874500 }, { "epoch": 13.19, - "learning_rate": 2.413562660476241e-06, - "loss": 0.0046, + "learning_rate": 3.6203439907143613e-06, + "loss": 0.0065, "step": 875000 }, { "epoch": 13.2, - "learning_rate": 2.403513267710799e-06, - "loss": 0.0047, + "learning_rate": 3.6052699015661983e-06, + "loss": 0.0063, "step": 875500 }, { "epoch": 13.2, - "learning_rate": 2.3934638749453566e-06, - "loss": 0.0047, + "learning_rate": 3.590195812418035e-06, + "loss": 0.0059, "step": 876000 }, { "epoch": 13.21, - "learning_rate": 2.383414482179914e-06, - "loss": 0.0044, + "learning_rate": 3.5751217232698715e-06, + "loss": 0.0061, "step": 876500 }, { "epoch": 13.22, - "learning_rate": 2.373365089414472e-06, - "loss": 0.004, + "learning_rate": 3.560047634121708e-06, + "loss": 0.0064, "step": 877000 }, { "epoch": 13.23, - "learning_rate": 2.36331569664903e-06, - "loss": 0.0045, + "learning_rate": 3.544973544973545e-06, + "loss": 0.0059, "step": 877500 }, { "epoch": 13.24, - "learning_rate": 2.3532663038835878e-06, - "loss": 0.0042, + "learning_rate": 3.5298994558253816e-06, + "loss": 0.0062, "step": 878000 }, { "epoch": 13.24, - "learning_rate": 2.3432169111181457e-06, - "loss": 0.004, + "learning_rate": 3.5148253666772186e-06, + "loss": 0.006, "step": 878500 }, { "epoch": 13.25, - "learning_rate": 2.3331675183527037e-06, - "loss": 0.0043, + "learning_rate": 3.499751277529055e-06, + "loss": 0.0062, "step": 879000 }, { "epoch": 13.26, - "learning_rate": 2.3231181255872613e-06, - "loss": 0.0041, + "learning_rate": 3.484677188380892e-06, + "loss": 0.0057, "step": 879500 }, { "epoch": 13.27, - "learning_rate": 2.3130687328218193e-06, - "loss": 0.0045, + "learning_rate": 3.4696030992327288e-06, + "loss": 0.0069, "step": 880000 }, { "epoch": 13.27, - "learning_rate": 2.3030193400563773e-06, - "loss": 0.0048, + "learning_rate": 3.4545290100845658e-06, + "loss": 0.0066, "step": 880500 }, { "epoch": 13.28, - "learning_rate": 2.292969947290935e-06, - "loss": 0.0046, + "learning_rate": 3.4394549209364023e-06, + "loss": 0.0069, "step": 881000 }, { "epoch": 13.29, - "learning_rate": 2.282920554525493e-06, - "loss": 0.0047, + "learning_rate": 3.424380831788239e-06, + "loss": 0.0063, "step": 881500 }, { "epoch": 13.3, - "learning_rate": 2.272871161760051e-06, - "loss": 0.0046, + "learning_rate": 3.409306742640076e-06, + "loss": 0.0062, "step": 882000 }, { "epoch": 13.3, - "learning_rate": 2.2628217689946085e-06, - "loss": 0.0052, + "learning_rate": 3.3942326534919125e-06, + "loss": 0.0065, "step": 882500 }, { "epoch": 13.31, - "learning_rate": 2.2527723762291665e-06, - "loss": 0.0045, + "learning_rate": 3.3791585643437495e-06, + "loss": 0.0061, "step": 883000 }, { "epoch": 13.32, - "learning_rate": 2.2427229834637245e-06, - "loss": 0.0049, + "learning_rate": 3.364084475195586e-06, + "loss": 0.0065, "step": 883500 }, { "epoch": 13.33, - "learning_rate": 2.232673590698282e-06, - "loss": 0.0041, + "learning_rate": 3.349010386047423e-06, + "loss": 0.0064, "step": 884000 }, { "epoch": 13.33, - "learning_rate": 2.22262419793284e-06, - "loss": 0.0042, + "learning_rate": 3.3339362968992596e-06, + "loss": 0.006, "step": 884500 }, { "epoch": 13.34, - "learning_rate": 2.212574805167398e-06, - "loss": 0.0046, + "learning_rate": 3.3188622077510966e-06, + "loss": 0.0058, "step": 885000 }, { "epoch": 13.35, - "learning_rate": 2.2025254124019556e-06, - "loss": 0.0047, + "learning_rate": 3.3037881186029332e-06, + "loss": 0.0061, "step": 885500 }, { "epoch": 13.36, - "learning_rate": 2.1924760196365136e-06, - "loss": 0.0043, + "learning_rate": 3.2887140294547702e-06, + "loss": 0.0057, "step": 886000 }, { "epoch": 13.36, - "learning_rate": 2.1824266268710716e-06, - "loss": 0.0044, + "learning_rate": 3.2736399403066068e-06, + "loss": 0.0063, "step": 886500 }, { "epoch": 13.37, - "learning_rate": 2.172377234105629e-06, - "loss": 0.0049, + "learning_rate": 3.2585658511584438e-06, + "loss": 0.0065, "step": 887000 }, { "epoch": 13.38, - "learning_rate": 2.162327841340187e-06, - "loss": 0.0041, + "learning_rate": 3.2434917620102804e-06, + "loss": 0.0059, "step": 887500 }, { "epoch": 13.39, - "learning_rate": 2.152278448574745e-06, - "loss": 0.0047, + "learning_rate": 3.2284176728621174e-06, + "loss": 0.0054, "step": 888000 }, { "epoch": 13.39, - "learning_rate": 2.1422290558093028e-06, - "loss": 0.0044, + "learning_rate": 3.213343583713954e-06, + "loss": 0.0065, "step": 888500 }, { "epoch": 13.4, - "learning_rate": 2.1321796630438608e-06, - "loss": 0.0048, + "learning_rate": 3.198269494565791e-06, + "loss": 0.0059, "step": 889000 }, { "epoch": 13.41, - "learning_rate": 2.1221302702784188e-06, - "loss": 0.0039, + "learning_rate": 3.1831954054176275e-06, + "loss": 0.0056, "step": 889500 }, { "epoch": 13.42, - "learning_rate": 2.1120808775129763e-06, - "loss": 0.004, + "learning_rate": 3.1681213162694645e-06, + "loss": 0.0058, "step": 890000 }, { "epoch": 13.42, - "learning_rate": 2.1020314847475343e-06, - "loss": 0.0047, + "learning_rate": 3.153047227121301e-06, + "loss": 0.0061, "step": 890500 }, { "epoch": 13.43, - "learning_rate": 2.091982091982092e-06, - "loss": 0.0042, + "learning_rate": 3.137973137973138e-06, + "loss": 0.006, "step": 891000 }, { "epoch": 13.44, - "learning_rate": 2.08193269921665e-06, - "loss": 0.0044, + "learning_rate": 3.1228990488249747e-06, + "loss": 0.0066, "step": 891500 }, { "epoch": 13.45, - "learning_rate": 2.071883306451208e-06, - "loss": 0.0045, + "learning_rate": 3.1078249596768117e-06, + "loss": 0.0066, "step": 892000 }, { "epoch": 13.45, - "learning_rate": 2.0618339136857655e-06, - "loss": 0.0048, + "learning_rate": 3.0927508705286482e-06, + "loss": 0.0062, "step": 892500 }, { "epoch": 13.46, - "learning_rate": 2.0517845209203235e-06, - "loss": 0.0041, + "learning_rate": 3.0776767813804852e-06, + "loss": 0.0057, "step": 893000 }, { "epoch": 13.47, - "learning_rate": 2.0417351281548815e-06, - "loss": 0.005, + "learning_rate": 3.062602692232322e-06, + "loss": 0.0068, "step": 893500 }, { "epoch": 13.48, - "learning_rate": 2.031685735389439e-06, - "loss": 0.0047, + "learning_rate": 3.047528603084159e-06, + "loss": 0.0061, "step": 894000 }, { "epoch": 13.48, - "learning_rate": 2.021636342623997e-06, - "loss": 0.0045, + "learning_rate": 3.0324545139359954e-06, + "loss": 0.0061, "step": 894500 }, { "epoch": 13.49, - "learning_rate": 2.011586949858555e-06, - "loss": 0.0049, + "learning_rate": 3.0173804247878324e-06, + "loss": 0.0071, "step": 895000 }, { "epoch": 13.5, - "learning_rate": 2.0015375570931126e-06, - "loss": 0.0046, + "learning_rate": 3.002306335639669e-06, + "loss": 0.0063, "step": 895500 }, { "epoch": 13.51, - "learning_rate": 1.9914881643276706e-06, - "loss": 0.0045, + "learning_rate": 2.987232246491506e-06, + "loss": 0.0063, "step": 896000 }, { "epoch": 13.51, - "learning_rate": 1.9814387715622286e-06, - "loss": 0.0046, + "learning_rate": 2.9721581573433425e-06, + "loss": 0.0063, "step": 896500 }, { "epoch": 13.52, - "learning_rate": 1.971389378796786e-06, - "loss": 0.0046, + "learning_rate": 2.9570840681951795e-06, + "loss": 0.0065, "step": 897000 }, { "epoch": 13.53, - "learning_rate": 1.961339986031344e-06, - "loss": 0.0048, + "learning_rate": 2.942009979047016e-06, + "loss": 0.0071, "step": 897500 }, { "epoch": 13.54, - "learning_rate": 1.951290593265902e-06, - "loss": 0.0045, + "learning_rate": 2.926935889898853e-06, + "loss": 0.0064, "step": 898000 }, { "epoch": 13.54, - "learning_rate": 1.9412412005004598e-06, - "loss": 0.0042, + "learning_rate": 2.9118618007506897e-06, + "loss": 0.0056, "step": 898500 }, { "epoch": 13.55, - "learning_rate": 1.9311918077350178e-06, - "loss": 0.0051, + "learning_rate": 2.8967877116025267e-06, + "loss": 0.0072, "step": 899000 }, { "epoch": 13.56, - "learning_rate": 1.9211424149695758e-06, - "loss": 0.005, + "learning_rate": 2.8817136224543632e-06, + "loss": 0.0067, "step": 899500 }, { "epoch": 13.57, - "learning_rate": 1.9110930222041334e-06, - "loss": 0.0043, + "learning_rate": 2.8666395333062002e-06, + "loss": 0.0059, "step": 900000 }, { "epoch": 13.57, - "learning_rate": 1.9010436294386914e-06, - "loss": 0.0042, + "learning_rate": 2.851565444158037e-06, + "loss": 0.0062, "step": 900500 }, { "epoch": 13.58, - "learning_rate": 1.8909942366732491e-06, - "loss": 0.0045, + "learning_rate": 2.836491355009874e-06, + "loss": 0.006, "step": 901000 }, { "epoch": 13.59, - "learning_rate": 1.8809448439078071e-06, - "loss": 0.0044, + "learning_rate": 2.8214172658617104e-06, + "loss": 0.0063, "step": 901500 }, { "epoch": 13.6, - "learning_rate": 1.870895451142365e-06, - "loss": 0.0045, + "learning_rate": 2.8063431767135474e-06, + "loss": 0.006, "step": 902000 }, { "epoch": 13.6, - "learning_rate": 1.8608460583769227e-06, - "loss": 0.0049, + "learning_rate": 2.791269087565384e-06, + "loss": 0.0063, "step": 902500 }, { "epoch": 13.61, - "learning_rate": 1.8507966656114807e-06, - "loss": 0.0048, + "learning_rate": 2.776194998417221e-06, + "loss": 0.0066, "step": 903000 }, { "epoch": 13.62, - "learning_rate": 1.8407472728460385e-06, - "loss": 0.0049, + "learning_rate": 2.7611209092690575e-06, + "loss": 0.007, "step": 903500 }, { "epoch": 13.63, - "learning_rate": 1.8306978800805963e-06, - "loss": 0.0044, + "learning_rate": 2.7460468201208945e-06, + "loss": 0.0063, "step": 904000 }, { "epoch": 13.63, - "learning_rate": 1.8206484873151543e-06, - "loss": 0.0043, + "learning_rate": 2.730972730972731e-06, + "loss": 0.0062, "step": 904500 }, { "epoch": 13.64, - "learning_rate": 1.810599094549712e-06, - "loss": 0.0046, + "learning_rate": 2.715898641824568e-06, + "loss": 0.0061, "step": 905000 }, { "epoch": 13.65, - "learning_rate": 1.8005497017842699e-06, - "loss": 0.0046, + "learning_rate": 2.7008245526764047e-06, + "loss": 0.0061, "step": 905500 }, { "epoch": 13.66, - "learning_rate": 1.7905003090188279e-06, - "loss": 0.0051, + "learning_rate": 2.6857504635282417e-06, + "loss": 0.0067, "step": 906000 }, { "epoch": 13.66, - "learning_rate": 1.7804509162533856e-06, - "loss": 0.0044, + "learning_rate": 2.6706763743800783e-06, + "loss": 0.0063, "step": 906500 }, { "epoch": 13.67, - "learning_rate": 1.7704015234879434e-06, - "loss": 0.0045, + "learning_rate": 2.6556022852319153e-06, + "loss": 0.0061, "step": 907000 }, { "epoch": 13.68, - "learning_rate": 1.7603521307225014e-06, - "loss": 0.0045, + "learning_rate": 2.640528196083752e-06, + "loss": 0.0064, "step": 907500 }, { "epoch": 13.69, - "learning_rate": 1.7503027379570592e-06, - "loss": 0.0048, + "learning_rate": 2.625454106935589e-06, + "loss": 0.0065, "step": 908000 }, { "epoch": 13.69, - "learning_rate": 1.740253345191617e-06, - "loss": 0.0048, + "learning_rate": 2.6103800177874254e-06, + "loss": 0.0066, "step": 908500 }, { "epoch": 13.7, - "learning_rate": 1.7302039524261748e-06, - "loss": 0.004, + "learning_rate": 2.5953059286392624e-06, + "loss": 0.0058, "step": 909000 }, { "epoch": 13.71, - "learning_rate": 1.7201545596607328e-06, - "loss": 0.0047, + "learning_rate": 2.580231839491099e-06, + "loss": 0.0063, "step": 909500 }, { "epoch": 13.72, - "learning_rate": 1.7101051668952906e-06, - "loss": 0.0049, + "learning_rate": 2.5651577503429355e-06, + "loss": 0.0065, "step": 910000 }, { "epoch": 13.72, - "learning_rate": 1.7000557741298484e-06, - "loss": 0.0048, + "learning_rate": 2.5500836611947725e-06, + "loss": 0.0063, "step": 910500 }, { "epoch": 13.73, - "learning_rate": 1.6900063813644064e-06, - "loss": 0.0047, + "learning_rate": 2.535009572046609e-06, + "loss": 0.0064, "step": 911000 }, { "epoch": 13.74, - "learning_rate": 1.6799569885989642e-06, - "loss": 0.0047, + "learning_rate": 2.519935482898446e-06, + "loss": 0.0067, "step": 911500 }, { "epoch": 13.75, - "learning_rate": 1.669907595833522e-06, - "loss": 0.0045, + "learning_rate": 2.5048613937502827e-06, + "loss": 0.0065, "step": 912000 }, { "epoch": 13.76, - "learning_rate": 1.65985820306808e-06, - "loss": 0.0043, + "learning_rate": 2.4897873046021197e-06, + "loss": 0.0062, "step": 912500 }, { "epoch": 13.76, - "learning_rate": 1.6498088103026377e-06, - "loss": 0.0042, + "learning_rate": 2.4747132154539563e-06, + "loss": 0.0066, "step": 913000 }, { "epoch": 13.77, - "learning_rate": 1.6397594175371955e-06, - "loss": 0.0045, + "learning_rate": 2.4596391263057933e-06, + "loss": 0.0058, "step": 913500 }, { "epoch": 13.78, - "learning_rate": 1.629710024771753e-06, - "loss": 0.0044, + "learning_rate": 2.4445650371576294e-06, + "loss": 0.0057, "step": 914000 }, { "epoch": 13.79, - "learning_rate": 1.619660632006311e-06, - "loss": 0.0048, + "learning_rate": 2.4294909480094664e-06, + "loss": 0.007, "step": 914500 }, { "epoch": 13.79, - "learning_rate": 1.6096112392408689e-06, - "loss": 0.0042, + "learning_rate": 2.414416858861303e-06, + "loss": 0.0058, "step": 915000 }, { "epoch": 13.8, - "learning_rate": 1.5995618464754267e-06, - "loss": 0.0042, + "learning_rate": 2.39934276971314e-06, + "loss": 0.0054, "step": 915500 }, { "epoch": 13.81, - "learning_rate": 1.5895124537099847e-06, - "loss": 0.0045, + "learning_rate": 2.3842686805649766e-06, + "loss": 0.0066, "step": 916000 }, { "epoch": 13.82, - "learning_rate": 1.5794630609445424e-06, - "loss": 0.0043, + "learning_rate": 2.3691945914168136e-06, + "loss": 0.0057, "step": 916500 }, { "epoch": 13.82, - "learning_rate": 1.5694136681791002e-06, - "loss": 0.0046, + "learning_rate": 2.35412050226865e-06, + "loss": 0.0057, "step": 917000 }, { "epoch": 13.83, - "learning_rate": 1.5593642754136582e-06, - "loss": 0.0044, + "learning_rate": 2.339046413120487e-06, + "loss": 0.0062, "step": 917500 }, { "epoch": 13.84, - "learning_rate": 1.549314882648216e-06, - "loss": 0.0045, + "learning_rate": 2.3239723239723237e-06, + "loss": 0.0061, "step": 918000 }, { "epoch": 13.85, - "learning_rate": 1.5392654898827738e-06, - "loss": 0.0042, + "learning_rate": 2.3088982348241607e-06, + "loss": 0.0063, "step": 918500 }, { "epoch": 13.85, - "learning_rate": 1.5292160971173318e-06, - "loss": 0.0049, + "learning_rate": 2.2938241456759973e-06, + "loss": 0.0061, "step": 919000 }, { "epoch": 13.86, - "learning_rate": 1.5191667043518896e-06, - "loss": 0.0042, + "learning_rate": 2.2787500565278343e-06, + "loss": 0.0062, "step": 919500 }, { "epoch": 13.87, - "learning_rate": 1.5091173115864474e-06, - "loss": 0.0043, + "learning_rate": 2.263675967379671e-06, + "loss": 0.0061, "step": 920000 }, { "epoch": 13.88, - "learning_rate": 1.4990679188210054e-06, - "loss": 0.0044, + "learning_rate": 2.248601878231508e-06, + "loss": 0.0059, "step": 920500 }, { "epoch": 13.88, - "learning_rate": 1.4890185260555632e-06, - "loss": 0.005, + "learning_rate": 2.2335277890833444e-06, + "loss": 0.0061, "step": 921000 }, { "epoch": 13.89, - "learning_rate": 1.478969133290121e-06, - "loss": 0.0046, + "learning_rate": 2.2184536999351814e-06, + "loss": 0.006, "step": 921500 }, { "epoch": 13.9, - "learning_rate": 1.4689197405246787e-06, - "loss": 0.004, + "learning_rate": 2.203379610787018e-06, + "loss": 0.0054, "step": 922000 }, { "epoch": 13.91, - "learning_rate": 1.4588703477592367e-06, - "loss": 0.0045, + "learning_rate": 2.188305521638855e-06, + "loss": 0.0063, "step": 922500 }, { "epoch": 13.91, - "learning_rate": 1.4488209549937945e-06, - "loss": 0.0042, + "learning_rate": 2.1732314324906916e-06, + "loss": 0.0057, "step": 923000 }, { "epoch": 13.92, - "learning_rate": 1.4387715622283523e-06, - "loss": 0.0047, + "learning_rate": 2.1581573433425286e-06, + "loss": 0.0061, "step": 923500 }, { "epoch": 13.93, - "learning_rate": 1.4287221694629103e-06, - "loss": 0.0047, + "learning_rate": 2.143083254194365e-06, + "loss": 0.0061, "step": 924000 }, { "epoch": 13.94, - "learning_rate": 1.418672776697468e-06, - "loss": 0.0044, + "learning_rate": 2.128009165046202e-06, + "loss": 0.0059, "step": 924500 }, { "epoch": 13.94, - "learning_rate": 1.4086233839320259e-06, - "loss": 0.0043, + "learning_rate": 2.1129350758980387e-06, + "loss": 0.0066, "step": 925000 }, { "epoch": 13.95, - "learning_rate": 1.3985739911665839e-06, - "loss": 0.0045, + "learning_rate": 2.0978609867498757e-06, + "loss": 0.0057, "step": 925500 }, { "epoch": 13.96, - "learning_rate": 1.3885245984011417e-06, - "loss": 0.0044, + "learning_rate": 2.0827868976017123e-06, + "loss": 0.0063, "step": 926000 }, { "epoch": 13.97, - "learning_rate": 1.3784752056356995e-06, - "loss": 0.0042, + "learning_rate": 2.0677128084535493e-06, + "loss": 0.0058, "step": 926500 }, { "epoch": 13.97, - "learning_rate": 1.3684258128702575e-06, - "loss": 0.0047, + "learning_rate": 2.052638719305386e-06, + "loss": 0.0061, "step": 927000 }, { "epoch": 13.98, - "learning_rate": 1.3583764201048152e-06, - "loss": 0.0048, + "learning_rate": 2.037564630157223e-06, + "loss": 0.0063, "step": 927500 }, { "epoch": 13.99, - "learning_rate": 1.348327027339373e-06, - "loss": 0.0046, + "learning_rate": 2.0224905410090594e-06, + "loss": 0.0064, "step": 928000 }, { "epoch": 14.0, - "learning_rate": 1.338277634573931e-06, - "loss": 0.0042, + "learning_rate": 2.0074164518608964e-06, + "loss": 0.0054, "step": 928500 }, { "epoch": 14.0, - "eval_accuracy": 0.9916466415960041, - "eval_f1": 0.9527847129251044, - "eval_loss": 0.055658113211393356, - "eval_precision": 0.9414636665554337, - "eval_recall": 0.9643813430345647, - "eval_runtime": 232.0621, - "eval_samples_per_second": 508.157, - "eval_steps_per_second": 31.763, + "eval_accuracy": 0.9884409088234384, + "eval_f1": 0.9516207694515415, + "eval_loss": 0.08319615572690964, + "eval_precision": 0.938869505641383, + "eval_recall": 0.9647231649453492, + "eval_runtime": 244.0878, + "eval_samples_per_second": 483.121, + "eval_steps_per_second": 30.198, "step": 928746 - }, - { - "epoch": 14.0, - "learning_rate": 1.3282282418084888e-06, - "loss": 0.0047, - "step": 929000 - }, - { - "epoch": 14.01, - "learning_rate": 1.3181788490430466e-06, - "loss": 0.0045, - "step": 929500 - }, - { - "epoch": 14.02, - "learning_rate": 1.3081294562776046e-06, - "loss": 0.0042, - "step": 930000 - }, - { - "epoch": 14.03, - "learning_rate": 1.2980800635121624e-06, - "loss": 0.0043, - "step": 930500 - }, - { - "epoch": 14.03, - "learning_rate": 1.2880306707467202e-06, - "loss": 0.004, - "step": 931000 - }, - { - "epoch": 14.04, - "learning_rate": 1.2779812779812782e-06, - "loss": 0.0045, - "step": 931500 - }, - { - "epoch": 14.05, - "learning_rate": 1.267931885215836e-06, - "loss": 0.0038, - "step": 932000 - }, - { - "epoch": 14.06, - "learning_rate": 1.2578824924503938e-06, - "loss": 0.004, - "step": 932500 - }, - { - "epoch": 14.06, - "learning_rate": 1.2478330996849515e-06, - "loss": 0.0044, - "step": 933000 - }, - { - "epoch": 14.07, - "learning_rate": 1.2377837069195095e-06, - "loss": 0.0038, - "step": 933500 - }, - { - "epoch": 14.08, - "learning_rate": 1.2277343141540673e-06, - "loss": 0.0037, - "step": 934000 - }, - { - "epoch": 14.09, - "learning_rate": 1.2176849213886251e-06, - "loss": 0.0044, - "step": 934500 - }, - { - "epoch": 14.09, - "learning_rate": 1.2076355286231831e-06, - "loss": 0.0039, - "step": 935000 - }, - { - "epoch": 14.1, - "learning_rate": 1.197586135857741e-06, - "loss": 0.0041, - "step": 935500 - }, - { - "epoch": 14.11, - "learning_rate": 1.1875367430922987e-06, - "loss": 0.0044, - "step": 936000 - }, - { - "epoch": 14.12, - "learning_rate": 1.1774873503268567e-06, - "loss": 0.0037, - "step": 936500 - }, - { - "epoch": 14.12, - "learning_rate": 1.1674379575614145e-06, - "loss": 0.0041, - "step": 937000 - }, - { - "epoch": 14.13, - "learning_rate": 1.1573885647959723e-06, - "loss": 0.0048, - "step": 937500 - }, - { - "epoch": 14.14, - "learning_rate": 1.1473391720305303e-06, - "loss": 0.0043, - "step": 938000 - }, - { - "epoch": 14.15, - "learning_rate": 1.137289779265088e-06, - "loss": 0.0042, - "step": 938500 - }, - { - "epoch": 14.15, - "learning_rate": 1.1272403864996458e-06, - "loss": 0.0045, - "step": 939000 - }, - { - "epoch": 14.16, - "learning_rate": 1.1171909937342038e-06, - "loss": 0.0042, - "step": 939500 - }, - { - "epoch": 14.17, - "learning_rate": 1.1071416009687616e-06, - "loss": 0.0042, - "step": 940000 - }, - { - "epoch": 14.18, - "learning_rate": 1.0970922082033194e-06, - "loss": 0.0041, - "step": 940500 - }, - { - "epoch": 14.18, - "learning_rate": 1.0870428154378774e-06, - "loss": 0.0033, - "step": 941000 - }, - { - "epoch": 14.19, - "learning_rate": 1.0769934226724352e-06, - "loss": 0.004, - "step": 941500 - }, - { - "epoch": 14.2, - "learning_rate": 1.066944029906993e-06, - "loss": 0.0039, - "step": 942000 - }, - { - "epoch": 14.21, - "learning_rate": 1.0568946371415508e-06, - "loss": 0.0039, - "step": 942500 - }, - { - "epoch": 14.21, - "learning_rate": 1.0468452443761086e-06, - "loss": 0.0041, - "step": 943000 - }, - { - "epoch": 14.22, - "learning_rate": 1.0367958516106666e-06, - "loss": 0.0039, - "step": 943500 - }, - { - "epoch": 14.23, - "learning_rate": 1.0267464588452243e-06, - "loss": 0.0036, - "step": 944000 - }, - { - "epoch": 14.24, - "learning_rate": 1.0166970660797821e-06, - "loss": 0.0042, - "step": 944500 - }, - { - "epoch": 14.25, - "learning_rate": 1.00664767331434e-06, - "loss": 0.0043, - "step": 945000 - }, - { - "epoch": 14.25, - "learning_rate": 9.96598280548898e-07, - "loss": 0.0038, - "step": 945500 - }, - { - "epoch": 14.26, - "learning_rate": 9.865488877834557e-07, - "loss": 0.0043, - "step": 946000 - }, - { - "epoch": 14.27, - "learning_rate": 9.764994950180135e-07, - "loss": 0.0044, - "step": 946500 - }, - { - "epoch": 14.28, - "learning_rate": 9.664501022525715e-07, - "loss": 0.004, - "step": 947000 - }, - { - "epoch": 14.28, - "learning_rate": 9.564007094871293e-07, - "loss": 0.004, - "step": 947500 - }, - { - "epoch": 14.29, - "learning_rate": 9.463513167216872e-07, - "loss": 0.0043, - "step": 948000 - }, - { - "epoch": 14.3, - "learning_rate": 9.36301923956245e-07, - "loss": 0.004, - "step": 948500 - }, - { - "epoch": 14.31, - "learning_rate": 9.262525311908028e-07, - "loss": 0.0044, - "step": 949000 - }, - { - "epoch": 14.31, - "learning_rate": 9.162031384253607e-07, - "loss": 0.0043, - "step": 949500 - }, - { - "epoch": 14.32, - "learning_rate": 9.061537456599185e-07, - "loss": 0.0035, - "step": 950000 - }, - { - "epoch": 14.33, - "learning_rate": 8.961043528944764e-07, - "loss": 0.0043, - "step": 950500 - }, - { - "epoch": 14.34, - "learning_rate": 8.860549601290343e-07, - "loss": 0.0044, - "step": 951000 - }, - { - "epoch": 14.34, - "learning_rate": 8.760055673635921e-07, - "loss": 0.0042, - "step": 951500 - }, - { - "epoch": 14.35, - "learning_rate": 8.6595617459815e-07, - "loss": 0.0043, - "step": 952000 - }, - { - "epoch": 14.36, - "learning_rate": 8.559067818327079e-07, - "loss": 0.0044, - "step": 952500 - }, - { - "epoch": 14.37, - "learning_rate": 8.458573890672657e-07, - "loss": 0.0041, - "step": 953000 - }, - { - "epoch": 14.37, - "learning_rate": 8.358079963018236e-07, - "loss": 0.0043, - "step": 953500 - }, - { - "epoch": 14.38, - "learning_rate": 8.257586035363814e-07, - "loss": 0.0035, - "step": 954000 - }, - { - "epoch": 14.39, - "learning_rate": 8.157092107709392e-07, - "loss": 0.004, - "step": 954500 - }, - { - "epoch": 14.4, - "learning_rate": 8.056598180054971e-07, - "loss": 0.004, - "step": 955000 - }, - { - "epoch": 14.4, - "learning_rate": 7.956104252400549e-07, - "loss": 0.004, - "step": 955500 - }, - { - "epoch": 14.41, - "learning_rate": 7.855610324746128e-07, - "loss": 0.0042, - "step": 956000 - }, - { - "epoch": 14.42, - "learning_rate": 7.755116397091707e-07, - "loss": 0.0043, - "step": 956500 - }, - { - "epoch": 14.43, - "learning_rate": 7.654622469437285e-07, - "loss": 0.0043, - "step": 957000 - }, - { - "epoch": 14.43, - "learning_rate": 7.554128541782864e-07, - "loss": 0.0037, - "step": 957500 - }, - { - "epoch": 14.44, - "learning_rate": 7.453634614128443e-07, - "loss": 0.0042, - "step": 958000 - }, - { - "epoch": 14.45, - "learning_rate": 7.353140686474021e-07, - "loss": 0.0038, - "step": 958500 - }, - { - "epoch": 14.46, - "learning_rate": 7.2526467588196e-07, - "loss": 0.004, - "step": 959000 - }, - { - "epoch": 14.46, - "learning_rate": 7.152152831165178e-07, - "loss": 0.0035, - "step": 959500 - }, - { - "epoch": 14.47, - "learning_rate": 7.051658903510756e-07, - "loss": 0.0043, - "step": 960000 - }, - { - "epoch": 14.48, - "learning_rate": 6.951164975856333e-07, - "loss": 0.0045, - "step": 960500 - }, - { - "epoch": 14.49, - "learning_rate": 6.850671048201912e-07, - "loss": 0.0038, - "step": 961000 - }, - { - "epoch": 14.49, - "learning_rate": 6.750177120547491e-07, - "loss": 0.0037, - "step": 961500 - }, - { - "epoch": 14.5, - "learning_rate": 6.649683192893069e-07, - "loss": 0.0044, - "step": 962000 - }, - { - "epoch": 14.51, - "learning_rate": 6.549189265238648e-07, - "loss": 0.0043, - "step": 962500 - }, - { - "epoch": 14.52, - "learning_rate": 6.448695337584227e-07, - "loss": 0.0042, - "step": 963000 - }, - { - "epoch": 14.52, - "learning_rate": 6.348201409929805e-07, - "loss": 0.004, - "step": 963500 - }, - { - "epoch": 14.53, - "learning_rate": 6.247707482275385e-07, - "loss": 0.0041, - "step": 964000 - }, - { - "epoch": 14.54, - "learning_rate": 6.147213554620963e-07, - "loss": 0.0042, - "step": 964500 - }, - { - "epoch": 14.55, - "learning_rate": 6.046719626966542e-07, - "loss": 0.0038, - "step": 965000 - }, - { - "epoch": 14.55, - "learning_rate": 5.946225699312119e-07, - "loss": 0.0044, - "step": 965500 - }, - { - "epoch": 14.56, - "learning_rate": 5.845731771657697e-07, - "loss": 0.0043, - "step": 966000 - }, - { - "epoch": 14.57, - "learning_rate": 5.745237844003276e-07, - "loss": 0.0046, - "step": 966500 - }, - { - "epoch": 14.58, - "learning_rate": 5.644743916348855e-07, - "loss": 0.004, - "step": 967000 - }, - { - "epoch": 14.58, - "learning_rate": 5.544249988694433e-07, - "loss": 0.0039, - "step": 967500 - }, - { - "epoch": 14.59, - "learning_rate": 5.443756061040012e-07, - "loss": 0.0039, - "step": 968000 - }, - { - "epoch": 14.6, - "learning_rate": 5.343262133385591e-07, - "loss": 0.004, - "step": 968500 - }, - { - "epoch": 14.61, - "learning_rate": 5.242768205731169e-07, - "loss": 0.0044, - "step": 969000 - }, - { - "epoch": 14.61, - "learning_rate": 5.142274278076748e-07, - "loss": 0.0044, - "step": 969500 - }, - { - "epoch": 14.62, - "learning_rate": 5.041780350422327e-07, - "loss": 0.0043, - "step": 970000 - }, - { - "epoch": 14.63, - "learning_rate": 4.941286422767905e-07, - "loss": 0.004, - "step": 970500 - }, - { - "epoch": 14.64, - "learning_rate": 4.840792495113483e-07, - "loss": 0.0042, - "step": 971000 - }, - { - "epoch": 14.64, - "learning_rate": 4.740298567459062e-07, - "loss": 0.0041, - "step": 971500 - }, - { - "epoch": 14.65, - "learning_rate": 4.63980463980464e-07, - "loss": 0.0044, - "step": 972000 - }, - { - "epoch": 14.66, - "learning_rate": 4.5393107121502186e-07, - "loss": 0.0042, - "step": 972500 - }, - { - "epoch": 14.67, - "learning_rate": 4.4388167844957976e-07, - "loss": 0.0043, - "step": 973000 - }, - { - "epoch": 14.67, - "learning_rate": 4.338322856841376e-07, - "loss": 0.004, - "step": 973500 - }, - { - "epoch": 14.68, - "learning_rate": 4.2378289291869544e-07, - "loss": 0.0042, - "step": 974000 - }, - { - "epoch": 14.69, - "learning_rate": 4.137335001532532e-07, - "loss": 0.0041, - "step": 974500 - }, - { - "epoch": 14.7, - "learning_rate": 4.036841073878111e-07, - "loss": 0.0042, - "step": 975000 - }, - { - "epoch": 14.7, - "learning_rate": 3.9363471462236896e-07, - "loss": 0.0047, - "step": 975500 - }, - { - "epoch": 14.71, - "learning_rate": 3.835853218569268e-07, - "loss": 0.0039, - "step": 976000 - }, - { - "epoch": 14.72, - "learning_rate": 3.7353592909148464e-07, - "loss": 0.0041, - "step": 976500 - }, - { - "epoch": 14.73, - "learning_rate": 3.6348653632604253e-07, - "loss": 0.004, - "step": 977000 - }, - { - "epoch": 14.73, - "learning_rate": 3.5343714356060037e-07, - "loss": 0.0037, - "step": 977500 - }, - { - "epoch": 14.74, - "learning_rate": 3.433877507951582e-07, - "loss": 0.0044, - "step": 978000 - }, - { - "epoch": 14.75, - "learning_rate": 3.3333835802971605e-07, - "loss": 0.0045, - "step": 978500 - }, - { - "epoch": 14.76, - "learning_rate": 3.2328896526427394e-07, - "loss": 0.0041, - "step": 979000 - }, - { - "epoch": 14.77, - "learning_rate": 3.132395724988318e-07, - "loss": 0.0041, - "step": 979500 - }, - { - "epoch": 14.77, - "learning_rate": 3.031901797333896e-07, - "loss": 0.004, - "step": 980000 - }, - { - "epoch": 14.78, - "learning_rate": 2.931407869679475e-07, - "loss": 0.0043, - "step": 980500 - }, - { - "epoch": 14.79, - "learning_rate": 2.8309139420250536e-07, - "loss": 0.0041, - "step": 981000 - }, - { - "epoch": 14.8, - "learning_rate": 2.7304200143706315e-07, - "loss": 0.0045, - "step": 981500 - }, - { - "epoch": 14.8, - "learning_rate": 2.6299260867162104e-07, - "loss": 0.0043, - "step": 982000 - }, - { - "epoch": 14.81, - "learning_rate": 2.529432159061789e-07, - "loss": 0.0043, - "step": 982500 - }, - { - "epoch": 14.82, - "learning_rate": 2.428938231407367e-07, - "loss": 0.0037, - "step": 983000 - }, - { - "epoch": 14.83, - "learning_rate": 2.3284443037529459e-07, - "loss": 0.0043, - "step": 983500 - }, - { - "epoch": 14.83, - "learning_rate": 2.2279503760985245e-07, - "loss": 0.0037, - "step": 984000 - }, - { - "epoch": 14.84, - "learning_rate": 2.127456448444103e-07, - "loss": 0.0042, - "step": 984500 - }, - { - "epoch": 14.85, - "learning_rate": 2.0269625207896816e-07, - "loss": 0.0043, - "step": 985000 - }, - { - "epoch": 14.86, - "learning_rate": 1.92646859313526e-07, - "loss": 0.0043, - "step": 985500 - }, - { - "epoch": 14.86, - "learning_rate": 1.8259746654808387e-07, - "loss": 0.0047, - "step": 986000 - }, - { - "epoch": 14.87, - "learning_rate": 1.7254807378264168e-07, - "loss": 0.0038, - "step": 986500 - }, - { - "epoch": 14.88, - "learning_rate": 1.6249868101719955e-07, - "loss": 0.0039, - "step": 987000 - }, - { - "epoch": 14.89, - "learning_rate": 1.5244928825175739e-07, - "loss": 0.004, - "step": 987500 - }, - { - "epoch": 14.89, - "learning_rate": 1.4239989548631525e-07, - "loss": 0.0042, - "step": 988000 - }, - { - "epoch": 14.9, - "learning_rate": 1.323505027208731e-07, - "loss": 0.0043, - "step": 988500 - }, - { - "epoch": 14.91, - "learning_rate": 1.2230110995543096e-07, - "loss": 0.0044, - "step": 989000 - }, - { - "epoch": 14.92, - "learning_rate": 1.1225171718998881e-07, - "loss": 0.0042, - "step": 989500 - }, - { - "epoch": 14.92, - "learning_rate": 1.0220232442454665e-07, - "loss": 0.0038, - "step": 990000 - }, - { - "epoch": 14.93, - "learning_rate": 9.21529316591045e-08, - "loss": 0.0043, - "step": 990500 - }, - { - "epoch": 14.94, - "learning_rate": 8.210353889366236e-08, - "loss": 0.0043, - "step": 991000 - }, - { - "epoch": 14.95, - "learning_rate": 7.20541461282202e-08, - "loss": 0.0041, - "step": 991500 - }, - { - "epoch": 14.95, - "learning_rate": 6.200475336277807e-08, - "loss": 0.0043, - "step": 992000 - }, - { - "epoch": 14.96, - "learning_rate": 5.1955360597335906e-08, - "loss": 0.0039, - "step": 992500 - }, - { - "epoch": 14.97, - "learning_rate": 4.190596783189376e-08, - "loss": 0.004, - "step": 993000 - }, - { - "epoch": 14.98, - "learning_rate": 3.185657506645161e-08, - "loss": 0.0038, - "step": 993500 - }, - { - "epoch": 14.98, - "learning_rate": 2.1807182301009463e-08, - "loss": 0.0045, - "step": 994000 - }, - { - "epoch": 14.99, - "learning_rate": 1.1757789535567315e-08, - "loss": 0.0046, - "step": 994500 - }, - { - "epoch": 15.0, - "learning_rate": 1.7083967701251653e-09, - "loss": 0.0045, - "step": 995000 - }, - { - "epoch": 15.0, - "eval_accuracy": 0.9916262232837728, - "eval_f1": 0.9528990607968144, - "eval_loss": 0.05894870683550835, - "eval_precision": 0.9420131378037918, - "eval_recall": 0.9640395211237802, - "eval_runtime": 237.8401, - "eval_samples_per_second": 495.812, - "eval_steps_per_second": 30.991, - "step": 995085 - }, - { - "epoch": 15.0, - "step": 995085, - "total_flos": 4.160281266509316e+18, - "train_loss": 0.002076548602076231, - "train_runtime": 26627.0898, - "train_samples_per_second": 597.931, - "train_steps_per_second": 37.371 } ], "max_steps": 995085, "num_train_epochs": 15, - "total_flos": 4.160281266509316e+18, + "total_flos": 3.882929181746528e+18, "trial_name": null, "trial_params": null }