HBERTv1_48_L2_H128_A2 / trainer_state.json
gokuls's picture
End of training
8e04827
raw
history blame
127 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.288807166952386,
"eval_steps": 1000000,
"global_step": 516769,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.000000000000001e-07,
"loss": 10.3381,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 1.0000000000000002e-06,
"loss": 10.2616,
"step": 1000
},
{
"epoch": 0.04,
"learning_rate": 1.5e-06,
"loss": 10.1304,
"step": 1500
},
{
"epoch": 0.06,
"learning_rate": 2.0000000000000003e-06,
"loss": 9.9802,
"step": 2000
},
{
"epoch": 0.07,
"learning_rate": 2.5e-06,
"loss": 9.8125,
"step": 2500
},
{
"epoch": 0.08,
"learning_rate": 3e-06,
"loss": 9.6188,
"step": 3000
},
{
"epoch": 0.1,
"learning_rate": 3.5e-06,
"loss": 9.4019,
"step": 3500
},
{
"epoch": 0.11,
"learning_rate": 4.000000000000001e-06,
"loss": 9.1652,
"step": 4000
},
{
"epoch": 0.12,
"learning_rate": 4.5e-06,
"loss": 8.9147,
"step": 4500
},
{
"epoch": 0.14,
"learning_rate": 5e-06,
"loss": 8.6596,
"step": 5000
},
{
"epoch": 0.15,
"learning_rate": 5.500000000000001e-06,
"loss": 8.4054,
"step": 5500
},
{
"epoch": 0.17,
"learning_rate": 6e-06,
"loss": 8.1569,
"step": 6000
},
{
"epoch": 0.18,
"learning_rate": 6.5000000000000004e-06,
"loss": 7.9271,
"step": 6500
},
{
"epoch": 0.19,
"learning_rate": 7e-06,
"loss": 7.7144,
"step": 7000
},
{
"epoch": 0.21,
"learning_rate": 7.500000000000001e-06,
"loss": 7.5352,
"step": 7500
},
{
"epoch": 0.22,
"learning_rate": 8.000000000000001e-06,
"loss": 7.3919,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 8.5e-06,
"loss": 7.2823,
"step": 8500
},
{
"epoch": 0.25,
"learning_rate": 9e-06,
"loss": 7.2106,
"step": 9000
},
{
"epoch": 0.26,
"learning_rate": 9.5e-06,
"loss": 7.1515,
"step": 9500
},
{
"epoch": 0.28,
"learning_rate": 1e-05,
"loss": 7.098,
"step": 10000
},
{
"epoch": 0.29,
"learning_rate": 9.998613652747741e-06,
"loss": 7.0565,
"step": 10500
},
{
"epoch": 0.3,
"learning_rate": 9.997227305495482e-06,
"loss": 7.0144,
"step": 11000
},
{
"epoch": 0.32,
"learning_rate": 9.995840958243221e-06,
"loss": 6.986,
"step": 11500
},
{
"epoch": 0.33,
"learning_rate": 9.994454610990962e-06,
"loss": 6.9545,
"step": 12000
},
{
"epoch": 0.35,
"learning_rate": 9.993068263738702e-06,
"loss": 6.931,
"step": 12500
},
{
"epoch": 0.36,
"learning_rate": 9.991681916486441e-06,
"loss": 6.9066,
"step": 13000
},
{
"epoch": 0.37,
"learning_rate": 9.990295569234184e-06,
"loss": 6.8805,
"step": 13500
},
{
"epoch": 0.39,
"learning_rate": 9.988909221981923e-06,
"loss": 6.8581,
"step": 14000
},
{
"epoch": 0.4,
"learning_rate": 9.987522874729663e-06,
"loss": 6.8405,
"step": 14500
},
{
"epoch": 0.41,
"learning_rate": 9.986136527477404e-06,
"loss": 6.8213,
"step": 15000
},
{
"epoch": 0.43,
"learning_rate": 9.984750180225143e-06,
"loss": 6.8017,
"step": 15500
},
{
"epoch": 0.44,
"learning_rate": 9.983363832972885e-06,
"loss": 6.7857,
"step": 16000
},
{
"epoch": 0.46,
"learning_rate": 9.981977485720624e-06,
"loss": 6.7668,
"step": 16500
},
{
"epoch": 0.47,
"learning_rate": 9.980591138468365e-06,
"loss": 6.7588,
"step": 17000
},
{
"epoch": 0.48,
"learning_rate": 9.979204791216105e-06,
"loss": 6.7387,
"step": 17500
},
{
"epoch": 0.5,
"learning_rate": 9.977818443963844e-06,
"loss": 6.7327,
"step": 18000
},
{
"epoch": 0.51,
"learning_rate": 9.976432096711585e-06,
"loss": 6.714,
"step": 18500
},
{
"epoch": 0.53,
"learning_rate": 9.975045749459326e-06,
"loss": 6.7006,
"step": 19000
},
{
"epoch": 0.54,
"learning_rate": 9.973659402207065e-06,
"loss": 6.6989,
"step": 19500
},
{
"epoch": 0.55,
"learning_rate": 9.972273054954807e-06,
"loss": 6.6843,
"step": 20000
},
{
"epoch": 0.57,
"learning_rate": 9.970886707702546e-06,
"loss": 6.6693,
"step": 20500
},
{
"epoch": 0.58,
"learning_rate": 9.969500360450287e-06,
"loss": 6.6629,
"step": 21000
},
{
"epoch": 0.59,
"learning_rate": 9.968114013198027e-06,
"loss": 6.654,
"step": 21500
},
{
"epoch": 0.61,
"learning_rate": 9.966727665945766e-06,
"loss": 6.6443,
"step": 22000
},
{
"epoch": 0.62,
"learning_rate": 9.965341318693507e-06,
"loss": 6.638,
"step": 22500
},
{
"epoch": 0.64,
"learning_rate": 9.963954971441247e-06,
"loss": 6.6275,
"step": 23000
},
{
"epoch": 0.65,
"learning_rate": 9.962568624188988e-06,
"loss": 6.6172,
"step": 23500
},
{
"epoch": 0.66,
"learning_rate": 9.961182276936729e-06,
"loss": 6.6165,
"step": 24000
},
{
"epoch": 0.68,
"learning_rate": 9.959795929684468e-06,
"loss": 6.6045,
"step": 24500
},
{
"epoch": 0.69,
"learning_rate": 9.958409582432208e-06,
"loss": 6.5938,
"step": 25000
},
{
"epoch": 0.71,
"learning_rate": 9.957023235179949e-06,
"loss": 6.5902,
"step": 25500
},
{
"epoch": 0.72,
"learning_rate": 9.955636887927688e-06,
"loss": 6.5789,
"step": 26000
},
{
"epoch": 0.73,
"learning_rate": 9.95425054067543e-06,
"loss": 6.5751,
"step": 26500
},
{
"epoch": 0.75,
"learning_rate": 9.95286419342317e-06,
"loss": 6.5694,
"step": 27000
},
{
"epoch": 0.76,
"learning_rate": 9.95147784617091e-06,
"loss": 6.5637,
"step": 27500
},
{
"epoch": 0.77,
"learning_rate": 9.95009149891865e-06,
"loss": 6.556,
"step": 28000
},
{
"epoch": 0.79,
"learning_rate": 9.94870515166639e-06,
"loss": 6.5486,
"step": 28500
},
{
"epoch": 0.8,
"learning_rate": 9.94731880441413e-06,
"loss": 6.5426,
"step": 29000
},
{
"epoch": 0.82,
"learning_rate": 9.94593245716187e-06,
"loss": 6.5411,
"step": 29500
},
{
"epoch": 0.83,
"learning_rate": 9.944546109909611e-06,
"loss": 6.5318,
"step": 30000
},
{
"epoch": 0.84,
"learning_rate": 9.943159762657352e-06,
"loss": 6.5263,
"step": 30500
},
{
"epoch": 0.86,
"learning_rate": 9.941773415405091e-06,
"loss": 6.5222,
"step": 31000
},
{
"epoch": 0.87,
"learning_rate": 9.940387068152832e-06,
"loss": 6.5154,
"step": 31500
},
{
"epoch": 0.88,
"learning_rate": 9.939000720900572e-06,
"loss": 6.5164,
"step": 32000
},
{
"epoch": 0.9,
"learning_rate": 9.937614373648313e-06,
"loss": 6.5088,
"step": 32500
},
{
"epoch": 0.91,
"learning_rate": 9.936228026396052e-06,
"loss": 6.5033,
"step": 33000
},
{
"epoch": 0.93,
"learning_rate": 9.934841679143793e-06,
"loss": 6.5037,
"step": 33500
},
{
"epoch": 0.94,
"learning_rate": 9.933455331891533e-06,
"loss": 6.4965,
"step": 34000
},
{
"epoch": 0.95,
"learning_rate": 9.932068984639274e-06,
"loss": 6.4929,
"step": 34500
},
{
"epoch": 0.97,
"learning_rate": 9.930682637387013e-06,
"loss": 6.4842,
"step": 35000
},
{
"epoch": 0.98,
"learning_rate": 9.929296290134753e-06,
"loss": 6.4838,
"step": 35500
},
{
"epoch": 1.0,
"learning_rate": 9.927909942882494e-06,
"loss": 6.477,
"step": 36000
},
{
"epoch": 1.01,
"learning_rate": 9.926523595630235e-06,
"loss": 6.4729,
"step": 36500
},
{
"epoch": 1.02,
"learning_rate": 9.925137248377974e-06,
"loss": 6.4718,
"step": 37000
},
{
"epoch": 1.04,
"learning_rate": 9.923750901125714e-06,
"loss": 6.4646,
"step": 37500
},
{
"epoch": 1.05,
"learning_rate": 9.922364553873455e-06,
"loss": 6.455,
"step": 38000
},
{
"epoch": 1.06,
"learning_rate": 9.920978206621196e-06,
"loss": 6.4577,
"step": 38500
},
{
"epoch": 1.08,
"learning_rate": 9.919591859368936e-06,
"loss": 6.4527,
"step": 39000
},
{
"epoch": 1.09,
"learning_rate": 9.918205512116675e-06,
"loss": 6.4509,
"step": 39500
},
{
"epoch": 1.11,
"learning_rate": 9.916819164864416e-06,
"loss": 6.4439,
"step": 40000
},
{
"epoch": 1.12,
"learning_rate": 9.915432817612157e-06,
"loss": 6.4433,
"step": 40500
},
{
"epoch": 1.13,
"learning_rate": 9.914046470359897e-06,
"loss": 6.4374,
"step": 41000
},
{
"epoch": 1.15,
"learning_rate": 9.912660123107636e-06,
"loss": 6.4368,
"step": 41500
},
{
"epoch": 1.16,
"learning_rate": 9.911273775855377e-06,
"loss": 6.429,
"step": 42000
},
{
"epoch": 1.18,
"learning_rate": 9.909887428603117e-06,
"loss": 6.4267,
"step": 42500
},
{
"epoch": 1.19,
"learning_rate": 9.908501081350858e-06,
"loss": 6.4202,
"step": 43000
},
{
"epoch": 1.2,
"learning_rate": 9.907114734098597e-06,
"loss": 6.4199,
"step": 43500
},
{
"epoch": 1.22,
"learning_rate": 9.905728386846338e-06,
"loss": 6.4171,
"step": 44000
},
{
"epoch": 1.23,
"learning_rate": 9.904342039594078e-06,
"loss": 6.4079,
"step": 44500
},
{
"epoch": 1.24,
"learning_rate": 9.902955692341819e-06,
"loss": 6.4066,
"step": 45000
},
{
"epoch": 1.26,
"learning_rate": 9.90156934508956e-06,
"loss": 6.41,
"step": 45500
},
{
"epoch": 1.27,
"learning_rate": 9.900182997837299e-06,
"loss": 6.4045,
"step": 46000
},
{
"epoch": 1.29,
"learning_rate": 9.89879665058504e-06,
"loss": 6.402,
"step": 46500
},
{
"epoch": 1.3,
"learning_rate": 9.89741030333278e-06,
"loss": 6.398,
"step": 47000
},
{
"epoch": 1.31,
"learning_rate": 9.896023956080519e-06,
"loss": 6.398,
"step": 47500
},
{
"epoch": 1.33,
"learning_rate": 9.89463760882826e-06,
"loss": 6.3931,
"step": 48000
},
{
"epoch": 1.34,
"learning_rate": 9.893251261576e-06,
"loss": 6.3866,
"step": 48500
},
{
"epoch": 1.35,
"learning_rate": 9.89186491432374e-06,
"loss": 6.3875,
"step": 49000
},
{
"epoch": 1.37,
"learning_rate": 9.890478567071481e-06,
"loss": 6.3859,
"step": 49500
},
{
"epoch": 1.38,
"learning_rate": 9.88909221981922e-06,
"loss": 6.3776,
"step": 50000
},
{
"epoch": 1.4,
"learning_rate": 9.887705872566961e-06,
"loss": 6.38,
"step": 50500
},
{
"epoch": 1.41,
"learning_rate": 9.886319525314702e-06,
"loss": 6.3779,
"step": 51000
},
{
"epoch": 1.42,
"learning_rate": 9.88493317806244e-06,
"loss": 6.3713,
"step": 51500
},
{
"epoch": 1.44,
"learning_rate": 9.883546830810183e-06,
"loss": 6.3715,
"step": 52000
},
{
"epoch": 1.45,
"learning_rate": 9.882160483557922e-06,
"loss": 6.3698,
"step": 52500
},
{
"epoch": 1.47,
"learning_rate": 9.880774136305663e-06,
"loss": 6.3641,
"step": 53000
},
{
"epoch": 1.48,
"learning_rate": 9.879387789053403e-06,
"loss": 6.3604,
"step": 53500
},
{
"epoch": 1.49,
"learning_rate": 9.878001441801142e-06,
"loss": 6.3553,
"step": 54000
},
{
"epoch": 1.51,
"learning_rate": 9.876615094548884e-06,
"loss": 6.3554,
"step": 54500
},
{
"epoch": 1.52,
"learning_rate": 9.875228747296623e-06,
"loss": 6.3515,
"step": 55000
},
{
"epoch": 1.53,
"learning_rate": 9.873842400044364e-06,
"loss": 6.3503,
"step": 55500
},
{
"epoch": 1.55,
"learning_rate": 9.872456052792105e-06,
"loss": 6.3526,
"step": 56000
},
{
"epoch": 1.56,
"learning_rate": 9.871069705539844e-06,
"loss": 6.3537,
"step": 56500
},
{
"epoch": 1.58,
"learning_rate": 9.869683358287584e-06,
"loss": 6.3438,
"step": 57000
},
{
"epoch": 1.59,
"learning_rate": 9.868297011035325e-06,
"loss": 6.3449,
"step": 57500
},
{
"epoch": 1.6,
"learning_rate": 9.866910663783064e-06,
"loss": 6.3424,
"step": 58000
},
{
"epoch": 1.62,
"learning_rate": 9.865524316530806e-06,
"loss": 6.3416,
"step": 58500
},
{
"epoch": 1.63,
"learning_rate": 9.864137969278545e-06,
"loss": 6.334,
"step": 59000
},
{
"epoch": 1.65,
"learning_rate": 9.862751622026286e-06,
"loss": 6.3384,
"step": 59500
},
{
"epoch": 1.66,
"learning_rate": 9.861365274774027e-06,
"loss": 6.3349,
"step": 60000
},
{
"epoch": 1.67,
"learning_rate": 9.859978927521765e-06,
"loss": 6.3318,
"step": 60500
},
{
"epoch": 1.69,
"learning_rate": 9.858592580269508e-06,
"loss": 6.3296,
"step": 61000
},
{
"epoch": 1.7,
"learning_rate": 9.857206233017247e-06,
"loss": 6.3261,
"step": 61500
},
{
"epoch": 1.71,
"learning_rate": 9.855819885764987e-06,
"loss": 6.3277,
"step": 62000
},
{
"epoch": 1.73,
"learning_rate": 9.854433538512728e-06,
"loss": 6.3218,
"step": 62500
},
{
"epoch": 1.74,
"learning_rate": 9.853047191260467e-06,
"loss": 6.3237,
"step": 63000
},
{
"epoch": 1.76,
"learning_rate": 9.851660844008208e-06,
"loss": 6.3203,
"step": 63500
},
{
"epoch": 1.77,
"learning_rate": 9.850274496755948e-06,
"loss": 6.3184,
"step": 64000
},
{
"epoch": 1.78,
"learning_rate": 9.848888149503689e-06,
"loss": 6.3114,
"step": 64500
},
{
"epoch": 1.8,
"learning_rate": 9.84750180225143e-06,
"loss": 6.311,
"step": 65000
},
{
"epoch": 1.81,
"learning_rate": 9.846115454999169e-06,
"loss": 6.3104,
"step": 65500
},
{
"epoch": 1.82,
"learning_rate": 9.84472910774691e-06,
"loss": 6.3108,
"step": 66000
},
{
"epoch": 1.84,
"learning_rate": 9.84334276049465e-06,
"loss": 6.3097,
"step": 66500
},
{
"epoch": 1.85,
"learning_rate": 9.841956413242389e-06,
"loss": 6.308,
"step": 67000
},
{
"epoch": 1.87,
"learning_rate": 9.840570065990131e-06,
"loss": 6.3075,
"step": 67500
},
{
"epoch": 1.88,
"learning_rate": 9.83918371873787e-06,
"loss": 6.3049,
"step": 68000
},
{
"epoch": 1.89,
"learning_rate": 9.83779737148561e-06,
"loss": 6.2991,
"step": 68500
},
{
"epoch": 1.91,
"learning_rate": 9.836411024233351e-06,
"loss": 6.3004,
"step": 69000
},
{
"epoch": 1.92,
"learning_rate": 9.83502467698109e-06,
"loss": 6.2954,
"step": 69500
},
{
"epoch": 1.94,
"learning_rate": 9.833638329728833e-06,
"loss": 6.296,
"step": 70000
},
{
"epoch": 1.95,
"learning_rate": 9.832251982476572e-06,
"loss": 6.2957,
"step": 70500
},
{
"epoch": 1.96,
"learning_rate": 9.830865635224312e-06,
"loss": 6.2927,
"step": 71000
},
{
"epoch": 1.98,
"learning_rate": 9.829479287972053e-06,
"loss": 6.2886,
"step": 71500
},
{
"epoch": 1.99,
"learning_rate": 9.828092940719792e-06,
"loss": 6.2889,
"step": 72000
},
{
"epoch": 2.0,
"learning_rate": 9.826706593467533e-06,
"loss": 6.2901,
"step": 72500
},
{
"epoch": 2.02,
"learning_rate": 9.825320246215273e-06,
"loss": 6.2874,
"step": 73000
},
{
"epoch": 2.03,
"learning_rate": 9.823933898963012e-06,
"loss": 6.2845,
"step": 73500
},
{
"epoch": 2.05,
"learning_rate": 9.822547551710754e-06,
"loss": 6.2846,
"step": 74000
},
{
"epoch": 2.06,
"learning_rate": 9.821161204458493e-06,
"loss": 6.2815,
"step": 74500
},
{
"epoch": 2.07,
"learning_rate": 9.819774857206234e-06,
"loss": 6.2814,
"step": 75000
},
{
"epoch": 2.09,
"learning_rate": 9.818388509953975e-06,
"loss": 6.2806,
"step": 75500
},
{
"epoch": 2.1,
"learning_rate": 9.817002162701714e-06,
"loss": 6.2773,
"step": 76000
},
{
"epoch": 2.12,
"learning_rate": 9.815615815449454e-06,
"loss": 6.2766,
"step": 76500
},
{
"epoch": 2.13,
"learning_rate": 9.814229468197195e-06,
"loss": 6.2737,
"step": 77000
},
{
"epoch": 2.14,
"learning_rate": 9.812843120944936e-06,
"loss": 6.2767,
"step": 77500
},
{
"epoch": 2.16,
"learning_rate": 9.811456773692676e-06,
"loss": 6.2733,
"step": 78000
},
{
"epoch": 2.17,
"learning_rate": 9.810070426440415e-06,
"loss": 6.27,
"step": 78500
},
{
"epoch": 2.18,
"learning_rate": 9.808684079188156e-06,
"loss": 6.2712,
"step": 79000
},
{
"epoch": 2.2,
"learning_rate": 9.807297731935896e-06,
"loss": 6.2709,
"step": 79500
},
{
"epoch": 2.21,
"learning_rate": 9.805911384683635e-06,
"loss": 6.2686,
"step": 80000
},
{
"epoch": 2.23,
"learning_rate": 9.804525037431376e-06,
"loss": 6.2674,
"step": 80500
},
{
"epoch": 2.24,
"learning_rate": 9.803138690179117e-06,
"loss": 6.2641,
"step": 81000
},
{
"epoch": 2.25,
"learning_rate": 9.801752342926857e-06,
"loss": 6.2609,
"step": 81500
},
{
"epoch": 2.27,
"learning_rate": 9.800365995674598e-06,
"loss": 6.2621,
"step": 82000
},
{
"epoch": 2.28,
"learning_rate": 9.798979648422337e-06,
"loss": 6.2635,
"step": 82500
},
{
"epoch": 2.29,
"learning_rate": 9.797593301170078e-06,
"loss": 6.2617,
"step": 83000
},
{
"epoch": 2.31,
"learning_rate": 9.796206953917818e-06,
"loss": 6.2595,
"step": 83500
},
{
"epoch": 2.32,
"learning_rate": 9.794820606665559e-06,
"loss": 6.2595,
"step": 84000
},
{
"epoch": 2.34,
"learning_rate": 9.7934342594133e-06,
"loss": 6.2614,
"step": 84500
},
{
"epoch": 2.35,
"learning_rate": 9.792047912161039e-06,
"loss": 6.2589,
"step": 85000
},
{
"epoch": 2.36,
"learning_rate": 9.790661564908779e-06,
"loss": 6.2529,
"step": 85500
},
{
"epoch": 2.38,
"learning_rate": 9.78927521765652e-06,
"loss": 6.2566,
"step": 86000
},
{
"epoch": 2.39,
"learning_rate": 9.78788887040426e-06,
"loss": 6.2534,
"step": 86500
},
{
"epoch": 2.41,
"learning_rate": 9.786502523152e-06,
"loss": 6.2499,
"step": 87000
},
{
"epoch": 2.42,
"learning_rate": 9.78511617589974e-06,
"loss": 6.2508,
"step": 87500
},
{
"epoch": 2.43,
"learning_rate": 9.78372982864748e-06,
"loss": 6.2462,
"step": 88000
},
{
"epoch": 2.45,
"learning_rate": 9.782343481395221e-06,
"loss": 6.2459,
"step": 88500
},
{
"epoch": 2.46,
"learning_rate": 9.78095713414296e-06,
"loss": 6.2461,
"step": 89000
},
{
"epoch": 2.47,
"learning_rate": 9.779570786890701e-06,
"loss": 6.2433,
"step": 89500
},
{
"epoch": 2.49,
"learning_rate": 9.778184439638442e-06,
"loss": 6.2437,
"step": 90000
},
{
"epoch": 2.5,
"learning_rate": 9.776798092386182e-06,
"loss": 6.2437,
"step": 90500
},
{
"epoch": 2.52,
"learning_rate": 9.775411745133921e-06,
"loss": 6.2422,
"step": 91000
},
{
"epoch": 2.53,
"learning_rate": 9.774025397881662e-06,
"loss": 6.2409,
"step": 91500
},
{
"epoch": 2.54,
"learning_rate": 9.772639050629402e-06,
"loss": 6.2422,
"step": 92000
},
{
"epoch": 2.56,
"learning_rate": 9.771252703377143e-06,
"loss": 6.2387,
"step": 92500
},
{
"epoch": 2.57,
"learning_rate": 9.769866356124884e-06,
"loss": 6.2411,
"step": 93000
},
{
"epoch": 2.59,
"learning_rate": 9.768480008872623e-06,
"loss": 6.2365,
"step": 93500
},
{
"epoch": 2.6,
"learning_rate": 9.767093661620363e-06,
"loss": 6.235,
"step": 94000
},
{
"epoch": 2.61,
"learning_rate": 9.765707314368104e-06,
"loss": 6.2352,
"step": 94500
},
{
"epoch": 2.63,
"learning_rate": 9.764320967115843e-06,
"loss": 6.2326,
"step": 95000
},
{
"epoch": 2.64,
"learning_rate": 9.762934619863584e-06,
"loss": 6.2318,
"step": 95500
},
{
"epoch": 2.65,
"learning_rate": 9.761548272611324e-06,
"loss": 6.2308,
"step": 96000
},
{
"epoch": 2.67,
"learning_rate": 9.760161925359065e-06,
"loss": 6.2275,
"step": 96500
},
{
"epoch": 2.68,
"learning_rate": 9.758775578106806e-06,
"loss": 6.2322,
"step": 97000
},
{
"epoch": 2.7,
"learning_rate": 9.757389230854544e-06,
"loss": 6.2291,
"step": 97500
},
{
"epoch": 2.71,
"learning_rate": 9.756002883602285e-06,
"loss": 6.2283,
"step": 98000
},
{
"epoch": 2.72,
"learning_rate": 9.754616536350026e-06,
"loss": 6.2276,
"step": 98500
},
{
"epoch": 2.74,
"learning_rate": 9.753230189097766e-06,
"loss": 6.2263,
"step": 99000
},
{
"epoch": 2.75,
"learning_rate": 9.751843841845507e-06,
"loss": 6.2272,
"step": 99500
},
{
"epoch": 2.77,
"learning_rate": 9.750457494593246e-06,
"loss": 6.2237,
"step": 100000
},
{
"epoch": 2.78,
"learning_rate": 9.749071147340987e-06,
"loss": 6.2273,
"step": 100500
},
{
"epoch": 2.79,
"learning_rate": 9.747684800088727e-06,
"loss": 6.2232,
"step": 101000
},
{
"epoch": 2.81,
"learning_rate": 9.746298452836466e-06,
"loss": 6.2197,
"step": 101500
},
{
"epoch": 2.82,
"learning_rate": 9.744912105584207e-06,
"loss": 6.2217,
"step": 102000
},
{
"epoch": 2.83,
"learning_rate": 9.743525758331948e-06,
"loss": 6.2223,
"step": 102500
},
{
"epoch": 2.85,
"learning_rate": 9.742139411079688e-06,
"loss": 6.2234,
"step": 103000
},
{
"epoch": 2.86,
"learning_rate": 9.740753063827429e-06,
"loss": 6.2169,
"step": 103500
},
{
"epoch": 2.88,
"learning_rate": 9.739366716575168e-06,
"loss": 6.2172,
"step": 104000
},
{
"epoch": 2.89,
"learning_rate": 9.737980369322908e-06,
"loss": 6.2164,
"step": 104500
},
{
"epoch": 2.9,
"learning_rate": 9.736594022070649e-06,
"loss": 6.2137,
"step": 105000
},
{
"epoch": 2.92,
"learning_rate": 9.735207674818388e-06,
"loss": 6.2122,
"step": 105500
},
{
"epoch": 2.93,
"learning_rate": 9.73382132756613e-06,
"loss": 6.2152,
"step": 106000
},
{
"epoch": 2.94,
"learning_rate": 9.73243498031387e-06,
"loss": 6.2132,
"step": 106500
},
{
"epoch": 2.96,
"learning_rate": 9.73104863306161e-06,
"loss": 6.2128,
"step": 107000
},
{
"epoch": 2.97,
"learning_rate": 9.72966228580935e-06,
"loss": 6.2112,
"step": 107500
},
{
"epoch": 2.99,
"learning_rate": 9.72827593855709e-06,
"loss": 6.2119,
"step": 108000
},
{
"epoch": 3.0,
"learning_rate": 9.726889591304832e-06,
"loss": 6.2066,
"step": 108500
},
{
"epoch": 3.01,
"learning_rate": 9.725503244052571e-06,
"loss": 6.2121,
"step": 109000
},
{
"epoch": 3.03,
"learning_rate": 9.724116896800312e-06,
"loss": 6.2058,
"step": 109500
},
{
"epoch": 3.04,
"learning_rate": 9.722730549548052e-06,
"loss": 6.2112,
"step": 110000
},
{
"epoch": 3.06,
"learning_rate": 9.721344202295791e-06,
"loss": 6.2069,
"step": 110500
},
{
"epoch": 3.07,
"learning_rate": 9.719957855043532e-06,
"loss": 6.2065,
"step": 111000
},
{
"epoch": 3.08,
"learning_rate": 9.718571507791272e-06,
"loss": 6.207,
"step": 111500
},
{
"epoch": 3.1,
"learning_rate": 9.717185160539011e-06,
"loss": 6.2061,
"step": 112000
},
{
"epoch": 3.11,
"learning_rate": 9.715798813286754e-06,
"loss": 6.2056,
"step": 112500
},
{
"epoch": 3.12,
"learning_rate": 9.714412466034493e-06,
"loss": 6.2062,
"step": 113000
},
{
"epoch": 3.14,
"learning_rate": 9.713026118782233e-06,
"loss": 6.1993,
"step": 113500
},
{
"epoch": 3.15,
"learning_rate": 9.711639771529974e-06,
"loss": 6.2011,
"step": 114000
},
{
"epoch": 3.17,
"learning_rate": 9.710253424277713e-06,
"loss": 6.2009,
"step": 114500
},
{
"epoch": 3.18,
"learning_rate": 9.708867077025455e-06,
"loss": 6.2019,
"step": 115000
},
{
"epoch": 3.19,
"learning_rate": 9.707480729773194e-06,
"loss": 6.1993,
"step": 115500
},
{
"epoch": 3.21,
"learning_rate": 9.706094382520935e-06,
"loss": 6.2048,
"step": 116000
},
{
"epoch": 3.22,
"learning_rate": 9.704708035268676e-06,
"loss": 6.1981,
"step": 116500
},
{
"epoch": 3.24,
"learning_rate": 9.703321688016414e-06,
"loss": 6.1946,
"step": 117000
},
{
"epoch": 3.25,
"learning_rate": 9.701935340764155e-06,
"loss": 6.2,
"step": 117500
},
{
"epoch": 3.26,
"learning_rate": 9.700548993511896e-06,
"loss": 6.1983,
"step": 118000
},
{
"epoch": 3.28,
"learning_rate": 9.699162646259635e-06,
"loss": 6.1921,
"step": 118500
},
{
"epoch": 3.29,
"learning_rate": 9.697776299007377e-06,
"loss": 6.1934,
"step": 119000
},
{
"epoch": 3.3,
"learning_rate": 9.696389951755116e-06,
"loss": 6.1959,
"step": 119500
},
{
"epoch": 3.32,
"learning_rate": 9.695003604502857e-06,
"loss": 6.195,
"step": 120000
},
{
"epoch": 3.33,
"learning_rate": 9.693617257250597e-06,
"loss": 6.1949,
"step": 120500
},
{
"epoch": 3.35,
"learning_rate": 9.692230909998336e-06,
"loss": 6.1922,
"step": 121000
},
{
"epoch": 3.36,
"learning_rate": 9.690844562746079e-06,
"loss": 6.1922,
"step": 121500
},
{
"epoch": 3.37,
"learning_rate": 9.689458215493818e-06,
"loss": 6.1916,
"step": 122000
},
{
"epoch": 3.39,
"learning_rate": 9.688071868241558e-06,
"loss": 6.1883,
"step": 122500
},
{
"epoch": 3.4,
"learning_rate": 9.686685520989299e-06,
"loss": 6.1929,
"step": 123000
},
{
"epoch": 3.41,
"learning_rate": 9.685299173737038e-06,
"loss": 6.1892,
"step": 123500
},
{
"epoch": 3.43,
"learning_rate": 9.683912826484778e-06,
"loss": 6.1857,
"step": 124000
},
{
"epoch": 3.44,
"learning_rate": 9.682526479232519e-06,
"loss": 6.1882,
"step": 124500
},
{
"epoch": 3.46,
"learning_rate": 9.68114013198026e-06,
"loss": 6.1898,
"step": 125000
},
{
"epoch": 3.47,
"learning_rate": 9.679753784728e-06,
"loss": 6.1831,
"step": 125500
},
{
"epoch": 3.48,
"learning_rate": 9.67836743747574e-06,
"loss": 6.1849,
"step": 126000
},
{
"epoch": 3.5,
"learning_rate": 9.67698109022348e-06,
"loss": 6.1848,
"step": 126500
},
{
"epoch": 3.51,
"learning_rate": 9.67559474297122e-06,
"loss": 6.1833,
"step": 127000
},
{
"epoch": 3.53,
"learning_rate": 9.67420839571896e-06,
"loss": 6.1851,
"step": 127500
},
{
"epoch": 3.54,
"learning_rate": 9.672822048466702e-06,
"loss": 6.1834,
"step": 128000
},
{
"epoch": 3.55,
"learning_rate": 9.671435701214441e-06,
"loss": 6.1821,
"step": 128500
},
{
"epoch": 3.57,
"learning_rate": 9.670049353962182e-06,
"loss": 6.1845,
"step": 129000
},
{
"epoch": 3.58,
"learning_rate": 9.668663006709922e-06,
"loss": 6.1815,
"step": 129500
},
{
"epoch": 3.59,
"learning_rate": 9.667276659457661e-06,
"loss": 6.1795,
"step": 130000
},
{
"epoch": 3.61,
"learning_rate": 9.665890312205402e-06,
"loss": 6.1826,
"step": 130500
},
{
"epoch": 3.62,
"learning_rate": 9.664503964953142e-06,
"loss": 6.1769,
"step": 131000
},
{
"epoch": 3.64,
"learning_rate": 9.663117617700883e-06,
"loss": 6.1838,
"step": 131500
},
{
"epoch": 3.65,
"learning_rate": 9.661731270448624e-06,
"loss": 6.1806,
"step": 132000
},
{
"epoch": 3.66,
"learning_rate": 9.660344923196363e-06,
"loss": 6.1767,
"step": 132500
},
{
"epoch": 3.68,
"learning_rate": 9.658958575944103e-06,
"loss": 6.1783,
"step": 133000
},
{
"epoch": 3.69,
"learning_rate": 9.657572228691844e-06,
"loss": 6.1757,
"step": 133500
},
{
"epoch": 3.71,
"learning_rate": 9.656185881439583e-06,
"loss": 6.177,
"step": 134000
},
{
"epoch": 3.72,
"learning_rate": 9.654799534187324e-06,
"loss": 6.1724,
"step": 134500
},
{
"epoch": 3.73,
"learning_rate": 9.653413186935064e-06,
"loss": 6.1782,
"step": 135000
},
{
"epoch": 3.75,
"learning_rate": 9.652026839682805e-06,
"loss": 6.1751,
"step": 135500
},
{
"epoch": 3.76,
"learning_rate": 9.650640492430545e-06,
"loss": 6.173,
"step": 136000
},
{
"epoch": 3.77,
"learning_rate": 9.649254145178284e-06,
"loss": 6.1737,
"step": 136500
},
{
"epoch": 3.79,
"learning_rate": 9.647867797926025e-06,
"loss": 6.1762,
"step": 137000
},
{
"epoch": 3.8,
"learning_rate": 9.646481450673766e-06,
"loss": 6.1703,
"step": 137500
},
{
"epoch": 3.82,
"learning_rate": 9.645095103421506e-06,
"loss": 6.1722,
"step": 138000
},
{
"epoch": 3.83,
"learning_rate": 9.643708756169245e-06,
"loss": 6.1711,
"step": 138500
},
{
"epoch": 3.84,
"learning_rate": 9.642322408916986e-06,
"loss": 6.1672,
"step": 139000
},
{
"epoch": 3.86,
"learning_rate": 9.640936061664727e-06,
"loss": 6.1731,
"step": 139500
},
{
"epoch": 3.87,
"learning_rate": 9.639549714412467e-06,
"loss": 6.1729,
"step": 140000
},
{
"epoch": 3.88,
"learning_rate": 9.638163367160206e-06,
"loss": 6.1678,
"step": 140500
},
{
"epoch": 3.9,
"learning_rate": 9.636777019907947e-06,
"loss": 6.17,
"step": 141000
},
{
"epoch": 3.91,
"learning_rate": 9.635390672655688e-06,
"loss": 6.1642,
"step": 141500
},
{
"epoch": 3.93,
"learning_rate": 9.634004325403428e-06,
"loss": 6.1669,
"step": 142000
},
{
"epoch": 3.94,
"learning_rate": 9.632617978151169e-06,
"loss": 6.169,
"step": 142500
},
{
"epoch": 3.95,
"learning_rate": 9.631231630898908e-06,
"loss": 6.1688,
"step": 143000
},
{
"epoch": 3.97,
"learning_rate": 9.629845283646648e-06,
"loss": 6.1681,
"step": 143500
},
{
"epoch": 3.98,
"learning_rate": 9.628458936394389e-06,
"loss": 6.1688,
"step": 144000
},
{
"epoch": 4.0,
"learning_rate": 9.62707258914213e-06,
"loss": 6.1618,
"step": 144500
},
{
"epoch": 4.01,
"learning_rate": 9.625686241889869e-06,
"loss": 6.1641,
"step": 145000
},
{
"epoch": 4.02,
"learning_rate": 9.62429989463761e-06,
"loss": 6.1615,
"step": 145500
},
{
"epoch": 4.04,
"learning_rate": 9.62291354738535e-06,
"loss": 6.1675,
"step": 146000
},
{
"epoch": 4.05,
"learning_rate": 9.62152720013309e-06,
"loss": 6.1619,
"step": 146500
},
{
"epoch": 4.06,
"learning_rate": 9.620140852880831e-06,
"loss": 6.1619,
"step": 147000
},
{
"epoch": 4.08,
"learning_rate": 9.61875450562857e-06,
"loss": 6.1601,
"step": 147500
},
{
"epoch": 4.09,
"learning_rate": 9.61736815837631e-06,
"loss": 6.162,
"step": 148000
},
{
"epoch": 4.11,
"learning_rate": 9.615981811124051e-06,
"loss": 6.1621,
"step": 148500
},
{
"epoch": 4.12,
"learning_rate": 9.61459546387179e-06,
"loss": 6.1604,
"step": 149000
},
{
"epoch": 4.13,
"learning_rate": 9.613209116619531e-06,
"loss": 6.1591,
"step": 149500
},
{
"epoch": 4.15,
"learning_rate": 9.611822769367272e-06,
"loss": 6.1592,
"step": 150000
},
{
"epoch": 4.16,
"learning_rate": 9.610436422115012e-06,
"loss": 6.1554,
"step": 150500
},
{
"epoch": 4.18,
"learning_rate": 9.609050074862753e-06,
"loss": 6.1591,
"step": 151000
},
{
"epoch": 4.19,
"learning_rate": 9.607663727610492e-06,
"loss": 6.1576,
"step": 151500
},
{
"epoch": 4.2,
"learning_rate": 9.606277380358233e-06,
"loss": 6.1569,
"step": 152000
},
{
"epoch": 4.22,
"learning_rate": 9.604891033105973e-06,
"loss": 6.1524,
"step": 152500
},
{
"epoch": 4.23,
"learning_rate": 9.603504685853712e-06,
"loss": 6.1581,
"step": 153000
},
{
"epoch": 4.24,
"learning_rate": 9.602118338601455e-06,
"loss": 6.1563,
"step": 153500
},
{
"epoch": 4.26,
"learning_rate": 9.600731991349194e-06,
"loss": 6.1551,
"step": 154000
},
{
"epoch": 4.27,
"learning_rate": 9.599345644096934e-06,
"loss": 6.1526,
"step": 154500
},
{
"epoch": 4.29,
"learning_rate": 9.597959296844675e-06,
"loss": 6.153,
"step": 155000
},
{
"epoch": 4.3,
"learning_rate": 9.596572949592414e-06,
"loss": 6.1549,
"step": 155500
},
{
"epoch": 4.31,
"learning_rate": 9.595186602340154e-06,
"loss": 6.1529,
"step": 156000
},
{
"epoch": 4.33,
"learning_rate": 9.593800255087895e-06,
"loss": 6.1531,
"step": 156500
},
{
"epoch": 4.34,
"learning_rate": 9.592413907835636e-06,
"loss": 6.1527,
"step": 157000
},
{
"epoch": 4.35,
"learning_rate": 9.591027560583376e-06,
"loss": 6.1501,
"step": 157500
},
{
"epoch": 4.37,
"learning_rate": 9.589641213331115e-06,
"loss": 6.1502,
"step": 158000
},
{
"epoch": 4.38,
"learning_rate": 9.588254866078856e-06,
"loss": 6.1537,
"step": 158500
},
{
"epoch": 4.4,
"learning_rate": 9.586868518826597e-06,
"loss": 6.1498,
"step": 159000
},
{
"epoch": 4.41,
"learning_rate": 9.585482171574336e-06,
"loss": 6.1516,
"step": 159500
},
{
"epoch": 4.42,
"learning_rate": 9.584095824322078e-06,
"loss": 6.1534,
"step": 160000
},
{
"epoch": 4.44,
"learning_rate": 9.582709477069817e-06,
"loss": 6.1468,
"step": 160500
},
{
"epoch": 4.45,
"learning_rate": 9.581323129817557e-06,
"loss": 6.1532,
"step": 161000
},
{
"epoch": 4.47,
"learning_rate": 9.579936782565298e-06,
"loss": 6.1484,
"step": 161500
},
{
"epoch": 4.48,
"learning_rate": 9.578550435313037e-06,
"loss": 6.1516,
"step": 162000
},
{
"epoch": 4.49,
"learning_rate": 9.57716408806078e-06,
"loss": 6.1466,
"step": 162500
},
{
"epoch": 4.51,
"learning_rate": 9.575777740808518e-06,
"loss": 6.1502,
"step": 163000
},
{
"epoch": 4.52,
"learning_rate": 9.574391393556259e-06,
"loss": 6.1447,
"step": 163500
},
{
"epoch": 4.53,
"learning_rate": 9.573005046304e-06,
"loss": 6.1462,
"step": 164000
},
{
"epoch": 4.55,
"learning_rate": 9.571618699051739e-06,
"loss": 6.1497,
"step": 164500
},
{
"epoch": 4.56,
"learning_rate": 9.57023235179948e-06,
"loss": 6.1405,
"step": 165000
},
{
"epoch": 4.58,
"learning_rate": 9.56884600454722e-06,
"loss": 6.1441,
"step": 165500
},
{
"epoch": 4.59,
"learning_rate": 9.567459657294959e-06,
"loss": 6.1469,
"step": 166000
},
{
"epoch": 4.6,
"learning_rate": 9.566073310042701e-06,
"loss": 6.1472,
"step": 166500
},
{
"epoch": 4.62,
"learning_rate": 9.56468696279044e-06,
"loss": 6.1413,
"step": 167000
},
{
"epoch": 4.63,
"learning_rate": 9.56330061553818e-06,
"loss": 6.1455,
"step": 167500
},
{
"epoch": 4.65,
"learning_rate": 9.561914268285921e-06,
"loss": 6.1469,
"step": 168000
},
{
"epoch": 4.66,
"learning_rate": 9.56052792103366e-06,
"loss": 6.1411,
"step": 168500
},
{
"epoch": 4.67,
"learning_rate": 9.559141573781403e-06,
"loss": 6.1425,
"step": 169000
},
{
"epoch": 4.69,
"learning_rate": 9.557755226529142e-06,
"loss": 6.142,
"step": 169500
},
{
"epoch": 4.7,
"learning_rate": 9.556368879276882e-06,
"loss": 6.138,
"step": 170000
},
{
"epoch": 4.71,
"learning_rate": 9.554982532024623e-06,
"loss": 6.1432,
"step": 170500
},
{
"epoch": 4.73,
"learning_rate": 9.553596184772362e-06,
"loss": 6.1415,
"step": 171000
},
{
"epoch": 4.74,
"learning_rate": 9.552209837520103e-06,
"loss": 6.1378,
"step": 171500
},
{
"epoch": 4.76,
"learning_rate": 9.550823490267843e-06,
"loss": 6.143,
"step": 172000
},
{
"epoch": 4.77,
"learning_rate": 9.549437143015582e-06,
"loss": 6.1417,
"step": 172500
},
{
"epoch": 4.78,
"learning_rate": 9.548050795763325e-06,
"loss": 6.1418,
"step": 173000
},
{
"epoch": 4.8,
"learning_rate": 9.546664448511063e-06,
"loss": 6.1426,
"step": 173500
},
{
"epoch": 4.81,
"learning_rate": 9.545278101258804e-06,
"loss": 6.1398,
"step": 174000
},
{
"epoch": 4.82,
"learning_rate": 9.543891754006545e-06,
"loss": 6.1375,
"step": 174500
},
{
"epoch": 4.84,
"learning_rate": 9.542505406754284e-06,
"loss": 6.1393,
"step": 175000
},
{
"epoch": 4.85,
"learning_rate": 9.541119059502026e-06,
"loss": 6.1393,
"step": 175500
},
{
"epoch": 4.87,
"learning_rate": 9.539732712249765e-06,
"loss": 6.1338,
"step": 176000
},
{
"epoch": 4.88,
"learning_rate": 9.538346364997506e-06,
"loss": 6.1359,
"step": 176500
},
{
"epoch": 4.89,
"learning_rate": 9.536960017745246e-06,
"loss": 6.1382,
"step": 177000
},
{
"epoch": 4.91,
"learning_rate": 9.535573670492985e-06,
"loss": 6.1371,
"step": 177500
},
{
"epoch": 4.92,
"learning_rate": 9.534187323240726e-06,
"loss": 6.1356,
"step": 178000
},
{
"epoch": 4.94,
"learning_rate": 9.532800975988467e-06,
"loss": 6.1342,
"step": 178500
},
{
"epoch": 4.95,
"learning_rate": 9.531414628736207e-06,
"loss": 6.1341,
"step": 179000
},
{
"epoch": 4.96,
"learning_rate": 9.530028281483948e-06,
"loss": 6.1353,
"step": 179500
},
{
"epoch": 4.98,
"learning_rate": 9.528641934231687e-06,
"loss": 6.1303,
"step": 180000
},
{
"epoch": 4.99,
"learning_rate": 9.527255586979427e-06,
"loss": 6.1354,
"step": 180500
},
{
"epoch": 5.0,
"learning_rate": 9.525869239727168e-06,
"loss": 6.1324,
"step": 181000
},
{
"epoch": 5.02,
"learning_rate": 9.524482892474907e-06,
"loss": 6.1328,
"step": 181500
},
{
"epoch": 5.03,
"learning_rate": 9.523096545222648e-06,
"loss": 6.1356,
"step": 182000
},
{
"epoch": 5.05,
"learning_rate": 9.521710197970388e-06,
"loss": 6.135,
"step": 182500
},
{
"epoch": 5.06,
"learning_rate": 9.520323850718129e-06,
"loss": 6.1319,
"step": 183000
},
{
"epoch": 5.07,
"learning_rate": 9.51893750346587e-06,
"loss": 6.1307,
"step": 183500
},
{
"epoch": 5.09,
"learning_rate": 9.517551156213609e-06,
"loss": 6.1287,
"step": 184000
},
{
"epoch": 5.1,
"learning_rate": 9.51616480896135e-06,
"loss": 6.1286,
"step": 184500
},
{
"epoch": 5.12,
"learning_rate": 9.51477846170909e-06,
"loss": 6.132,
"step": 185000
},
{
"epoch": 5.13,
"learning_rate": 9.51339211445683e-06,
"loss": 6.1296,
"step": 185500
},
{
"epoch": 5.14,
"learning_rate": 9.512005767204571e-06,
"loss": 6.1306,
"step": 186000
},
{
"epoch": 5.16,
"learning_rate": 9.51061941995231e-06,
"loss": 6.1314,
"step": 186500
},
{
"epoch": 5.17,
"learning_rate": 9.50923307270005e-06,
"loss": 6.1281,
"step": 187000
},
{
"epoch": 5.18,
"learning_rate": 9.507846725447791e-06,
"loss": 6.1276,
"step": 187500
},
{
"epoch": 5.2,
"learning_rate": 9.50646037819553e-06,
"loss": 6.1274,
"step": 188000
},
{
"epoch": 5.21,
"learning_rate": 9.505074030943271e-06,
"loss": 6.1301,
"step": 188500
},
{
"epoch": 5.23,
"learning_rate": 9.503687683691012e-06,
"loss": 6.1282,
"step": 189000
},
{
"epoch": 5.24,
"learning_rate": 9.502301336438752e-06,
"loss": 6.1261,
"step": 189500
},
{
"epoch": 5.25,
"learning_rate": 9.500914989186493e-06,
"loss": 6.1236,
"step": 190000
},
{
"epoch": 5.27,
"learning_rate": 9.499528641934232e-06,
"loss": 6.1256,
"step": 190500
},
{
"epoch": 5.28,
"learning_rate": 9.498142294681973e-06,
"loss": 6.1268,
"step": 191000
},
{
"epoch": 5.3,
"learning_rate": 9.496755947429713e-06,
"loss": 6.1253,
"step": 191500
},
{
"epoch": 5.31,
"learning_rate": 9.495369600177454e-06,
"loss": 6.1282,
"step": 192000
},
{
"epoch": 5.32,
"learning_rate": 9.493983252925193e-06,
"loss": 6.1232,
"step": 192500
},
{
"epoch": 5.34,
"learning_rate": 9.492596905672933e-06,
"loss": 6.1283,
"step": 193000
},
{
"epoch": 5.35,
"learning_rate": 9.491210558420674e-06,
"loss": 6.1278,
"step": 193500
},
{
"epoch": 5.36,
"learning_rate": 9.489824211168415e-06,
"loss": 6.1216,
"step": 194000
},
{
"epoch": 5.38,
"learning_rate": 9.488437863916154e-06,
"loss": 6.1209,
"step": 194500
},
{
"epoch": 5.39,
"learning_rate": 9.487051516663894e-06,
"loss": 6.1216,
"step": 195000
},
{
"epoch": 5.41,
"learning_rate": 9.485665169411635e-06,
"loss": 6.1259,
"step": 195500
},
{
"epoch": 5.42,
"learning_rate": 9.484278822159376e-06,
"loss": 6.1191,
"step": 196000
},
{
"epoch": 5.43,
"learning_rate": 9.482892474907116e-06,
"loss": 6.1222,
"step": 196500
},
{
"epoch": 5.45,
"learning_rate": 9.481506127654855e-06,
"loss": 6.1207,
"step": 197000
},
{
"epoch": 5.46,
"learning_rate": 9.480119780402596e-06,
"loss": 6.1171,
"step": 197500
},
{
"epoch": 5.47,
"learning_rate": 9.478733433150337e-06,
"loss": 6.1228,
"step": 198000
},
{
"epoch": 5.49,
"learning_rate": 9.477347085898077e-06,
"loss": 6.119,
"step": 198500
},
{
"epoch": 5.5,
"learning_rate": 9.475960738645816e-06,
"loss": 6.119,
"step": 199000
},
{
"epoch": 5.52,
"learning_rate": 9.474574391393557e-06,
"loss": 6.1198,
"step": 199500
},
{
"epoch": 5.53,
"learning_rate": 9.473188044141297e-06,
"loss": 6.1238,
"step": 200000
},
{
"epoch": 5.54,
"learning_rate": 9.471801696889038e-06,
"loss": 6.121,
"step": 200500
},
{
"epoch": 5.56,
"learning_rate": 9.470415349636779e-06,
"loss": 6.1171,
"step": 201000
},
{
"epoch": 5.57,
"learning_rate": 9.469029002384518e-06,
"loss": 6.1196,
"step": 201500
},
{
"epoch": 5.59,
"learning_rate": 9.467642655132258e-06,
"loss": 6.1172,
"step": 202000
},
{
"epoch": 5.6,
"learning_rate": 9.466256307879999e-06,
"loss": 6.1241,
"step": 202500
},
{
"epoch": 5.61,
"learning_rate": 9.464869960627738e-06,
"loss": 6.121,
"step": 203000
},
{
"epoch": 5.63,
"learning_rate": 9.463483613375479e-06,
"loss": 6.1186,
"step": 203500
},
{
"epoch": 5.64,
"learning_rate": 9.46209726612322e-06,
"loss": 6.1153,
"step": 204000
},
{
"epoch": 5.65,
"learning_rate": 9.46071091887096e-06,
"loss": 6.1186,
"step": 204500
},
{
"epoch": 5.67,
"learning_rate": 9.4593245716187e-06,
"loss": 6.1177,
"step": 205000
},
{
"epoch": 5.68,
"learning_rate": 9.45793822436644e-06,
"loss": 6.1165,
"step": 205500
},
{
"epoch": 5.7,
"learning_rate": 9.45655187711418e-06,
"loss": 6.1138,
"step": 206000
},
{
"epoch": 5.71,
"learning_rate": 9.45516552986192e-06,
"loss": 6.1177,
"step": 206500
},
{
"epoch": 5.72,
"learning_rate": 9.45377918260966e-06,
"loss": 6.1178,
"step": 207000
},
{
"epoch": 5.74,
"learning_rate": 9.452392835357402e-06,
"loss": 6.1124,
"step": 207500
},
{
"epoch": 5.75,
"learning_rate": 9.451006488105141e-06,
"loss": 6.1135,
"step": 208000
},
{
"epoch": 5.77,
"learning_rate": 9.449620140852882e-06,
"loss": 6.1192,
"step": 208500
},
{
"epoch": 5.78,
"learning_rate": 9.448233793600622e-06,
"loss": 6.1179,
"step": 209000
},
{
"epoch": 5.79,
"learning_rate": 9.446847446348361e-06,
"loss": 6.1107,
"step": 209500
},
{
"epoch": 5.81,
"learning_rate": 9.445461099096102e-06,
"loss": 6.1133,
"step": 210000
},
{
"epoch": 5.82,
"learning_rate": 9.444074751843843e-06,
"loss": 6.1123,
"step": 210500
},
{
"epoch": 5.83,
"learning_rate": 9.442688404591583e-06,
"loss": 6.1143,
"step": 211000
},
{
"epoch": 5.85,
"learning_rate": 9.441302057339324e-06,
"loss": 6.1132,
"step": 211500
},
{
"epoch": 5.86,
"learning_rate": 9.439915710087063e-06,
"loss": 6.1149,
"step": 212000
},
{
"epoch": 5.88,
"learning_rate": 9.438529362834803e-06,
"loss": 6.1126,
"step": 212500
},
{
"epoch": 5.89,
"learning_rate": 9.437143015582544e-06,
"loss": 6.1108,
"step": 213000
},
{
"epoch": 5.9,
"learning_rate": 9.435756668330283e-06,
"loss": 6.1119,
"step": 213500
},
{
"epoch": 5.92,
"learning_rate": 9.434370321078025e-06,
"loss": 6.1139,
"step": 214000
},
{
"epoch": 5.93,
"learning_rate": 9.432983973825764e-06,
"loss": 6.113,
"step": 214500
},
{
"epoch": 5.94,
"learning_rate": 9.431597626573505e-06,
"loss": 6.1073,
"step": 215000
},
{
"epoch": 5.96,
"learning_rate": 9.430211279321246e-06,
"loss": 6.1105,
"step": 215500
},
{
"epoch": 5.97,
"learning_rate": 9.428824932068985e-06,
"loss": 6.1072,
"step": 216000
},
{
"epoch": 5.99,
"learning_rate": 9.427438584816725e-06,
"loss": 6.1142,
"step": 216500
},
{
"epoch": 6.0,
"learning_rate": 9.426052237564466e-06,
"loss": 6.1105,
"step": 217000
},
{
"epoch": 6.01,
"learning_rate": 9.424665890312206e-06,
"loss": 6.1103,
"step": 217500
},
{
"epoch": 6.03,
"learning_rate": 9.423279543059947e-06,
"loss": 6.1079,
"step": 218000
},
{
"epoch": 6.04,
"learning_rate": 9.421893195807686e-06,
"loss": 6.1099,
"step": 218500
},
{
"epoch": 6.06,
"learning_rate": 9.420506848555427e-06,
"loss": 6.1098,
"step": 219000
},
{
"epoch": 6.07,
"learning_rate": 9.419120501303167e-06,
"loss": 6.1094,
"step": 219500
},
{
"epoch": 6.08,
"learning_rate": 9.417734154050906e-06,
"loss": 6.1081,
"step": 220000
},
{
"epoch": 6.1,
"learning_rate": 9.416347806798649e-06,
"loss": 6.1065,
"step": 220500
},
{
"epoch": 6.11,
"learning_rate": 9.414961459546388e-06,
"loss": 6.1096,
"step": 221000
},
{
"epoch": 6.12,
"learning_rate": 9.413575112294128e-06,
"loss": 6.1089,
"step": 221500
},
{
"epoch": 6.14,
"learning_rate": 9.412188765041869e-06,
"loss": 6.1069,
"step": 222000
},
{
"epoch": 6.15,
"learning_rate": 9.410802417789608e-06,
"loss": 6.1021,
"step": 222500
},
{
"epoch": 6.17,
"learning_rate": 9.40941607053735e-06,
"loss": 6.1075,
"step": 223000
},
{
"epoch": 6.18,
"learning_rate": 9.40802972328509e-06,
"loss": 6.1075,
"step": 223500
},
{
"epoch": 6.19,
"learning_rate": 9.40664337603283e-06,
"loss": 6.1056,
"step": 224000
},
{
"epoch": 6.21,
"learning_rate": 9.40525702878057e-06,
"loss": 6.1077,
"step": 224500
},
{
"epoch": 6.22,
"learning_rate": 9.40387068152831e-06,
"loss": 6.1079,
"step": 225000
},
{
"epoch": 6.24,
"learning_rate": 9.40248433427605e-06,
"loss": 6.1067,
"step": 225500
},
{
"epoch": 6.25,
"learning_rate": 9.40109798702379e-06,
"loss": 6.1048,
"step": 226000
},
{
"epoch": 6.26,
"learning_rate": 9.39971163977153e-06,
"loss": 6.1075,
"step": 226500
},
{
"epoch": 6.28,
"learning_rate": 9.398325292519272e-06,
"loss": 6.1044,
"step": 227000
},
{
"epoch": 6.29,
"learning_rate": 9.396938945267011e-06,
"loss": 6.1046,
"step": 227500
},
{
"epoch": 6.3,
"learning_rate": 9.395552598014752e-06,
"loss": 6.103,
"step": 228000
},
{
"epoch": 6.32,
"learning_rate": 9.394166250762492e-06,
"loss": 6.1046,
"step": 228500
},
{
"epoch": 6.33,
"learning_rate": 9.392779903510231e-06,
"loss": 6.1004,
"step": 229000
},
{
"epoch": 6.35,
"learning_rate": 9.391393556257974e-06,
"loss": 6.1034,
"step": 229500
},
{
"epoch": 6.36,
"learning_rate": 9.390007209005712e-06,
"loss": 6.1041,
"step": 230000
},
{
"epoch": 6.37,
"learning_rate": 9.388620861753453e-06,
"loss": 6.1043,
"step": 230500
},
{
"epoch": 6.39,
"learning_rate": 9.387234514501194e-06,
"loss": 6.102,
"step": 231000
},
{
"epoch": 6.4,
"learning_rate": 9.385848167248933e-06,
"loss": 6.0986,
"step": 231500
},
{
"epoch": 6.41,
"learning_rate": 9.384461819996673e-06,
"loss": 6.1023,
"step": 232000
},
{
"epoch": 6.43,
"learning_rate": 9.383075472744414e-06,
"loss": 6.1015,
"step": 232500
},
{
"epoch": 6.44,
"learning_rate": 9.381689125492155e-06,
"loss": 6.0996,
"step": 233000
},
{
"epoch": 6.46,
"learning_rate": 9.380302778239895e-06,
"loss": 6.1013,
"step": 233500
},
{
"epoch": 6.47,
"learning_rate": 9.378916430987634e-06,
"loss": 6.1024,
"step": 234000
},
{
"epoch": 6.48,
"learning_rate": 9.377530083735375e-06,
"loss": 6.1004,
"step": 234500
},
{
"epoch": 6.5,
"learning_rate": 9.376143736483116e-06,
"loss": 6.1033,
"step": 235000
},
{
"epoch": 6.51,
"learning_rate": 9.374757389230855e-06,
"loss": 6.1025,
"step": 235500
},
{
"epoch": 6.53,
"learning_rate": 9.373371041978595e-06,
"loss": 6.0993,
"step": 236000
},
{
"epoch": 6.54,
"learning_rate": 9.371984694726336e-06,
"loss": 6.0948,
"step": 236500
},
{
"epoch": 6.55,
"learning_rate": 9.370598347474076e-06,
"loss": 6.0956,
"step": 237000
},
{
"epoch": 6.57,
"learning_rate": 9.369212000221817e-06,
"loss": 6.1003,
"step": 237500
},
{
"epoch": 6.58,
"learning_rate": 9.367825652969556e-06,
"loss": 6.1016,
"step": 238000
},
{
"epoch": 6.59,
"learning_rate": 9.366439305717297e-06,
"loss": 6.0976,
"step": 238500
},
{
"epoch": 6.61,
"learning_rate": 9.365052958465037e-06,
"loss": 6.0974,
"step": 239000
},
{
"epoch": 6.62,
"learning_rate": 9.363666611212778e-06,
"loss": 6.0981,
"step": 239500
},
{
"epoch": 6.64,
"learning_rate": 9.362280263960519e-06,
"loss": 6.1003,
"step": 240000
},
{
"epoch": 6.65,
"learning_rate": 9.360893916708258e-06,
"loss": 6.0968,
"step": 240500
},
{
"epoch": 6.66,
"learning_rate": 9.359507569455998e-06,
"loss": 6.0988,
"step": 241000
},
{
"epoch": 6.68,
"learning_rate": 9.358121222203739e-06,
"loss": 6.1019,
"step": 241500
},
{
"epoch": 6.69,
"learning_rate": 9.356734874951478e-06,
"loss": 6.0987,
"step": 242000
},
{
"epoch": 6.71,
"learning_rate": 9.355348527699218e-06,
"loss": 6.0975,
"step": 242500
},
{
"epoch": 6.72,
"learning_rate": 9.353962180446959e-06,
"loss": 6.0961,
"step": 243000
},
{
"epoch": 6.73,
"learning_rate": 9.3525758331947e-06,
"loss": 6.0974,
"step": 243500
},
{
"epoch": 6.75,
"learning_rate": 9.35118948594244e-06,
"loss": 6.0963,
"step": 244000
},
{
"epoch": 6.76,
"learning_rate": 9.34980313869018e-06,
"loss": 6.0989,
"step": 244500
},
{
"epoch": 6.77,
"learning_rate": 9.34841679143792e-06,
"loss": 6.0979,
"step": 245000
},
{
"epoch": 6.79,
"learning_rate": 9.34703044418566e-06,
"loss": 6.0951,
"step": 245500
},
{
"epoch": 6.8,
"learning_rate": 9.345644096933401e-06,
"loss": 6.0965,
"step": 246000
},
{
"epoch": 6.82,
"learning_rate": 9.34425774968114e-06,
"loss": 6.0983,
"step": 246500
},
{
"epoch": 6.83,
"learning_rate": 9.342871402428881e-06,
"loss": 6.0937,
"step": 247000
},
{
"epoch": 6.84,
"learning_rate": 9.341485055176622e-06,
"loss": 6.0938,
"step": 247500
},
{
"epoch": 6.86,
"learning_rate": 9.340098707924362e-06,
"loss": 6.0956,
"step": 248000
},
{
"epoch": 6.87,
"learning_rate": 9.338712360672101e-06,
"loss": 6.0966,
"step": 248500
},
{
"epoch": 6.88,
"learning_rate": 9.337326013419842e-06,
"loss": 6.0919,
"step": 249000
},
{
"epoch": 6.9,
"learning_rate": 9.335939666167582e-06,
"loss": 6.0887,
"step": 249500
},
{
"epoch": 6.91,
"learning_rate": 9.334553318915323e-06,
"loss": 6.0937,
"step": 250000
},
{
"epoch": 6.93,
"learning_rate": 9.333166971663062e-06,
"loss": 6.0922,
"step": 250500
},
{
"epoch": 6.94,
"learning_rate": 9.331780624410803e-06,
"loss": 6.0927,
"step": 251000
},
{
"epoch": 6.95,
"learning_rate": 9.330394277158543e-06,
"loss": 6.0938,
"step": 251500
},
{
"epoch": 6.97,
"learning_rate": 9.329007929906284e-06,
"loss": 6.0944,
"step": 252000
},
{
"epoch": 6.98,
"learning_rate": 9.327621582654025e-06,
"loss": 6.0923,
"step": 252500
},
{
"epoch": 7.0,
"learning_rate": 9.326235235401764e-06,
"loss": 6.091,
"step": 253000
},
{
"epoch": 7.01,
"learning_rate": 9.324848888149504e-06,
"loss": 6.0916,
"step": 253500
},
{
"epoch": 7.02,
"learning_rate": 9.323462540897245e-06,
"loss": 6.0898,
"step": 254000
},
{
"epoch": 7.04,
"learning_rate": 9.322076193644986e-06,
"loss": 6.0913,
"step": 254500
},
{
"epoch": 7.05,
"learning_rate": 9.320689846392726e-06,
"loss": 6.0948,
"step": 255000
},
{
"epoch": 7.06,
"learning_rate": 9.319303499140465e-06,
"loss": 6.0911,
"step": 255500
},
{
"epoch": 7.08,
"learning_rate": 9.317917151888206e-06,
"loss": 6.0936,
"step": 256000
},
{
"epoch": 7.09,
"learning_rate": 9.316530804635946e-06,
"loss": 6.0893,
"step": 256500
},
{
"epoch": 7.11,
"learning_rate": 9.315144457383685e-06,
"loss": 6.0903,
"step": 257000
},
{
"epoch": 7.12,
"learning_rate": 9.313758110131426e-06,
"loss": 6.092,
"step": 257500
},
{
"epoch": 7.13,
"learning_rate": 9.312371762879167e-06,
"loss": 6.0877,
"step": 258000
},
{
"epoch": 7.15,
"learning_rate": 9.310985415626907e-06,
"loss": 6.0908,
"step": 258500
},
{
"epoch": 7.16,
"learning_rate": 9.309599068374648e-06,
"loss": 6.0916,
"step": 259000
},
{
"epoch": 7.18,
"learning_rate": 9.308212721122387e-06,
"loss": 6.0861,
"step": 259500
},
{
"epoch": 7.19,
"learning_rate": 9.306826373870128e-06,
"loss": 6.0921,
"step": 260000
},
{
"epoch": 7.2,
"learning_rate": 9.305440026617868e-06,
"loss": 6.0858,
"step": 260500
},
{
"epoch": 7.22,
"learning_rate": 9.304053679365607e-06,
"loss": 6.09,
"step": 261000
},
{
"epoch": 7.23,
"learning_rate": 9.30266733211335e-06,
"loss": 6.0855,
"step": 261500
},
{
"epoch": 7.24,
"learning_rate": 9.301280984861088e-06,
"loss": 6.0844,
"step": 262000
},
{
"epoch": 7.26,
"learning_rate": 9.299894637608829e-06,
"loss": 6.0885,
"step": 262500
},
{
"epoch": 7.27,
"learning_rate": 9.29850829035657e-06,
"loss": 6.0843,
"step": 263000
},
{
"epoch": 7.29,
"learning_rate": 9.297121943104309e-06,
"loss": 6.0872,
"step": 263500
},
{
"epoch": 7.3,
"learning_rate": 9.29573559585205e-06,
"loss": 6.086,
"step": 264000
},
{
"epoch": 7.31,
"learning_rate": 9.29434924859979e-06,
"loss": 6.0856,
"step": 264500
},
{
"epoch": 7.33,
"learning_rate": 9.292962901347529e-06,
"loss": 6.0868,
"step": 265000
},
{
"epoch": 7.34,
"learning_rate": 9.291576554095271e-06,
"loss": 6.088,
"step": 265500
},
{
"epoch": 7.35,
"learning_rate": 9.29019020684301e-06,
"loss": 6.0867,
"step": 266000
},
{
"epoch": 7.37,
"learning_rate": 9.288803859590751e-06,
"loss": 6.0869,
"step": 266500
},
{
"epoch": 7.38,
"learning_rate": 9.287417512338492e-06,
"loss": 6.0889,
"step": 267000
},
{
"epoch": 7.4,
"learning_rate": 9.28603116508623e-06,
"loss": 6.0832,
"step": 267500
},
{
"epoch": 7.41,
"learning_rate": 9.284644817833973e-06,
"loss": 6.0833,
"step": 268000
},
{
"epoch": 7.42,
"learning_rate": 9.283258470581712e-06,
"loss": 6.0827,
"step": 268500
},
{
"epoch": 7.44,
"learning_rate": 9.281872123329452e-06,
"loss": 6.0832,
"step": 269000
},
{
"epoch": 7.45,
"learning_rate": 9.280485776077193e-06,
"loss": 6.0888,
"step": 269500
},
{
"epoch": 7.47,
"learning_rate": 9.279099428824932e-06,
"loss": 6.0844,
"step": 270000
},
{
"epoch": 7.48,
"learning_rate": 9.277713081572673e-06,
"loss": 6.0889,
"step": 270500
},
{
"epoch": 7.49,
"learning_rate": 9.276326734320413e-06,
"loss": 6.0886,
"step": 271000
},
{
"epoch": 7.51,
"learning_rate": 9.274940387068154e-06,
"loss": 6.0842,
"step": 271500
},
{
"epoch": 7.52,
"learning_rate": 9.273554039815895e-06,
"loss": 6.086,
"step": 272000
},
{
"epoch": 7.53,
"learning_rate": 9.272167692563634e-06,
"loss": 6.084,
"step": 272500
},
{
"epoch": 7.55,
"learning_rate": 9.270781345311374e-06,
"loss": 6.0844,
"step": 273000
},
{
"epoch": 7.56,
"learning_rate": 9.269394998059115e-06,
"loss": 6.0871,
"step": 273500
},
{
"epoch": 7.58,
"learning_rate": 9.268008650806854e-06,
"loss": 6.0848,
"step": 274000
},
{
"epoch": 7.59,
"learning_rate": 9.266622303554596e-06,
"loss": 6.086,
"step": 274500
},
{
"epoch": 7.6,
"learning_rate": 9.265235956302335e-06,
"loss": 6.0839,
"step": 275000
},
{
"epoch": 7.62,
"learning_rate": 9.263849609050076e-06,
"loss": 6.0843,
"step": 275500
},
{
"epoch": 7.63,
"learning_rate": 9.262463261797816e-06,
"loss": 6.0824,
"step": 276000
},
{
"epoch": 7.65,
"learning_rate": 9.261076914545555e-06,
"loss": 6.0837,
"step": 276500
},
{
"epoch": 7.66,
"learning_rate": 9.259690567293298e-06,
"loss": 6.0819,
"step": 277000
},
{
"epoch": 7.67,
"learning_rate": 9.258304220041037e-06,
"loss": 6.078,
"step": 277500
},
{
"epoch": 7.69,
"learning_rate": 9.256917872788777e-06,
"loss": 6.0801,
"step": 278000
},
{
"epoch": 7.7,
"learning_rate": 9.255531525536518e-06,
"loss": 6.0812,
"step": 278500
},
{
"epoch": 7.71,
"learning_rate": 9.254145178284257e-06,
"loss": 6.0827,
"step": 279000
},
{
"epoch": 7.73,
"learning_rate": 9.252758831031998e-06,
"loss": 6.0806,
"step": 279500
},
{
"epoch": 7.74,
"learning_rate": 9.251372483779738e-06,
"loss": 6.0803,
"step": 280000
},
{
"epoch": 7.76,
"learning_rate": 9.249986136527477e-06,
"loss": 6.0819,
"step": 280500
},
{
"epoch": 7.77,
"learning_rate": 9.24859978927522e-06,
"loss": 6.0811,
"step": 281000
},
{
"epoch": 7.78,
"learning_rate": 9.247213442022958e-06,
"loss": 6.082,
"step": 281500
},
{
"epoch": 7.8,
"learning_rate": 9.245827094770699e-06,
"loss": 6.0796,
"step": 282000
},
{
"epoch": 7.81,
"learning_rate": 9.24444074751844e-06,
"loss": 6.0797,
"step": 282500
},
{
"epoch": 7.83,
"learning_rate": 9.243054400266179e-06,
"loss": 6.0774,
"step": 283000
},
{
"epoch": 7.84,
"learning_rate": 9.241668053013921e-06,
"loss": 6.082,
"step": 283500
},
{
"epoch": 7.85,
"learning_rate": 9.24028170576166e-06,
"loss": 6.0824,
"step": 284000
},
{
"epoch": 7.87,
"learning_rate": 9.2388953585094e-06,
"loss": 6.0756,
"step": 284500
},
{
"epoch": 7.88,
"learning_rate": 9.237509011257141e-06,
"loss": 6.0811,
"step": 285000
},
{
"epoch": 7.89,
"learning_rate": 9.23612266400488e-06,
"loss": 6.0766,
"step": 285500
},
{
"epoch": 7.91,
"learning_rate": 9.23473631675262e-06,
"loss": 6.0801,
"step": 286000
},
{
"epoch": 7.92,
"learning_rate": 9.233349969500361e-06,
"loss": 6.0765,
"step": 286500
},
{
"epoch": 7.94,
"learning_rate": 9.2319636222481e-06,
"loss": 6.0779,
"step": 287000
},
{
"epoch": 7.95,
"learning_rate": 9.230577274995843e-06,
"loss": 6.0784,
"step": 287500
},
{
"epoch": 7.96,
"learning_rate": 9.229190927743582e-06,
"loss": 6.0776,
"step": 288000
},
{
"epoch": 7.98,
"learning_rate": 9.227804580491322e-06,
"loss": 6.0798,
"step": 288500
},
{
"epoch": 7.99,
"learning_rate": 9.226418233239063e-06,
"loss": 6.0782,
"step": 289000
},
{
"epoch": 8.0,
"learning_rate": 9.225031885986802e-06,
"loss": 6.0787,
"step": 289500
},
{
"epoch": 8.02,
"learning_rate": 9.223645538734543e-06,
"loss": 6.0766,
"step": 290000
},
{
"epoch": 8.03,
"learning_rate": 9.222259191482283e-06,
"loss": 6.0781,
"step": 290500
},
{
"epoch": 8.05,
"learning_rate": 9.220872844230024e-06,
"loss": 6.0736,
"step": 291000
},
{
"epoch": 8.06,
"learning_rate": 9.219486496977765e-06,
"loss": 6.0756,
"step": 291500
},
{
"epoch": 8.07,
"learning_rate": 9.218100149725504e-06,
"loss": 6.0742,
"step": 292000
},
{
"epoch": 8.09,
"learning_rate": 9.216713802473244e-06,
"loss": 6.0791,
"step": 292500
},
{
"epoch": 8.1,
"learning_rate": 9.215327455220985e-06,
"loss": 6.0754,
"step": 293000
},
{
"epoch": 8.12,
"learning_rate": 9.213941107968725e-06,
"loss": 6.0739,
"step": 293500
},
{
"epoch": 8.13,
"learning_rate": 9.212554760716464e-06,
"loss": 6.0751,
"step": 294000
},
{
"epoch": 8.14,
"learning_rate": 9.211168413464205e-06,
"loss": 6.0737,
"step": 294500
},
{
"epoch": 8.16,
"learning_rate": 9.209782066211946e-06,
"loss": 6.0803,
"step": 295000
},
{
"epoch": 8.17,
"learning_rate": 9.208395718959686e-06,
"loss": 6.0766,
"step": 295500
},
{
"epoch": 8.18,
"learning_rate": 9.207009371707425e-06,
"loss": 6.0745,
"step": 296000
},
{
"epoch": 8.2,
"learning_rate": 9.205623024455166e-06,
"loss": 6.0743,
"step": 296500
},
{
"epoch": 8.21,
"learning_rate": 9.204236677202907e-06,
"loss": 6.0735,
"step": 297000
},
{
"epoch": 8.23,
"learning_rate": 9.202850329950647e-06,
"loss": 6.077,
"step": 297500
},
{
"epoch": 8.24,
"learning_rate": 9.201463982698388e-06,
"loss": 6.0748,
"step": 298000
},
{
"epoch": 8.25,
"learning_rate": 9.200077635446127e-06,
"loss": 6.0755,
"step": 298500
},
{
"epoch": 8.27,
"learning_rate": 9.198691288193867e-06,
"loss": 6.074,
"step": 299000
},
{
"epoch": 8.28,
"learning_rate": 9.197304940941608e-06,
"loss": 6.077,
"step": 299500
},
{
"epoch": 8.3,
"learning_rate": 9.195918593689349e-06,
"loss": 6.0762,
"step": 300000
},
{
"epoch": 8.31,
"learning_rate": 9.194532246437088e-06,
"loss": 6.0768,
"step": 300500
},
{
"epoch": 8.32,
"learning_rate": 9.193145899184828e-06,
"loss": 6.0716,
"step": 301000
},
{
"epoch": 8.34,
"learning_rate": 9.191759551932569e-06,
"loss": 6.0743,
"step": 301500
},
{
"epoch": 8.35,
"learning_rate": 9.19037320468031e-06,
"loss": 6.0744,
"step": 302000
},
{
"epoch": 8.36,
"learning_rate": 9.188986857428049e-06,
"loss": 6.0747,
"step": 302500
},
{
"epoch": 8.38,
"learning_rate": 9.18760051017579e-06,
"loss": 6.0729,
"step": 303000
},
{
"epoch": 8.39,
"learning_rate": 9.18621416292353e-06,
"loss": 6.0691,
"step": 303500
},
{
"epoch": 8.41,
"learning_rate": 9.18482781567127e-06,
"loss": 6.0739,
"step": 304000
},
{
"epoch": 8.42,
"learning_rate": 9.18344146841901e-06,
"loss": 6.0715,
"step": 304500
},
{
"epoch": 8.43,
"learning_rate": 9.18205512116675e-06,
"loss": 6.0702,
"step": 305000
},
{
"epoch": 8.45,
"learning_rate": 9.18066877391449e-06,
"loss": 6.0691,
"step": 305500
},
{
"epoch": 8.46,
"learning_rate": 9.179282426662231e-06,
"loss": 6.0745,
"step": 306000
},
{
"epoch": 8.47,
"learning_rate": 9.177896079409972e-06,
"loss": 6.0722,
"step": 306500
},
{
"epoch": 8.49,
"learning_rate": 9.176509732157711e-06,
"loss": 6.0749,
"step": 307000
},
{
"epoch": 8.5,
"learning_rate": 9.175123384905452e-06,
"loss": 6.0709,
"step": 307500
},
{
"epoch": 8.52,
"learning_rate": 9.173737037653192e-06,
"loss": 6.0693,
"step": 308000
},
{
"epoch": 8.53,
"learning_rate": 9.172350690400931e-06,
"loss": 6.0693,
"step": 308500
},
{
"epoch": 8.54,
"learning_rate": 9.170964343148672e-06,
"loss": 6.0728,
"step": 309000
},
{
"epoch": 8.56,
"learning_rate": 9.169577995896413e-06,
"loss": 6.0721,
"step": 309500
},
{
"epoch": 8.57,
"learning_rate": 9.168191648644153e-06,
"loss": 6.0721,
"step": 310000
},
{
"epoch": 8.59,
"learning_rate": 9.166805301391894e-06,
"loss": 6.0706,
"step": 310500
},
{
"epoch": 8.6,
"learning_rate": 9.165418954139633e-06,
"loss": 6.0695,
"step": 311000
},
{
"epoch": 8.61,
"learning_rate": 9.164032606887373e-06,
"loss": 6.0672,
"step": 311500
},
{
"epoch": 8.63,
"learning_rate": 9.162646259635114e-06,
"loss": 6.0714,
"step": 312000
},
{
"epoch": 8.64,
"learning_rate": 9.161259912382855e-06,
"loss": 6.0659,
"step": 312500
},
{
"epoch": 8.65,
"learning_rate": 9.159873565130595e-06,
"loss": 6.0718,
"step": 313000
},
{
"epoch": 8.67,
"learning_rate": 9.158487217878334e-06,
"loss": 6.0658,
"step": 313500
},
{
"epoch": 8.68,
"learning_rate": 9.157100870626075e-06,
"loss": 6.0716,
"step": 314000
},
{
"epoch": 8.7,
"learning_rate": 9.155714523373816e-06,
"loss": 6.0705,
"step": 314500
},
{
"epoch": 8.71,
"learning_rate": 9.154328176121555e-06,
"loss": 6.0689,
"step": 315000
},
{
"epoch": 8.72,
"learning_rate": 9.152941828869297e-06,
"loss": 6.0677,
"step": 315500
},
{
"epoch": 8.74,
"learning_rate": 9.151555481617036e-06,
"loss": 6.0683,
"step": 316000
},
{
"epoch": 8.75,
"learning_rate": 9.150169134364777e-06,
"loss": 6.0702,
"step": 316500
},
{
"epoch": 8.77,
"learning_rate": 9.148782787112517e-06,
"loss": 6.0724,
"step": 317000
},
{
"epoch": 8.78,
"learning_rate": 9.147396439860256e-06,
"loss": 6.066,
"step": 317500
},
{
"epoch": 8.79,
"learning_rate": 9.146010092607997e-06,
"loss": 6.0695,
"step": 318000
},
{
"epoch": 8.81,
"learning_rate": 9.144623745355737e-06,
"loss": 6.0696,
"step": 318500
},
{
"epoch": 8.82,
"learning_rate": 9.143237398103476e-06,
"loss": 6.0679,
"step": 319000
},
{
"epoch": 8.83,
"learning_rate": 9.141851050851219e-06,
"loss": 6.0666,
"step": 319500
},
{
"epoch": 8.85,
"learning_rate": 9.140464703598958e-06,
"loss": 6.0708,
"step": 320000
},
{
"epoch": 8.86,
"learning_rate": 9.139078356346698e-06,
"loss": 6.0677,
"step": 320500
},
{
"epoch": 8.88,
"learning_rate": 9.137692009094439e-06,
"loss": 6.0646,
"step": 321000
},
{
"epoch": 8.89,
"learning_rate": 9.136305661842178e-06,
"loss": 6.0643,
"step": 321500
},
{
"epoch": 8.9,
"learning_rate": 9.13491931458992e-06,
"loss": 6.0695,
"step": 322000
},
{
"epoch": 8.92,
"learning_rate": 9.13353296733766e-06,
"loss": 6.068,
"step": 322500
},
{
"epoch": 8.93,
"learning_rate": 9.1321466200854e-06,
"loss": 6.0688,
"step": 323000
},
{
"epoch": 8.94,
"learning_rate": 9.13076027283314e-06,
"loss": 6.0624,
"step": 323500
},
{
"epoch": 8.96,
"learning_rate": 9.12937392558088e-06,
"loss": 6.064,
"step": 324000
},
{
"epoch": 8.97,
"learning_rate": 9.12798757832862e-06,
"loss": 6.0657,
"step": 324500
},
{
"epoch": 8.99,
"learning_rate": 9.12660123107636e-06,
"loss": 6.0665,
"step": 325000
},
{
"epoch": 9.0,
"learning_rate": 9.125214883824101e-06,
"loss": 6.0649,
"step": 325500
},
{
"epoch": 9.01,
"learning_rate": 9.123828536571842e-06,
"loss": 6.0642,
"step": 326000
},
{
"epoch": 9.03,
"learning_rate": 9.122442189319581e-06,
"loss": 6.0629,
"step": 326500
},
{
"epoch": 9.04,
"learning_rate": 9.121055842067322e-06,
"loss": 6.0662,
"step": 327000
},
{
"epoch": 9.06,
"learning_rate": 9.119669494815062e-06,
"loss": 6.0694,
"step": 327500
},
{
"epoch": 9.07,
"learning_rate": 9.118283147562801e-06,
"loss": 6.0667,
"step": 328000
},
{
"epoch": 9.08,
"learning_rate": 9.116896800310544e-06,
"loss": 6.0655,
"step": 328500
},
{
"epoch": 9.1,
"learning_rate": 9.115510453058283e-06,
"loss": 6.0646,
"step": 329000
},
{
"epoch": 9.11,
"learning_rate": 9.114124105806023e-06,
"loss": 6.0648,
"step": 329500
},
{
"epoch": 9.12,
"learning_rate": 9.112737758553764e-06,
"loss": 6.062,
"step": 330000
},
{
"epoch": 9.14,
"learning_rate": 9.111351411301503e-06,
"loss": 6.0626,
"step": 330500
},
{
"epoch": 9.15,
"learning_rate": 9.109965064049243e-06,
"loss": 6.0657,
"step": 331000
},
{
"epoch": 9.17,
"learning_rate": 9.108578716796984e-06,
"loss": 6.0662,
"step": 331500
},
{
"epoch": 9.18,
"learning_rate": 9.107192369544725e-06,
"loss": 6.0646,
"step": 332000
},
{
"epoch": 9.19,
"learning_rate": 9.105806022292465e-06,
"loss": 6.0649,
"step": 332500
},
{
"epoch": 9.21,
"learning_rate": 9.104419675040204e-06,
"loss": 6.0623,
"step": 333000
},
{
"epoch": 9.22,
"learning_rate": 9.103033327787945e-06,
"loss": 6.0619,
"step": 333500
},
{
"epoch": 9.24,
"learning_rate": 9.101646980535686e-06,
"loss": 6.0666,
"step": 334000
},
{
"epoch": 9.25,
"learning_rate": 9.100260633283425e-06,
"loss": 6.0644,
"step": 334500
},
{
"epoch": 9.26,
"learning_rate": 9.098874286031167e-06,
"loss": 6.0616,
"step": 335000
},
{
"epoch": 9.28,
"learning_rate": 9.097487938778906e-06,
"loss": 6.0584,
"step": 335500
},
{
"epoch": 9.29,
"learning_rate": 9.096101591526647e-06,
"loss": 6.0644,
"step": 336000
},
{
"epoch": 9.3,
"learning_rate": 9.094715244274387e-06,
"loss": 6.0609,
"step": 336500
},
{
"epoch": 9.32,
"learning_rate": 9.093328897022126e-06,
"loss": 6.0627,
"step": 337000
},
{
"epoch": 9.33,
"learning_rate": 9.091942549769867e-06,
"loss": 6.0629,
"step": 337500
},
{
"epoch": 9.35,
"learning_rate": 9.090556202517607e-06,
"loss": 6.0601,
"step": 338000
},
{
"epoch": 9.36,
"learning_rate": 9.089169855265348e-06,
"loss": 6.0594,
"step": 338500
},
{
"epoch": 9.37,
"learning_rate": 9.087783508013089e-06,
"loss": 6.0596,
"step": 339000
},
{
"epoch": 9.39,
"learning_rate": 9.086397160760828e-06,
"loss": 6.0621,
"step": 339500
},
{
"epoch": 9.4,
"learning_rate": 9.085010813508568e-06,
"loss": 6.0599,
"step": 340000
},
{
"epoch": 9.41,
"learning_rate": 9.083624466256309e-06,
"loss": 6.0603,
"step": 340500
},
{
"epoch": 9.43,
"learning_rate": 9.082238119004048e-06,
"loss": 6.0615,
"step": 341000
},
{
"epoch": 9.44,
"learning_rate": 9.08085177175179e-06,
"loss": 6.0654,
"step": 341500
},
{
"epoch": 9.46,
"learning_rate": 9.07946542449953e-06,
"loss": 6.0619,
"step": 342000
},
{
"epoch": 9.47,
"learning_rate": 9.07807907724727e-06,
"loss": 6.0597,
"step": 342500
},
{
"epoch": 9.48,
"learning_rate": 9.07669272999501e-06,
"loss": 6.0616,
"step": 343000
},
{
"epoch": 9.5,
"learning_rate": 9.07530638274275e-06,
"loss": 6.0671,
"step": 343500
},
{
"epoch": 9.51,
"learning_rate": 9.07392003549049e-06,
"loss": 6.0616,
"step": 344000
},
{
"epoch": 9.53,
"learning_rate": 9.07253368823823e-06,
"loss": 6.0571,
"step": 344500
},
{
"epoch": 9.54,
"learning_rate": 9.071147340985971e-06,
"loss": 6.0598,
"step": 345000
},
{
"epoch": 9.55,
"learning_rate": 9.069760993733712e-06,
"loss": 6.0638,
"step": 345500
},
{
"epoch": 9.57,
"learning_rate": 9.068374646481451e-06,
"loss": 6.0576,
"step": 346000
},
{
"epoch": 9.58,
"learning_rate": 9.066988299229192e-06,
"loss": 6.0531,
"step": 346500
},
{
"epoch": 9.59,
"learning_rate": 9.065601951976932e-06,
"loss": 6.0627,
"step": 347000
},
{
"epoch": 9.61,
"learning_rate": 9.064215604724673e-06,
"loss": 6.0588,
"step": 347500
},
{
"epoch": 9.62,
"learning_rate": 9.062829257472412e-06,
"loss": 6.059,
"step": 348000
},
{
"epoch": 9.64,
"learning_rate": 9.061442910220153e-06,
"loss": 6.0609,
"step": 348500
},
{
"epoch": 9.65,
"learning_rate": 9.060056562967893e-06,
"loss": 6.0576,
"step": 349000
},
{
"epoch": 9.66,
"learning_rate": 9.058670215715634e-06,
"loss": 6.0531,
"step": 349500
},
{
"epoch": 9.68,
"learning_rate": 9.057283868463373e-06,
"loss": 6.0594,
"step": 350000
},
{
"epoch": 9.69,
"learning_rate": 9.055897521211113e-06,
"loss": 6.0612,
"step": 350500
},
{
"epoch": 9.71,
"learning_rate": 9.054511173958854e-06,
"loss": 6.0603,
"step": 351000
},
{
"epoch": 9.72,
"learning_rate": 9.053124826706595e-06,
"loss": 6.06,
"step": 351500
},
{
"epoch": 9.73,
"learning_rate": 9.051738479454334e-06,
"loss": 6.061,
"step": 352000
},
{
"epoch": 9.75,
"learning_rate": 9.050352132202074e-06,
"loss": 6.0582,
"step": 352500
},
{
"epoch": 9.76,
"learning_rate": 9.048965784949815e-06,
"loss": 6.0578,
"step": 353000
},
{
"epoch": 9.77,
"learning_rate": 9.047579437697556e-06,
"loss": 6.0565,
"step": 353500
},
{
"epoch": 9.79,
"learning_rate": 9.046193090445296e-06,
"loss": 6.0547,
"step": 354000
},
{
"epoch": 9.8,
"learning_rate": 9.044806743193035e-06,
"loss": 6.0581,
"step": 354500
},
{
"epoch": 9.82,
"learning_rate": 9.043420395940776e-06,
"loss": 6.0586,
"step": 355000
},
{
"epoch": 9.83,
"learning_rate": 9.042034048688516e-06,
"loss": 6.0572,
"step": 355500
},
{
"epoch": 9.84,
"learning_rate": 9.040647701436257e-06,
"loss": 6.0544,
"step": 356000
},
{
"epoch": 9.86,
"learning_rate": 9.039261354183996e-06,
"loss": 6.061,
"step": 356500
},
{
"epoch": 9.87,
"learning_rate": 9.037875006931737e-06,
"loss": 6.058,
"step": 357000
},
{
"epoch": 9.88,
"learning_rate": 9.036488659679477e-06,
"loss": 6.0573,
"step": 357500
},
{
"epoch": 9.9,
"learning_rate": 9.035102312427218e-06,
"loss": 6.0591,
"step": 358000
},
{
"epoch": 9.91,
"learning_rate": 9.033715965174957e-06,
"loss": 6.0574,
"step": 358500
},
{
"epoch": 9.93,
"learning_rate": 9.032329617922698e-06,
"loss": 6.0556,
"step": 359000
},
{
"epoch": 9.94,
"learning_rate": 9.030943270670438e-06,
"loss": 6.0612,
"step": 359500
},
{
"epoch": 9.95,
"learning_rate": 9.029556923418179e-06,
"loss": 6.0563,
"step": 360000
},
{
"epoch": 9.97,
"learning_rate": 9.02817057616592e-06,
"loss": 6.059,
"step": 360500
},
{
"epoch": 9.98,
"learning_rate": 9.026784228913659e-06,
"loss": 6.0538,
"step": 361000
},
{
"epoch": 10.0,
"learning_rate": 9.0253978816614e-06,
"loss": 6.0537,
"step": 361500
},
{
"epoch": 10.01,
"learning_rate": 9.02401153440914e-06,
"loss": 6.0551,
"step": 362000
},
{
"epoch": 10.02,
"learning_rate": 9.022625187156879e-06,
"loss": 6.0582,
"step": 362500
},
{
"epoch": 10.04,
"learning_rate": 9.02123883990462e-06,
"loss": 6.0527,
"step": 363000
},
{
"epoch": 10.05,
"learning_rate": 9.01985249265236e-06,
"loss": 6.0551,
"step": 363500
},
{
"epoch": 10.06,
"learning_rate": 9.0184661454001e-06,
"loss": 6.0528,
"step": 364000
},
{
"epoch": 10.08,
"learning_rate": 9.017079798147841e-06,
"loss": 6.0538,
"step": 364500
},
{
"epoch": 10.09,
"learning_rate": 9.01569345089558e-06,
"loss": 6.0527,
"step": 365000
},
{
"epoch": 10.11,
"learning_rate": 9.014307103643321e-06,
"loss": 6.0543,
"step": 365500
},
{
"epoch": 10.12,
"learning_rate": 9.012920756391062e-06,
"loss": 6.0532,
"step": 366000
},
{
"epoch": 10.13,
"learning_rate": 9.0115344091388e-06,
"loss": 6.0526,
"step": 366500
},
{
"epoch": 10.15,
"learning_rate": 9.010148061886543e-06,
"loss": 6.0578,
"step": 367000
},
{
"epoch": 10.16,
"learning_rate": 9.008761714634282e-06,
"loss": 6.0532,
"step": 367500
},
{
"epoch": 10.18,
"learning_rate": 9.007375367382022e-06,
"loss": 6.0524,
"step": 368000
},
{
"epoch": 10.19,
"learning_rate": 9.005989020129763e-06,
"loss": 6.0546,
"step": 368500
},
{
"epoch": 10.2,
"learning_rate": 9.004602672877502e-06,
"loss": 6.0534,
"step": 369000
},
{
"epoch": 10.22,
"learning_rate": 9.003216325625244e-06,
"loss": 6.0546,
"step": 369500
},
{
"epoch": 10.23,
"learning_rate": 9.001829978372983e-06,
"loss": 6.0556,
"step": 370000
},
{
"epoch": 10.24,
"learning_rate": 9.000443631120724e-06,
"loss": 6.0522,
"step": 370500
},
{
"epoch": 10.26,
"learning_rate": 8.999057283868465e-06,
"loss": 6.0553,
"step": 371000
},
{
"epoch": 10.27,
"learning_rate": 8.997670936616204e-06,
"loss": 6.0552,
"step": 371500
},
{
"epoch": 10.29,
"learning_rate": 8.996284589363944e-06,
"loss": 6.0567,
"step": 372000
},
{
"epoch": 10.3,
"learning_rate": 8.994898242111685e-06,
"loss": 6.054,
"step": 372500
},
{
"epoch": 10.31,
"learning_rate": 8.993511894859424e-06,
"loss": 6.0512,
"step": 373000
},
{
"epoch": 10.33,
"learning_rate": 8.992125547607166e-06,
"loss": 6.0519,
"step": 373500
},
{
"epoch": 10.34,
"learning_rate": 8.990739200354905e-06,
"loss": 6.0544,
"step": 374000
},
{
"epoch": 10.36,
"learning_rate": 8.989352853102646e-06,
"loss": 6.0505,
"step": 374500
},
{
"epoch": 10.37,
"learning_rate": 8.987966505850386e-06,
"loss": 6.052,
"step": 375000
},
{
"epoch": 10.38,
"learning_rate": 8.986580158598125e-06,
"loss": 6.0513,
"step": 375500
},
{
"epoch": 10.4,
"learning_rate": 8.985193811345868e-06,
"loss": 6.0509,
"step": 376000
},
{
"epoch": 10.41,
"learning_rate": 8.983807464093607e-06,
"loss": 6.0537,
"step": 376500
},
{
"epoch": 10.42,
"learning_rate": 8.982421116841347e-06,
"loss": 6.0544,
"step": 377000
},
{
"epoch": 10.44,
"learning_rate": 8.981034769589088e-06,
"loss": 6.0494,
"step": 377500
},
{
"epoch": 10.45,
"learning_rate": 8.979648422336827e-06,
"loss": 6.055,
"step": 378000
},
{
"epoch": 10.47,
"learning_rate": 8.978262075084568e-06,
"loss": 6.0528,
"step": 378500
},
{
"epoch": 10.48,
"learning_rate": 8.976875727832308e-06,
"loss": 6.052,
"step": 379000
},
{
"epoch": 10.49,
"learning_rate": 8.975489380580047e-06,
"loss": 6.0506,
"step": 379500
},
{
"epoch": 10.51,
"learning_rate": 8.97410303332779e-06,
"loss": 6.0501,
"step": 380000
},
{
"epoch": 10.52,
"learning_rate": 8.972716686075528e-06,
"loss": 6.0532,
"step": 380500
},
{
"epoch": 10.53,
"learning_rate": 8.971330338823269e-06,
"loss": 6.0537,
"step": 381000
},
{
"epoch": 10.55,
"learning_rate": 8.96994399157101e-06,
"loss": 6.0498,
"step": 381500
},
{
"epoch": 10.56,
"learning_rate": 8.968557644318749e-06,
"loss": 6.0497,
"step": 382000
},
{
"epoch": 10.58,
"learning_rate": 8.967171297066491e-06,
"loss": 6.0528,
"step": 382500
},
{
"epoch": 10.59,
"learning_rate": 8.96578494981423e-06,
"loss": 6.0474,
"step": 383000
},
{
"epoch": 10.6,
"learning_rate": 8.96439860256197e-06,
"loss": 6.0528,
"step": 383500
},
{
"epoch": 10.62,
"learning_rate": 8.963012255309711e-06,
"loss": 6.0564,
"step": 384000
},
{
"epoch": 10.63,
"learning_rate": 8.96162590805745e-06,
"loss": 6.0487,
"step": 384500
},
{
"epoch": 10.65,
"learning_rate": 8.960239560805191e-06,
"loss": 6.054,
"step": 385000
},
{
"epoch": 10.66,
"learning_rate": 8.958853213552932e-06,
"loss": 6.0529,
"step": 385500
},
{
"epoch": 10.67,
"learning_rate": 8.957466866300672e-06,
"loss": 6.051,
"step": 386000
},
{
"epoch": 10.69,
"learning_rate": 8.956080519048413e-06,
"loss": 6.0515,
"step": 386500
},
{
"epoch": 10.7,
"learning_rate": 8.954694171796152e-06,
"loss": 6.0496,
"step": 387000
},
{
"epoch": 10.71,
"learning_rate": 8.953307824543892e-06,
"loss": 6.0513,
"step": 387500
},
{
"epoch": 10.73,
"learning_rate": 8.951921477291633e-06,
"loss": 6.0495,
"step": 388000
},
{
"epoch": 10.74,
"learning_rate": 8.950535130039372e-06,
"loss": 6.0488,
"step": 388500
},
{
"epoch": 10.76,
"learning_rate": 8.949148782787114e-06,
"loss": 6.0464,
"step": 389000
},
{
"epoch": 10.77,
"learning_rate": 8.947762435534853e-06,
"loss": 6.048,
"step": 389500
},
{
"epoch": 10.78,
"learning_rate": 8.946376088282594e-06,
"loss": 6.0476,
"step": 390000
},
{
"epoch": 10.8,
"learning_rate": 8.944989741030335e-06,
"loss": 6.048,
"step": 390500
},
{
"epoch": 10.81,
"learning_rate": 8.943603393778074e-06,
"loss": 6.05,
"step": 391000
},
{
"epoch": 10.83,
"learning_rate": 8.942217046525814e-06,
"loss": 6.0464,
"step": 391500
},
{
"epoch": 10.84,
"learning_rate": 8.940830699273555e-06,
"loss": 6.0486,
"step": 392000
},
{
"epoch": 10.85,
"learning_rate": 8.939444352021296e-06,
"loss": 6.0453,
"step": 392500
},
{
"epoch": 10.87,
"learning_rate": 8.938058004769036e-06,
"loss": 6.0499,
"step": 393000
},
{
"epoch": 10.88,
"learning_rate": 8.936671657516775e-06,
"loss": 6.0468,
"step": 393500
},
{
"epoch": 10.89,
"learning_rate": 8.935285310264516e-06,
"loss": 6.049,
"step": 394000
},
{
"epoch": 10.91,
"learning_rate": 8.933898963012256e-06,
"loss": 6.0479,
"step": 394500
},
{
"epoch": 10.92,
"learning_rate": 8.932512615759995e-06,
"loss": 6.0482,
"step": 395000
},
{
"epoch": 10.94,
"learning_rate": 8.931126268507736e-06,
"loss": 6.0482,
"step": 395500
},
{
"epoch": 10.95,
"learning_rate": 8.929739921255477e-06,
"loss": 6.0494,
"step": 396000
},
{
"epoch": 10.96,
"learning_rate": 8.928353574003217e-06,
"loss": 6.0475,
"step": 396500
},
{
"epoch": 10.98,
"learning_rate": 8.926967226750958e-06,
"loss": 6.0473,
"step": 397000
},
{
"epoch": 10.99,
"learning_rate": 8.925580879498697e-06,
"loss": 6.048,
"step": 397500
},
{
"epoch": 11.0,
"learning_rate": 8.924194532246438e-06,
"loss": 6.0486,
"step": 398000
},
{
"epoch": 11.02,
"learning_rate": 8.922808184994178e-06,
"loss": 6.0467,
"step": 398500
},
{
"epoch": 11.03,
"learning_rate": 8.921421837741919e-06,
"loss": 6.0501,
"step": 399000
},
{
"epoch": 11.05,
"learning_rate": 8.92003549048966e-06,
"loss": 6.0474,
"step": 399500
},
{
"epoch": 11.06,
"learning_rate": 8.918649143237398e-06,
"loss": 6.0438,
"step": 400000
},
{
"epoch": 11.07,
"learning_rate": 8.917262795985139e-06,
"loss": 6.0472,
"step": 400500
},
{
"epoch": 11.09,
"learning_rate": 8.91587644873288e-06,
"loss": 6.0478,
"step": 401000
},
{
"epoch": 11.1,
"learning_rate": 8.914490101480619e-06,
"loss": 6.0477,
"step": 401500
},
{
"epoch": 11.12,
"learning_rate": 8.91310375422836e-06,
"loss": 6.0473,
"step": 402000
},
{
"epoch": 11.13,
"learning_rate": 8.9117174069761e-06,
"loss": 6.0444,
"step": 402500
},
{
"epoch": 11.14,
"learning_rate": 8.91033105972384e-06,
"loss": 6.0459,
"step": 403000
},
{
"epoch": 11.16,
"learning_rate": 8.908944712471581e-06,
"loss": 6.0442,
"step": 403500
},
{
"epoch": 11.17,
"learning_rate": 8.90755836521932e-06,
"loss": 6.0492,
"step": 404000
},
{
"epoch": 11.18,
"learning_rate": 8.906172017967061e-06,
"loss": 6.0465,
"step": 404500
},
{
"epoch": 11.2,
"learning_rate": 8.904785670714802e-06,
"loss": 6.0487,
"step": 405000
},
{
"epoch": 11.21,
"learning_rate": 8.903399323462542e-06,
"loss": 6.0453,
"step": 405500
},
{
"epoch": 11.23,
"learning_rate": 8.902012976210281e-06,
"loss": 6.043,
"step": 406000
},
{
"epoch": 11.24,
"learning_rate": 8.900626628958022e-06,
"loss": 6.0432,
"step": 406500
},
{
"epoch": 11.25,
"learning_rate": 8.899240281705762e-06,
"loss": 6.0426,
"step": 407000
},
{
"epoch": 11.27,
"learning_rate": 8.897853934453503e-06,
"loss": 6.0503,
"step": 407500
},
{
"epoch": 11.28,
"learning_rate": 8.896467587201244e-06,
"loss": 6.044,
"step": 408000
},
{
"epoch": 11.3,
"learning_rate": 8.895081239948983e-06,
"loss": 6.0497,
"step": 408500
},
{
"epoch": 11.31,
"learning_rate": 8.893694892696723e-06,
"loss": 6.0422,
"step": 409000
},
{
"epoch": 11.32,
"learning_rate": 8.892308545444464e-06,
"loss": 6.0455,
"step": 409500
},
{
"epoch": 11.34,
"learning_rate": 8.890922198192203e-06,
"loss": 6.0473,
"step": 410000
},
{
"epoch": 11.35,
"learning_rate": 8.889535850939944e-06,
"loss": 6.045,
"step": 410500
},
{
"epoch": 11.36,
"learning_rate": 8.888149503687684e-06,
"loss": 6.0494,
"step": 411000
},
{
"epoch": 11.38,
"learning_rate": 8.886763156435425e-06,
"loss": 6.0426,
"step": 411500
},
{
"epoch": 11.39,
"learning_rate": 8.885376809183166e-06,
"loss": 6.0423,
"step": 412000
},
{
"epoch": 11.41,
"learning_rate": 8.883990461930904e-06,
"loss": 6.0446,
"step": 412500
},
{
"epoch": 11.42,
"learning_rate": 8.882604114678645e-06,
"loss": 6.0445,
"step": 413000
},
{
"epoch": 11.43,
"learning_rate": 8.881217767426386e-06,
"loss": 6.0417,
"step": 413500
},
{
"epoch": 11.45,
"learning_rate": 8.879831420174126e-06,
"loss": 6.0419,
"step": 414000
},
{
"epoch": 11.46,
"learning_rate": 8.878445072921867e-06,
"loss": 6.0432,
"step": 414500
},
{
"epoch": 11.47,
"learning_rate": 8.877058725669606e-06,
"loss": 6.0437,
"step": 415000
},
{
"epoch": 11.49,
"learning_rate": 8.875672378417347e-06,
"loss": 6.0442,
"step": 415500
},
{
"epoch": 11.5,
"learning_rate": 8.874286031165087e-06,
"loss": 6.0428,
"step": 416000
},
{
"epoch": 11.52,
"learning_rate": 8.872899683912826e-06,
"loss": 6.0421,
"step": 416500
},
{
"epoch": 11.53,
"learning_rate": 8.871513336660567e-06,
"loss": 6.0449,
"step": 417000
},
{
"epoch": 11.54,
"learning_rate": 8.870126989408308e-06,
"loss": 6.0434,
"step": 417500
},
{
"epoch": 11.56,
"learning_rate": 8.868740642156048e-06,
"loss": 6.0429,
"step": 418000
},
{
"epoch": 11.57,
"learning_rate": 8.867354294903789e-06,
"loss": 6.0467,
"step": 418500
},
{
"epoch": 11.59,
"learning_rate": 8.865967947651528e-06,
"loss": 6.0418,
"step": 419000
},
{
"epoch": 11.6,
"learning_rate": 8.864581600399268e-06,
"loss": 6.0402,
"step": 419500
},
{
"epoch": 11.61,
"learning_rate": 8.863195253147009e-06,
"loss": 6.0449,
"step": 420000
},
{
"epoch": 11.63,
"learning_rate": 8.861808905894748e-06,
"loss": 6.044,
"step": 420500
},
{
"epoch": 11.64,
"learning_rate": 8.86042255864249e-06,
"loss": 6.0391,
"step": 421000
},
{
"epoch": 11.65,
"learning_rate": 8.85903621139023e-06,
"loss": 6.0415,
"step": 421500
},
{
"epoch": 11.67,
"learning_rate": 8.85764986413797e-06,
"loss": 6.0446,
"step": 422000
},
{
"epoch": 11.68,
"learning_rate": 8.85626351688571e-06,
"loss": 6.0415,
"step": 422500
},
{
"epoch": 11.7,
"learning_rate": 8.85487716963345e-06,
"loss": 6.0436,
"step": 423000
},
{
"epoch": 11.71,
"learning_rate": 8.853490822381192e-06,
"loss": 6.0429,
"step": 423500
},
{
"epoch": 11.72,
"learning_rate": 8.852104475128931e-06,
"loss": 6.0436,
"step": 424000
},
{
"epoch": 11.74,
"learning_rate": 8.850718127876672e-06,
"loss": 6.0445,
"step": 424500
},
{
"epoch": 11.75,
"learning_rate": 8.849331780624412e-06,
"loss": 6.0408,
"step": 425000
},
{
"epoch": 11.77,
"learning_rate": 8.847945433372151e-06,
"loss": 6.0451,
"step": 425500
},
{
"epoch": 11.78,
"learning_rate": 8.846559086119892e-06,
"loss": 6.0442,
"step": 426000
},
{
"epoch": 11.79,
"learning_rate": 8.845172738867632e-06,
"loss": 6.0403,
"step": 426500
},
{
"epoch": 11.81,
"learning_rate": 8.843786391615371e-06,
"loss": 6.0429,
"step": 427000
},
{
"epoch": 11.82,
"learning_rate": 8.842400044363114e-06,
"loss": 6.0404,
"step": 427500
},
{
"epoch": 11.83,
"learning_rate": 8.841013697110853e-06,
"loss": 6.0422,
"step": 428000
},
{
"epoch": 11.85,
"learning_rate": 8.839627349858593e-06,
"loss": 6.0392,
"step": 428500
},
{
"epoch": 11.86,
"learning_rate": 8.838241002606334e-06,
"loss": 6.0425,
"step": 429000
},
{
"epoch": 11.88,
"learning_rate": 8.836854655354073e-06,
"loss": 6.0433,
"step": 429500
},
{
"epoch": 11.89,
"learning_rate": 8.835468308101815e-06,
"loss": 6.0429,
"step": 430000
},
{
"epoch": 11.9,
"learning_rate": 8.834081960849554e-06,
"loss": 6.0378,
"step": 430500
},
{
"epoch": 11.92,
"learning_rate": 8.832695613597295e-06,
"loss": 6.0374,
"step": 431000
},
{
"epoch": 11.93,
"learning_rate": 8.831309266345035e-06,
"loss": 6.0374,
"step": 431500
},
{
"epoch": 11.94,
"learning_rate": 8.829922919092774e-06,
"loss": 6.0418,
"step": 432000
},
{
"epoch": 11.96,
"learning_rate": 8.828536571840515e-06,
"loss": 6.0385,
"step": 432500
},
{
"epoch": 11.97,
"learning_rate": 8.827150224588256e-06,
"loss": 6.0385,
"step": 433000
},
{
"epoch": 11.99,
"learning_rate": 8.825763877335995e-06,
"loss": 6.0418,
"step": 433500
},
{
"epoch": 12.0,
"learning_rate": 8.824377530083737e-06,
"loss": 6.0395,
"step": 434000
},
{
"epoch": 12.01,
"learning_rate": 8.822991182831476e-06,
"loss": 6.0406,
"step": 434500
},
{
"epoch": 12.03,
"learning_rate": 8.821604835579217e-06,
"loss": 6.0399,
"step": 435000
},
{
"epoch": 12.04,
"learning_rate": 8.820218488326957e-06,
"loss": 6.0367,
"step": 435500
},
{
"epoch": 12.06,
"learning_rate": 8.818832141074696e-06,
"loss": 6.037,
"step": 436000
},
{
"epoch": 12.07,
"learning_rate": 8.817445793822439e-06,
"loss": 6.0423,
"step": 436500
},
{
"epoch": 12.08,
"learning_rate": 8.816059446570178e-06,
"loss": 6.0416,
"step": 437000
},
{
"epoch": 12.1,
"learning_rate": 8.814673099317918e-06,
"loss": 6.0387,
"step": 437500
},
{
"epoch": 12.11,
"learning_rate": 8.813286752065659e-06,
"loss": 6.0426,
"step": 438000
},
{
"epoch": 12.12,
"learning_rate": 8.811900404813398e-06,
"loss": 6.0418,
"step": 438500
},
{
"epoch": 12.14,
"learning_rate": 8.810514057561138e-06,
"loss": 6.042,
"step": 439000
},
{
"epoch": 12.15,
"learning_rate": 8.809127710308879e-06,
"loss": 6.039,
"step": 439500
},
{
"epoch": 12.17,
"learning_rate": 8.80774136305662e-06,
"loss": 6.0397,
"step": 440000
},
{
"epoch": 12.18,
"learning_rate": 8.80635501580436e-06,
"loss": 6.0397,
"step": 440500
},
{
"epoch": 12.19,
"learning_rate": 8.8049686685521e-06,
"loss": 6.038,
"step": 441000
},
{
"epoch": 12.21,
"learning_rate": 8.80358232129984e-06,
"loss": 6.0376,
"step": 441500
},
{
"epoch": 12.22,
"learning_rate": 8.80219597404758e-06,
"loss": 6.0332,
"step": 442000
},
{
"epoch": 12.24,
"learning_rate": 8.80080962679532e-06,
"loss": 6.0365,
"step": 442500
},
{
"epoch": 12.25,
"learning_rate": 8.799423279543062e-06,
"loss": 6.0373,
"step": 443000
},
{
"epoch": 12.26,
"learning_rate": 8.7980369322908e-06,
"loss": 6.0412,
"step": 443500
},
{
"epoch": 12.28,
"learning_rate": 8.796650585038541e-06,
"loss": 6.0364,
"step": 444000
},
{
"epoch": 12.29,
"learning_rate": 8.795264237786282e-06,
"loss": 6.0353,
"step": 444500
},
{
"epoch": 12.3,
"learning_rate": 8.793877890534021e-06,
"loss": 6.0388,
"step": 445000
},
{
"epoch": 12.32,
"learning_rate": 8.792491543281762e-06,
"loss": 6.0396,
"step": 445500
},
{
"epoch": 12.33,
"learning_rate": 8.791105196029502e-06,
"loss": 6.0406,
"step": 446000
},
{
"epoch": 12.35,
"learning_rate": 8.789718848777243e-06,
"loss": 6.0382,
"step": 446500
},
{
"epoch": 12.36,
"learning_rate": 8.788332501524984e-06,
"loss": 6.039,
"step": 447000
},
{
"epoch": 12.37,
"learning_rate": 8.786946154272723e-06,
"loss": 6.0368,
"step": 447500
},
{
"epoch": 12.39,
"learning_rate": 8.785559807020463e-06,
"loss": 6.0387,
"step": 448000
},
{
"epoch": 12.4,
"learning_rate": 8.784173459768204e-06,
"loss": 6.0348,
"step": 448500
},
{
"epoch": 12.41,
"learning_rate": 8.782787112515943e-06,
"loss": 6.0353,
"step": 449000
},
{
"epoch": 12.43,
"learning_rate": 8.781400765263684e-06,
"loss": 6.0355,
"step": 449500
},
{
"epoch": 12.44,
"learning_rate": 8.780014418011424e-06,
"loss": 6.0349,
"step": 450000
},
{
"epoch": 12.46,
"learning_rate": 8.778628070759165e-06,
"loss": 6.0359,
"step": 450500
},
{
"epoch": 12.47,
"learning_rate": 8.777241723506905e-06,
"loss": 6.0355,
"step": 451000
},
{
"epoch": 12.48,
"learning_rate": 8.775855376254644e-06,
"loss": 6.0393,
"step": 451500
},
{
"epoch": 12.5,
"learning_rate": 8.774469029002385e-06,
"loss": 6.0354,
"step": 452000
},
{
"epoch": 12.51,
"learning_rate": 8.773082681750126e-06,
"loss": 6.0337,
"step": 452500
},
{
"epoch": 12.53,
"learning_rate": 8.771696334497866e-06,
"loss": 6.0389,
"step": 453000
},
{
"epoch": 12.54,
"learning_rate": 8.770309987245605e-06,
"loss": 6.0354,
"step": 453500
},
{
"epoch": 12.55,
"learning_rate": 8.768923639993346e-06,
"loss": 6.0368,
"step": 454000
},
{
"epoch": 12.57,
"learning_rate": 8.767537292741087e-06,
"loss": 6.0371,
"step": 454500
},
{
"epoch": 12.58,
"learning_rate": 8.766150945488827e-06,
"loss": 6.0374,
"step": 455000
},
{
"epoch": 12.59,
"learning_rate": 8.764764598236566e-06,
"loss": 6.0381,
"step": 455500
},
{
"epoch": 12.61,
"learning_rate": 8.763378250984307e-06,
"loss": 6.0355,
"step": 456000
},
{
"epoch": 12.62,
"learning_rate": 8.761991903732047e-06,
"loss": 6.0348,
"step": 456500
},
{
"epoch": 12.64,
"learning_rate": 8.760605556479788e-06,
"loss": 6.0361,
"step": 457000
},
{
"epoch": 12.65,
"learning_rate": 8.759219209227529e-06,
"loss": 6.0346,
"step": 457500
},
{
"epoch": 12.66,
"learning_rate": 8.757832861975268e-06,
"loss": 6.0368,
"step": 458000
},
{
"epoch": 12.68,
"learning_rate": 8.756446514723008e-06,
"loss": 6.0332,
"step": 458500
},
{
"epoch": 12.69,
"learning_rate": 8.755060167470749e-06,
"loss": 6.0363,
"step": 459000
},
{
"epoch": 12.71,
"learning_rate": 8.75367382021849e-06,
"loss": 6.0372,
"step": 459500
},
{
"epoch": 12.72,
"learning_rate": 8.752287472966229e-06,
"loss": 6.0344,
"step": 460000
},
{
"epoch": 12.73,
"learning_rate": 8.75090112571397e-06,
"loss": 6.037,
"step": 460500
},
{
"epoch": 12.75,
"learning_rate": 8.74951477846171e-06,
"loss": 6.0365,
"step": 461000
},
{
"epoch": 12.76,
"learning_rate": 8.74812843120945e-06,
"loss": 6.0357,
"step": 461500
},
{
"epoch": 12.77,
"learning_rate": 8.746742083957191e-06,
"loss": 6.0341,
"step": 462000
},
{
"epoch": 12.79,
"learning_rate": 8.74535573670493e-06,
"loss": 6.0372,
"step": 462500
},
{
"epoch": 12.8,
"learning_rate": 8.74396938945267e-06,
"loss": 6.037,
"step": 463000
},
{
"epoch": 12.82,
"learning_rate": 8.742583042200411e-06,
"loss": 6.0362,
"step": 463500
},
{
"epoch": 12.83,
"learning_rate": 8.74119669494815e-06,
"loss": 6.0354,
"step": 464000
},
{
"epoch": 12.84,
"learning_rate": 8.739810347695891e-06,
"loss": 6.0337,
"step": 464500
},
{
"epoch": 12.86,
"learning_rate": 8.738424000443632e-06,
"loss": 6.0322,
"step": 465000
},
{
"epoch": 12.87,
"learning_rate": 8.737037653191372e-06,
"loss": 6.0343,
"step": 465500
},
{
"epoch": 12.89,
"learning_rate": 8.735651305939113e-06,
"loss": 6.0346,
"step": 466000
},
{
"epoch": 12.9,
"learning_rate": 8.734264958686852e-06,
"loss": 6.0336,
"step": 466500
},
{
"epoch": 12.91,
"learning_rate": 8.732878611434593e-06,
"loss": 6.0343,
"step": 467000
},
{
"epoch": 12.93,
"learning_rate": 8.731492264182333e-06,
"loss": 6.0334,
"step": 467500
},
{
"epoch": 12.94,
"learning_rate": 8.730105916930072e-06,
"loss": 6.0339,
"step": 468000
},
{
"epoch": 12.95,
"learning_rate": 8.728719569677815e-06,
"loss": 6.0319,
"step": 468500
},
{
"epoch": 12.97,
"learning_rate": 8.727333222425553e-06,
"loss": 6.0341,
"step": 469000
},
{
"epoch": 12.98,
"learning_rate": 8.725946875173294e-06,
"loss": 6.0341,
"step": 469500
},
{
"epoch": 13.0,
"learning_rate": 8.724560527921035e-06,
"loss": 6.0323,
"step": 470000
},
{
"epoch": 13.01,
"learning_rate": 8.723174180668774e-06,
"loss": 6.0324,
"step": 470500
},
{
"epoch": 13.02,
"learning_rate": 8.721787833416514e-06,
"loss": 6.0323,
"step": 471000
},
{
"epoch": 13.04,
"learning_rate": 8.720401486164255e-06,
"loss": 6.0364,
"step": 471500
},
{
"epoch": 13.05,
"learning_rate": 8.719015138911996e-06,
"loss": 6.0301,
"step": 472000
},
{
"epoch": 13.06,
"learning_rate": 8.717628791659736e-06,
"loss": 6.0334,
"step": 472500
},
{
"epoch": 13.08,
"learning_rate": 8.716242444407475e-06,
"loss": 6.0358,
"step": 473000
},
{
"epoch": 13.09,
"learning_rate": 8.714856097155216e-06,
"loss": 6.0293,
"step": 473500
},
{
"epoch": 13.11,
"learning_rate": 8.713469749902957e-06,
"loss": 6.0352,
"step": 474000
},
{
"epoch": 13.12,
"learning_rate": 8.712083402650695e-06,
"loss": 6.0352,
"step": 474500
},
{
"epoch": 13.13,
"learning_rate": 8.710697055398438e-06,
"loss": 6.0328,
"step": 475000
},
{
"epoch": 13.15,
"learning_rate": 8.709310708146177e-06,
"loss": 6.0353,
"step": 475500
},
{
"epoch": 13.16,
"learning_rate": 8.707924360893917e-06,
"loss": 6.0314,
"step": 476000
},
{
"epoch": 13.18,
"learning_rate": 8.706538013641658e-06,
"loss": 6.0311,
"step": 476500
},
{
"epoch": 13.19,
"learning_rate": 8.705151666389397e-06,
"loss": 6.0348,
"step": 477000
},
{
"epoch": 13.2,
"learning_rate": 8.703765319137138e-06,
"loss": 6.0327,
"step": 477500
},
{
"epoch": 13.22,
"learning_rate": 8.702378971884878e-06,
"loss": 6.0325,
"step": 478000
},
{
"epoch": 13.23,
"learning_rate": 8.700992624632619e-06,
"loss": 6.0342,
"step": 478500
},
{
"epoch": 13.24,
"learning_rate": 8.69960627738036e-06,
"loss": 6.0313,
"step": 479000
},
{
"epoch": 13.26,
"learning_rate": 8.698219930128099e-06,
"loss": 6.0302,
"step": 479500
},
{
"epoch": 13.27,
"learning_rate": 8.69683358287584e-06,
"loss": 6.0313,
"step": 480000
},
{
"epoch": 13.29,
"learning_rate": 8.69544723562358e-06,
"loss": 6.0299,
"step": 480500
},
{
"epoch": 13.3,
"learning_rate": 8.694060888371319e-06,
"loss": 6.0321,
"step": 481000
},
{
"epoch": 13.31,
"learning_rate": 8.692674541119061e-06,
"loss": 6.0286,
"step": 481500
},
{
"epoch": 13.33,
"learning_rate": 8.6912881938668e-06,
"loss": 6.0296,
"step": 482000
},
{
"epoch": 13.34,
"learning_rate": 8.68990184661454e-06,
"loss": 6.0352,
"step": 482500
},
{
"epoch": 13.36,
"learning_rate": 8.688515499362281e-06,
"loss": 6.0306,
"step": 483000
},
{
"epoch": 13.37,
"learning_rate": 8.68712915211002e-06,
"loss": 6.0323,
"step": 483500
},
{
"epoch": 13.38,
"learning_rate": 8.685742804857763e-06,
"loss": 6.0303,
"step": 484000
},
{
"epoch": 13.4,
"learning_rate": 8.684356457605502e-06,
"loss": 6.0338,
"step": 484500
},
{
"epoch": 13.41,
"learning_rate": 8.682970110353242e-06,
"loss": 6.0341,
"step": 485000
},
{
"epoch": 13.42,
"learning_rate": 8.681583763100983e-06,
"loss": 6.0328,
"step": 485500
},
{
"epoch": 13.44,
"learning_rate": 8.680197415848722e-06,
"loss": 6.0328,
"step": 486000
},
{
"epoch": 13.45,
"learning_rate": 8.678811068596463e-06,
"loss": 6.0311,
"step": 486500
},
{
"epoch": 13.47,
"learning_rate": 8.677424721344203e-06,
"loss": 6.035,
"step": 487000
},
{
"epoch": 13.48,
"learning_rate": 8.676038374091942e-06,
"loss": 6.0337,
"step": 487500
},
{
"epoch": 13.49,
"learning_rate": 8.674652026839684e-06,
"loss": 6.031,
"step": 488000
},
{
"epoch": 13.51,
"learning_rate": 8.673265679587423e-06,
"loss": 6.0288,
"step": 488500
},
{
"epoch": 13.52,
"learning_rate": 8.671879332335164e-06,
"loss": 6.0333,
"step": 489000
},
{
"epoch": 13.53,
"learning_rate": 8.670492985082905e-06,
"loss": 6.0315,
"step": 489500
},
{
"epoch": 13.55,
"learning_rate": 8.669106637830644e-06,
"loss": 6.0295,
"step": 490000
},
{
"epoch": 13.56,
"learning_rate": 8.667720290578386e-06,
"loss": 6.0286,
"step": 490500
},
{
"epoch": 13.58,
"learning_rate": 8.666333943326125e-06,
"loss": 6.0334,
"step": 491000
},
{
"epoch": 13.59,
"learning_rate": 8.664947596073866e-06,
"loss": 6.0271,
"step": 491500
},
{
"epoch": 13.6,
"learning_rate": 8.663561248821606e-06,
"loss": 6.0305,
"step": 492000
},
{
"epoch": 13.62,
"learning_rate": 8.662174901569345e-06,
"loss": 6.0251,
"step": 492500
},
{
"epoch": 13.63,
"learning_rate": 8.660788554317086e-06,
"loss": 6.0235,
"step": 493000
},
{
"epoch": 13.65,
"learning_rate": 8.659402207064827e-06,
"loss": 6.0309,
"step": 493500
},
{
"epoch": 13.66,
"learning_rate": 8.658015859812567e-06,
"loss": 6.0292,
"step": 494000
},
{
"epoch": 13.67,
"learning_rate": 8.656629512560308e-06,
"loss": 6.0283,
"step": 494500
},
{
"epoch": 13.69,
"learning_rate": 8.655243165308047e-06,
"loss": 6.03,
"step": 495000
},
{
"epoch": 13.7,
"learning_rate": 8.653856818055787e-06,
"loss": 6.0331,
"step": 495500
},
{
"epoch": 13.71,
"learning_rate": 8.652470470803528e-06,
"loss": 6.029,
"step": 496000
},
{
"epoch": 13.73,
"learning_rate": 8.651084123551267e-06,
"loss": 6.0267,
"step": 496500
},
{
"epoch": 13.74,
"learning_rate": 8.649697776299008e-06,
"loss": 6.0315,
"step": 497000
},
{
"epoch": 13.76,
"learning_rate": 8.648311429046748e-06,
"loss": 6.0285,
"step": 497500
},
{
"epoch": 13.77,
"learning_rate": 8.646925081794489e-06,
"loss": 6.0304,
"step": 498000
},
{
"epoch": 13.78,
"learning_rate": 8.64553873454223e-06,
"loss": 6.0314,
"step": 498500
},
{
"epoch": 13.8,
"learning_rate": 8.644152387289969e-06,
"loss": 6.0293,
"step": 499000
},
{
"epoch": 13.81,
"learning_rate": 8.64276604003771e-06,
"loss": 6.0279,
"step": 499500
},
{
"epoch": 13.83,
"learning_rate": 8.64137969278545e-06,
"loss": 6.0314,
"step": 500000
},
{
"epoch": 13.84,
"learning_rate": 8.63999334553319e-06,
"loss": 6.0332,
"step": 500500
},
{
"epoch": 13.85,
"learning_rate": 8.638606998280931e-06,
"loss": 6.028,
"step": 501000
},
{
"epoch": 13.87,
"learning_rate": 8.63722065102867e-06,
"loss": 6.028,
"step": 501500
},
{
"epoch": 13.88,
"learning_rate": 8.63583430377641e-06,
"loss": 6.0295,
"step": 502000
},
{
"epoch": 13.89,
"learning_rate": 8.634447956524151e-06,
"loss": 6.0291,
"step": 502500
},
{
"epoch": 13.91,
"learning_rate": 8.63306160927189e-06,
"loss": 6.0304,
"step": 503000
},
{
"epoch": 13.92,
"learning_rate": 8.631675262019631e-06,
"loss": 6.0275,
"step": 503500
},
{
"epoch": 13.94,
"learning_rate": 8.630288914767372e-06,
"loss": 6.0305,
"step": 504000
},
{
"epoch": 13.95,
"learning_rate": 8.628902567515112e-06,
"loss": 6.0277,
"step": 504500
},
{
"epoch": 13.96,
"learning_rate": 8.627516220262853e-06,
"loss": 6.0273,
"step": 505000
},
{
"epoch": 13.98,
"learning_rate": 8.626129873010592e-06,
"loss": 6.0309,
"step": 505500
},
{
"epoch": 13.99,
"learning_rate": 8.624743525758333e-06,
"loss": 6.0277,
"step": 506000
},
{
"epoch": 14.0,
"learning_rate": 8.623357178506073e-06,
"loss": 6.0277,
"step": 506500
},
{
"epoch": 14.02,
"learning_rate": 8.621970831253814e-06,
"loss": 6.03,
"step": 507000
},
{
"epoch": 14.03,
"learning_rate": 8.620584484001553e-06,
"loss": 6.027,
"step": 507500
},
{
"epoch": 14.05,
"learning_rate": 8.619198136749293e-06,
"loss": 6.0255,
"step": 508000
},
{
"epoch": 14.06,
"learning_rate": 8.617811789497034e-06,
"loss": 6.0278,
"step": 508500
},
{
"epoch": 14.07,
"learning_rate": 8.616425442244775e-06,
"loss": 6.0248,
"step": 509000
},
{
"epoch": 14.09,
"learning_rate": 8.615039094992514e-06,
"loss": 6.0281,
"step": 509500
},
{
"epoch": 14.1,
"learning_rate": 8.613652747740254e-06,
"loss": 6.0281,
"step": 510000
},
{
"epoch": 14.12,
"learning_rate": 8.612266400487995e-06,
"loss": 6.0246,
"step": 510500
},
{
"epoch": 14.13,
"learning_rate": 8.610880053235736e-06,
"loss": 6.0302,
"step": 511000
},
{
"epoch": 14.14,
"learning_rate": 8.609493705983475e-06,
"loss": 6.0314,
"step": 511500
},
{
"epoch": 14.16,
"learning_rate": 8.608107358731215e-06,
"loss": 6.0285,
"step": 512000
},
{
"epoch": 14.17,
"learning_rate": 8.606721011478956e-06,
"loss": 6.025,
"step": 512500
},
{
"epoch": 14.18,
"learning_rate": 8.605334664226696e-06,
"loss": 6.0287,
"step": 513000
},
{
"epoch": 14.2,
"learning_rate": 8.603948316974437e-06,
"loss": 6.0264,
"step": 513500
},
{
"epoch": 14.21,
"learning_rate": 8.602561969722176e-06,
"loss": 6.0268,
"step": 514000
},
{
"epoch": 14.23,
"learning_rate": 8.601175622469917e-06,
"loss": 6.0288,
"step": 514500
},
{
"epoch": 14.24,
"learning_rate": 8.599789275217657e-06,
"loss": 6.0283,
"step": 515000
},
{
"epoch": 14.25,
"learning_rate": 8.598402927965398e-06,
"loss": 6.0278,
"step": 515500
},
{
"epoch": 14.27,
"learning_rate": 8.597016580713139e-06,
"loss": 6.0262,
"step": 516000
},
{
"epoch": 14.28,
"learning_rate": 8.595630233460878e-06,
"loss": 6.0258,
"step": 516500
},
{
"epoch": 14.29,
"step": 516769,
"total_flos": 1.272767802507264e+17,
"train_loss": 6.192780332760428,
"train_runtime": 197999.3322,
"train_samples_per_second": 2958.979,
"train_steps_per_second": 18.266
}
],
"logging_steps": 500,
"max_steps": 3616600,
"num_train_epochs": 100,
"save_steps": 1000000,
"total_flos": 1.272767802507264e+17,
"trial_name": null,
"trial_params": null
}