diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6226 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.288807166952386, + "eval_steps": 1000000, + "global_step": 516769, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 5.000000000000001e-07, + "loss": 10.3381, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 1.0000000000000002e-06, + "loss": 10.2616, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 1.5e-06, + "loss": 10.1304, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 2.0000000000000003e-06, + "loss": 9.9802, + "step": 2000 + }, + { + "epoch": 0.07, + "learning_rate": 2.5e-06, + "loss": 9.8125, + "step": 2500 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 9.6188, + "step": 3000 + }, + { + "epoch": 0.1, + "learning_rate": 3.5e-06, + "loss": 9.4019, + "step": 3500 + }, + { + "epoch": 0.11, + "learning_rate": 4.000000000000001e-06, + "loss": 9.1652, + "step": 4000 + }, + { + "epoch": 0.12, + "learning_rate": 4.5e-06, + "loss": 8.9147, + "step": 4500 + }, + { + "epoch": 0.14, + "learning_rate": 5e-06, + "loss": 8.6596, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 5.500000000000001e-06, + "loss": 8.4054, + "step": 5500 + }, + { + "epoch": 0.17, + "learning_rate": 6e-06, + "loss": 8.1569, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 6.5000000000000004e-06, + "loss": 7.9271, + "step": 6500 + }, + { + "epoch": 0.19, + "learning_rate": 7e-06, + "loss": 7.7144, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 7.500000000000001e-06, + "loss": 7.5352, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 8.000000000000001e-06, + "loss": 7.3919, + "step": 8000 + }, + { + "epoch": 0.24, + "learning_rate": 8.5e-06, + "loss": 7.2823, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 9e-06, + "loss": 7.2106, + "step": 9000 + }, + { + "epoch": 0.26, + "learning_rate": 9.5e-06, + "loss": 7.1515, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 1e-05, + "loss": 7.098, + "step": 10000 + }, + { + "epoch": 0.29, + "learning_rate": 9.998613652747741e-06, + "loss": 7.0565, + "step": 10500 + }, + { + "epoch": 0.3, + "learning_rate": 9.997227305495482e-06, + "loss": 7.0144, + "step": 11000 + }, + { + "epoch": 0.32, + "learning_rate": 9.995840958243221e-06, + "loss": 6.986, + "step": 11500 + }, + { + "epoch": 0.33, + "learning_rate": 9.994454610990962e-06, + "loss": 6.9545, + "step": 12000 + }, + { + "epoch": 0.35, + "learning_rate": 9.993068263738702e-06, + "loss": 6.931, + "step": 12500 + }, + { + "epoch": 0.36, + "learning_rate": 9.991681916486441e-06, + "loss": 6.9066, + "step": 13000 + }, + { + "epoch": 0.37, + "learning_rate": 9.990295569234184e-06, + "loss": 6.8805, + "step": 13500 + }, + { + "epoch": 0.39, + "learning_rate": 9.988909221981923e-06, + "loss": 6.8581, + "step": 14000 + }, + { + "epoch": 0.4, + "learning_rate": 9.987522874729663e-06, + "loss": 6.8405, + "step": 14500 + }, + { + "epoch": 0.41, + "learning_rate": 9.986136527477404e-06, + "loss": 6.8213, + "step": 15000 + }, + { + "epoch": 0.43, + "learning_rate": 9.984750180225143e-06, + "loss": 6.8017, + "step": 15500 + }, + { + "epoch": 0.44, + "learning_rate": 9.983363832972885e-06, + "loss": 6.7857, + "step": 16000 + }, + { + "epoch": 0.46, + "learning_rate": 9.981977485720624e-06, + "loss": 6.7668, + "step": 16500 + }, + { + "epoch": 0.47, + "learning_rate": 9.980591138468365e-06, + "loss": 6.7588, + "step": 17000 + }, + { + "epoch": 0.48, + "learning_rate": 9.979204791216105e-06, + "loss": 6.7387, + "step": 17500 + }, + { + "epoch": 0.5, + "learning_rate": 9.977818443963844e-06, + "loss": 6.7327, + "step": 18000 + }, + { + "epoch": 0.51, + "learning_rate": 9.976432096711585e-06, + "loss": 6.714, + "step": 18500 + }, + { + "epoch": 0.53, + "learning_rate": 9.975045749459326e-06, + "loss": 6.7006, + "step": 19000 + }, + { + "epoch": 0.54, + "learning_rate": 9.973659402207065e-06, + "loss": 6.6989, + "step": 19500 + }, + { + "epoch": 0.55, + "learning_rate": 9.972273054954807e-06, + "loss": 6.6843, + "step": 20000 + }, + { + "epoch": 0.57, + "learning_rate": 9.970886707702546e-06, + "loss": 6.6693, + "step": 20500 + }, + { + "epoch": 0.58, + "learning_rate": 9.969500360450287e-06, + "loss": 6.6629, + "step": 21000 + }, + { + "epoch": 0.59, + "learning_rate": 9.968114013198027e-06, + "loss": 6.654, + "step": 21500 + }, + { + "epoch": 0.61, + "learning_rate": 9.966727665945766e-06, + "loss": 6.6443, + "step": 22000 + }, + { + "epoch": 0.62, + "learning_rate": 9.965341318693507e-06, + "loss": 6.638, + "step": 22500 + }, + { + "epoch": 0.64, + "learning_rate": 9.963954971441247e-06, + "loss": 6.6275, + "step": 23000 + }, + { + "epoch": 0.65, + "learning_rate": 9.962568624188988e-06, + "loss": 6.6172, + "step": 23500 + }, + { + "epoch": 0.66, + "learning_rate": 9.961182276936729e-06, + "loss": 6.6165, + "step": 24000 + }, + { + "epoch": 0.68, + "learning_rate": 9.959795929684468e-06, + "loss": 6.6045, + "step": 24500 + }, + { + "epoch": 0.69, + "learning_rate": 9.958409582432208e-06, + "loss": 6.5938, + "step": 25000 + }, + { + "epoch": 0.71, + "learning_rate": 9.957023235179949e-06, + "loss": 6.5902, + "step": 25500 + }, + { + "epoch": 0.72, + "learning_rate": 9.955636887927688e-06, + "loss": 6.5789, + "step": 26000 + }, + { + "epoch": 0.73, + "learning_rate": 9.95425054067543e-06, + "loss": 6.5751, + "step": 26500 + }, + { + "epoch": 0.75, + "learning_rate": 9.95286419342317e-06, + "loss": 6.5694, + "step": 27000 + }, + { + "epoch": 0.76, + "learning_rate": 9.95147784617091e-06, + "loss": 6.5637, + "step": 27500 + }, + { + "epoch": 0.77, + "learning_rate": 9.95009149891865e-06, + "loss": 6.556, + "step": 28000 + }, + { + "epoch": 0.79, + "learning_rate": 9.94870515166639e-06, + "loss": 6.5486, + "step": 28500 + }, + { + "epoch": 0.8, + "learning_rate": 9.94731880441413e-06, + "loss": 6.5426, + "step": 29000 + }, + { + "epoch": 0.82, + "learning_rate": 9.94593245716187e-06, + "loss": 6.5411, + "step": 29500 + }, + { + "epoch": 0.83, + "learning_rate": 9.944546109909611e-06, + "loss": 6.5318, + "step": 30000 + }, + { + "epoch": 0.84, + "learning_rate": 9.943159762657352e-06, + "loss": 6.5263, + "step": 30500 + }, + { + "epoch": 0.86, + "learning_rate": 9.941773415405091e-06, + "loss": 6.5222, + "step": 31000 + }, + { + "epoch": 0.87, + "learning_rate": 9.940387068152832e-06, + "loss": 6.5154, + "step": 31500 + }, + { + "epoch": 0.88, + "learning_rate": 9.939000720900572e-06, + "loss": 6.5164, + "step": 32000 + }, + { + "epoch": 0.9, + "learning_rate": 9.937614373648313e-06, + "loss": 6.5088, + "step": 32500 + }, + { + "epoch": 0.91, + "learning_rate": 9.936228026396052e-06, + "loss": 6.5033, + "step": 33000 + }, + { + "epoch": 0.93, + "learning_rate": 9.934841679143793e-06, + "loss": 6.5037, + "step": 33500 + }, + { + "epoch": 0.94, + "learning_rate": 9.933455331891533e-06, + "loss": 6.4965, + "step": 34000 + }, + { + "epoch": 0.95, + "learning_rate": 9.932068984639274e-06, + "loss": 6.4929, + "step": 34500 + }, + { + "epoch": 0.97, + "learning_rate": 9.930682637387013e-06, + "loss": 6.4842, + "step": 35000 + }, + { + "epoch": 0.98, + "learning_rate": 9.929296290134753e-06, + "loss": 6.4838, + "step": 35500 + }, + { + "epoch": 1.0, + "learning_rate": 9.927909942882494e-06, + "loss": 6.477, + "step": 36000 + }, + { + "epoch": 1.01, + "learning_rate": 9.926523595630235e-06, + "loss": 6.4729, + "step": 36500 + }, + { + "epoch": 1.02, + "learning_rate": 9.925137248377974e-06, + "loss": 6.4718, + "step": 37000 + }, + { + "epoch": 1.04, + "learning_rate": 9.923750901125714e-06, + "loss": 6.4646, + "step": 37500 + }, + { + "epoch": 1.05, + "learning_rate": 9.922364553873455e-06, + "loss": 6.455, + "step": 38000 + }, + { + "epoch": 1.06, + "learning_rate": 9.920978206621196e-06, + "loss": 6.4577, + "step": 38500 + }, + { + "epoch": 1.08, + "learning_rate": 9.919591859368936e-06, + "loss": 6.4527, + "step": 39000 + }, + { + "epoch": 1.09, + "learning_rate": 9.918205512116675e-06, + "loss": 6.4509, + "step": 39500 + }, + { + "epoch": 1.11, + "learning_rate": 9.916819164864416e-06, + "loss": 6.4439, + "step": 40000 + }, + { + "epoch": 1.12, + "learning_rate": 9.915432817612157e-06, + "loss": 6.4433, + "step": 40500 + }, + { + "epoch": 1.13, + "learning_rate": 9.914046470359897e-06, + "loss": 6.4374, + "step": 41000 + }, + { + "epoch": 1.15, + "learning_rate": 9.912660123107636e-06, + "loss": 6.4368, + "step": 41500 + }, + { + "epoch": 1.16, + "learning_rate": 9.911273775855377e-06, + "loss": 6.429, + "step": 42000 + }, + { + "epoch": 1.18, + "learning_rate": 9.909887428603117e-06, + "loss": 6.4267, + "step": 42500 + }, + { + "epoch": 1.19, + "learning_rate": 9.908501081350858e-06, + "loss": 6.4202, + "step": 43000 + }, + { + "epoch": 1.2, + "learning_rate": 9.907114734098597e-06, + "loss": 6.4199, + "step": 43500 + }, + { + "epoch": 1.22, + "learning_rate": 9.905728386846338e-06, + "loss": 6.4171, + "step": 44000 + }, + { + "epoch": 1.23, + "learning_rate": 9.904342039594078e-06, + "loss": 6.4079, + "step": 44500 + }, + { + "epoch": 1.24, + "learning_rate": 9.902955692341819e-06, + "loss": 6.4066, + "step": 45000 + }, + { + "epoch": 1.26, + "learning_rate": 9.90156934508956e-06, + "loss": 6.41, + "step": 45500 + }, + { + "epoch": 1.27, + "learning_rate": 9.900182997837299e-06, + "loss": 6.4045, + "step": 46000 + }, + { + "epoch": 1.29, + "learning_rate": 9.89879665058504e-06, + "loss": 6.402, + "step": 46500 + }, + { + "epoch": 1.3, + "learning_rate": 9.89741030333278e-06, + "loss": 6.398, + "step": 47000 + }, + { + "epoch": 1.31, + "learning_rate": 9.896023956080519e-06, + "loss": 6.398, + "step": 47500 + }, + { + "epoch": 1.33, + "learning_rate": 9.89463760882826e-06, + "loss": 6.3931, + "step": 48000 + }, + { + "epoch": 1.34, + "learning_rate": 9.893251261576e-06, + "loss": 6.3866, + "step": 48500 + }, + { + "epoch": 1.35, + "learning_rate": 9.89186491432374e-06, + "loss": 6.3875, + "step": 49000 + }, + { + "epoch": 1.37, + "learning_rate": 9.890478567071481e-06, + "loss": 6.3859, + "step": 49500 + }, + { + "epoch": 1.38, + "learning_rate": 9.88909221981922e-06, + "loss": 6.3776, + "step": 50000 + }, + { + "epoch": 1.4, + "learning_rate": 9.887705872566961e-06, + "loss": 6.38, + "step": 50500 + }, + { + "epoch": 1.41, + "learning_rate": 9.886319525314702e-06, + "loss": 6.3779, + "step": 51000 + }, + { + "epoch": 1.42, + "learning_rate": 9.88493317806244e-06, + "loss": 6.3713, + "step": 51500 + }, + { + "epoch": 1.44, + "learning_rate": 9.883546830810183e-06, + "loss": 6.3715, + "step": 52000 + }, + { + "epoch": 1.45, + "learning_rate": 9.882160483557922e-06, + "loss": 6.3698, + "step": 52500 + }, + { + "epoch": 1.47, + "learning_rate": 9.880774136305663e-06, + "loss": 6.3641, + "step": 53000 + }, + { + "epoch": 1.48, + "learning_rate": 9.879387789053403e-06, + "loss": 6.3604, + "step": 53500 + }, + { + "epoch": 1.49, + "learning_rate": 9.878001441801142e-06, + "loss": 6.3553, + "step": 54000 + }, + { + "epoch": 1.51, + "learning_rate": 9.876615094548884e-06, + "loss": 6.3554, + "step": 54500 + }, + { + "epoch": 1.52, + "learning_rate": 9.875228747296623e-06, + "loss": 6.3515, + "step": 55000 + }, + { + "epoch": 1.53, + "learning_rate": 9.873842400044364e-06, + "loss": 6.3503, + "step": 55500 + }, + { + "epoch": 1.55, + "learning_rate": 9.872456052792105e-06, + "loss": 6.3526, + "step": 56000 + }, + { + "epoch": 1.56, + "learning_rate": 9.871069705539844e-06, + "loss": 6.3537, + "step": 56500 + }, + { + "epoch": 1.58, + "learning_rate": 9.869683358287584e-06, + "loss": 6.3438, + "step": 57000 + }, + { + "epoch": 1.59, + "learning_rate": 9.868297011035325e-06, + "loss": 6.3449, + "step": 57500 + }, + { + "epoch": 1.6, + "learning_rate": 9.866910663783064e-06, + "loss": 6.3424, + "step": 58000 + }, + { + "epoch": 1.62, + "learning_rate": 9.865524316530806e-06, + "loss": 6.3416, + "step": 58500 + }, + { + "epoch": 1.63, + "learning_rate": 9.864137969278545e-06, + "loss": 6.334, + "step": 59000 + }, + { + "epoch": 1.65, + "learning_rate": 9.862751622026286e-06, + "loss": 6.3384, + "step": 59500 + }, + { + "epoch": 1.66, + "learning_rate": 9.861365274774027e-06, + "loss": 6.3349, + "step": 60000 + }, + { + "epoch": 1.67, + "learning_rate": 9.859978927521765e-06, + "loss": 6.3318, + "step": 60500 + }, + { + "epoch": 1.69, + "learning_rate": 9.858592580269508e-06, + "loss": 6.3296, + "step": 61000 + }, + { + "epoch": 1.7, + "learning_rate": 9.857206233017247e-06, + "loss": 6.3261, + "step": 61500 + }, + { + "epoch": 1.71, + "learning_rate": 9.855819885764987e-06, + "loss": 6.3277, + "step": 62000 + }, + { + "epoch": 1.73, + "learning_rate": 9.854433538512728e-06, + "loss": 6.3218, + "step": 62500 + }, + { + "epoch": 1.74, + "learning_rate": 9.853047191260467e-06, + "loss": 6.3237, + "step": 63000 + }, + { + "epoch": 1.76, + "learning_rate": 9.851660844008208e-06, + "loss": 6.3203, + "step": 63500 + }, + { + "epoch": 1.77, + "learning_rate": 9.850274496755948e-06, + "loss": 6.3184, + "step": 64000 + }, + { + "epoch": 1.78, + "learning_rate": 9.848888149503689e-06, + "loss": 6.3114, + "step": 64500 + }, + { + "epoch": 1.8, + "learning_rate": 9.84750180225143e-06, + "loss": 6.311, + "step": 65000 + }, + { + "epoch": 1.81, + "learning_rate": 9.846115454999169e-06, + "loss": 6.3104, + "step": 65500 + }, + { + "epoch": 1.82, + "learning_rate": 9.84472910774691e-06, + "loss": 6.3108, + "step": 66000 + }, + { + "epoch": 1.84, + "learning_rate": 9.84334276049465e-06, + "loss": 6.3097, + "step": 66500 + }, + { + "epoch": 1.85, + "learning_rate": 9.841956413242389e-06, + "loss": 6.308, + "step": 67000 + }, + { + "epoch": 1.87, + "learning_rate": 9.840570065990131e-06, + "loss": 6.3075, + "step": 67500 + }, + { + "epoch": 1.88, + "learning_rate": 9.83918371873787e-06, + "loss": 6.3049, + "step": 68000 + }, + { + "epoch": 1.89, + "learning_rate": 9.83779737148561e-06, + "loss": 6.2991, + "step": 68500 + }, + { + "epoch": 1.91, + "learning_rate": 9.836411024233351e-06, + "loss": 6.3004, + "step": 69000 + }, + { + "epoch": 1.92, + "learning_rate": 9.83502467698109e-06, + "loss": 6.2954, + "step": 69500 + }, + { + "epoch": 1.94, + "learning_rate": 9.833638329728833e-06, + "loss": 6.296, + "step": 70000 + }, + { + "epoch": 1.95, + "learning_rate": 9.832251982476572e-06, + "loss": 6.2957, + "step": 70500 + }, + { + "epoch": 1.96, + "learning_rate": 9.830865635224312e-06, + "loss": 6.2927, + "step": 71000 + }, + { + "epoch": 1.98, + "learning_rate": 9.829479287972053e-06, + "loss": 6.2886, + "step": 71500 + }, + { + "epoch": 1.99, + "learning_rate": 9.828092940719792e-06, + "loss": 6.2889, + "step": 72000 + }, + { + "epoch": 2.0, + "learning_rate": 9.826706593467533e-06, + "loss": 6.2901, + "step": 72500 + }, + { + "epoch": 2.02, + "learning_rate": 9.825320246215273e-06, + "loss": 6.2874, + "step": 73000 + }, + { + "epoch": 2.03, + "learning_rate": 9.823933898963012e-06, + "loss": 6.2845, + "step": 73500 + }, + { + "epoch": 2.05, + "learning_rate": 9.822547551710754e-06, + "loss": 6.2846, + "step": 74000 + }, + { + "epoch": 2.06, + "learning_rate": 9.821161204458493e-06, + "loss": 6.2815, + "step": 74500 + }, + { + "epoch": 2.07, + "learning_rate": 9.819774857206234e-06, + "loss": 6.2814, + "step": 75000 + }, + { + "epoch": 2.09, + "learning_rate": 9.818388509953975e-06, + "loss": 6.2806, + "step": 75500 + }, + { + "epoch": 2.1, + "learning_rate": 9.817002162701714e-06, + "loss": 6.2773, + "step": 76000 + }, + { + "epoch": 2.12, + "learning_rate": 9.815615815449454e-06, + "loss": 6.2766, + "step": 76500 + }, + { + "epoch": 2.13, + "learning_rate": 9.814229468197195e-06, + "loss": 6.2737, + "step": 77000 + }, + { + "epoch": 2.14, + "learning_rate": 9.812843120944936e-06, + "loss": 6.2767, + "step": 77500 + }, + { + "epoch": 2.16, + "learning_rate": 9.811456773692676e-06, + "loss": 6.2733, + "step": 78000 + }, + { + "epoch": 2.17, + "learning_rate": 9.810070426440415e-06, + "loss": 6.27, + "step": 78500 + }, + { + "epoch": 2.18, + "learning_rate": 9.808684079188156e-06, + "loss": 6.2712, + "step": 79000 + }, + { + "epoch": 2.2, + "learning_rate": 9.807297731935896e-06, + "loss": 6.2709, + "step": 79500 + }, + { + "epoch": 2.21, + "learning_rate": 9.805911384683635e-06, + "loss": 6.2686, + "step": 80000 + }, + { + "epoch": 2.23, + "learning_rate": 9.804525037431376e-06, + "loss": 6.2674, + "step": 80500 + }, + { + "epoch": 2.24, + "learning_rate": 9.803138690179117e-06, + "loss": 6.2641, + "step": 81000 + }, + { + "epoch": 2.25, + "learning_rate": 9.801752342926857e-06, + "loss": 6.2609, + "step": 81500 + }, + { + "epoch": 2.27, + "learning_rate": 9.800365995674598e-06, + "loss": 6.2621, + "step": 82000 + }, + { + "epoch": 2.28, + "learning_rate": 9.798979648422337e-06, + "loss": 6.2635, + "step": 82500 + }, + { + "epoch": 2.29, + "learning_rate": 9.797593301170078e-06, + "loss": 6.2617, + "step": 83000 + }, + { + "epoch": 2.31, + "learning_rate": 9.796206953917818e-06, + "loss": 6.2595, + "step": 83500 + }, + { + "epoch": 2.32, + "learning_rate": 9.794820606665559e-06, + "loss": 6.2595, + "step": 84000 + }, + { + "epoch": 2.34, + "learning_rate": 9.7934342594133e-06, + "loss": 6.2614, + "step": 84500 + }, + { + "epoch": 2.35, + "learning_rate": 9.792047912161039e-06, + "loss": 6.2589, + "step": 85000 + }, + { + "epoch": 2.36, + "learning_rate": 9.790661564908779e-06, + "loss": 6.2529, + "step": 85500 + }, + { + "epoch": 2.38, + "learning_rate": 9.78927521765652e-06, + "loss": 6.2566, + "step": 86000 + }, + { + "epoch": 2.39, + "learning_rate": 9.78788887040426e-06, + "loss": 6.2534, + "step": 86500 + }, + { + "epoch": 2.41, + "learning_rate": 9.786502523152e-06, + "loss": 6.2499, + "step": 87000 + }, + { + "epoch": 2.42, + "learning_rate": 9.78511617589974e-06, + "loss": 6.2508, + "step": 87500 + }, + { + "epoch": 2.43, + "learning_rate": 9.78372982864748e-06, + "loss": 6.2462, + "step": 88000 + }, + { + "epoch": 2.45, + "learning_rate": 9.782343481395221e-06, + "loss": 6.2459, + "step": 88500 + }, + { + "epoch": 2.46, + "learning_rate": 9.78095713414296e-06, + "loss": 6.2461, + "step": 89000 + }, + { + "epoch": 2.47, + "learning_rate": 9.779570786890701e-06, + "loss": 6.2433, + "step": 89500 + }, + { + "epoch": 2.49, + "learning_rate": 9.778184439638442e-06, + "loss": 6.2437, + "step": 90000 + }, + { + "epoch": 2.5, + "learning_rate": 9.776798092386182e-06, + "loss": 6.2437, + "step": 90500 + }, + { + "epoch": 2.52, + "learning_rate": 9.775411745133921e-06, + "loss": 6.2422, + "step": 91000 + }, + { + "epoch": 2.53, + "learning_rate": 9.774025397881662e-06, + "loss": 6.2409, + "step": 91500 + }, + { + "epoch": 2.54, + "learning_rate": 9.772639050629402e-06, + "loss": 6.2422, + "step": 92000 + }, + { + "epoch": 2.56, + "learning_rate": 9.771252703377143e-06, + "loss": 6.2387, + "step": 92500 + }, + { + "epoch": 2.57, + "learning_rate": 9.769866356124884e-06, + "loss": 6.2411, + "step": 93000 + }, + { + "epoch": 2.59, + "learning_rate": 9.768480008872623e-06, + "loss": 6.2365, + "step": 93500 + }, + { + "epoch": 2.6, + "learning_rate": 9.767093661620363e-06, + "loss": 6.235, + "step": 94000 + }, + { + "epoch": 2.61, + "learning_rate": 9.765707314368104e-06, + "loss": 6.2352, + "step": 94500 + }, + { + "epoch": 2.63, + "learning_rate": 9.764320967115843e-06, + "loss": 6.2326, + "step": 95000 + }, + { + "epoch": 2.64, + "learning_rate": 9.762934619863584e-06, + "loss": 6.2318, + "step": 95500 + }, + { + "epoch": 2.65, + "learning_rate": 9.761548272611324e-06, + "loss": 6.2308, + "step": 96000 + }, + { + "epoch": 2.67, + "learning_rate": 9.760161925359065e-06, + "loss": 6.2275, + "step": 96500 + }, + { + "epoch": 2.68, + "learning_rate": 9.758775578106806e-06, + "loss": 6.2322, + "step": 97000 + }, + { + "epoch": 2.7, + "learning_rate": 9.757389230854544e-06, + "loss": 6.2291, + "step": 97500 + }, + { + "epoch": 2.71, + "learning_rate": 9.756002883602285e-06, + "loss": 6.2283, + "step": 98000 + }, + { + "epoch": 2.72, + "learning_rate": 9.754616536350026e-06, + "loss": 6.2276, + "step": 98500 + }, + { + "epoch": 2.74, + "learning_rate": 9.753230189097766e-06, + "loss": 6.2263, + "step": 99000 + }, + { + "epoch": 2.75, + "learning_rate": 9.751843841845507e-06, + "loss": 6.2272, + "step": 99500 + }, + { + "epoch": 2.77, + "learning_rate": 9.750457494593246e-06, + "loss": 6.2237, + "step": 100000 + }, + { + "epoch": 2.78, + "learning_rate": 9.749071147340987e-06, + "loss": 6.2273, + "step": 100500 + }, + { + "epoch": 2.79, + "learning_rate": 9.747684800088727e-06, + "loss": 6.2232, + "step": 101000 + }, + { + "epoch": 2.81, + "learning_rate": 9.746298452836466e-06, + "loss": 6.2197, + "step": 101500 + }, + { + "epoch": 2.82, + "learning_rate": 9.744912105584207e-06, + "loss": 6.2217, + "step": 102000 + }, + { + "epoch": 2.83, + "learning_rate": 9.743525758331948e-06, + "loss": 6.2223, + "step": 102500 + }, + { + "epoch": 2.85, + "learning_rate": 9.742139411079688e-06, + "loss": 6.2234, + "step": 103000 + }, + { + "epoch": 2.86, + "learning_rate": 9.740753063827429e-06, + "loss": 6.2169, + "step": 103500 + }, + { + "epoch": 2.88, + "learning_rate": 9.739366716575168e-06, + "loss": 6.2172, + "step": 104000 + }, + { + "epoch": 2.89, + "learning_rate": 9.737980369322908e-06, + "loss": 6.2164, + "step": 104500 + }, + { + "epoch": 2.9, + "learning_rate": 9.736594022070649e-06, + "loss": 6.2137, + "step": 105000 + }, + { + "epoch": 2.92, + "learning_rate": 9.735207674818388e-06, + "loss": 6.2122, + "step": 105500 + }, + { + "epoch": 2.93, + "learning_rate": 9.73382132756613e-06, + "loss": 6.2152, + "step": 106000 + }, + { + "epoch": 2.94, + "learning_rate": 9.73243498031387e-06, + "loss": 6.2132, + "step": 106500 + }, + { + "epoch": 2.96, + "learning_rate": 9.73104863306161e-06, + "loss": 6.2128, + "step": 107000 + }, + { + "epoch": 2.97, + "learning_rate": 9.72966228580935e-06, + "loss": 6.2112, + "step": 107500 + }, + { + "epoch": 2.99, + "learning_rate": 9.72827593855709e-06, + "loss": 6.2119, + "step": 108000 + }, + { + "epoch": 3.0, + "learning_rate": 9.726889591304832e-06, + "loss": 6.2066, + "step": 108500 + }, + { + "epoch": 3.01, + "learning_rate": 9.725503244052571e-06, + "loss": 6.2121, + "step": 109000 + }, + { + "epoch": 3.03, + "learning_rate": 9.724116896800312e-06, + "loss": 6.2058, + "step": 109500 + }, + { + "epoch": 3.04, + "learning_rate": 9.722730549548052e-06, + "loss": 6.2112, + "step": 110000 + }, + { + "epoch": 3.06, + "learning_rate": 9.721344202295791e-06, + "loss": 6.2069, + "step": 110500 + }, + { + "epoch": 3.07, + "learning_rate": 9.719957855043532e-06, + "loss": 6.2065, + "step": 111000 + }, + { + "epoch": 3.08, + "learning_rate": 9.718571507791272e-06, + "loss": 6.207, + "step": 111500 + }, + { + "epoch": 3.1, + "learning_rate": 9.717185160539011e-06, + "loss": 6.2061, + "step": 112000 + }, + { + "epoch": 3.11, + "learning_rate": 9.715798813286754e-06, + "loss": 6.2056, + "step": 112500 + }, + { + "epoch": 3.12, + "learning_rate": 9.714412466034493e-06, + "loss": 6.2062, + "step": 113000 + }, + { + "epoch": 3.14, + "learning_rate": 9.713026118782233e-06, + "loss": 6.1993, + "step": 113500 + }, + { + "epoch": 3.15, + "learning_rate": 9.711639771529974e-06, + "loss": 6.2011, + "step": 114000 + }, + { + "epoch": 3.17, + "learning_rate": 9.710253424277713e-06, + "loss": 6.2009, + "step": 114500 + }, + { + "epoch": 3.18, + "learning_rate": 9.708867077025455e-06, + "loss": 6.2019, + "step": 115000 + }, + { + "epoch": 3.19, + "learning_rate": 9.707480729773194e-06, + "loss": 6.1993, + "step": 115500 + }, + { + "epoch": 3.21, + "learning_rate": 9.706094382520935e-06, + "loss": 6.2048, + "step": 116000 + }, + { + "epoch": 3.22, + "learning_rate": 9.704708035268676e-06, + "loss": 6.1981, + "step": 116500 + }, + { + "epoch": 3.24, + "learning_rate": 9.703321688016414e-06, + "loss": 6.1946, + "step": 117000 + }, + { + "epoch": 3.25, + "learning_rate": 9.701935340764155e-06, + "loss": 6.2, + "step": 117500 + }, + { + "epoch": 3.26, + "learning_rate": 9.700548993511896e-06, + "loss": 6.1983, + "step": 118000 + }, + { + "epoch": 3.28, + "learning_rate": 9.699162646259635e-06, + "loss": 6.1921, + "step": 118500 + }, + { + "epoch": 3.29, + "learning_rate": 9.697776299007377e-06, + "loss": 6.1934, + "step": 119000 + }, + { + "epoch": 3.3, + "learning_rate": 9.696389951755116e-06, + "loss": 6.1959, + "step": 119500 + }, + { + "epoch": 3.32, + "learning_rate": 9.695003604502857e-06, + "loss": 6.195, + "step": 120000 + }, + { + "epoch": 3.33, + "learning_rate": 9.693617257250597e-06, + "loss": 6.1949, + "step": 120500 + }, + { + "epoch": 3.35, + "learning_rate": 9.692230909998336e-06, + "loss": 6.1922, + "step": 121000 + }, + { + "epoch": 3.36, + "learning_rate": 9.690844562746079e-06, + "loss": 6.1922, + "step": 121500 + }, + { + "epoch": 3.37, + "learning_rate": 9.689458215493818e-06, + "loss": 6.1916, + "step": 122000 + }, + { + "epoch": 3.39, + "learning_rate": 9.688071868241558e-06, + "loss": 6.1883, + "step": 122500 + }, + { + "epoch": 3.4, + "learning_rate": 9.686685520989299e-06, + "loss": 6.1929, + "step": 123000 + }, + { + "epoch": 3.41, + "learning_rate": 9.685299173737038e-06, + "loss": 6.1892, + "step": 123500 + }, + { + "epoch": 3.43, + "learning_rate": 9.683912826484778e-06, + "loss": 6.1857, + "step": 124000 + }, + { + "epoch": 3.44, + "learning_rate": 9.682526479232519e-06, + "loss": 6.1882, + "step": 124500 + }, + { + "epoch": 3.46, + "learning_rate": 9.68114013198026e-06, + "loss": 6.1898, + "step": 125000 + }, + { + "epoch": 3.47, + "learning_rate": 9.679753784728e-06, + "loss": 6.1831, + "step": 125500 + }, + { + "epoch": 3.48, + "learning_rate": 9.67836743747574e-06, + "loss": 6.1849, + "step": 126000 + }, + { + "epoch": 3.5, + "learning_rate": 9.67698109022348e-06, + "loss": 6.1848, + "step": 126500 + }, + { + "epoch": 3.51, + "learning_rate": 9.67559474297122e-06, + "loss": 6.1833, + "step": 127000 + }, + { + "epoch": 3.53, + "learning_rate": 9.67420839571896e-06, + "loss": 6.1851, + "step": 127500 + }, + { + "epoch": 3.54, + "learning_rate": 9.672822048466702e-06, + "loss": 6.1834, + "step": 128000 + }, + { + "epoch": 3.55, + "learning_rate": 9.671435701214441e-06, + "loss": 6.1821, + "step": 128500 + }, + { + "epoch": 3.57, + "learning_rate": 9.670049353962182e-06, + "loss": 6.1845, + "step": 129000 + }, + { + "epoch": 3.58, + "learning_rate": 9.668663006709922e-06, + "loss": 6.1815, + "step": 129500 + }, + { + "epoch": 3.59, + "learning_rate": 9.667276659457661e-06, + "loss": 6.1795, + "step": 130000 + }, + { + "epoch": 3.61, + "learning_rate": 9.665890312205402e-06, + "loss": 6.1826, + "step": 130500 + }, + { + "epoch": 3.62, + "learning_rate": 9.664503964953142e-06, + "loss": 6.1769, + "step": 131000 + }, + { + "epoch": 3.64, + "learning_rate": 9.663117617700883e-06, + "loss": 6.1838, + "step": 131500 + }, + { + "epoch": 3.65, + "learning_rate": 9.661731270448624e-06, + "loss": 6.1806, + "step": 132000 + }, + { + "epoch": 3.66, + "learning_rate": 9.660344923196363e-06, + "loss": 6.1767, + "step": 132500 + }, + { + "epoch": 3.68, + "learning_rate": 9.658958575944103e-06, + "loss": 6.1783, + "step": 133000 + }, + { + "epoch": 3.69, + "learning_rate": 9.657572228691844e-06, + "loss": 6.1757, + "step": 133500 + }, + { + "epoch": 3.71, + "learning_rate": 9.656185881439583e-06, + "loss": 6.177, + "step": 134000 + }, + { + "epoch": 3.72, + "learning_rate": 9.654799534187324e-06, + "loss": 6.1724, + "step": 134500 + }, + { + "epoch": 3.73, + "learning_rate": 9.653413186935064e-06, + "loss": 6.1782, + "step": 135000 + }, + { + "epoch": 3.75, + "learning_rate": 9.652026839682805e-06, + "loss": 6.1751, + "step": 135500 + }, + { + "epoch": 3.76, + "learning_rate": 9.650640492430545e-06, + "loss": 6.173, + "step": 136000 + }, + { + "epoch": 3.77, + "learning_rate": 9.649254145178284e-06, + "loss": 6.1737, + "step": 136500 + }, + { + "epoch": 3.79, + "learning_rate": 9.647867797926025e-06, + "loss": 6.1762, + "step": 137000 + }, + { + "epoch": 3.8, + "learning_rate": 9.646481450673766e-06, + "loss": 6.1703, + "step": 137500 + }, + { + "epoch": 3.82, + "learning_rate": 9.645095103421506e-06, + "loss": 6.1722, + "step": 138000 + }, + { + "epoch": 3.83, + "learning_rate": 9.643708756169245e-06, + "loss": 6.1711, + "step": 138500 + }, + { + "epoch": 3.84, + "learning_rate": 9.642322408916986e-06, + "loss": 6.1672, + "step": 139000 + }, + { + "epoch": 3.86, + "learning_rate": 9.640936061664727e-06, + "loss": 6.1731, + "step": 139500 + }, + { + "epoch": 3.87, + "learning_rate": 9.639549714412467e-06, + "loss": 6.1729, + "step": 140000 + }, + { + "epoch": 3.88, + "learning_rate": 9.638163367160206e-06, + "loss": 6.1678, + "step": 140500 + }, + { + "epoch": 3.9, + "learning_rate": 9.636777019907947e-06, + "loss": 6.17, + "step": 141000 + }, + { + "epoch": 3.91, + "learning_rate": 9.635390672655688e-06, + "loss": 6.1642, + "step": 141500 + }, + { + "epoch": 3.93, + "learning_rate": 9.634004325403428e-06, + "loss": 6.1669, + "step": 142000 + }, + { + "epoch": 3.94, + "learning_rate": 9.632617978151169e-06, + "loss": 6.169, + "step": 142500 + }, + { + "epoch": 3.95, + "learning_rate": 9.631231630898908e-06, + "loss": 6.1688, + "step": 143000 + }, + { + "epoch": 3.97, + "learning_rate": 9.629845283646648e-06, + "loss": 6.1681, + "step": 143500 + }, + { + "epoch": 3.98, + "learning_rate": 9.628458936394389e-06, + "loss": 6.1688, + "step": 144000 + }, + { + "epoch": 4.0, + "learning_rate": 9.62707258914213e-06, + "loss": 6.1618, + "step": 144500 + }, + { + "epoch": 4.01, + "learning_rate": 9.625686241889869e-06, + "loss": 6.1641, + "step": 145000 + }, + { + "epoch": 4.02, + "learning_rate": 9.62429989463761e-06, + "loss": 6.1615, + "step": 145500 + }, + { + "epoch": 4.04, + "learning_rate": 9.62291354738535e-06, + "loss": 6.1675, + "step": 146000 + }, + { + "epoch": 4.05, + "learning_rate": 9.62152720013309e-06, + "loss": 6.1619, + "step": 146500 + }, + { + "epoch": 4.06, + "learning_rate": 9.620140852880831e-06, + "loss": 6.1619, + "step": 147000 + }, + { + "epoch": 4.08, + "learning_rate": 9.61875450562857e-06, + "loss": 6.1601, + "step": 147500 + }, + { + "epoch": 4.09, + "learning_rate": 9.61736815837631e-06, + "loss": 6.162, + "step": 148000 + }, + { + "epoch": 4.11, + "learning_rate": 9.615981811124051e-06, + "loss": 6.1621, + "step": 148500 + }, + { + "epoch": 4.12, + "learning_rate": 9.61459546387179e-06, + "loss": 6.1604, + "step": 149000 + }, + { + "epoch": 4.13, + "learning_rate": 9.613209116619531e-06, + "loss": 6.1591, + "step": 149500 + }, + { + "epoch": 4.15, + "learning_rate": 9.611822769367272e-06, + "loss": 6.1592, + "step": 150000 + }, + { + "epoch": 4.16, + "learning_rate": 9.610436422115012e-06, + "loss": 6.1554, + "step": 150500 + }, + { + "epoch": 4.18, + "learning_rate": 9.609050074862753e-06, + "loss": 6.1591, + "step": 151000 + }, + { + "epoch": 4.19, + "learning_rate": 9.607663727610492e-06, + "loss": 6.1576, + "step": 151500 + }, + { + "epoch": 4.2, + "learning_rate": 9.606277380358233e-06, + "loss": 6.1569, + "step": 152000 + }, + { + "epoch": 4.22, + "learning_rate": 9.604891033105973e-06, + "loss": 6.1524, + "step": 152500 + }, + { + "epoch": 4.23, + "learning_rate": 9.603504685853712e-06, + "loss": 6.1581, + "step": 153000 + }, + { + "epoch": 4.24, + "learning_rate": 9.602118338601455e-06, + "loss": 6.1563, + "step": 153500 + }, + { + "epoch": 4.26, + "learning_rate": 9.600731991349194e-06, + "loss": 6.1551, + "step": 154000 + }, + { + "epoch": 4.27, + "learning_rate": 9.599345644096934e-06, + "loss": 6.1526, + "step": 154500 + }, + { + "epoch": 4.29, + "learning_rate": 9.597959296844675e-06, + "loss": 6.153, + "step": 155000 + }, + { + "epoch": 4.3, + "learning_rate": 9.596572949592414e-06, + "loss": 6.1549, + "step": 155500 + }, + { + "epoch": 4.31, + "learning_rate": 9.595186602340154e-06, + "loss": 6.1529, + "step": 156000 + }, + { + "epoch": 4.33, + "learning_rate": 9.593800255087895e-06, + "loss": 6.1531, + "step": 156500 + }, + { + "epoch": 4.34, + "learning_rate": 9.592413907835636e-06, + "loss": 6.1527, + "step": 157000 + }, + { + "epoch": 4.35, + "learning_rate": 9.591027560583376e-06, + "loss": 6.1501, + "step": 157500 + }, + { + "epoch": 4.37, + "learning_rate": 9.589641213331115e-06, + "loss": 6.1502, + "step": 158000 + }, + { + "epoch": 4.38, + "learning_rate": 9.588254866078856e-06, + "loss": 6.1537, + "step": 158500 + }, + { + "epoch": 4.4, + "learning_rate": 9.586868518826597e-06, + "loss": 6.1498, + "step": 159000 + }, + { + "epoch": 4.41, + "learning_rate": 9.585482171574336e-06, + "loss": 6.1516, + "step": 159500 + }, + { + "epoch": 4.42, + "learning_rate": 9.584095824322078e-06, + "loss": 6.1534, + "step": 160000 + }, + { + "epoch": 4.44, + "learning_rate": 9.582709477069817e-06, + "loss": 6.1468, + "step": 160500 + }, + { + "epoch": 4.45, + "learning_rate": 9.581323129817557e-06, + "loss": 6.1532, + "step": 161000 + }, + { + "epoch": 4.47, + "learning_rate": 9.579936782565298e-06, + "loss": 6.1484, + "step": 161500 + }, + { + "epoch": 4.48, + "learning_rate": 9.578550435313037e-06, + "loss": 6.1516, + "step": 162000 + }, + { + "epoch": 4.49, + "learning_rate": 9.57716408806078e-06, + "loss": 6.1466, + "step": 162500 + }, + { + "epoch": 4.51, + "learning_rate": 9.575777740808518e-06, + "loss": 6.1502, + "step": 163000 + }, + { + "epoch": 4.52, + "learning_rate": 9.574391393556259e-06, + "loss": 6.1447, + "step": 163500 + }, + { + "epoch": 4.53, + "learning_rate": 9.573005046304e-06, + "loss": 6.1462, + "step": 164000 + }, + { + "epoch": 4.55, + "learning_rate": 9.571618699051739e-06, + "loss": 6.1497, + "step": 164500 + }, + { + "epoch": 4.56, + "learning_rate": 9.57023235179948e-06, + "loss": 6.1405, + "step": 165000 + }, + { + "epoch": 4.58, + "learning_rate": 9.56884600454722e-06, + "loss": 6.1441, + "step": 165500 + }, + { + "epoch": 4.59, + "learning_rate": 9.567459657294959e-06, + "loss": 6.1469, + "step": 166000 + }, + { + "epoch": 4.6, + "learning_rate": 9.566073310042701e-06, + "loss": 6.1472, + "step": 166500 + }, + { + "epoch": 4.62, + "learning_rate": 9.56468696279044e-06, + "loss": 6.1413, + "step": 167000 + }, + { + "epoch": 4.63, + "learning_rate": 9.56330061553818e-06, + "loss": 6.1455, + "step": 167500 + }, + { + "epoch": 4.65, + "learning_rate": 9.561914268285921e-06, + "loss": 6.1469, + "step": 168000 + }, + { + "epoch": 4.66, + "learning_rate": 9.56052792103366e-06, + "loss": 6.1411, + "step": 168500 + }, + { + "epoch": 4.67, + "learning_rate": 9.559141573781403e-06, + "loss": 6.1425, + "step": 169000 + }, + { + "epoch": 4.69, + "learning_rate": 9.557755226529142e-06, + "loss": 6.142, + "step": 169500 + }, + { + "epoch": 4.7, + "learning_rate": 9.556368879276882e-06, + "loss": 6.138, + "step": 170000 + }, + { + "epoch": 4.71, + "learning_rate": 9.554982532024623e-06, + "loss": 6.1432, + "step": 170500 + }, + { + "epoch": 4.73, + "learning_rate": 9.553596184772362e-06, + "loss": 6.1415, + "step": 171000 + }, + { + "epoch": 4.74, + "learning_rate": 9.552209837520103e-06, + "loss": 6.1378, + "step": 171500 + }, + { + "epoch": 4.76, + "learning_rate": 9.550823490267843e-06, + "loss": 6.143, + "step": 172000 + }, + { + "epoch": 4.77, + "learning_rate": 9.549437143015582e-06, + "loss": 6.1417, + "step": 172500 + }, + { + "epoch": 4.78, + "learning_rate": 9.548050795763325e-06, + "loss": 6.1418, + "step": 173000 + }, + { + "epoch": 4.8, + "learning_rate": 9.546664448511063e-06, + "loss": 6.1426, + "step": 173500 + }, + { + "epoch": 4.81, + "learning_rate": 9.545278101258804e-06, + "loss": 6.1398, + "step": 174000 + }, + { + "epoch": 4.82, + "learning_rate": 9.543891754006545e-06, + "loss": 6.1375, + "step": 174500 + }, + { + "epoch": 4.84, + "learning_rate": 9.542505406754284e-06, + "loss": 6.1393, + "step": 175000 + }, + { + "epoch": 4.85, + "learning_rate": 9.541119059502026e-06, + "loss": 6.1393, + "step": 175500 + }, + { + "epoch": 4.87, + "learning_rate": 9.539732712249765e-06, + "loss": 6.1338, + "step": 176000 + }, + { + "epoch": 4.88, + "learning_rate": 9.538346364997506e-06, + "loss": 6.1359, + "step": 176500 + }, + { + "epoch": 4.89, + "learning_rate": 9.536960017745246e-06, + "loss": 6.1382, + "step": 177000 + }, + { + "epoch": 4.91, + "learning_rate": 9.535573670492985e-06, + "loss": 6.1371, + "step": 177500 + }, + { + "epoch": 4.92, + "learning_rate": 9.534187323240726e-06, + "loss": 6.1356, + "step": 178000 + }, + { + "epoch": 4.94, + "learning_rate": 9.532800975988467e-06, + "loss": 6.1342, + "step": 178500 + }, + { + "epoch": 4.95, + "learning_rate": 9.531414628736207e-06, + "loss": 6.1341, + "step": 179000 + }, + { + "epoch": 4.96, + "learning_rate": 9.530028281483948e-06, + "loss": 6.1353, + "step": 179500 + }, + { + "epoch": 4.98, + "learning_rate": 9.528641934231687e-06, + "loss": 6.1303, + "step": 180000 + }, + { + "epoch": 4.99, + "learning_rate": 9.527255586979427e-06, + "loss": 6.1354, + "step": 180500 + }, + { + "epoch": 5.0, + "learning_rate": 9.525869239727168e-06, + "loss": 6.1324, + "step": 181000 + }, + { + "epoch": 5.02, + "learning_rate": 9.524482892474907e-06, + "loss": 6.1328, + "step": 181500 + }, + { + "epoch": 5.03, + "learning_rate": 9.523096545222648e-06, + "loss": 6.1356, + "step": 182000 + }, + { + "epoch": 5.05, + "learning_rate": 9.521710197970388e-06, + "loss": 6.135, + "step": 182500 + }, + { + "epoch": 5.06, + "learning_rate": 9.520323850718129e-06, + "loss": 6.1319, + "step": 183000 + }, + { + "epoch": 5.07, + "learning_rate": 9.51893750346587e-06, + "loss": 6.1307, + "step": 183500 + }, + { + "epoch": 5.09, + "learning_rate": 9.517551156213609e-06, + "loss": 6.1287, + "step": 184000 + }, + { + "epoch": 5.1, + "learning_rate": 9.51616480896135e-06, + "loss": 6.1286, + "step": 184500 + }, + { + "epoch": 5.12, + "learning_rate": 9.51477846170909e-06, + "loss": 6.132, + "step": 185000 + }, + { + "epoch": 5.13, + "learning_rate": 9.51339211445683e-06, + "loss": 6.1296, + "step": 185500 + }, + { + "epoch": 5.14, + "learning_rate": 9.512005767204571e-06, + "loss": 6.1306, + "step": 186000 + }, + { + "epoch": 5.16, + "learning_rate": 9.51061941995231e-06, + "loss": 6.1314, + "step": 186500 + }, + { + "epoch": 5.17, + "learning_rate": 9.50923307270005e-06, + "loss": 6.1281, + "step": 187000 + }, + { + "epoch": 5.18, + "learning_rate": 9.507846725447791e-06, + "loss": 6.1276, + "step": 187500 + }, + { + "epoch": 5.2, + "learning_rate": 9.50646037819553e-06, + "loss": 6.1274, + "step": 188000 + }, + { + "epoch": 5.21, + "learning_rate": 9.505074030943271e-06, + "loss": 6.1301, + "step": 188500 + }, + { + "epoch": 5.23, + "learning_rate": 9.503687683691012e-06, + "loss": 6.1282, + "step": 189000 + }, + { + "epoch": 5.24, + "learning_rate": 9.502301336438752e-06, + "loss": 6.1261, + "step": 189500 + }, + { + "epoch": 5.25, + "learning_rate": 9.500914989186493e-06, + "loss": 6.1236, + "step": 190000 + }, + { + "epoch": 5.27, + "learning_rate": 9.499528641934232e-06, + "loss": 6.1256, + "step": 190500 + }, + { + "epoch": 5.28, + "learning_rate": 9.498142294681973e-06, + "loss": 6.1268, + "step": 191000 + }, + { + "epoch": 5.3, + "learning_rate": 9.496755947429713e-06, + "loss": 6.1253, + "step": 191500 + }, + { + "epoch": 5.31, + "learning_rate": 9.495369600177454e-06, + "loss": 6.1282, + "step": 192000 + }, + { + "epoch": 5.32, + "learning_rate": 9.493983252925193e-06, + "loss": 6.1232, + "step": 192500 + }, + { + "epoch": 5.34, + "learning_rate": 9.492596905672933e-06, + "loss": 6.1283, + "step": 193000 + }, + { + "epoch": 5.35, + "learning_rate": 9.491210558420674e-06, + "loss": 6.1278, + "step": 193500 + }, + { + "epoch": 5.36, + "learning_rate": 9.489824211168415e-06, + "loss": 6.1216, + "step": 194000 + }, + { + "epoch": 5.38, + "learning_rate": 9.488437863916154e-06, + "loss": 6.1209, + "step": 194500 + }, + { + "epoch": 5.39, + "learning_rate": 9.487051516663894e-06, + "loss": 6.1216, + "step": 195000 + }, + { + "epoch": 5.41, + "learning_rate": 9.485665169411635e-06, + "loss": 6.1259, + "step": 195500 + }, + { + "epoch": 5.42, + "learning_rate": 9.484278822159376e-06, + "loss": 6.1191, + "step": 196000 + }, + { + "epoch": 5.43, + "learning_rate": 9.482892474907116e-06, + "loss": 6.1222, + "step": 196500 + }, + { + "epoch": 5.45, + "learning_rate": 9.481506127654855e-06, + "loss": 6.1207, + "step": 197000 + }, + { + "epoch": 5.46, + "learning_rate": 9.480119780402596e-06, + "loss": 6.1171, + "step": 197500 + }, + { + "epoch": 5.47, + "learning_rate": 9.478733433150337e-06, + "loss": 6.1228, + "step": 198000 + }, + { + "epoch": 5.49, + "learning_rate": 9.477347085898077e-06, + "loss": 6.119, + "step": 198500 + }, + { + "epoch": 5.5, + "learning_rate": 9.475960738645816e-06, + "loss": 6.119, + "step": 199000 + }, + { + "epoch": 5.52, + "learning_rate": 9.474574391393557e-06, + "loss": 6.1198, + "step": 199500 + }, + { + "epoch": 5.53, + "learning_rate": 9.473188044141297e-06, + "loss": 6.1238, + "step": 200000 + }, + { + "epoch": 5.54, + "learning_rate": 9.471801696889038e-06, + "loss": 6.121, + "step": 200500 + }, + { + "epoch": 5.56, + "learning_rate": 9.470415349636779e-06, + "loss": 6.1171, + "step": 201000 + }, + { + "epoch": 5.57, + "learning_rate": 9.469029002384518e-06, + "loss": 6.1196, + "step": 201500 + }, + { + "epoch": 5.59, + "learning_rate": 9.467642655132258e-06, + "loss": 6.1172, + "step": 202000 + }, + { + "epoch": 5.6, + "learning_rate": 9.466256307879999e-06, + "loss": 6.1241, + "step": 202500 + }, + { + "epoch": 5.61, + "learning_rate": 9.464869960627738e-06, + "loss": 6.121, + "step": 203000 + }, + { + "epoch": 5.63, + "learning_rate": 9.463483613375479e-06, + "loss": 6.1186, + "step": 203500 + }, + { + "epoch": 5.64, + "learning_rate": 9.46209726612322e-06, + "loss": 6.1153, + "step": 204000 + }, + { + "epoch": 5.65, + "learning_rate": 9.46071091887096e-06, + "loss": 6.1186, + "step": 204500 + }, + { + "epoch": 5.67, + "learning_rate": 9.4593245716187e-06, + "loss": 6.1177, + "step": 205000 + }, + { + "epoch": 5.68, + "learning_rate": 9.45793822436644e-06, + "loss": 6.1165, + "step": 205500 + }, + { + "epoch": 5.7, + "learning_rate": 9.45655187711418e-06, + "loss": 6.1138, + "step": 206000 + }, + { + "epoch": 5.71, + "learning_rate": 9.45516552986192e-06, + "loss": 6.1177, + "step": 206500 + }, + { + "epoch": 5.72, + "learning_rate": 9.45377918260966e-06, + "loss": 6.1178, + "step": 207000 + }, + { + "epoch": 5.74, + "learning_rate": 9.452392835357402e-06, + "loss": 6.1124, + "step": 207500 + }, + { + "epoch": 5.75, + "learning_rate": 9.451006488105141e-06, + "loss": 6.1135, + "step": 208000 + }, + { + "epoch": 5.77, + "learning_rate": 9.449620140852882e-06, + "loss": 6.1192, + "step": 208500 + }, + { + "epoch": 5.78, + "learning_rate": 9.448233793600622e-06, + "loss": 6.1179, + "step": 209000 + }, + { + "epoch": 5.79, + "learning_rate": 9.446847446348361e-06, + "loss": 6.1107, + "step": 209500 + }, + { + "epoch": 5.81, + "learning_rate": 9.445461099096102e-06, + "loss": 6.1133, + "step": 210000 + }, + { + "epoch": 5.82, + "learning_rate": 9.444074751843843e-06, + "loss": 6.1123, + "step": 210500 + }, + { + "epoch": 5.83, + "learning_rate": 9.442688404591583e-06, + "loss": 6.1143, + "step": 211000 + }, + { + "epoch": 5.85, + "learning_rate": 9.441302057339324e-06, + "loss": 6.1132, + "step": 211500 + }, + { + "epoch": 5.86, + "learning_rate": 9.439915710087063e-06, + "loss": 6.1149, + "step": 212000 + }, + { + "epoch": 5.88, + "learning_rate": 9.438529362834803e-06, + "loss": 6.1126, + "step": 212500 + }, + { + "epoch": 5.89, + "learning_rate": 9.437143015582544e-06, + "loss": 6.1108, + "step": 213000 + }, + { + "epoch": 5.9, + "learning_rate": 9.435756668330283e-06, + "loss": 6.1119, + "step": 213500 + }, + { + "epoch": 5.92, + "learning_rate": 9.434370321078025e-06, + "loss": 6.1139, + "step": 214000 + }, + { + "epoch": 5.93, + "learning_rate": 9.432983973825764e-06, + "loss": 6.113, + "step": 214500 + }, + { + "epoch": 5.94, + "learning_rate": 9.431597626573505e-06, + "loss": 6.1073, + "step": 215000 + }, + { + "epoch": 5.96, + "learning_rate": 9.430211279321246e-06, + "loss": 6.1105, + "step": 215500 + }, + { + "epoch": 5.97, + "learning_rate": 9.428824932068985e-06, + "loss": 6.1072, + "step": 216000 + }, + { + "epoch": 5.99, + "learning_rate": 9.427438584816725e-06, + "loss": 6.1142, + "step": 216500 + }, + { + "epoch": 6.0, + "learning_rate": 9.426052237564466e-06, + "loss": 6.1105, + "step": 217000 + }, + { + "epoch": 6.01, + "learning_rate": 9.424665890312206e-06, + "loss": 6.1103, + "step": 217500 + }, + { + "epoch": 6.03, + "learning_rate": 9.423279543059947e-06, + "loss": 6.1079, + "step": 218000 + }, + { + "epoch": 6.04, + "learning_rate": 9.421893195807686e-06, + "loss": 6.1099, + "step": 218500 + }, + { + "epoch": 6.06, + "learning_rate": 9.420506848555427e-06, + "loss": 6.1098, + "step": 219000 + }, + { + "epoch": 6.07, + "learning_rate": 9.419120501303167e-06, + "loss": 6.1094, + "step": 219500 + }, + { + "epoch": 6.08, + "learning_rate": 9.417734154050906e-06, + "loss": 6.1081, + "step": 220000 + }, + { + "epoch": 6.1, + "learning_rate": 9.416347806798649e-06, + "loss": 6.1065, + "step": 220500 + }, + { + "epoch": 6.11, + "learning_rate": 9.414961459546388e-06, + "loss": 6.1096, + "step": 221000 + }, + { + "epoch": 6.12, + "learning_rate": 9.413575112294128e-06, + "loss": 6.1089, + "step": 221500 + }, + { + "epoch": 6.14, + "learning_rate": 9.412188765041869e-06, + "loss": 6.1069, + "step": 222000 + }, + { + "epoch": 6.15, + "learning_rate": 9.410802417789608e-06, + "loss": 6.1021, + "step": 222500 + }, + { + "epoch": 6.17, + "learning_rate": 9.40941607053735e-06, + "loss": 6.1075, + "step": 223000 + }, + { + "epoch": 6.18, + "learning_rate": 9.40802972328509e-06, + "loss": 6.1075, + "step": 223500 + }, + { + "epoch": 6.19, + "learning_rate": 9.40664337603283e-06, + "loss": 6.1056, + "step": 224000 + }, + { + "epoch": 6.21, + "learning_rate": 9.40525702878057e-06, + "loss": 6.1077, + "step": 224500 + }, + { + "epoch": 6.22, + "learning_rate": 9.40387068152831e-06, + "loss": 6.1079, + "step": 225000 + }, + { + "epoch": 6.24, + "learning_rate": 9.40248433427605e-06, + "loss": 6.1067, + "step": 225500 + }, + { + "epoch": 6.25, + "learning_rate": 9.40109798702379e-06, + "loss": 6.1048, + "step": 226000 + }, + { + "epoch": 6.26, + "learning_rate": 9.39971163977153e-06, + "loss": 6.1075, + "step": 226500 + }, + { + "epoch": 6.28, + "learning_rate": 9.398325292519272e-06, + "loss": 6.1044, + "step": 227000 + }, + { + "epoch": 6.29, + "learning_rate": 9.396938945267011e-06, + "loss": 6.1046, + "step": 227500 + }, + { + "epoch": 6.3, + "learning_rate": 9.395552598014752e-06, + "loss": 6.103, + "step": 228000 + }, + { + "epoch": 6.32, + "learning_rate": 9.394166250762492e-06, + "loss": 6.1046, + "step": 228500 + }, + { + "epoch": 6.33, + "learning_rate": 9.392779903510231e-06, + "loss": 6.1004, + "step": 229000 + }, + { + "epoch": 6.35, + "learning_rate": 9.391393556257974e-06, + "loss": 6.1034, + "step": 229500 + }, + { + "epoch": 6.36, + "learning_rate": 9.390007209005712e-06, + "loss": 6.1041, + "step": 230000 + }, + { + "epoch": 6.37, + "learning_rate": 9.388620861753453e-06, + "loss": 6.1043, + "step": 230500 + }, + { + "epoch": 6.39, + "learning_rate": 9.387234514501194e-06, + "loss": 6.102, + "step": 231000 + }, + { + "epoch": 6.4, + "learning_rate": 9.385848167248933e-06, + "loss": 6.0986, + "step": 231500 + }, + { + "epoch": 6.41, + "learning_rate": 9.384461819996673e-06, + "loss": 6.1023, + "step": 232000 + }, + { + "epoch": 6.43, + "learning_rate": 9.383075472744414e-06, + "loss": 6.1015, + "step": 232500 + }, + { + "epoch": 6.44, + "learning_rate": 9.381689125492155e-06, + "loss": 6.0996, + "step": 233000 + }, + { + "epoch": 6.46, + "learning_rate": 9.380302778239895e-06, + "loss": 6.1013, + "step": 233500 + }, + { + "epoch": 6.47, + "learning_rate": 9.378916430987634e-06, + "loss": 6.1024, + "step": 234000 + }, + { + "epoch": 6.48, + "learning_rate": 9.377530083735375e-06, + "loss": 6.1004, + "step": 234500 + }, + { + "epoch": 6.5, + "learning_rate": 9.376143736483116e-06, + "loss": 6.1033, + "step": 235000 + }, + { + "epoch": 6.51, + "learning_rate": 9.374757389230855e-06, + "loss": 6.1025, + "step": 235500 + }, + { + "epoch": 6.53, + "learning_rate": 9.373371041978595e-06, + "loss": 6.0993, + "step": 236000 + }, + { + "epoch": 6.54, + "learning_rate": 9.371984694726336e-06, + "loss": 6.0948, + "step": 236500 + }, + { + "epoch": 6.55, + "learning_rate": 9.370598347474076e-06, + "loss": 6.0956, + "step": 237000 + }, + { + "epoch": 6.57, + "learning_rate": 9.369212000221817e-06, + "loss": 6.1003, + "step": 237500 + }, + { + "epoch": 6.58, + "learning_rate": 9.367825652969556e-06, + "loss": 6.1016, + "step": 238000 + }, + { + "epoch": 6.59, + "learning_rate": 9.366439305717297e-06, + "loss": 6.0976, + "step": 238500 + }, + { + "epoch": 6.61, + "learning_rate": 9.365052958465037e-06, + "loss": 6.0974, + "step": 239000 + }, + { + "epoch": 6.62, + "learning_rate": 9.363666611212778e-06, + "loss": 6.0981, + "step": 239500 + }, + { + "epoch": 6.64, + "learning_rate": 9.362280263960519e-06, + "loss": 6.1003, + "step": 240000 + }, + { + "epoch": 6.65, + "learning_rate": 9.360893916708258e-06, + "loss": 6.0968, + "step": 240500 + }, + { + "epoch": 6.66, + "learning_rate": 9.359507569455998e-06, + "loss": 6.0988, + "step": 241000 + }, + { + "epoch": 6.68, + "learning_rate": 9.358121222203739e-06, + "loss": 6.1019, + "step": 241500 + }, + { + "epoch": 6.69, + "learning_rate": 9.356734874951478e-06, + "loss": 6.0987, + "step": 242000 + }, + { + "epoch": 6.71, + "learning_rate": 9.355348527699218e-06, + "loss": 6.0975, + "step": 242500 + }, + { + "epoch": 6.72, + "learning_rate": 9.353962180446959e-06, + "loss": 6.0961, + "step": 243000 + }, + { + "epoch": 6.73, + "learning_rate": 9.3525758331947e-06, + "loss": 6.0974, + "step": 243500 + }, + { + "epoch": 6.75, + "learning_rate": 9.35118948594244e-06, + "loss": 6.0963, + "step": 244000 + }, + { + "epoch": 6.76, + "learning_rate": 9.34980313869018e-06, + "loss": 6.0989, + "step": 244500 + }, + { + "epoch": 6.77, + "learning_rate": 9.34841679143792e-06, + "loss": 6.0979, + "step": 245000 + }, + { + "epoch": 6.79, + "learning_rate": 9.34703044418566e-06, + "loss": 6.0951, + "step": 245500 + }, + { + "epoch": 6.8, + "learning_rate": 9.345644096933401e-06, + "loss": 6.0965, + "step": 246000 + }, + { + "epoch": 6.82, + "learning_rate": 9.34425774968114e-06, + "loss": 6.0983, + "step": 246500 + }, + { + "epoch": 6.83, + "learning_rate": 9.342871402428881e-06, + "loss": 6.0937, + "step": 247000 + }, + { + "epoch": 6.84, + "learning_rate": 9.341485055176622e-06, + "loss": 6.0938, + "step": 247500 + }, + { + "epoch": 6.86, + "learning_rate": 9.340098707924362e-06, + "loss": 6.0956, + "step": 248000 + }, + { + "epoch": 6.87, + "learning_rate": 9.338712360672101e-06, + "loss": 6.0966, + "step": 248500 + }, + { + "epoch": 6.88, + "learning_rate": 9.337326013419842e-06, + "loss": 6.0919, + "step": 249000 + }, + { + "epoch": 6.9, + "learning_rate": 9.335939666167582e-06, + "loss": 6.0887, + "step": 249500 + }, + { + "epoch": 6.91, + "learning_rate": 9.334553318915323e-06, + "loss": 6.0937, + "step": 250000 + }, + { + "epoch": 6.93, + "learning_rate": 9.333166971663062e-06, + "loss": 6.0922, + "step": 250500 + }, + { + "epoch": 6.94, + "learning_rate": 9.331780624410803e-06, + "loss": 6.0927, + "step": 251000 + }, + { + "epoch": 6.95, + "learning_rate": 9.330394277158543e-06, + "loss": 6.0938, + "step": 251500 + }, + { + "epoch": 6.97, + "learning_rate": 9.329007929906284e-06, + "loss": 6.0944, + "step": 252000 + }, + { + "epoch": 6.98, + "learning_rate": 9.327621582654025e-06, + "loss": 6.0923, + "step": 252500 + }, + { + "epoch": 7.0, + "learning_rate": 9.326235235401764e-06, + "loss": 6.091, + "step": 253000 + }, + { + "epoch": 7.01, + "learning_rate": 9.324848888149504e-06, + "loss": 6.0916, + "step": 253500 + }, + { + "epoch": 7.02, + "learning_rate": 9.323462540897245e-06, + "loss": 6.0898, + "step": 254000 + }, + { + "epoch": 7.04, + "learning_rate": 9.322076193644986e-06, + "loss": 6.0913, + "step": 254500 + }, + { + "epoch": 7.05, + "learning_rate": 9.320689846392726e-06, + "loss": 6.0948, + "step": 255000 + }, + { + "epoch": 7.06, + "learning_rate": 9.319303499140465e-06, + "loss": 6.0911, + "step": 255500 + }, + { + "epoch": 7.08, + "learning_rate": 9.317917151888206e-06, + "loss": 6.0936, + "step": 256000 + }, + { + "epoch": 7.09, + "learning_rate": 9.316530804635946e-06, + "loss": 6.0893, + "step": 256500 + }, + { + "epoch": 7.11, + "learning_rate": 9.315144457383685e-06, + "loss": 6.0903, + "step": 257000 + }, + { + "epoch": 7.12, + "learning_rate": 9.313758110131426e-06, + "loss": 6.092, + "step": 257500 + }, + { + "epoch": 7.13, + "learning_rate": 9.312371762879167e-06, + "loss": 6.0877, + "step": 258000 + }, + { + "epoch": 7.15, + "learning_rate": 9.310985415626907e-06, + "loss": 6.0908, + "step": 258500 + }, + { + "epoch": 7.16, + "learning_rate": 9.309599068374648e-06, + "loss": 6.0916, + "step": 259000 + }, + { + "epoch": 7.18, + "learning_rate": 9.308212721122387e-06, + "loss": 6.0861, + "step": 259500 + }, + { + "epoch": 7.19, + "learning_rate": 9.306826373870128e-06, + "loss": 6.0921, + "step": 260000 + }, + { + "epoch": 7.2, + "learning_rate": 9.305440026617868e-06, + "loss": 6.0858, + "step": 260500 + }, + { + "epoch": 7.22, + "learning_rate": 9.304053679365607e-06, + "loss": 6.09, + "step": 261000 + }, + { + "epoch": 7.23, + "learning_rate": 9.30266733211335e-06, + "loss": 6.0855, + "step": 261500 + }, + { + "epoch": 7.24, + "learning_rate": 9.301280984861088e-06, + "loss": 6.0844, + "step": 262000 + }, + { + "epoch": 7.26, + "learning_rate": 9.299894637608829e-06, + "loss": 6.0885, + "step": 262500 + }, + { + "epoch": 7.27, + "learning_rate": 9.29850829035657e-06, + "loss": 6.0843, + "step": 263000 + }, + { + "epoch": 7.29, + "learning_rate": 9.297121943104309e-06, + "loss": 6.0872, + "step": 263500 + }, + { + "epoch": 7.3, + "learning_rate": 9.29573559585205e-06, + "loss": 6.086, + "step": 264000 + }, + { + "epoch": 7.31, + "learning_rate": 9.29434924859979e-06, + "loss": 6.0856, + "step": 264500 + }, + { + "epoch": 7.33, + "learning_rate": 9.292962901347529e-06, + "loss": 6.0868, + "step": 265000 + }, + { + "epoch": 7.34, + "learning_rate": 9.291576554095271e-06, + "loss": 6.088, + "step": 265500 + }, + { + "epoch": 7.35, + "learning_rate": 9.29019020684301e-06, + "loss": 6.0867, + "step": 266000 + }, + { + "epoch": 7.37, + "learning_rate": 9.288803859590751e-06, + "loss": 6.0869, + "step": 266500 + }, + { + "epoch": 7.38, + "learning_rate": 9.287417512338492e-06, + "loss": 6.0889, + "step": 267000 + }, + { + "epoch": 7.4, + "learning_rate": 9.28603116508623e-06, + "loss": 6.0832, + "step": 267500 + }, + { + "epoch": 7.41, + "learning_rate": 9.284644817833973e-06, + "loss": 6.0833, + "step": 268000 + }, + { + "epoch": 7.42, + "learning_rate": 9.283258470581712e-06, + "loss": 6.0827, + "step": 268500 + }, + { + "epoch": 7.44, + "learning_rate": 9.281872123329452e-06, + "loss": 6.0832, + "step": 269000 + }, + { + "epoch": 7.45, + "learning_rate": 9.280485776077193e-06, + "loss": 6.0888, + "step": 269500 + }, + { + "epoch": 7.47, + "learning_rate": 9.279099428824932e-06, + "loss": 6.0844, + "step": 270000 + }, + { + "epoch": 7.48, + "learning_rate": 9.277713081572673e-06, + "loss": 6.0889, + "step": 270500 + }, + { + "epoch": 7.49, + "learning_rate": 9.276326734320413e-06, + "loss": 6.0886, + "step": 271000 + }, + { + "epoch": 7.51, + "learning_rate": 9.274940387068154e-06, + "loss": 6.0842, + "step": 271500 + }, + { + "epoch": 7.52, + "learning_rate": 9.273554039815895e-06, + "loss": 6.086, + "step": 272000 + }, + { + "epoch": 7.53, + "learning_rate": 9.272167692563634e-06, + "loss": 6.084, + "step": 272500 + }, + { + "epoch": 7.55, + "learning_rate": 9.270781345311374e-06, + "loss": 6.0844, + "step": 273000 + }, + { + "epoch": 7.56, + "learning_rate": 9.269394998059115e-06, + "loss": 6.0871, + "step": 273500 + }, + { + "epoch": 7.58, + "learning_rate": 9.268008650806854e-06, + "loss": 6.0848, + "step": 274000 + }, + { + "epoch": 7.59, + "learning_rate": 9.266622303554596e-06, + "loss": 6.086, + "step": 274500 + }, + { + "epoch": 7.6, + "learning_rate": 9.265235956302335e-06, + "loss": 6.0839, + "step": 275000 + }, + { + "epoch": 7.62, + "learning_rate": 9.263849609050076e-06, + "loss": 6.0843, + "step": 275500 + }, + { + "epoch": 7.63, + "learning_rate": 9.262463261797816e-06, + "loss": 6.0824, + "step": 276000 + }, + { + "epoch": 7.65, + "learning_rate": 9.261076914545555e-06, + "loss": 6.0837, + "step": 276500 + }, + { + "epoch": 7.66, + "learning_rate": 9.259690567293298e-06, + "loss": 6.0819, + "step": 277000 + }, + { + "epoch": 7.67, + "learning_rate": 9.258304220041037e-06, + "loss": 6.078, + "step": 277500 + }, + { + "epoch": 7.69, + "learning_rate": 9.256917872788777e-06, + "loss": 6.0801, + "step": 278000 + }, + { + "epoch": 7.7, + "learning_rate": 9.255531525536518e-06, + "loss": 6.0812, + "step": 278500 + }, + { + "epoch": 7.71, + "learning_rate": 9.254145178284257e-06, + "loss": 6.0827, + "step": 279000 + }, + { + "epoch": 7.73, + "learning_rate": 9.252758831031998e-06, + "loss": 6.0806, + "step": 279500 + }, + { + "epoch": 7.74, + "learning_rate": 9.251372483779738e-06, + "loss": 6.0803, + "step": 280000 + }, + { + "epoch": 7.76, + "learning_rate": 9.249986136527477e-06, + "loss": 6.0819, + "step": 280500 + }, + { + "epoch": 7.77, + "learning_rate": 9.24859978927522e-06, + "loss": 6.0811, + "step": 281000 + }, + { + "epoch": 7.78, + "learning_rate": 9.247213442022958e-06, + "loss": 6.082, + "step": 281500 + }, + { + "epoch": 7.8, + "learning_rate": 9.245827094770699e-06, + "loss": 6.0796, + "step": 282000 + }, + { + "epoch": 7.81, + "learning_rate": 9.24444074751844e-06, + "loss": 6.0797, + "step": 282500 + }, + { + "epoch": 7.83, + "learning_rate": 9.243054400266179e-06, + "loss": 6.0774, + "step": 283000 + }, + { + "epoch": 7.84, + "learning_rate": 9.241668053013921e-06, + "loss": 6.082, + "step": 283500 + }, + { + "epoch": 7.85, + "learning_rate": 9.24028170576166e-06, + "loss": 6.0824, + "step": 284000 + }, + { + "epoch": 7.87, + "learning_rate": 9.2388953585094e-06, + "loss": 6.0756, + "step": 284500 + }, + { + "epoch": 7.88, + "learning_rate": 9.237509011257141e-06, + "loss": 6.0811, + "step": 285000 + }, + { + "epoch": 7.89, + "learning_rate": 9.23612266400488e-06, + "loss": 6.0766, + "step": 285500 + }, + { + "epoch": 7.91, + "learning_rate": 9.23473631675262e-06, + "loss": 6.0801, + "step": 286000 + }, + { + "epoch": 7.92, + "learning_rate": 9.233349969500361e-06, + "loss": 6.0765, + "step": 286500 + }, + { + "epoch": 7.94, + "learning_rate": 9.2319636222481e-06, + "loss": 6.0779, + "step": 287000 + }, + { + "epoch": 7.95, + "learning_rate": 9.230577274995843e-06, + "loss": 6.0784, + "step": 287500 + }, + { + "epoch": 7.96, + "learning_rate": 9.229190927743582e-06, + "loss": 6.0776, + "step": 288000 + }, + { + "epoch": 7.98, + "learning_rate": 9.227804580491322e-06, + "loss": 6.0798, + "step": 288500 + }, + { + "epoch": 7.99, + "learning_rate": 9.226418233239063e-06, + "loss": 6.0782, + "step": 289000 + }, + { + "epoch": 8.0, + "learning_rate": 9.225031885986802e-06, + "loss": 6.0787, + "step": 289500 + }, + { + "epoch": 8.02, + "learning_rate": 9.223645538734543e-06, + "loss": 6.0766, + "step": 290000 + }, + { + "epoch": 8.03, + "learning_rate": 9.222259191482283e-06, + "loss": 6.0781, + "step": 290500 + }, + { + "epoch": 8.05, + "learning_rate": 9.220872844230024e-06, + "loss": 6.0736, + "step": 291000 + }, + { + "epoch": 8.06, + "learning_rate": 9.219486496977765e-06, + "loss": 6.0756, + "step": 291500 + }, + { + "epoch": 8.07, + "learning_rate": 9.218100149725504e-06, + "loss": 6.0742, + "step": 292000 + }, + { + "epoch": 8.09, + "learning_rate": 9.216713802473244e-06, + "loss": 6.0791, + "step": 292500 + }, + { + "epoch": 8.1, + "learning_rate": 9.215327455220985e-06, + "loss": 6.0754, + "step": 293000 + }, + { + "epoch": 8.12, + "learning_rate": 9.213941107968725e-06, + "loss": 6.0739, + "step": 293500 + }, + { + "epoch": 8.13, + "learning_rate": 9.212554760716464e-06, + "loss": 6.0751, + "step": 294000 + }, + { + "epoch": 8.14, + "learning_rate": 9.211168413464205e-06, + "loss": 6.0737, + "step": 294500 + }, + { + "epoch": 8.16, + "learning_rate": 9.209782066211946e-06, + "loss": 6.0803, + "step": 295000 + }, + { + "epoch": 8.17, + "learning_rate": 9.208395718959686e-06, + "loss": 6.0766, + "step": 295500 + }, + { + "epoch": 8.18, + "learning_rate": 9.207009371707425e-06, + "loss": 6.0745, + "step": 296000 + }, + { + "epoch": 8.2, + "learning_rate": 9.205623024455166e-06, + "loss": 6.0743, + "step": 296500 + }, + { + "epoch": 8.21, + "learning_rate": 9.204236677202907e-06, + "loss": 6.0735, + "step": 297000 + }, + { + "epoch": 8.23, + "learning_rate": 9.202850329950647e-06, + "loss": 6.077, + "step": 297500 + }, + { + "epoch": 8.24, + "learning_rate": 9.201463982698388e-06, + "loss": 6.0748, + "step": 298000 + }, + { + "epoch": 8.25, + "learning_rate": 9.200077635446127e-06, + "loss": 6.0755, + "step": 298500 + }, + { + "epoch": 8.27, + "learning_rate": 9.198691288193867e-06, + "loss": 6.074, + "step": 299000 + }, + { + "epoch": 8.28, + "learning_rate": 9.197304940941608e-06, + "loss": 6.077, + "step": 299500 + }, + { + "epoch": 8.3, + "learning_rate": 9.195918593689349e-06, + "loss": 6.0762, + "step": 300000 + }, + { + "epoch": 8.31, + "learning_rate": 9.194532246437088e-06, + "loss": 6.0768, + "step": 300500 + }, + { + "epoch": 8.32, + "learning_rate": 9.193145899184828e-06, + "loss": 6.0716, + "step": 301000 + }, + { + "epoch": 8.34, + "learning_rate": 9.191759551932569e-06, + "loss": 6.0743, + "step": 301500 + }, + { + "epoch": 8.35, + "learning_rate": 9.19037320468031e-06, + "loss": 6.0744, + "step": 302000 + }, + { + "epoch": 8.36, + "learning_rate": 9.188986857428049e-06, + "loss": 6.0747, + "step": 302500 + }, + { + "epoch": 8.38, + "learning_rate": 9.18760051017579e-06, + "loss": 6.0729, + "step": 303000 + }, + { + "epoch": 8.39, + "learning_rate": 9.18621416292353e-06, + "loss": 6.0691, + "step": 303500 + }, + { + "epoch": 8.41, + "learning_rate": 9.18482781567127e-06, + "loss": 6.0739, + "step": 304000 + }, + { + "epoch": 8.42, + "learning_rate": 9.18344146841901e-06, + "loss": 6.0715, + "step": 304500 + }, + { + "epoch": 8.43, + "learning_rate": 9.18205512116675e-06, + "loss": 6.0702, + "step": 305000 + }, + { + "epoch": 8.45, + "learning_rate": 9.18066877391449e-06, + "loss": 6.0691, + "step": 305500 + }, + { + "epoch": 8.46, + "learning_rate": 9.179282426662231e-06, + "loss": 6.0745, + "step": 306000 + }, + { + "epoch": 8.47, + "learning_rate": 9.177896079409972e-06, + "loss": 6.0722, + "step": 306500 + }, + { + "epoch": 8.49, + "learning_rate": 9.176509732157711e-06, + "loss": 6.0749, + "step": 307000 + }, + { + "epoch": 8.5, + "learning_rate": 9.175123384905452e-06, + "loss": 6.0709, + "step": 307500 + }, + { + "epoch": 8.52, + "learning_rate": 9.173737037653192e-06, + "loss": 6.0693, + "step": 308000 + }, + { + "epoch": 8.53, + "learning_rate": 9.172350690400931e-06, + "loss": 6.0693, + "step": 308500 + }, + { + "epoch": 8.54, + "learning_rate": 9.170964343148672e-06, + "loss": 6.0728, + "step": 309000 + }, + { + "epoch": 8.56, + "learning_rate": 9.169577995896413e-06, + "loss": 6.0721, + "step": 309500 + }, + { + "epoch": 8.57, + "learning_rate": 9.168191648644153e-06, + "loss": 6.0721, + "step": 310000 + }, + { + "epoch": 8.59, + "learning_rate": 9.166805301391894e-06, + "loss": 6.0706, + "step": 310500 + }, + { + "epoch": 8.6, + "learning_rate": 9.165418954139633e-06, + "loss": 6.0695, + "step": 311000 + }, + { + "epoch": 8.61, + "learning_rate": 9.164032606887373e-06, + "loss": 6.0672, + "step": 311500 + }, + { + "epoch": 8.63, + "learning_rate": 9.162646259635114e-06, + "loss": 6.0714, + "step": 312000 + }, + { + "epoch": 8.64, + "learning_rate": 9.161259912382855e-06, + "loss": 6.0659, + "step": 312500 + }, + { + "epoch": 8.65, + "learning_rate": 9.159873565130595e-06, + "loss": 6.0718, + "step": 313000 + }, + { + "epoch": 8.67, + "learning_rate": 9.158487217878334e-06, + "loss": 6.0658, + "step": 313500 + }, + { + "epoch": 8.68, + "learning_rate": 9.157100870626075e-06, + "loss": 6.0716, + "step": 314000 + }, + { + "epoch": 8.7, + "learning_rate": 9.155714523373816e-06, + "loss": 6.0705, + "step": 314500 + }, + { + "epoch": 8.71, + "learning_rate": 9.154328176121555e-06, + "loss": 6.0689, + "step": 315000 + }, + { + "epoch": 8.72, + "learning_rate": 9.152941828869297e-06, + "loss": 6.0677, + "step": 315500 + }, + { + "epoch": 8.74, + "learning_rate": 9.151555481617036e-06, + "loss": 6.0683, + "step": 316000 + }, + { + "epoch": 8.75, + "learning_rate": 9.150169134364777e-06, + "loss": 6.0702, + "step": 316500 + }, + { + "epoch": 8.77, + "learning_rate": 9.148782787112517e-06, + "loss": 6.0724, + "step": 317000 + }, + { + "epoch": 8.78, + "learning_rate": 9.147396439860256e-06, + "loss": 6.066, + "step": 317500 + }, + { + "epoch": 8.79, + "learning_rate": 9.146010092607997e-06, + "loss": 6.0695, + "step": 318000 + }, + { + "epoch": 8.81, + "learning_rate": 9.144623745355737e-06, + "loss": 6.0696, + "step": 318500 + }, + { + "epoch": 8.82, + "learning_rate": 9.143237398103476e-06, + "loss": 6.0679, + "step": 319000 + }, + { + "epoch": 8.83, + "learning_rate": 9.141851050851219e-06, + "loss": 6.0666, + "step": 319500 + }, + { + "epoch": 8.85, + "learning_rate": 9.140464703598958e-06, + "loss": 6.0708, + "step": 320000 + }, + { + "epoch": 8.86, + "learning_rate": 9.139078356346698e-06, + "loss": 6.0677, + "step": 320500 + }, + { + "epoch": 8.88, + "learning_rate": 9.137692009094439e-06, + "loss": 6.0646, + "step": 321000 + }, + { + "epoch": 8.89, + "learning_rate": 9.136305661842178e-06, + "loss": 6.0643, + "step": 321500 + }, + { + "epoch": 8.9, + "learning_rate": 9.13491931458992e-06, + "loss": 6.0695, + "step": 322000 + }, + { + "epoch": 8.92, + "learning_rate": 9.13353296733766e-06, + "loss": 6.068, + "step": 322500 + }, + { + "epoch": 8.93, + "learning_rate": 9.1321466200854e-06, + "loss": 6.0688, + "step": 323000 + }, + { + "epoch": 8.94, + "learning_rate": 9.13076027283314e-06, + "loss": 6.0624, + "step": 323500 + }, + { + "epoch": 8.96, + "learning_rate": 9.12937392558088e-06, + "loss": 6.064, + "step": 324000 + }, + { + "epoch": 8.97, + "learning_rate": 9.12798757832862e-06, + "loss": 6.0657, + "step": 324500 + }, + { + "epoch": 8.99, + "learning_rate": 9.12660123107636e-06, + "loss": 6.0665, + "step": 325000 + }, + { + "epoch": 9.0, + "learning_rate": 9.125214883824101e-06, + "loss": 6.0649, + "step": 325500 + }, + { + "epoch": 9.01, + "learning_rate": 9.123828536571842e-06, + "loss": 6.0642, + "step": 326000 + }, + { + "epoch": 9.03, + "learning_rate": 9.122442189319581e-06, + "loss": 6.0629, + "step": 326500 + }, + { + "epoch": 9.04, + "learning_rate": 9.121055842067322e-06, + "loss": 6.0662, + "step": 327000 + }, + { + "epoch": 9.06, + "learning_rate": 9.119669494815062e-06, + "loss": 6.0694, + "step": 327500 + }, + { + "epoch": 9.07, + "learning_rate": 9.118283147562801e-06, + "loss": 6.0667, + "step": 328000 + }, + { + "epoch": 9.08, + "learning_rate": 9.116896800310544e-06, + "loss": 6.0655, + "step": 328500 + }, + { + "epoch": 9.1, + "learning_rate": 9.115510453058283e-06, + "loss": 6.0646, + "step": 329000 + }, + { + "epoch": 9.11, + "learning_rate": 9.114124105806023e-06, + "loss": 6.0648, + "step": 329500 + }, + { + "epoch": 9.12, + "learning_rate": 9.112737758553764e-06, + "loss": 6.062, + "step": 330000 + }, + { + "epoch": 9.14, + "learning_rate": 9.111351411301503e-06, + "loss": 6.0626, + "step": 330500 + }, + { + "epoch": 9.15, + "learning_rate": 9.109965064049243e-06, + "loss": 6.0657, + "step": 331000 + }, + { + "epoch": 9.17, + "learning_rate": 9.108578716796984e-06, + "loss": 6.0662, + "step": 331500 + }, + { + "epoch": 9.18, + "learning_rate": 9.107192369544725e-06, + "loss": 6.0646, + "step": 332000 + }, + { + "epoch": 9.19, + "learning_rate": 9.105806022292465e-06, + "loss": 6.0649, + "step": 332500 + }, + { + "epoch": 9.21, + "learning_rate": 9.104419675040204e-06, + "loss": 6.0623, + "step": 333000 + }, + { + "epoch": 9.22, + "learning_rate": 9.103033327787945e-06, + "loss": 6.0619, + "step": 333500 + }, + { + "epoch": 9.24, + "learning_rate": 9.101646980535686e-06, + "loss": 6.0666, + "step": 334000 + }, + { + "epoch": 9.25, + "learning_rate": 9.100260633283425e-06, + "loss": 6.0644, + "step": 334500 + }, + { + "epoch": 9.26, + "learning_rate": 9.098874286031167e-06, + "loss": 6.0616, + "step": 335000 + }, + { + "epoch": 9.28, + "learning_rate": 9.097487938778906e-06, + "loss": 6.0584, + "step": 335500 + }, + { + "epoch": 9.29, + "learning_rate": 9.096101591526647e-06, + "loss": 6.0644, + "step": 336000 + }, + { + "epoch": 9.3, + "learning_rate": 9.094715244274387e-06, + "loss": 6.0609, + "step": 336500 + }, + { + "epoch": 9.32, + "learning_rate": 9.093328897022126e-06, + "loss": 6.0627, + "step": 337000 + }, + { + "epoch": 9.33, + "learning_rate": 9.091942549769867e-06, + "loss": 6.0629, + "step": 337500 + }, + { + "epoch": 9.35, + "learning_rate": 9.090556202517607e-06, + "loss": 6.0601, + "step": 338000 + }, + { + "epoch": 9.36, + "learning_rate": 9.089169855265348e-06, + "loss": 6.0594, + "step": 338500 + }, + { + "epoch": 9.37, + "learning_rate": 9.087783508013089e-06, + "loss": 6.0596, + "step": 339000 + }, + { + "epoch": 9.39, + "learning_rate": 9.086397160760828e-06, + "loss": 6.0621, + "step": 339500 + }, + { + "epoch": 9.4, + "learning_rate": 9.085010813508568e-06, + "loss": 6.0599, + "step": 340000 + }, + { + "epoch": 9.41, + "learning_rate": 9.083624466256309e-06, + "loss": 6.0603, + "step": 340500 + }, + { + "epoch": 9.43, + "learning_rate": 9.082238119004048e-06, + "loss": 6.0615, + "step": 341000 + }, + { + "epoch": 9.44, + "learning_rate": 9.08085177175179e-06, + "loss": 6.0654, + "step": 341500 + }, + { + "epoch": 9.46, + "learning_rate": 9.07946542449953e-06, + "loss": 6.0619, + "step": 342000 + }, + { + "epoch": 9.47, + "learning_rate": 9.07807907724727e-06, + "loss": 6.0597, + "step": 342500 + }, + { + "epoch": 9.48, + "learning_rate": 9.07669272999501e-06, + "loss": 6.0616, + "step": 343000 + }, + { + "epoch": 9.5, + "learning_rate": 9.07530638274275e-06, + "loss": 6.0671, + "step": 343500 + }, + { + "epoch": 9.51, + "learning_rate": 9.07392003549049e-06, + "loss": 6.0616, + "step": 344000 + }, + { + "epoch": 9.53, + "learning_rate": 9.07253368823823e-06, + "loss": 6.0571, + "step": 344500 + }, + { + "epoch": 9.54, + "learning_rate": 9.071147340985971e-06, + "loss": 6.0598, + "step": 345000 + }, + { + "epoch": 9.55, + "learning_rate": 9.069760993733712e-06, + "loss": 6.0638, + "step": 345500 + }, + { + "epoch": 9.57, + "learning_rate": 9.068374646481451e-06, + "loss": 6.0576, + "step": 346000 + }, + { + "epoch": 9.58, + "learning_rate": 9.066988299229192e-06, + "loss": 6.0531, + "step": 346500 + }, + { + "epoch": 9.59, + "learning_rate": 9.065601951976932e-06, + "loss": 6.0627, + "step": 347000 + }, + { + "epoch": 9.61, + "learning_rate": 9.064215604724673e-06, + "loss": 6.0588, + "step": 347500 + }, + { + "epoch": 9.62, + "learning_rate": 9.062829257472412e-06, + "loss": 6.059, + "step": 348000 + }, + { + "epoch": 9.64, + "learning_rate": 9.061442910220153e-06, + "loss": 6.0609, + "step": 348500 + }, + { + "epoch": 9.65, + "learning_rate": 9.060056562967893e-06, + "loss": 6.0576, + "step": 349000 + }, + { + "epoch": 9.66, + "learning_rate": 9.058670215715634e-06, + "loss": 6.0531, + "step": 349500 + }, + { + "epoch": 9.68, + "learning_rate": 9.057283868463373e-06, + "loss": 6.0594, + "step": 350000 + }, + { + "epoch": 9.69, + "learning_rate": 9.055897521211113e-06, + "loss": 6.0612, + "step": 350500 + }, + { + "epoch": 9.71, + "learning_rate": 9.054511173958854e-06, + "loss": 6.0603, + "step": 351000 + }, + { + "epoch": 9.72, + "learning_rate": 9.053124826706595e-06, + "loss": 6.06, + "step": 351500 + }, + { + "epoch": 9.73, + "learning_rate": 9.051738479454334e-06, + "loss": 6.061, + "step": 352000 + }, + { + "epoch": 9.75, + "learning_rate": 9.050352132202074e-06, + "loss": 6.0582, + "step": 352500 + }, + { + "epoch": 9.76, + "learning_rate": 9.048965784949815e-06, + "loss": 6.0578, + "step": 353000 + }, + { + "epoch": 9.77, + "learning_rate": 9.047579437697556e-06, + "loss": 6.0565, + "step": 353500 + }, + { + "epoch": 9.79, + "learning_rate": 9.046193090445296e-06, + "loss": 6.0547, + "step": 354000 + }, + { + "epoch": 9.8, + "learning_rate": 9.044806743193035e-06, + "loss": 6.0581, + "step": 354500 + }, + { + "epoch": 9.82, + "learning_rate": 9.043420395940776e-06, + "loss": 6.0586, + "step": 355000 + }, + { + "epoch": 9.83, + "learning_rate": 9.042034048688516e-06, + "loss": 6.0572, + "step": 355500 + }, + { + "epoch": 9.84, + "learning_rate": 9.040647701436257e-06, + "loss": 6.0544, + "step": 356000 + }, + { + "epoch": 9.86, + "learning_rate": 9.039261354183996e-06, + "loss": 6.061, + "step": 356500 + }, + { + "epoch": 9.87, + "learning_rate": 9.037875006931737e-06, + "loss": 6.058, + "step": 357000 + }, + { + "epoch": 9.88, + "learning_rate": 9.036488659679477e-06, + "loss": 6.0573, + "step": 357500 + }, + { + "epoch": 9.9, + "learning_rate": 9.035102312427218e-06, + "loss": 6.0591, + "step": 358000 + }, + { + "epoch": 9.91, + "learning_rate": 9.033715965174957e-06, + "loss": 6.0574, + "step": 358500 + }, + { + "epoch": 9.93, + "learning_rate": 9.032329617922698e-06, + "loss": 6.0556, + "step": 359000 + }, + { + "epoch": 9.94, + "learning_rate": 9.030943270670438e-06, + "loss": 6.0612, + "step": 359500 + }, + { + "epoch": 9.95, + "learning_rate": 9.029556923418179e-06, + "loss": 6.0563, + "step": 360000 + }, + { + "epoch": 9.97, + "learning_rate": 9.02817057616592e-06, + "loss": 6.059, + "step": 360500 + }, + { + "epoch": 9.98, + "learning_rate": 9.026784228913659e-06, + "loss": 6.0538, + "step": 361000 + }, + { + "epoch": 10.0, + "learning_rate": 9.0253978816614e-06, + "loss": 6.0537, + "step": 361500 + }, + { + "epoch": 10.01, + "learning_rate": 9.02401153440914e-06, + "loss": 6.0551, + "step": 362000 + }, + { + "epoch": 10.02, + "learning_rate": 9.022625187156879e-06, + "loss": 6.0582, + "step": 362500 + }, + { + "epoch": 10.04, + "learning_rate": 9.02123883990462e-06, + "loss": 6.0527, + "step": 363000 + }, + { + "epoch": 10.05, + "learning_rate": 9.01985249265236e-06, + "loss": 6.0551, + "step": 363500 + }, + { + "epoch": 10.06, + "learning_rate": 9.0184661454001e-06, + "loss": 6.0528, + "step": 364000 + }, + { + "epoch": 10.08, + "learning_rate": 9.017079798147841e-06, + "loss": 6.0538, + "step": 364500 + }, + { + "epoch": 10.09, + "learning_rate": 9.01569345089558e-06, + "loss": 6.0527, + "step": 365000 + }, + { + "epoch": 10.11, + "learning_rate": 9.014307103643321e-06, + "loss": 6.0543, + "step": 365500 + }, + { + "epoch": 10.12, + "learning_rate": 9.012920756391062e-06, + "loss": 6.0532, + "step": 366000 + }, + { + "epoch": 10.13, + "learning_rate": 9.0115344091388e-06, + "loss": 6.0526, + "step": 366500 + }, + { + "epoch": 10.15, + "learning_rate": 9.010148061886543e-06, + "loss": 6.0578, + "step": 367000 + }, + { + "epoch": 10.16, + "learning_rate": 9.008761714634282e-06, + "loss": 6.0532, + "step": 367500 + }, + { + "epoch": 10.18, + "learning_rate": 9.007375367382022e-06, + "loss": 6.0524, + "step": 368000 + }, + { + "epoch": 10.19, + "learning_rate": 9.005989020129763e-06, + "loss": 6.0546, + "step": 368500 + }, + { + "epoch": 10.2, + "learning_rate": 9.004602672877502e-06, + "loss": 6.0534, + "step": 369000 + }, + { + "epoch": 10.22, + "learning_rate": 9.003216325625244e-06, + "loss": 6.0546, + "step": 369500 + }, + { + "epoch": 10.23, + "learning_rate": 9.001829978372983e-06, + "loss": 6.0556, + "step": 370000 + }, + { + "epoch": 10.24, + "learning_rate": 9.000443631120724e-06, + "loss": 6.0522, + "step": 370500 + }, + { + "epoch": 10.26, + "learning_rate": 8.999057283868465e-06, + "loss": 6.0553, + "step": 371000 + }, + { + "epoch": 10.27, + "learning_rate": 8.997670936616204e-06, + "loss": 6.0552, + "step": 371500 + }, + { + "epoch": 10.29, + "learning_rate": 8.996284589363944e-06, + "loss": 6.0567, + "step": 372000 + }, + { + "epoch": 10.3, + "learning_rate": 8.994898242111685e-06, + "loss": 6.054, + "step": 372500 + }, + { + "epoch": 10.31, + "learning_rate": 8.993511894859424e-06, + "loss": 6.0512, + "step": 373000 + }, + { + "epoch": 10.33, + "learning_rate": 8.992125547607166e-06, + "loss": 6.0519, + "step": 373500 + }, + { + "epoch": 10.34, + "learning_rate": 8.990739200354905e-06, + "loss": 6.0544, + "step": 374000 + }, + { + "epoch": 10.36, + "learning_rate": 8.989352853102646e-06, + "loss": 6.0505, + "step": 374500 + }, + { + "epoch": 10.37, + "learning_rate": 8.987966505850386e-06, + "loss": 6.052, + "step": 375000 + }, + { + "epoch": 10.38, + "learning_rate": 8.986580158598125e-06, + "loss": 6.0513, + "step": 375500 + }, + { + "epoch": 10.4, + "learning_rate": 8.985193811345868e-06, + "loss": 6.0509, + "step": 376000 + }, + { + "epoch": 10.41, + "learning_rate": 8.983807464093607e-06, + "loss": 6.0537, + "step": 376500 + }, + { + "epoch": 10.42, + "learning_rate": 8.982421116841347e-06, + "loss": 6.0544, + "step": 377000 + }, + { + "epoch": 10.44, + "learning_rate": 8.981034769589088e-06, + "loss": 6.0494, + "step": 377500 + }, + { + "epoch": 10.45, + "learning_rate": 8.979648422336827e-06, + "loss": 6.055, + "step": 378000 + }, + { + "epoch": 10.47, + "learning_rate": 8.978262075084568e-06, + "loss": 6.0528, + "step": 378500 + }, + { + "epoch": 10.48, + "learning_rate": 8.976875727832308e-06, + "loss": 6.052, + "step": 379000 + }, + { + "epoch": 10.49, + "learning_rate": 8.975489380580047e-06, + "loss": 6.0506, + "step": 379500 + }, + { + "epoch": 10.51, + "learning_rate": 8.97410303332779e-06, + "loss": 6.0501, + "step": 380000 + }, + { + "epoch": 10.52, + "learning_rate": 8.972716686075528e-06, + "loss": 6.0532, + "step": 380500 + }, + { + "epoch": 10.53, + "learning_rate": 8.971330338823269e-06, + "loss": 6.0537, + "step": 381000 + }, + { + "epoch": 10.55, + "learning_rate": 8.96994399157101e-06, + "loss": 6.0498, + "step": 381500 + }, + { + "epoch": 10.56, + "learning_rate": 8.968557644318749e-06, + "loss": 6.0497, + "step": 382000 + }, + { + "epoch": 10.58, + "learning_rate": 8.967171297066491e-06, + "loss": 6.0528, + "step": 382500 + }, + { + "epoch": 10.59, + "learning_rate": 8.96578494981423e-06, + "loss": 6.0474, + "step": 383000 + }, + { + "epoch": 10.6, + "learning_rate": 8.96439860256197e-06, + "loss": 6.0528, + "step": 383500 + }, + { + "epoch": 10.62, + "learning_rate": 8.963012255309711e-06, + "loss": 6.0564, + "step": 384000 + }, + { + "epoch": 10.63, + "learning_rate": 8.96162590805745e-06, + "loss": 6.0487, + "step": 384500 + }, + { + "epoch": 10.65, + "learning_rate": 8.960239560805191e-06, + "loss": 6.054, + "step": 385000 + }, + { + "epoch": 10.66, + "learning_rate": 8.958853213552932e-06, + "loss": 6.0529, + "step": 385500 + }, + { + "epoch": 10.67, + "learning_rate": 8.957466866300672e-06, + "loss": 6.051, + "step": 386000 + }, + { + "epoch": 10.69, + "learning_rate": 8.956080519048413e-06, + "loss": 6.0515, + "step": 386500 + }, + { + "epoch": 10.7, + "learning_rate": 8.954694171796152e-06, + "loss": 6.0496, + "step": 387000 + }, + { + "epoch": 10.71, + "learning_rate": 8.953307824543892e-06, + "loss": 6.0513, + "step": 387500 + }, + { + "epoch": 10.73, + "learning_rate": 8.951921477291633e-06, + "loss": 6.0495, + "step": 388000 + }, + { + "epoch": 10.74, + "learning_rate": 8.950535130039372e-06, + "loss": 6.0488, + "step": 388500 + }, + { + "epoch": 10.76, + "learning_rate": 8.949148782787114e-06, + "loss": 6.0464, + "step": 389000 + }, + { + "epoch": 10.77, + "learning_rate": 8.947762435534853e-06, + "loss": 6.048, + "step": 389500 + }, + { + "epoch": 10.78, + "learning_rate": 8.946376088282594e-06, + "loss": 6.0476, + "step": 390000 + }, + { + "epoch": 10.8, + "learning_rate": 8.944989741030335e-06, + "loss": 6.048, + "step": 390500 + }, + { + "epoch": 10.81, + "learning_rate": 8.943603393778074e-06, + "loss": 6.05, + "step": 391000 + }, + { + "epoch": 10.83, + "learning_rate": 8.942217046525814e-06, + "loss": 6.0464, + "step": 391500 + }, + { + "epoch": 10.84, + "learning_rate": 8.940830699273555e-06, + "loss": 6.0486, + "step": 392000 + }, + { + "epoch": 10.85, + "learning_rate": 8.939444352021296e-06, + "loss": 6.0453, + "step": 392500 + }, + { + "epoch": 10.87, + "learning_rate": 8.938058004769036e-06, + "loss": 6.0499, + "step": 393000 + }, + { + "epoch": 10.88, + "learning_rate": 8.936671657516775e-06, + "loss": 6.0468, + "step": 393500 + }, + { + "epoch": 10.89, + "learning_rate": 8.935285310264516e-06, + "loss": 6.049, + "step": 394000 + }, + { + "epoch": 10.91, + "learning_rate": 8.933898963012256e-06, + "loss": 6.0479, + "step": 394500 + }, + { + "epoch": 10.92, + "learning_rate": 8.932512615759995e-06, + "loss": 6.0482, + "step": 395000 + }, + { + "epoch": 10.94, + "learning_rate": 8.931126268507736e-06, + "loss": 6.0482, + "step": 395500 + }, + { + "epoch": 10.95, + "learning_rate": 8.929739921255477e-06, + "loss": 6.0494, + "step": 396000 + }, + { + "epoch": 10.96, + "learning_rate": 8.928353574003217e-06, + "loss": 6.0475, + "step": 396500 + }, + { + "epoch": 10.98, + "learning_rate": 8.926967226750958e-06, + "loss": 6.0473, + "step": 397000 + }, + { + "epoch": 10.99, + "learning_rate": 8.925580879498697e-06, + "loss": 6.048, + "step": 397500 + }, + { + "epoch": 11.0, + "learning_rate": 8.924194532246438e-06, + "loss": 6.0486, + "step": 398000 + }, + { + "epoch": 11.02, + "learning_rate": 8.922808184994178e-06, + "loss": 6.0467, + "step": 398500 + }, + { + "epoch": 11.03, + "learning_rate": 8.921421837741919e-06, + "loss": 6.0501, + "step": 399000 + }, + { + "epoch": 11.05, + "learning_rate": 8.92003549048966e-06, + "loss": 6.0474, + "step": 399500 + }, + { + "epoch": 11.06, + "learning_rate": 8.918649143237398e-06, + "loss": 6.0438, + "step": 400000 + }, + { + "epoch": 11.07, + "learning_rate": 8.917262795985139e-06, + "loss": 6.0472, + "step": 400500 + }, + { + "epoch": 11.09, + "learning_rate": 8.91587644873288e-06, + "loss": 6.0478, + "step": 401000 + }, + { + "epoch": 11.1, + "learning_rate": 8.914490101480619e-06, + "loss": 6.0477, + "step": 401500 + }, + { + "epoch": 11.12, + "learning_rate": 8.91310375422836e-06, + "loss": 6.0473, + "step": 402000 + }, + { + "epoch": 11.13, + "learning_rate": 8.9117174069761e-06, + "loss": 6.0444, + "step": 402500 + }, + { + "epoch": 11.14, + "learning_rate": 8.91033105972384e-06, + "loss": 6.0459, + "step": 403000 + }, + { + "epoch": 11.16, + "learning_rate": 8.908944712471581e-06, + "loss": 6.0442, + "step": 403500 + }, + { + "epoch": 11.17, + "learning_rate": 8.90755836521932e-06, + "loss": 6.0492, + "step": 404000 + }, + { + "epoch": 11.18, + "learning_rate": 8.906172017967061e-06, + "loss": 6.0465, + "step": 404500 + }, + { + "epoch": 11.2, + "learning_rate": 8.904785670714802e-06, + "loss": 6.0487, + "step": 405000 + }, + { + "epoch": 11.21, + "learning_rate": 8.903399323462542e-06, + "loss": 6.0453, + "step": 405500 + }, + { + "epoch": 11.23, + "learning_rate": 8.902012976210281e-06, + "loss": 6.043, + "step": 406000 + }, + { + "epoch": 11.24, + "learning_rate": 8.900626628958022e-06, + "loss": 6.0432, + "step": 406500 + }, + { + "epoch": 11.25, + "learning_rate": 8.899240281705762e-06, + "loss": 6.0426, + "step": 407000 + }, + { + "epoch": 11.27, + "learning_rate": 8.897853934453503e-06, + "loss": 6.0503, + "step": 407500 + }, + { + "epoch": 11.28, + "learning_rate": 8.896467587201244e-06, + "loss": 6.044, + "step": 408000 + }, + { + "epoch": 11.3, + "learning_rate": 8.895081239948983e-06, + "loss": 6.0497, + "step": 408500 + }, + { + "epoch": 11.31, + "learning_rate": 8.893694892696723e-06, + "loss": 6.0422, + "step": 409000 + }, + { + "epoch": 11.32, + "learning_rate": 8.892308545444464e-06, + "loss": 6.0455, + "step": 409500 + }, + { + "epoch": 11.34, + "learning_rate": 8.890922198192203e-06, + "loss": 6.0473, + "step": 410000 + }, + { + "epoch": 11.35, + "learning_rate": 8.889535850939944e-06, + "loss": 6.045, + "step": 410500 + }, + { + "epoch": 11.36, + "learning_rate": 8.888149503687684e-06, + "loss": 6.0494, + "step": 411000 + }, + { + "epoch": 11.38, + "learning_rate": 8.886763156435425e-06, + "loss": 6.0426, + "step": 411500 + }, + { + "epoch": 11.39, + "learning_rate": 8.885376809183166e-06, + "loss": 6.0423, + "step": 412000 + }, + { + "epoch": 11.41, + "learning_rate": 8.883990461930904e-06, + "loss": 6.0446, + "step": 412500 + }, + { + "epoch": 11.42, + "learning_rate": 8.882604114678645e-06, + "loss": 6.0445, + "step": 413000 + }, + { + "epoch": 11.43, + "learning_rate": 8.881217767426386e-06, + "loss": 6.0417, + "step": 413500 + }, + { + "epoch": 11.45, + "learning_rate": 8.879831420174126e-06, + "loss": 6.0419, + "step": 414000 + }, + { + "epoch": 11.46, + "learning_rate": 8.878445072921867e-06, + "loss": 6.0432, + "step": 414500 + }, + { + "epoch": 11.47, + "learning_rate": 8.877058725669606e-06, + "loss": 6.0437, + "step": 415000 + }, + { + "epoch": 11.49, + "learning_rate": 8.875672378417347e-06, + "loss": 6.0442, + "step": 415500 + }, + { + "epoch": 11.5, + "learning_rate": 8.874286031165087e-06, + "loss": 6.0428, + "step": 416000 + }, + { + "epoch": 11.52, + "learning_rate": 8.872899683912826e-06, + "loss": 6.0421, + "step": 416500 + }, + { + "epoch": 11.53, + "learning_rate": 8.871513336660567e-06, + "loss": 6.0449, + "step": 417000 + }, + { + "epoch": 11.54, + "learning_rate": 8.870126989408308e-06, + "loss": 6.0434, + "step": 417500 + }, + { + "epoch": 11.56, + "learning_rate": 8.868740642156048e-06, + "loss": 6.0429, + "step": 418000 + }, + { + "epoch": 11.57, + "learning_rate": 8.867354294903789e-06, + "loss": 6.0467, + "step": 418500 + }, + { + "epoch": 11.59, + "learning_rate": 8.865967947651528e-06, + "loss": 6.0418, + "step": 419000 + }, + { + "epoch": 11.6, + "learning_rate": 8.864581600399268e-06, + "loss": 6.0402, + "step": 419500 + }, + { + "epoch": 11.61, + "learning_rate": 8.863195253147009e-06, + "loss": 6.0449, + "step": 420000 + }, + { + "epoch": 11.63, + "learning_rate": 8.861808905894748e-06, + "loss": 6.044, + "step": 420500 + }, + { + "epoch": 11.64, + "learning_rate": 8.86042255864249e-06, + "loss": 6.0391, + "step": 421000 + }, + { + "epoch": 11.65, + "learning_rate": 8.85903621139023e-06, + "loss": 6.0415, + "step": 421500 + }, + { + "epoch": 11.67, + "learning_rate": 8.85764986413797e-06, + "loss": 6.0446, + "step": 422000 + }, + { + "epoch": 11.68, + "learning_rate": 8.85626351688571e-06, + "loss": 6.0415, + "step": 422500 + }, + { + "epoch": 11.7, + "learning_rate": 8.85487716963345e-06, + "loss": 6.0436, + "step": 423000 + }, + { + "epoch": 11.71, + "learning_rate": 8.853490822381192e-06, + "loss": 6.0429, + "step": 423500 + }, + { + "epoch": 11.72, + "learning_rate": 8.852104475128931e-06, + "loss": 6.0436, + "step": 424000 + }, + { + "epoch": 11.74, + "learning_rate": 8.850718127876672e-06, + "loss": 6.0445, + "step": 424500 + }, + { + "epoch": 11.75, + "learning_rate": 8.849331780624412e-06, + "loss": 6.0408, + "step": 425000 + }, + { + "epoch": 11.77, + "learning_rate": 8.847945433372151e-06, + "loss": 6.0451, + "step": 425500 + }, + { + "epoch": 11.78, + "learning_rate": 8.846559086119892e-06, + "loss": 6.0442, + "step": 426000 + }, + { + "epoch": 11.79, + "learning_rate": 8.845172738867632e-06, + "loss": 6.0403, + "step": 426500 + }, + { + "epoch": 11.81, + "learning_rate": 8.843786391615371e-06, + "loss": 6.0429, + "step": 427000 + }, + { + "epoch": 11.82, + "learning_rate": 8.842400044363114e-06, + "loss": 6.0404, + "step": 427500 + }, + { + "epoch": 11.83, + "learning_rate": 8.841013697110853e-06, + "loss": 6.0422, + "step": 428000 + }, + { + "epoch": 11.85, + "learning_rate": 8.839627349858593e-06, + "loss": 6.0392, + "step": 428500 + }, + { + "epoch": 11.86, + "learning_rate": 8.838241002606334e-06, + "loss": 6.0425, + "step": 429000 + }, + { + "epoch": 11.88, + "learning_rate": 8.836854655354073e-06, + "loss": 6.0433, + "step": 429500 + }, + { + "epoch": 11.89, + "learning_rate": 8.835468308101815e-06, + "loss": 6.0429, + "step": 430000 + }, + { + "epoch": 11.9, + "learning_rate": 8.834081960849554e-06, + "loss": 6.0378, + "step": 430500 + }, + { + "epoch": 11.92, + "learning_rate": 8.832695613597295e-06, + "loss": 6.0374, + "step": 431000 + }, + { + "epoch": 11.93, + "learning_rate": 8.831309266345035e-06, + "loss": 6.0374, + "step": 431500 + }, + { + "epoch": 11.94, + "learning_rate": 8.829922919092774e-06, + "loss": 6.0418, + "step": 432000 + }, + { + "epoch": 11.96, + "learning_rate": 8.828536571840515e-06, + "loss": 6.0385, + "step": 432500 + }, + { + "epoch": 11.97, + "learning_rate": 8.827150224588256e-06, + "loss": 6.0385, + "step": 433000 + }, + { + "epoch": 11.99, + "learning_rate": 8.825763877335995e-06, + "loss": 6.0418, + "step": 433500 + }, + { + "epoch": 12.0, + "learning_rate": 8.824377530083737e-06, + "loss": 6.0395, + "step": 434000 + }, + { + "epoch": 12.01, + "learning_rate": 8.822991182831476e-06, + "loss": 6.0406, + "step": 434500 + }, + { + "epoch": 12.03, + "learning_rate": 8.821604835579217e-06, + "loss": 6.0399, + "step": 435000 + }, + { + "epoch": 12.04, + "learning_rate": 8.820218488326957e-06, + "loss": 6.0367, + "step": 435500 + }, + { + "epoch": 12.06, + "learning_rate": 8.818832141074696e-06, + "loss": 6.037, + "step": 436000 + }, + { + "epoch": 12.07, + "learning_rate": 8.817445793822439e-06, + "loss": 6.0423, + "step": 436500 + }, + { + "epoch": 12.08, + "learning_rate": 8.816059446570178e-06, + "loss": 6.0416, + "step": 437000 + }, + { + "epoch": 12.1, + "learning_rate": 8.814673099317918e-06, + "loss": 6.0387, + "step": 437500 + }, + { + "epoch": 12.11, + "learning_rate": 8.813286752065659e-06, + "loss": 6.0426, + "step": 438000 + }, + { + "epoch": 12.12, + "learning_rate": 8.811900404813398e-06, + "loss": 6.0418, + "step": 438500 + }, + { + "epoch": 12.14, + "learning_rate": 8.810514057561138e-06, + "loss": 6.042, + "step": 439000 + }, + { + "epoch": 12.15, + "learning_rate": 8.809127710308879e-06, + "loss": 6.039, + "step": 439500 + }, + { + "epoch": 12.17, + "learning_rate": 8.80774136305662e-06, + "loss": 6.0397, + "step": 440000 + }, + { + "epoch": 12.18, + "learning_rate": 8.80635501580436e-06, + "loss": 6.0397, + "step": 440500 + }, + { + "epoch": 12.19, + "learning_rate": 8.8049686685521e-06, + "loss": 6.038, + "step": 441000 + }, + { + "epoch": 12.21, + "learning_rate": 8.80358232129984e-06, + "loss": 6.0376, + "step": 441500 + }, + { + "epoch": 12.22, + "learning_rate": 8.80219597404758e-06, + "loss": 6.0332, + "step": 442000 + }, + { + "epoch": 12.24, + "learning_rate": 8.80080962679532e-06, + "loss": 6.0365, + "step": 442500 + }, + { + "epoch": 12.25, + "learning_rate": 8.799423279543062e-06, + "loss": 6.0373, + "step": 443000 + }, + { + "epoch": 12.26, + "learning_rate": 8.7980369322908e-06, + "loss": 6.0412, + "step": 443500 + }, + { + "epoch": 12.28, + "learning_rate": 8.796650585038541e-06, + "loss": 6.0364, + "step": 444000 + }, + { + "epoch": 12.29, + "learning_rate": 8.795264237786282e-06, + "loss": 6.0353, + "step": 444500 + }, + { + "epoch": 12.3, + "learning_rate": 8.793877890534021e-06, + "loss": 6.0388, + "step": 445000 + }, + { + "epoch": 12.32, + "learning_rate": 8.792491543281762e-06, + "loss": 6.0396, + "step": 445500 + }, + { + "epoch": 12.33, + "learning_rate": 8.791105196029502e-06, + "loss": 6.0406, + "step": 446000 + }, + { + "epoch": 12.35, + "learning_rate": 8.789718848777243e-06, + "loss": 6.0382, + "step": 446500 + }, + { + "epoch": 12.36, + "learning_rate": 8.788332501524984e-06, + "loss": 6.039, + "step": 447000 + }, + { + "epoch": 12.37, + "learning_rate": 8.786946154272723e-06, + "loss": 6.0368, + "step": 447500 + }, + { + "epoch": 12.39, + "learning_rate": 8.785559807020463e-06, + "loss": 6.0387, + "step": 448000 + }, + { + "epoch": 12.4, + "learning_rate": 8.784173459768204e-06, + "loss": 6.0348, + "step": 448500 + }, + { + "epoch": 12.41, + "learning_rate": 8.782787112515943e-06, + "loss": 6.0353, + "step": 449000 + }, + { + "epoch": 12.43, + "learning_rate": 8.781400765263684e-06, + "loss": 6.0355, + "step": 449500 + }, + { + "epoch": 12.44, + "learning_rate": 8.780014418011424e-06, + "loss": 6.0349, + "step": 450000 + }, + { + "epoch": 12.46, + "learning_rate": 8.778628070759165e-06, + "loss": 6.0359, + "step": 450500 + }, + { + "epoch": 12.47, + "learning_rate": 8.777241723506905e-06, + "loss": 6.0355, + "step": 451000 + }, + { + "epoch": 12.48, + "learning_rate": 8.775855376254644e-06, + "loss": 6.0393, + "step": 451500 + }, + { + "epoch": 12.5, + "learning_rate": 8.774469029002385e-06, + "loss": 6.0354, + "step": 452000 + }, + { + "epoch": 12.51, + "learning_rate": 8.773082681750126e-06, + "loss": 6.0337, + "step": 452500 + }, + { + "epoch": 12.53, + "learning_rate": 8.771696334497866e-06, + "loss": 6.0389, + "step": 453000 + }, + { + "epoch": 12.54, + "learning_rate": 8.770309987245605e-06, + "loss": 6.0354, + "step": 453500 + }, + { + "epoch": 12.55, + "learning_rate": 8.768923639993346e-06, + "loss": 6.0368, + "step": 454000 + }, + { + "epoch": 12.57, + "learning_rate": 8.767537292741087e-06, + "loss": 6.0371, + "step": 454500 + }, + { + "epoch": 12.58, + "learning_rate": 8.766150945488827e-06, + "loss": 6.0374, + "step": 455000 + }, + { + "epoch": 12.59, + "learning_rate": 8.764764598236566e-06, + "loss": 6.0381, + "step": 455500 + }, + { + "epoch": 12.61, + "learning_rate": 8.763378250984307e-06, + "loss": 6.0355, + "step": 456000 + }, + { + "epoch": 12.62, + "learning_rate": 8.761991903732047e-06, + "loss": 6.0348, + "step": 456500 + }, + { + "epoch": 12.64, + "learning_rate": 8.760605556479788e-06, + "loss": 6.0361, + "step": 457000 + }, + { + "epoch": 12.65, + "learning_rate": 8.759219209227529e-06, + "loss": 6.0346, + "step": 457500 + }, + { + "epoch": 12.66, + "learning_rate": 8.757832861975268e-06, + "loss": 6.0368, + "step": 458000 + }, + { + "epoch": 12.68, + "learning_rate": 8.756446514723008e-06, + "loss": 6.0332, + "step": 458500 + }, + { + "epoch": 12.69, + "learning_rate": 8.755060167470749e-06, + "loss": 6.0363, + "step": 459000 + }, + { + "epoch": 12.71, + "learning_rate": 8.75367382021849e-06, + "loss": 6.0372, + "step": 459500 + }, + { + "epoch": 12.72, + "learning_rate": 8.752287472966229e-06, + "loss": 6.0344, + "step": 460000 + }, + { + "epoch": 12.73, + "learning_rate": 8.75090112571397e-06, + "loss": 6.037, + "step": 460500 + }, + { + "epoch": 12.75, + "learning_rate": 8.74951477846171e-06, + "loss": 6.0365, + "step": 461000 + }, + { + "epoch": 12.76, + "learning_rate": 8.74812843120945e-06, + "loss": 6.0357, + "step": 461500 + }, + { + "epoch": 12.77, + "learning_rate": 8.746742083957191e-06, + "loss": 6.0341, + "step": 462000 + }, + { + "epoch": 12.79, + "learning_rate": 8.74535573670493e-06, + "loss": 6.0372, + "step": 462500 + }, + { + "epoch": 12.8, + "learning_rate": 8.74396938945267e-06, + "loss": 6.037, + "step": 463000 + }, + { + "epoch": 12.82, + "learning_rate": 8.742583042200411e-06, + "loss": 6.0362, + "step": 463500 + }, + { + "epoch": 12.83, + "learning_rate": 8.74119669494815e-06, + "loss": 6.0354, + "step": 464000 + }, + { + "epoch": 12.84, + "learning_rate": 8.739810347695891e-06, + "loss": 6.0337, + "step": 464500 + }, + { + "epoch": 12.86, + "learning_rate": 8.738424000443632e-06, + "loss": 6.0322, + "step": 465000 + }, + { + "epoch": 12.87, + "learning_rate": 8.737037653191372e-06, + "loss": 6.0343, + "step": 465500 + }, + { + "epoch": 12.89, + "learning_rate": 8.735651305939113e-06, + "loss": 6.0346, + "step": 466000 + }, + { + "epoch": 12.9, + "learning_rate": 8.734264958686852e-06, + "loss": 6.0336, + "step": 466500 + }, + { + "epoch": 12.91, + "learning_rate": 8.732878611434593e-06, + "loss": 6.0343, + "step": 467000 + }, + { + "epoch": 12.93, + "learning_rate": 8.731492264182333e-06, + "loss": 6.0334, + "step": 467500 + }, + { + "epoch": 12.94, + "learning_rate": 8.730105916930072e-06, + "loss": 6.0339, + "step": 468000 + }, + { + "epoch": 12.95, + "learning_rate": 8.728719569677815e-06, + "loss": 6.0319, + "step": 468500 + }, + { + "epoch": 12.97, + "learning_rate": 8.727333222425553e-06, + "loss": 6.0341, + "step": 469000 + }, + { + "epoch": 12.98, + "learning_rate": 8.725946875173294e-06, + "loss": 6.0341, + "step": 469500 + }, + { + "epoch": 13.0, + "learning_rate": 8.724560527921035e-06, + "loss": 6.0323, + "step": 470000 + }, + { + "epoch": 13.01, + "learning_rate": 8.723174180668774e-06, + "loss": 6.0324, + "step": 470500 + }, + { + "epoch": 13.02, + "learning_rate": 8.721787833416514e-06, + "loss": 6.0323, + "step": 471000 + }, + { + "epoch": 13.04, + "learning_rate": 8.720401486164255e-06, + "loss": 6.0364, + "step": 471500 + }, + { + "epoch": 13.05, + "learning_rate": 8.719015138911996e-06, + "loss": 6.0301, + "step": 472000 + }, + { + "epoch": 13.06, + "learning_rate": 8.717628791659736e-06, + "loss": 6.0334, + "step": 472500 + }, + { + "epoch": 13.08, + "learning_rate": 8.716242444407475e-06, + "loss": 6.0358, + "step": 473000 + }, + { + "epoch": 13.09, + "learning_rate": 8.714856097155216e-06, + "loss": 6.0293, + "step": 473500 + }, + { + "epoch": 13.11, + "learning_rate": 8.713469749902957e-06, + "loss": 6.0352, + "step": 474000 + }, + { + "epoch": 13.12, + "learning_rate": 8.712083402650695e-06, + "loss": 6.0352, + "step": 474500 + }, + { + "epoch": 13.13, + "learning_rate": 8.710697055398438e-06, + "loss": 6.0328, + "step": 475000 + }, + { + "epoch": 13.15, + "learning_rate": 8.709310708146177e-06, + "loss": 6.0353, + "step": 475500 + }, + { + "epoch": 13.16, + "learning_rate": 8.707924360893917e-06, + "loss": 6.0314, + "step": 476000 + }, + { + "epoch": 13.18, + "learning_rate": 8.706538013641658e-06, + "loss": 6.0311, + "step": 476500 + }, + { + "epoch": 13.19, + "learning_rate": 8.705151666389397e-06, + "loss": 6.0348, + "step": 477000 + }, + { + "epoch": 13.2, + "learning_rate": 8.703765319137138e-06, + "loss": 6.0327, + "step": 477500 + }, + { + "epoch": 13.22, + "learning_rate": 8.702378971884878e-06, + "loss": 6.0325, + "step": 478000 + }, + { + "epoch": 13.23, + "learning_rate": 8.700992624632619e-06, + "loss": 6.0342, + "step": 478500 + }, + { + "epoch": 13.24, + "learning_rate": 8.69960627738036e-06, + "loss": 6.0313, + "step": 479000 + }, + { + "epoch": 13.26, + "learning_rate": 8.698219930128099e-06, + "loss": 6.0302, + "step": 479500 + }, + { + "epoch": 13.27, + "learning_rate": 8.69683358287584e-06, + "loss": 6.0313, + "step": 480000 + }, + { + "epoch": 13.29, + "learning_rate": 8.69544723562358e-06, + "loss": 6.0299, + "step": 480500 + }, + { + "epoch": 13.3, + "learning_rate": 8.694060888371319e-06, + "loss": 6.0321, + "step": 481000 + }, + { + "epoch": 13.31, + "learning_rate": 8.692674541119061e-06, + "loss": 6.0286, + "step": 481500 + }, + { + "epoch": 13.33, + "learning_rate": 8.6912881938668e-06, + "loss": 6.0296, + "step": 482000 + }, + { + "epoch": 13.34, + "learning_rate": 8.68990184661454e-06, + "loss": 6.0352, + "step": 482500 + }, + { + "epoch": 13.36, + "learning_rate": 8.688515499362281e-06, + "loss": 6.0306, + "step": 483000 + }, + { + "epoch": 13.37, + "learning_rate": 8.68712915211002e-06, + "loss": 6.0323, + "step": 483500 + }, + { + "epoch": 13.38, + "learning_rate": 8.685742804857763e-06, + "loss": 6.0303, + "step": 484000 + }, + { + "epoch": 13.4, + "learning_rate": 8.684356457605502e-06, + "loss": 6.0338, + "step": 484500 + }, + { + "epoch": 13.41, + "learning_rate": 8.682970110353242e-06, + "loss": 6.0341, + "step": 485000 + }, + { + "epoch": 13.42, + "learning_rate": 8.681583763100983e-06, + "loss": 6.0328, + "step": 485500 + }, + { + "epoch": 13.44, + "learning_rate": 8.680197415848722e-06, + "loss": 6.0328, + "step": 486000 + }, + { + "epoch": 13.45, + "learning_rate": 8.678811068596463e-06, + "loss": 6.0311, + "step": 486500 + }, + { + "epoch": 13.47, + "learning_rate": 8.677424721344203e-06, + "loss": 6.035, + "step": 487000 + }, + { + "epoch": 13.48, + "learning_rate": 8.676038374091942e-06, + "loss": 6.0337, + "step": 487500 + }, + { + "epoch": 13.49, + "learning_rate": 8.674652026839684e-06, + "loss": 6.031, + "step": 488000 + }, + { + "epoch": 13.51, + "learning_rate": 8.673265679587423e-06, + "loss": 6.0288, + "step": 488500 + }, + { + "epoch": 13.52, + "learning_rate": 8.671879332335164e-06, + "loss": 6.0333, + "step": 489000 + }, + { + "epoch": 13.53, + "learning_rate": 8.670492985082905e-06, + "loss": 6.0315, + "step": 489500 + }, + { + "epoch": 13.55, + "learning_rate": 8.669106637830644e-06, + "loss": 6.0295, + "step": 490000 + }, + { + "epoch": 13.56, + "learning_rate": 8.667720290578386e-06, + "loss": 6.0286, + "step": 490500 + }, + { + "epoch": 13.58, + "learning_rate": 8.666333943326125e-06, + "loss": 6.0334, + "step": 491000 + }, + { + "epoch": 13.59, + "learning_rate": 8.664947596073866e-06, + "loss": 6.0271, + "step": 491500 + }, + { + "epoch": 13.6, + "learning_rate": 8.663561248821606e-06, + "loss": 6.0305, + "step": 492000 + }, + { + "epoch": 13.62, + "learning_rate": 8.662174901569345e-06, + "loss": 6.0251, + "step": 492500 + }, + { + "epoch": 13.63, + "learning_rate": 8.660788554317086e-06, + "loss": 6.0235, + "step": 493000 + }, + { + "epoch": 13.65, + "learning_rate": 8.659402207064827e-06, + "loss": 6.0309, + "step": 493500 + }, + { + "epoch": 13.66, + "learning_rate": 8.658015859812567e-06, + "loss": 6.0292, + "step": 494000 + }, + { + "epoch": 13.67, + "learning_rate": 8.656629512560308e-06, + "loss": 6.0283, + "step": 494500 + }, + { + "epoch": 13.69, + "learning_rate": 8.655243165308047e-06, + "loss": 6.03, + "step": 495000 + }, + { + "epoch": 13.7, + "learning_rate": 8.653856818055787e-06, + "loss": 6.0331, + "step": 495500 + }, + { + "epoch": 13.71, + "learning_rate": 8.652470470803528e-06, + "loss": 6.029, + "step": 496000 + }, + { + "epoch": 13.73, + "learning_rate": 8.651084123551267e-06, + "loss": 6.0267, + "step": 496500 + }, + { + "epoch": 13.74, + "learning_rate": 8.649697776299008e-06, + "loss": 6.0315, + "step": 497000 + }, + { + "epoch": 13.76, + "learning_rate": 8.648311429046748e-06, + "loss": 6.0285, + "step": 497500 + }, + { + "epoch": 13.77, + "learning_rate": 8.646925081794489e-06, + "loss": 6.0304, + "step": 498000 + }, + { + "epoch": 13.78, + "learning_rate": 8.64553873454223e-06, + "loss": 6.0314, + "step": 498500 + }, + { + "epoch": 13.8, + "learning_rate": 8.644152387289969e-06, + "loss": 6.0293, + "step": 499000 + }, + { + "epoch": 13.81, + "learning_rate": 8.64276604003771e-06, + "loss": 6.0279, + "step": 499500 + }, + { + "epoch": 13.83, + "learning_rate": 8.64137969278545e-06, + "loss": 6.0314, + "step": 500000 + }, + { + "epoch": 13.84, + "learning_rate": 8.63999334553319e-06, + "loss": 6.0332, + "step": 500500 + }, + { + "epoch": 13.85, + "learning_rate": 8.638606998280931e-06, + "loss": 6.028, + "step": 501000 + }, + { + "epoch": 13.87, + "learning_rate": 8.63722065102867e-06, + "loss": 6.028, + "step": 501500 + }, + { + "epoch": 13.88, + "learning_rate": 8.63583430377641e-06, + "loss": 6.0295, + "step": 502000 + }, + { + "epoch": 13.89, + "learning_rate": 8.634447956524151e-06, + "loss": 6.0291, + "step": 502500 + }, + { + "epoch": 13.91, + "learning_rate": 8.63306160927189e-06, + "loss": 6.0304, + "step": 503000 + }, + { + "epoch": 13.92, + "learning_rate": 8.631675262019631e-06, + "loss": 6.0275, + "step": 503500 + }, + { + "epoch": 13.94, + "learning_rate": 8.630288914767372e-06, + "loss": 6.0305, + "step": 504000 + }, + { + "epoch": 13.95, + "learning_rate": 8.628902567515112e-06, + "loss": 6.0277, + "step": 504500 + }, + { + "epoch": 13.96, + "learning_rate": 8.627516220262853e-06, + "loss": 6.0273, + "step": 505000 + }, + { + "epoch": 13.98, + "learning_rate": 8.626129873010592e-06, + "loss": 6.0309, + "step": 505500 + }, + { + "epoch": 13.99, + "learning_rate": 8.624743525758333e-06, + "loss": 6.0277, + "step": 506000 + }, + { + "epoch": 14.0, + "learning_rate": 8.623357178506073e-06, + "loss": 6.0277, + "step": 506500 + }, + { + "epoch": 14.02, + "learning_rate": 8.621970831253814e-06, + "loss": 6.03, + "step": 507000 + }, + { + "epoch": 14.03, + "learning_rate": 8.620584484001553e-06, + "loss": 6.027, + "step": 507500 + }, + { + "epoch": 14.05, + "learning_rate": 8.619198136749293e-06, + "loss": 6.0255, + "step": 508000 + }, + { + "epoch": 14.06, + "learning_rate": 8.617811789497034e-06, + "loss": 6.0278, + "step": 508500 + }, + { + "epoch": 14.07, + "learning_rate": 8.616425442244775e-06, + "loss": 6.0248, + "step": 509000 + }, + { + "epoch": 14.09, + "learning_rate": 8.615039094992514e-06, + "loss": 6.0281, + "step": 509500 + }, + { + "epoch": 14.1, + "learning_rate": 8.613652747740254e-06, + "loss": 6.0281, + "step": 510000 + }, + { + "epoch": 14.12, + "learning_rate": 8.612266400487995e-06, + "loss": 6.0246, + "step": 510500 + }, + { + "epoch": 14.13, + "learning_rate": 8.610880053235736e-06, + "loss": 6.0302, + "step": 511000 + }, + { + "epoch": 14.14, + "learning_rate": 8.609493705983475e-06, + "loss": 6.0314, + "step": 511500 + }, + { + "epoch": 14.16, + "learning_rate": 8.608107358731215e-06, + "loss": 6.0285, + "step": 512000 + }, + { + "epoch": 14.17, + "learning_rate": 8.606721011478956e-06, + "loss": 6.025, + "step": 512500 + }, + { + "epoch": 14.18, + "learning_rate": 8.605334664226696e-06, + "loss": 6.0287, + "step": 513000 + }, + { + "epoch": 14.2, + "learning_rate": 8.603948316974437e-06, + "loss": 6.0264, + "step": 513500 + }, + { + "epoch": 14.21, + "learning_rate": 8.602561969722176e-06, + "loss": 6.0268, + "step": 514000 + }, + { + "epoch": 14.23, + "learning_rate": 8.601175622469917e-06, + "loss": 6.0288, + "step": 514500 + }, + { + "epoch": 14.24, + "learning_rate": 8.599789275217657e-06, + "loss": 6.0283, + "step": 515000 + }, + { + "epoch": 14.25, + "learning_rate": 8.598402927965398e-06, + "loss": 6.0278, + "step": 515500 + }, + { + "epoch": 14.27, + "learning_rate": 8.597016580713139e-06, + "loss": 6.0262, + "step": 516000 + }, + { + "epoch": 14.28, + "learning_rate": 8.595630233460878e-06, + "loss": 6.0258, + "step": 516500 + }, + { + "epoch": 14.29, + "step": 516769, + "total_flos": 1.272767802507264e+17, + "train_loss": 6.192780332760428, + "train_runtime": 197999.3322, + "train_samples_per_second": 2958.979, + "train_steps_per_second": 18.266 + } + ], + "logging_steps": 500, + "max_steps": 3616600, + "num_train_epochs": 100, + "save_steps": 1000000, + "total_flos": 1.272767802507264e+17, + "trial_name": null, + "trial_params": null +}