{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 32850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.997199391171994e-05, "loss": 2.9757, "step": 25 }, { "epoch": 0.02, "learning_rate": 3.994155251141553e-05, "loss": 2.7383, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.9911111111111114e-05, "loss": 2.5865, "step": 75 }, { "epoch": 0.03, "learning_rate": 3.98806697108067e-05, "loss": 2.6042, "step": 100 }, { "epoch": 0.04, "learning_rate": 3.9850228310502285e-05, "loss": 2.7077, "step": 125 }, { "epoch": 0.05, "learning_rate": 3.982100456621005e-05, "loss": 2.8069, "step": 150 }, { "epoch": 0.05, "learning_rate": 3.9790563165905637e-05, "loss": 2.5831, "step": 175 }, { "epoch": 0.06, "learning_rate": 3.976012176560122e-05, "loss": 2.5526, "step": 200 }, { "epoch": 0.07, "learning_rate": 3.972968036529681e-05, "loss": 2.5865, "step": 225 }, { "epoch": 0.08, "learning_rate": 3.9699238964992394e-05, "loss": 2.4963, "step": 250 }, { "epoch": 0.08, "learning_rate": 3.966879756468798e-05, "loss": 2.5408, "step": 275 }, { "epoch": 0.09, "learning_rate": 3.9638356164383565e-05, "loss": 2.7108, "step": 300 }, { "epoch": 0.1, "learning_rate": 3.960791476407915e-05, "loss": 2.6353, "step": 325 }, { "epoch": 0.11, "learning_rate": 3.9577473363774736e-05, "loss": 2.538, "step": 350 }, { "epoch": 0.11, "learning_rate": 3.954703196347032e-05, "loss": 2.6099, "step": 375 }, { "epoch": 0.12, "learning_rate": 3.951659056316591e-05, "loss": 2.569, "step": 400 }, { "epoch": 0.13, "learning_rate": 3.948614916286149e-05, "loss": 2.4251, "step": 425 }, { "epoch": 0.14, "learning_rate": 3.945570776255708e-05, "loss": 2.4954, "step": 450 }, { "epoch": 0.14, "learning_rate": 3.9425266362252665e-05, "loss": 2.3763, "step": 475 }, { "epoch": 0.15, "learning_rate": 3.939482496194826e-05, "loss": 2.5156, "step": 500 }, { "epoch": 0.16, "learning_rate": 3.936438356164384e-05, "loss": 2.4705, "step": 525 }, { "epoch": 0.17, "learning_rate": 3.933394216133942e-05, "loss": 2.5698, "step": 550 }, { "epoch": 0.18, "learning_rate": 3.930350076103501e-05, "loss": 2.497, "step": 575 }, { "epoch": 0.18, "learning_rate": 3.92730593607306e-05, "loss": 2.3479, "step": 600 }, { "epoch": 0.19, "learning_rate": 3.9242617960426186e-05, "loss": 2.4028, "step": 625 }, { "epoch": 0.2, "learning_rate": 3.921217656012177e-05, "loss": 2.4164, "step": 650 }, { "epoch": 0.21, "learning_rate": 3.918173515981735e-05, "loss": 2.5796, "step": 675 }, { "epoch": 0.21, "learning_rate": 3.915129375951294e-05, "loss": 2.5134, "step": 700 }, { "epoch": 0.22, "learning_rate": 3.912085235920853e-05, "loss": 2.4413, "step": 725 }, { "epoch": 0.23, "learning_rate": 3.9090410958904114e-05, "loss": 2.4925, "step": 750 }, { "epoch": 0.24, "learning_rate": 3.90599695585997e-05, "loss": 2.4394, "step": 775 }, { "epoch": 0.24, "learning_rate": 3.9029528158295285e-05, "loss": 2.4334, "step": 800 }, { "epoch": 0.25, "learning_rate": 3.899908675799087e-05, "loss": 2.3809, "step": 825 }, { "epoch": 0.26, "learning_rate": 3.896864535768646e-05, "loss": 2.4713, "step": 850 }, { "epoch": 0.27, "learning_rate": 3.893820395738204e-05, "loss": 2.3322, "step": 875 }, { "epoch": 0.27, "learning_rate": 3.890776255707763e-05, "loss": 2.3121, "step": 900 }, { "epoch": 0.28, "learning_rate": 3.8877321156773214e-05, "loss": 2.3795, "step": 925 }, { "epoch": 0.29, "learning_rate": 3.88468797564688e-05, "loss": 2.377, "step": 950 }, { "epoch": 0.3, "learning_rate": 3.8816438356164385e-05, "loss": 2.4454, "step": 975 }, { "epoch": 0.3, "learning_rate": 3.878599695585998e-05, "loss": 2.4502, "step": 1000 }, { "epoch": 0.31, "learning_rate": 3.8755555555555556e-05, "loss": 2.423, "step": 1025 }, { "epoch": 0.32, "learning_rate": 3.872511415525114e-05, "loss": 2.3717, "step": 1050 }, { "epoch": 0.33, "learning_rate": 3.869467275494673e-05, "loss": 2.4881, "step": 1075 }, { "epoch": 0.33, "learning_rate": 3.866423135464232e-05, "loss": 2.3898, "step": 1100 }, { "epoch": 0.34, "learning_rate": 3.8633789954337906e-05, "loss": 2.4626, "step": 1125 }, { "epoch": 0.35, "learning_rate": 3.860334855403349e-05, "loss": 2.3674, "step": 1150 }, { "epoch": 0.36, "learning_rate": 3.857290715372907e-05, "loss": 2.3929, "step": 1175 }, { "epoch": 0.37, "learning_rate": 3.854246575342466e-05, "loss": 2.3222, "step": 1200 }, { "epoch": 0.37, "learning_rate": 3.851202435312025e-05, "loss": 2.453, "step": 1225 }, { "epoch": 0.38, "learning_rate": 3.8481582952815834e-05, "loss": 2.4782, "step": 1250 }, { "epoch": 0.39, "learning_rate": 3.845114155251142e-05, "loss": 2.5002, "step": 1275 }, { "epoch": 0.4, "learning_rate": 3.8420700152207006e-05, "loss": 2.3431, "step": 1300 }, { "epoch": 0.4, "learning_rate": 3.839025875190259e-05, "loss": 2.3825, "step": 1325 }, { "epoch": 0.41, "learning_rate": 3.835981735159818e-05, "loss": 2.4681, "step": 1350 }, { "epoch": 0.42, "learning_rate": 3.832937595129376e-05, "loss": 2.4481, "step": 1375 }, { "epoch": 0.43, "learning_rate": 3.829893455098935e-05, "loss": 2.3781, "step": 1400 }, { "epoch": 0.43, "learning_rate": 3.8268493150684934e-05, "loss": 2.309, "step": 1425 }, { "epoch": 0.44, "learning_rate": 3.823805175038052e-05, "loss": 2.4879, "step": 1450 }, { "epoch": 0.45, "learning_rate": 3.8207610350076105e-05, "loss": 2.3736, "step": 1475 }, { "epoch": 0.46, "learning_rate": 3.81771689497717e-05, "loss": 2.5061, "step": 1500 }, { "epoch": 0.46, "learning_rate": 3.814672754946728e-05, "loss": 2.3825, "step": 1525 }, { "epoch": 0.47, "learning_rate": 3.811628614916286e-05, "loss": 2.3851, "step": 1550 }, { "epoch": 0.48, "learning_rate": 3.808584474885845e-05, "loss": 2.3361, "step": 1575 }, { "epoch": 0.49, "learning_rate": 3.805540334855404e-05, "loss": 2.4237, "step": 1600 }, { "epoch": 0.49, "learning_rate": 3.8024961948249626e-05, "loss": 2.3025, "step": 1625 }, { "epoch": 0.5, "learning_rate": 3.7994520547945205e-05, "loss": 2.3862, "step": 1650 }, { "epoch": 0.51, "learning_rate": 3.796407914764079e-05, "loss": 2.3268, "step": 1675 }, { "epoch": 0.52, "learning_rate": 3.793363774733638e-05, "loss": 2.3134, "step": 1700 }, { "epoch": 0.53, "learning_rate": 3.790319634703197e-05, "loss": 2.3043, "step": 1725 }, { "epoch": 0.53, "learning_rate": 3.7872754946727555e-05, "loss": 2.2921, "step": 1750 }, { "epoch": 0.54, "learning_rate": 3.784231354642314e-05, "loss": 2.3658, "step": 1775 }, { "epoch": 0.55, "learning_rate": 3.7811872146118726e-05, "loss": 2.3773, "step": 1800 }, { "epoch": 0.56, "learning_rate": 3.778143074581431e-05, "loss": 2.3391, "step": 1825 }, { "epoch": 0.56, "learning_rate": 3.77509893455099e-05, "loss": 2.3664, "step": 1850 }, { "epoch": 0.57, "learning_rate": 3.772054794520548e-05, "loss": 2.3339, "step": 1875 }, { "epoch": 0.58, "learning_rate": 3.769010654490107e-05, "loss": 2.2858, "step": 1900 }, { "epoch": 0.59, "learning_rate": 3.7659665144596654e-05, "loss": 2.344, "step": 1925 }, { "epoch": 0.59, "learning_rate": 3.762922374429224e-05, "loss": 2.2802, "step": 1950 }, { "epoch": 0.6, "learning_rate": 3.7598782343987826e-05, "loss": 2.2779, "step": 1975 }, { "epoch": 0.61, "learning_rate": 3.756834094368341e-05, "loss": 2.3558, "step": 2000 }, { "epoch": 0.62, "learning_rate": 3.7537899543379e-05, "loss": 2.3082, "step": 2025 }, { "epoch": 0.62, "learning_rate": 3.750745814307458e-05, "loss": 2.3199, "step": 2050 }, { "epoch": 0.63, "learning_rate": 3.747701674277017e-05, "loss": 2.4683, "step": 2075 }, { "epoch": 0.64, "learning_rate": 3.744657534246576e-05, "loss": 2.4058, "step": 2100 }, { "epoch": 0.65, "learning_rate": 3.741613394216135e-05, "loss": 2.3419, "step": 2125 }, { "epoch": 0.65, "learning_rate": 3.7385692541856926e-05, "loss": 2.2594, "step": 2150 }, { "epoch": 0.66, "learning_rate": 3.735525114155251e-05, "loss": 2.224, "step": 2175 }, { "epoch": 0.67, "learning_rate": 3.7324809741248104e-05, "loss": 2.4244, "step": 2200 }, { "epoch": 0.68, "learning_rate": 3.729436834094369e-05, "loss": 2.2928, "step": 2225 }, { "epoch": 0.68, "learning_rate": 3.7263926940639275e-05, "loss": 2.2732, "step": 2250 }, { "epoch": 0.69, "learning_rate": 3.7233485540334854e-05, "loss": 2.4067, "step": 2275 }, { "epoch": 0.7, "learning_rate": 3.7203044140030446e-05, "loss": 2.3029, "step": 2300 }, { "epoch": 0.71, "learning_rate": 3.717260273972603e-05, "loss": 2.3482, "step": 2325 }, { "epoch": 0.72, "learning_rate": 3.714216133942162e-05, "loss": 2.2777, "step": 2350 }, { "epoch": 0.72, "learning_rate": 3.7111719939117203e-05, "loss": 2.2948, "step": 2375 }, { "epoch": 0.73, "learning_rate": 3.708127853881279e-05, "loss": 2.3709, "step": 2400 }, { "epoch": 0.74, "learning_rate": 3.7050837138508375e-05, "loss": 2.3924, "step": 2425 }, { "epoch": 0.75, "learning_rate": 3.702039573820396e-05, "loss": 2.3185, "step": 2450 }, { "epoch": 0.75, "learning_rate": 3.6989954337899546e-05, "loss": 2.3042, "step": 2475 }, { "epoch": 0.76, "learning_rate": 3.695951293759513e-05, "loss": 2.3062, "step": 2500 }, { "epoch": 0.77, "learning_rate": 3.692907153729072e-05, "loss": 2.2792, "step": 2525 }, { "epoch": 0.78, "learning_rate": 3.68986301369863e-05, "loss": 2.2511, "step": 2550 }, { "epoch": 0.78, "learning_rate": 3.686818873668189e-05, "loss": 2.2938, "step": 2575 }, { "epoch": 0.79, "learning_rate": 3.683774733637748e-05, "loss": 2.3167, "step": 2600 }, { "epoch": 0.8, "learning_rate": 3.680730593607306e-05, "loss": 2.3586, "step": 2625 }, { "epoch": 0.81, "learning_rate": 3.6776864535768646e-05, "loss": 2.2705, "step": 2650 }, { "epoch": 0.81, "learning_rate": 3.674642313546423e-05, "loss": 2.1603, "step": 2675 }, { "epoch": 0.82, "learning_rate": 3.6715981735159824e-05, "loss": 2.3594, "step": 2700 }, { "epoch": 0.83, "learning_rate": 3.668554033485541e-05, "loss": 2.308, "step": 2725 }, { "epoch": 0.84, "learning_rate": 3.6655098934550995e-05, "loss": 2.3469, "step": 2750 }, { "epoch": 0.84, "learning_rate": 3.6624657534246574e-05, "loss": 2.3153, "step": 2775 }, { "epoch": 0.85, "learning_rate": 3.659421613394217e-05, "loss": 2.3836, "step": 2800 }, { "epoch": 0.86, "learning_rate": 3.656377473363775e-05, "loss": 2.2617, "step": 2825 }, { "epoch": 0.87, "learning_rate": 3.653333333333334e-05, "loss": 2.3834, "step": 2850 }, { "epoch": 0.88, "learning_rate": 3.6502891933028924e-05, "loss": 2.2582, "step": 2875 }, { "epoch": 0.88, "learning_rate": 3.647245053272451e-05, "loss": 2.2326, "step": 2900 }, { "epoch": 0.89, "learning_rate": 3.6442009132420095e-05, "loss": 2.2157, "step": 2925 }, { "epoch": 0.9, "learning_rate": 3.641156773211568e-05, "loss": 2.2988, "step": 2950 }, { "epoch": 0.91, "learning_rate": 3.6381126331811267e-05, "loss": 2.2304, "step": 2975 }, { "epoch": 0.91, "learning_rate": 3.635068493150685e-05, "loss": 2.1554, "step": 3000 }, { "epoch": 0.92, "learning_rate": 3.632024353120244e-05, "loss": 2.2901, "step": 3025 }, { "epoch": 0.93, "learning_rate": 3.6289802130898024e-05, "loss": 2.1346, "step": 3050 }, { "epoch": 0.94, "learning_rate": 3.625936073059361e-05, "loss": 2.2658, "step": 3075 }, { "epoch": 0.94, "learning_rate": 3.62289193302892e-05, "loss": 2.2892, "step": 3100 }, { "epoch": 0.95, "learning_rate": 3.619847792998478e-05, "loss": 2.4276, "step": 3125 }, { "epoch": 0.96, "learning_rate": 3.6168036529680366e-05, "loss": 2.3805, "step": 3150 }, { "epoch": 0.97, "learning_rate": 3.613759512937595e-05, "loss": 2.3277, "step": 3175 }, { "epoch": 0.97, "learning_rate": 3.6107153729071544e-05, "loss": 2.253, "step": 3200 }, { "epoch": 0.98, "learning_rate": 3.607671232876713e-05, "loss": 2.2018, "step": 3225 }, { "epoch": 0.99, "learning_rate": 3.604627092846271e-05, "loss": 2.3574, "step": 3250 }, { "epoch": 1.0, "learning_rate": 3.6015829528158295e-05, "loss": 2.3147, "step": 3275 }, { "epoch": 1.0, "learning_rate": 3.598538812785389e-05, "loss": 2.247, "step": 3300 }, { "epoch": 1.01, "learning_rate": 3.595494672754947e-05, "loss": 1.9512, "step": 3325 }, { "epoch": 1.02, "learning_rate": 3.592450532724506e-05, "loss": 2.0597, "step": 3350 }, { "epoch": 1.03, "learning_rate": 3.5894063926940644e-05, "loss": 2.0195, "step": 3375 }, { "epoch": 1.04, "learning_rate": 3.586362252663623e-05, "loss": 1.9563, "step": 3400 }, { "epoch": 1.04, "learning_rate": 3.5833181126331816e-05, "loss": 1.9845, "step": 3425 }, { "epoch": 1.05, "learning_rate": 3.58027397260274e-05, "loss": 2.0274, "step": 3450 }, { "epoch": 1.06, "learning_rate": 3.577229832572299e-05, "loss": 2.051, "step": 3475 }, { "epoch": 1.07, "learning_rate": 3.574185692541857e-05, "loss": 1.9961, "step": 3500 }, { "epoch": 1.07, "learning_rate": 3.571141552511416e-05, "loss": 1.9761, "step": 3525 }, { "epoch": 1.08, "learning_rate": 3.5680974124809744e-05, "loss": 2.0424, "step": 3550 }, { "epoch": 1.09, "learning_rate": 3.565053272450533e-05, "loss": 1.9622, "step": 3575 }, { "epoch": 1.1, "learning_rate": 3.5620091324200915e-05, "loss": 1.992, "step": 3600 }, { "epoch": 1.1, "learning_rate": 3.55896499238965e-05, "loss": 2.0852, "step": 3625 }, { "epoch": 1.11, "learning_rate": 3.555920852359209e-05, "loss": 2.1086, "step": 3650 }, { "epoch": 1.12, "learning_rate": 3.552876712328767e-05, "loss": 2.0231, "step": 3675 }, { "epoch": 1.13, "learning_rate": 3.5498325722983265e-05, "loss": 1.9472, "step": 3700 }, { "epoch": 1.13, "learning_rate": 3.546788432267885e-05, "loss": 1.9655, "step": 3725 }, { "epoch": 1.14, "learning_rate": 3.543744292237443e-05, "loss": 2.04, "step": 3750 }, { "epoch": 1.15, "learning_rate": 3.5407001522070015e-05, "loss": 2.1005, "step": 3775 }, { "epoch": 1.16, "learning_rate": 3.537656012176561e-05, "loss": 2.1219, "step": 3800 }, { "epoch": 1.16, "learning_rate": 3.534611872146119e-05, "loss": 2.046, "step": 3825 }, { "epoch": 1.17, "learning_rate": 3.531567732115678e-05, "loss": 2.0985, "step": 3850 }, { "epoch": 1.18, "learning_rate": 3.528523592085236e-05, "loss": 2.0795, "step": 3875 }, { "epoch": 1.19, "learning_rate": 3.525479452054795e-05, "loss": 2.0962, "step": 3900 }, { "epoch": 1.19, "learning_rate": 3.5224353120243536e-05, "loss": 2.052, "step": 3925 }, { "epoch": 1.2, "learning_rate": 3.519391171993912e-05, "loss": 2.1188, "step": 3950 }, { "epoch": 1.21, "learning_rate": 3.516347031963471e-05, "loss": 2.0075, "step": 3975 }, { "epoch": 1.22, "learning_rate": 3.513302891933029e-05, "loss": 2.0194, "step": 4000 }, { "epoch": 1.23, "learning_rate": 3.510258751902588e-05, "loss": 2.0319, "step": 4025 }, { "epoch": 1.23, "learning_rate": 3.5072146118721464e-05, "loss": 1.9817, "step": 4050 }, { "epoch": 1.24, "learning_rate": 3.504170471841705e-05, "loss": 2.0719, "step": 4075 }, { "epoch": 1.25, "learning_rate": 3.5011263318112636e-05, "loss": 1.9931, "step": 4100 }, { "epoch": 1.26, "learning_rate": 3.498082191780822e-05, "loss": 1.9983, "step": 4125 }, { "epoch": 1.26, "learning_rate": 3.495038051750381e-05, "loss": 2.1237, "step": 4150 }, { "epoch": 1.27, "learning_rate": 3.491993911719939e-05, "loss": 2.011, "step": 4175 }, { "epoch": 1.28, "learning_rate": 3.4889497716894985e-05, "loss": 2.0294, "step": 4200 }, { "epoch": 1.29, "learning_rate": 3.4859056316590564e-05, "loss": 2.063, "step": 4225 }, { "epoch": 1.29, "learning_rate": 3.482861491628615e-05, "loss": 2.0753, "step": 4250 }, { "epoch": 1.3, "learning_rate": 3.4798173515981735e-05, "loss": 2.0859, "step": 4275 }, { "epoch": 1.31, "learning_rate": 3.476773211567733e-05, "loss": 1.9657, "step": 4300 }, { "epoch": 1.32, "learning_rate": 3.4737290715372914e-05, "loss": 2.0901, "step": 4325 }, { "epoch": 1.32, "learning_rate": 3.47068493150685e-05, "loss": 2.0066, "step": 4350 }, { "epoch": 1.33, "learning_rate": 3.467640791476408e-05, "loss": 2.0134, "step": 4375 }, { "epoch": 1.34, "learning_rate": 3.464596651445967e-05, "loss": 2.0645, "step": 4400 }, { "epoch": 1.35, "learning_rate": 3.4615525114155256e-05, "loss": 2.0559, "step": 4425 }, { "epoch": 1.35, "learning_rate": 3.458508371385084e-05, "loss": 2.1435, "step": 4450 }, { "epoch": 1.36, "learning_rate": 3.455464231354643e-05, "loss": 1.9392, "step": 4475 }, { "epoch": 1.37, "learning_rate": 3.452420091324201e-05, "loss": 2.0839, "step": 4500 }, { "epoch": 1.38, "learning_rate": 3.44937595129376e-05, "loss": 2.1076, "step": 4525 }, { "epoch": 1.39, "learning_rate": 3.4463318112633185e-05, "loss": 2.0119, "step": 4550 }, { "epoch": 1.39, "learning_rate": 3.443287671232877e-05, "loss": 2.1092, "step": 4575 }, { "epoch": 1.4, "learning_rate": 3.4402435312024356e-05, "loss": 2.0383, "step": 4600 }, { "epoch": 1.41, "learning_rate": 3.437199391171994e-05, "loss": 2.1265, "step": 4625 }, { "epoch": 1.42, "learning_rate": 3.434155251141553e-05, "loss": 2.07, "step": 4650 }, { "epoch": 1.42, "learning_rate": 3.431111111111111e-05, "loss": 2.0152, "step": 4675 }, { "epoch": 1.43, "learning_rate": 3.42806697108067e-05, "loss": 2.008, "step": 4700 }, { "epoch": 1.44, "learning_rate": 3.4250228310502284e-05, "loss": 1.9992, "step": 4725 }, { "epoch": 1.45, "learning_rate": 3.421978691019787e-05, "loss": 2.0191, "step": 4750 }, { "epoch": 1.45, "learning_rate": 3.4189345509893456e-05, "loss": 2.0505, "step": 4775 }, { "epoch": 1.46, "learning_rate": 3.415890410958904e-05, "loss": 2.0542, "step": 4800 }, { "epoch": 1.47, "learning_rate": 3.4128462709284634e-05, "loss": 2.1455, "step": 4825 }, { "epoch": 1.48, "learning_rate": 3.409802130898021e-05, "loss": 1.9989, "step": 4850 }, { "epoch": 1.48, "learning_rate": 3.40675799086758e-05, "loss": 2.0734, "step": 4875 }, { "epoch": 1.49, "learning_rate": 3.4037138508371384e-05, "loss": 1.9921, "step": 4900 }, { "epoch": 1.5, "learning_rate": 3.400669710806698e-05, "loss": 2.1307, "step": 4925 }, { "epoch": 1.51, "learning_rate": 3.397625570776256e-05, "loss": 2.0824, "step": 4950 }, { "epoch": 1.51, "learning_rate": 3.394581430745815e-05, "loss": 1.9928, "step": 4975 }, { "epoch": 1.52, "learning_rate": 3.391537290715373e-05, "loss": 2.0448, "step": 5000 }, { "epoch": 1.53, "learning_rate": 3.388493150684932e-05, "loss": 2.0779, "step": 5025 }, { "epoch": 1.54, "learning_rate": 3.3854490106544905e-05, "loss": 2.0343, "step": 5050 }, { "epoch": 1.54, "learning_rate": 3.382404870624049e-05, "loss": 1.9332, "step": 5075 }, { "epoch": 1.55, "learning_rate": 3.3793607305936076e-05, "loss": 2.1341, "step": 5100 }, { "epoch": 1.56, "learning_rate": 3.376316590563166e-05, "loss": 1.978, "step": 5125 }, { "epoch": 1.57, "learning_rate": 3.373272450532725e-05, "loss": 1.9498, "step": 5150 }, { "epoch": 1.58, "learning_rate": 3.3702283105022834e-05, "loss": 1.9063, "step": 5175 }, { "epoch": 1.58, "learning_rate": 3.367184170471842e-05, "loss": 2.0495, "step": 5200 }, { "epoch": 1.59, "learning_rate": 3.3641400304414005e-05, "loss": 2.02, "step": 5225 }, { "epoch": 1.6, "learning_rate": 3.361095890410959e-05, "loss": 1.973, "step": 5250 }, { "epoch": 1.61, "learning_rate": 3.3580517503805176e-05, "loss": 1.96, "step": 5275 }, { "epoch": 1.61, "learning_rate": 3.355007610350076e-05, "loss": 2.0725, "step": 5300 }, { "epoch": 1.62, "learning_rate": 3.351963470319635e-05, "loss": 2.1469, "step": 5325 }, { "epoch": 1.63, "learning_rate": 3.348919330289193e-05, "loss": 2.0721, "step": 5350 }, { "epoch": 1.64, "learning_rate": 3.345875190258752e-05, "loss": 2.1235, "step": 5375 }, { "epoch": 1.64, "learning_rate": 3.3428310502283105e-05, "loss": 1.9994, "step": 5400 }, { "epoch": 1.65, "learning_rate": 3.33978691019787e-05, "loss": 2.0654, "step": 5425 }, { "epoch": 1.66, "learning_rate": 3.336742770167428e-05, "loss": 2.0628, "step": 5450 }, { "epoch": 1.67, "learning_rate": 3.333698630136986e-05, "loss": 2.0541, "step": 5475 }, { "epoch": 1.67, "learning_rate": 3.330654490106545e-05, "loss": 2.0388, "step": 5500 }, { "epoch": 1.68, "learning_rate": 3.327610350076104e-05, "loss": 1.9874, "step": 5525 }, { "epoch": 1.69, "learning_rate": 3.3245662100456625e-05, "loss": 2.0574, "step": 5550 }, { "epoch": 1.7, "learning_rate": 3.321522070015221e-05, "loss": 1.9692, "step": 5575 }, { "epoch": 1.7, "learning_rate": 3.31847792998478e-05, "loss": 1.932, "step": 5600 }, { "epoch": 1.71, "learning_rate": 3.315433789954338e-05, "loss": 1.8747, "step": 5625 }, { "epoch": 1.72, "learning_rate": 3.312389649923897e-05, "loss": 2.0928, "step": 5650 }, { "epoch": 1.73, "learning_rate": 3.3093455098934554e-05, "loss": 2.1114, "step": 5675 }, { "epoch": 1.74, "learning_rate": 3.306301369863014e-05, "loss": 2.0353, "step": 5700 }, { "epoch": 1.74, "learning_rate": 3.3032572298325725e-05, "loss": 1.9485, "step": 5725 }, { "epoch": 1.75, "learning_rate": 3.300213089802131e-05, "loss": 2.0028, "step": 5750 }, { "epoch": 1.76, "learning_rate": 3.2971689497716897e-05, "loss": 2.026, "step": 5775 }, { "epoch": 1.77, "learning_rate": 3.294124809741248e-05, "loss": 2.1633, "step": 5800 }, { "epoch": 1.77, "learning_rate": 3.291080669710807e-05, "loss": 2.0141, "step": 5825 }, { "epoch": 1.78, "learning_rate": 3.2880365296803654e-05, "loss": 1.9247, "step": 5850 }, { "epoch": 1.79, "learning_rate": 3.284992389649924e-05, "loss": 2.005, "step": 5875 }, { "epoch": 1.8, "learning_rate": 3.2819482496194825e-05, "loss": 2.0607, "step": 5900 }, { "epoch": 1.8, "learning_rate": 3.278904109589042e-05, "loss": 2.0251, "step": 5925 }, { "epoch": 1.81, "learning_rate": 3.2758599695585996e-05, "loss": 2.1067, "step": 5950 }, { "epoch": 1.82, "learning_rate": 3.272815829528158e-05, "loss": 1.97, "step": 5975 }, { "epoch": 1.83, "learning_rate": 3.269771689497717e-05, "loss": 2.0772, "step": 6000 }, { "epoch": 1.83, "learning_rate": 3.266727549467276e-05, "loss": 2.0554, "step": 6025 }, { "epoch": 1.84, "learning_rate": 3.2636834094368346e-05, "loss": 1.9819, "step": 6050 }, { "epoch": 1.85, "learning_rate": 3.260639269406393e-05, "loss": 2.0674, "step": 6075 }, { "epoch": 1.86, "learning_rate": 3.257595129375951e-05, "loss": 2.0516, "step": 6100 }, { "epoch": 1.86, "learning_rate": 3.25455098934551e-05, "loss": 2.0866, "step": 6125 }, { "epoch": 1.87, "learning_rate": 3.251506849315069e-05, "loss": 1.9859, "step": 6150 }, { "epoch": 1.88, "learning_rate": 3.2484627092846274e-05, "loss": 1.9645, "step": 6175 }, { "epoch": 1.89, "learning_rate": 3.245418569254186e-05, "loss": 2.0103, "step": 6200 }, { "epoch": 1.89, "learning_rate": 3.2423744292237446e-05, "loss": 2.052, "step": 6225 }, { "epoch": 1.9, "learning_rate": 3.239330289193303e-05, "loss": 2.0134, "step": 6250 }, { "epoch": 1.91, "learning_rate": 3.236286149162862e-05, "loss": 2.025, "step": 6275 }, { "epoch": 1.92, "learning_rate": 3.23324200913242e-05, "loss": 2.0338, "step": 6300 }, { "epoch": 1.93, "learning_rate": 3.230197869101979e-05, "loss": 2.0614, "step": 6325 }, { "epoch": 1.93, "learning_rate": 3.2271537290715374e-05, "loss": 1.9784, "step": 6350 }, { "epoch": 1.94, "learning_rate": 3.224109589041096e-05, "loss": 1.9976, "step": 6375 }, { "epoch": 1.95, "learning_rate": 3.2210654490106545e-05, "loss": 2.0695, "step": 6400 }, { "epoch": 1.96, "learning_rate": 3.218021308980214e-05, "loss": 1.8607, "step": 6425 }, { "epoch": 1.96, "learning_rate": 3.214977168949772e-05, "loss": 2.0003, "step": 6450 }, { "epoch": 1.97, "learning_rate": 3.21193302891933e-05, "loss": 1.9529, "step": 6475 }, { "epoch": 1.98, "learning_rate": 3.208888888888889e-05, "loss": 1.9775, "step": 6500 }, { "epoch": 1.99, "learning_rate": 3.205844748858448e-05, "loss": 2.1202, "step": 6525 }, { "epoch": 1.99, "learning_rate": 3.2028006088280066e-05, "loss": 2.0416, "step": 6550 }, { "epoch": 2.0, "learning_rate": 3.1997564687975645e-05, "loss": 1.939, "step": 6575 }, { "epoch": 2.01, "learning_rate": 3.196712328767123e-05, "loss": 1.8248, "step": 6600 }, { "epoch": 2.02, "learning_rate": 3.193668188736682e-05, "loss": 1.768, "step": 6625 }, { "epoch": 2.02, "learning_rate": 3.190624048706241e-05, "loss": 1.7417, "step": 6650 }, { "epoch": 2.03, "learning_rate": 3.1875799086757995e-05, "loss": 1.787, "step": 6675 }, { "epoch": 2.04, "learning_rate": 3.184657534246576e-05, "loss": 1.8256, "step": 6700 }, { "epoch": 2.05, "learning_rate": 3.181613394216134e-05, "loss": 1.7764, "step": 6725 }, { "epoch": 2.05, "learning_rate": 3.1785692541856925e-05, "loss": 1.7467, "step": 6750 }, { "epoch": 2.06, "learning_rate": 3.175525114155251e-05, "loss": 1.7261, "step": 6775 }, { "epoch": 2.07, "learning_rate": 3.17248097412481e-05, "loss": 1.7825, "step": 6800 }, { "epoch": 2.08, "learning_rate": 3.169436834094369e-05, "loss": 1.7536, "step": 6825 }, { "epoch": 2.09, "learning_rate": 3.1663926940639274e-05, "loss": 1.7507, "step": 6850 }, { "epoch": 2.09, "learning_rate": 3.1633485540334853e-05, "loss": 1.7534, "step": 6875 }, { "epoch": 2.1, "learning_rate": 3.1603044140030446e-05, "loss": 1.6879, "step": 6900 }, { "epoch": 2.11, "learning_rate": 3.157260273972603e-05, "loss": 1.6767, "step": 6925 }, { "epoch": 2.12, "learning_rate": 3.154216133942162e-05, "loss": 1.7718, "step": 6950 }, { "epoch": 2.12, "learning_rate": 3.15117199391172e-05, "loss": 1.7115, "step": 6975 }, { "epoch": 2.13, "learning_rate": 3.148127853881279e-05, "loss": 1.7024, "step": 7000 }, { "epoch": 2.14, "learning_rate": 3.1450837138508374e-05, "loss": 1.7389, "step": 7025 }, { "epoch": 2.15, "learning_rate": 3.142039573820396e-05, "loss": 1.7123, "step": 7050 }, { "epoch": 2.15, "learning_rate": 3.1389954337899546e-05, "loss": 1.835, "step": 7075 }, { "epoch": 2.16, "learning_rate": 3.135951293759513e-05, "loss": 1.7925, "step": 7100 }, { "epoch": 2.17, "learning_rate": 3.132907153729072e-05, "loss": 1.7361, "step": 7125 }, { "epoch": 2.18, "learning_rate": 3.12986301369863e-05, "loss": 1.775, "step": 7150 }, { "epoch": 2.18, "learning_rate": 3.126818873668189e-05, "loss": 1.699, "step": 7175 }, { "epoch": 2.19, "learning_rate": 3.123774733637748e-05, "loss": 1.8377, "step": 7200 }, { "epoch": 2.2, "learning_rate": 3.120730593607306e-05, "loss": 1.6677, "step": 7225 }, { "epoch": 2.21, "learning_rate": 3.1176864535768645e-05, "loss": 1.7617, "step": 7250 }, { "epoch": 2.21, "learning_rate": 3.114642313546423e-05, "loss": 1.7213, "step": 7275 }, { "epoch": 2.22, "learning_rate": 3.1115981735159824e-05, "loss": 1.8376, "step": 7300 }, { "epoch": 2.23, "learning_rate": 3.108554033485541e-05, "loss": 1.792, "step": 7325 }, { "epoch": 2.24, "learning_rate": 3.105509893455099e-05, "loss": 1.8017, "step": 7350 }, { "epoch": 2.25, "learning_rate": 3.1024657534246574e-05, "loss": 1.7817, "step": 7375 }, { "epoch": 2.25, "learning_rate": 3.0994216133942166e-05, "loss": 1.8186, "step": 7400 }, { "epoch": 2.26, "learning_rate": 3.096377473363775e-05, "loss": 1.7081, "step": 7425 }, { "epoch": 2.27, "learning_rate": 3.093333333333334e-05, "loss": 1.7725, "step": 7450 }, { "epoch": 2.28, "learning_rate": 3.090289193302892e-05, "loss": 1.8943, "step": 7475 }, { "epoch": 2.28, "learning_rate": 3.087245053272451e-05, "loss": 1.8723, "step": 7500 }, { "epoch": 2.29, "learning_rate": 3.0842009132420095e-05, "loss": 1.8089, "step": 7525 }, { "epoch": 2.3, "learning_rate": 3.081156773211568e-05, "loss": 1.7635, "step": 7550 }, { "epoch": 2.31, "learning_rate": 3.0781126331811266e-05, "loss": 1.7497, "step": 7575 }, { "epoch": 2.31, "learning_rate": 3.075068493150685e-05, "loss": 1.738, "step": 7600 }, { "epoch": 2.32, "learning_rate": 3.072024353120244e-05, "loss": 1.7198, "step": 7625 }, { "epoch": 2.33, "learning_rate": 3.068980213089802e-05, "loss": 1.7462, "step": 7650 }, { "epoch": 2.34, "learning_rate": 3.065936073059361e-05, "loss": 1.8347, "step": 7675 }, { "epoch": 2.34, "learning_rate": 3.0628919330289194e-05, "loss": 1.8402, "step": 7700 }, { "epoch": 2.35, "learning_rate": 3.059847792998478e-05, "loss": 1.8079, "step": 7725 }, { "epoch": 2.36, "learning_rate": 3.0568036529680366e-05, "loss": 1.7424, "step": 7750 }, { "epoch": 2.37, "learning_rate": 3.053759512937595e-05, "loss": 1.7832, "step": 7775 }, { "epoch": 2.37, "learning_rate": 3.050715372907154e-05, "loss": 1.7963, "step": 7800 }, { "epoch": 2.38, "learning_rate": 3.0477929984779303e-05, "loss": 1.8091, "step": 7825 }, { "epoch": 2.39, "learning_rate": 3.044748858447489e-05, "loss": 1.6953, "step": 7850 }, { "epoch": 2.4, "learning_rate": 3.0417047184170478e-05, "loss": 1.8256, "step": 7875 }, { "epoch": 2.4, "learning_rate": 3.038660578386606e-05, "loss": 1.8461, "step": 7900 }, { "epoch": 2.41, "learning_rate": 3.0356164383561646e-05, "loss": 1.6912, "step": 7925 }, { "epoch": 2.42, "learning_rate": 3.032572298325723e-05, "loss": 1.7326, "step": 7950 }, { "epoch": 2.43, "learning_rate": 3.029528158295282e-05, "loss": 1.7778, "step": 7975 }, { "epoch": 2.44, "learning_rate": 3.0264840182648406e-05, "loss": 1.7841, "step": 8000 }, { "epoch": 2.44, "learning_rate": 3.023439878234399e-05, "loss": 1.8748, "step": 8025 }, { "epoch": 2.45, "learning_rate": 3.0203957382039574e-05, "loss": 1.7683, "step": 8050 }, { "epoch": 2.46, "learning_rate": 3.0173515981735163e-05, "loss": 1.8536, "step": 8075 }, { "epoch": 2.47, "learning_rate": 3.014307458143075e-05, "loss": 1.865, "step": 8100 }, { "epoch": 2.47, "learning_rate": 3.0112633181126334e-05, "loss": 1.7282, "step": 8125 }, { "epoch": 2.48, "learning_rate": 3.008219178082192e-05, "loss": 1.8071, "step": 8150 }, { "epoch": 2.49, "learning_rate": 3.005175038051751e-05, "loss": 1.7984, "step": 8175 }, { "epoch": 2.5, "learning_rate": 3.002130898021309e-05, "loss": 1.8904, "step": 8200 }, { "epoch": 2.5, "learning_rate": 2.9990867579908677e-05, "loss": 1.8645, "step": 8225 }, { "epoch": 2.51, "learning_rate": 2.9960426179604263e-05, "loss": 1.8624, "step": 8250 }, { "epoch": 2.52, "learning_rate": 2.9929984779299852e-05, "loss": 1.7069, "step": 8275 }, { "epoch": 2.53, "learning_rate": 2.9899543378995438e-05, "loss": 1.8513, "step": 8300 }, { "epoch": 2.53, "learning_rate": 2.9869101978691023e-05, "loss": 1.695, "step": 8325 }, { "epoch": 2.54, "learning_rate": 2.9838660578386606e-05, "loss": 1.789, "step": 8350 }, { "epoch": 2.55, "learning_rate": 2.9808219178082195e-05, "loss": 1.8332, "step": 8375 }, { "epoch": 2.56, "learning_rate": 2.977777777777778e-05, "loss": 1.781, "step": 8400 }, { "epoch": 2.56, "learning_rate": 2.9747336377473366e-05, "loss": 1.7331, "step": 8425 }, { "epoch": 2.57, "learning_rate": 2.971689497716895e-05, "loss": 1.7355, "step": 8450 }, { "epoch": 2.58, "learning_rate": 2.968645357686454e-05, "loss": 1.7736, "step": 8475 }, { "epoch": 2.59, "learning_rate": 2.9656012176560126e-05, "loss": 1.8316, "step": 8500 }, { "epoch": 2.6, "learning_rate": 2.962557077625571e-05, "loss": 1.7823, "step": 8525 }, { "epoch": 2.6, "learning_rate": 2.9595129375951294e-05, "loss": 1.8136, "step": 8550 }, { "epoch": 2.61, "learning_rate": 2.9564687975646883e-05, "loss": 1.8344, "step": 8575 }, { "epoch": 2.62, "learning_rate": 2.953424657534247e-05, "loss": 1.8676, "step": 8600 }, { "epoch": 2.63, "learning_rate": 2.9503805175038055e-05, "loss": 1.8041, "step": 8625 }, { "epoch": 2.63, "learning_rate": 2.9473363774733637e-05, "loss": 1.7538, "step": 8650 }, { "epoch": 2.64, "learning_rate": 2.944292237442923e-05, "loss": 1.7551, "step": 8675 }, { "epoch": 2.65, "learning_rate": 2.9412480974124812e-05, "loss": 1.8191, "step": 8700 }, { "epoch": 2.66, "learning_rate": 2.9382039573820398e-05, "loss": 1.6856, "step": 8725 }, { "epoch": 2.66, "learning_rate": 2.9351598173515983e-05, "loss": 1.8142, "step": 8750 }, { "epoch": 2.67, "learning_rate": 2.9321156773211572e-05, "loss": 1.7193, "step": 8775 }, { "epoch": 2.68, "learning_rate": 2.9290715372907158e-05, "loss": 1.8532, "step": 8800 }, { "epoch": 2.69, "learning_rate": 2.926027397260274e-05, "loss": 1.7633, "step": 8825 }, { "epoch": 2.69, "learning_rate": 2.9229832572298326e-05, "loss": 1.8355, "step": 8850 }, { "epoch": 2.7, "learning_rate": 2.9199391171993915e-05, "loss": 1.7773, "step": 8875 }, { "epoch": 2.71, "learning_rate": 2.91689497716895e-05, "loss": 1.8063, "step": 8900 }, { "epoch": 2.72, "learning_rate": 2.9138508371385086e-05, "loss": 1.8132, "step": 8925 }, { "epoch": 2.72, "learning_rate": 2.9108066971080672e-05, "loss": 1.8179, "step": 8950 }, { "epoch": 2.73, "learning_rate": 2.907762557077626e-05, "loss": 1.8025, "step": 8975 }, { "epoch": 2.74, "learning_rate": 2.9047184170471843e-05, "loss": 1.9039, "step": 9000 }, { "epoch": 2.75, "learning_rate": 2.901674277016743e-05, "loss": 1.8192, "step": 9025 }, { "epoch": 2.75, "learning_rate": 2.8986301369863015e-05, "loss": 1.7935, "step": 9050 }, { "epoch": 2.76, "learning_rate": 2.8955859969558604e-05, "loss": 1.8275, "step": 9075 }, { "epoch": 2.77, "learning_rate": 2.892541856925419e-05, "loss": 1.818, "step": 9100 }, { "epoch": 2.78, "learning_rate": 2.8894977168949775e-05, "loss": 1.8142, "step": 9125 }, { "epoch": 2.79, "learning_rate": 2.8864535768645357e-05, "loss": 1.8139, "step": 9150 }, { "epoch": 2.79, "learning_rate": 2.8834094368340947e-05, "loss": 1.8355, "step": 9175 }, { "epoch": 2.8, "learning_rate": 2.8803652968036532e-05, "loss": 1.7334, "step": 9200 }, { "epoch": 2.81, "learning_rate": 2.8773211567732118e-05, "loss": 1.7533, "step": 9225 }, { "epoch": 2.82, "learning_rate": 2.8742770167427704e-05, "loss": 1.7938, "step": 9250 }, { "epoch": 2.82, "learning_rate": 2.8712328767123293e-05, "loss": 1.7383, "step": 9275 }, { "epoch": 2.83, "learning_rate": 2.868188736681888e-05, "loss": 1.7569, "step": 9300 }, { "epoch": 2.84, "learning_rate": 2.865144596651446e-05, "loss": 1.891, "step": 9325 }, { "epoch": 2.85, "learning_rate": 2.8621004566210046e-05, "loss": 1.8103, "step": 9350 }, { "epoch": 2.85, "learning_rate": 2.8590563165905635e-05, "loss": 1.8334, "step": 9375 }, { "epoch": 2.86, "learning_rate": 2.856012176560122e-05, "loss": 1.7458, "step": 9400 }, { "epoch": 2.87, "learning_rate": 2.8529680365296807e-05, "loss": 1.8501, "step": 9425 }, { "epoch": 2.88, "learning_rate": 2.849923896499239e-05, "loss": 1.7494, "step": 9450 }, { "epoch": 2.88, "learning_rate": 2.846879756468798e-05, "loss": 1.7331, "step": 9475 }, { "epoch": 2.89, "learning_rate": 2.8438356164383564e-05, "loss": 1.8252, "step": 9500 }, { "epoch": 2.9, "learning_rate": 2.840791476407915e-05, "loss": 1.808, "step": 9525 }, { "epoch": 2.91, "learning_rate": 2.8377473363774735e-05, "loss": 1.7964, "step": 9550 }, { "epoch": 2.91, "learning_rate": 2.8347031963470324e-05, "loss": 1.8049, "step": 9575 }, { "epoch": 2.92, "learning_rate": 2.831659056316591e-05, "loss": 1.8175, "step": 9600 }, { "epoch": 2.93, "learning_rate": 2.8286149162861492e-05, "loss": 1.768, "step": 9625 }, { "epoch": 2.94, "learning_rate": 2.8255707762557078e-05, "loss": 1.8176, "step": 9650 }, { "epoch": 2.95, "learning_rate": 2.8225266362252667e-05, "loss": 1.7983, "step": 9675 }, { "epoch": 2.95, "learning_rate": 2.8194824961948253e-05, "loss": 1.8383, "step": 9700 }, { "epoch": 2.96, "learning_rate": 2.8164383561643838e-05, "loss": 1.77, "step": 9725 }, { "epoch": 2.97, "learning_rate": 2.8133942161339424e-05, "loss": 1.8007, "step": 9750 }, { "epoch": 2.98, "learning_rate": 2.8103500761035013e-05, "loss": 1.7941, "step": 9775 }, { "epoch": 2.98, "learning_rate": 2.8073059360730595e-05, "loss": 1.8826, "step": 9800 }, { "epoch": 2.99, "learning_rate": 2.804261796042618e-05, "loss": 1.8164, "step": 9825 }, { "epoch": 3.0, "learning_rate": 2.8012176560121767e-05, "loss": 1.8422, "step": 9850 }, { "epoch": 3.01, "learning_rate": 2.7981735159817356e-05, "loss": 1.607, "step": 9875 }, { "epoch": 3.01, "learning_rate": 2.795129375951294e-05, "loss": 1.5504, "step": 9900 }, { "epoch": 3.02, "learning_rate": 2.7920852359208527e-05, "loss": 1.5181, "step": 9925 }, { "epoch": 3.03, "learning_rate": 2.789041095890411e-05, "loss": 1.5856, "step": 9950 }, { "epoch": 3.04, "learning_rate": 2.78599695585997e-05, "loss": 1.4975, "step": 9975 }, { "epoch": 3.04, "learning_rate": 2.7829528158295284e-05, "loss": 1.5684, "step": 10000 }, { "epoch": 3.05, "learning_rate": 2.779908675799087e-05, "loss": 1.6031, "step": 10025 }, { "epoch": 3.06, "learning_rate": 2.7768645357686455e-05, "loss": 1.5398, "step": 10050 }, { "epoch": 3.07, "learning_rate": 2.7738203957382045e-05, "loss": 1.5564, "step": 10075 }, { "epoch": 3.07, "learning_rate": 2.770776255707763e-05, "loss": 1.5395, "step": 10100 }, { "epoch": 3.08, "learning_rate": 2.7677321156773213e-05, "loss": 1.5022, "step": 10125 }, { "epoch": 3.09, "learning_rate": 2.7646879756468798e-05, "loss": 1.4697, "step": 10150 }, { "epoch": 3.1, "learning_rate": 2.7616438356164387e-05, "loss": 1.5704, "step": 10175 }, { "epoch": 3.11, "learning_rate": 2.7585996955859973e-05, "loss": 1.5571, "step": 10200 }, { "epoch": 3.11, "learning_rate": 2.755555555555556e-05, "loss": 1.5742, "step": 10225 }, { "epoch": 3.12, "learning_rate": 2.752511415525114e-05, "loss": 1.5312, "step": 10250 }, { "epoch": 3.13, "learning_rate": 2.7494672754946733e-05, "loss": 1.4847, "step": 10275 }, { "epoch": 3.14, "learning_rate": 2.7464231354642316e-05, "loss": 1.5724, "step": 10300 }, { "epoch": 3.14, "learning_rate": 2.74337899543379e-05, "loss": 1.5509, "step": 10325 }, { "epoch": 3.15, "learning_rate": 2.7403348554033487e-05, "loss": 1.5007, "step": 10350 }, { "epoch": 3.16, "learning_rate": 2.7372907153729076e-05, "loss": 1.5213, "step": 10375 }, { "epoch": 3.17, "learning_rate": 2.7342465753424662e-05, "loss": 1.5834, "step": 10400 }, { "epoch": 3.17, "learning_rate": 2.7312024353120244e-05, "loss": 1.5101, "step": 10425 }, { "epoch": 3.18, "learning_rate": 2.728158295281583e-05, "loss": 1.5733, "step": 10450 }, { "epoch": 3.19, "learning_rate": 2.725114155251142e-05, "loss": 1.5903, "step": 10475 }, { "epoch": 3.2, "learning_rate": 2.7220700152207005e-05, "loss": 1.5721, "step": 10500 }, { "epoch": 3.2, "learning_rate": 2.719025875190259e-05, "loss": 1.5293, "step": 10525 }, { "epoch": 3.21, "learning_rate": 2.7159817351598176e-05, "loss": 1.6016, "step": 10550 }, { "epoch": 3.22, "learning_rate": 2.7129375951293765e-05, "loss": 1.4873, "step": 10575 }, { "epoch": 3.23, "learning_rate": 2.7098934550989347e-05, "loss": 1.6021, "step": 10600 }, { "epoch": 3.23, "learning_rate": 2.7068493150684933e-05, "loss": 1.6195, "step": 10625 }, { "epoch": 3.24, "learning_rate": 2.703805175038052e-05, "loss": 1.6156, "step": 10650 }, { "epoch": 3.25, "learning_rate": 2.7007610350076108e-05, "loss": 1.5061, "step": 10675 }, { "epoch": 3.26, "learning_rate": 2.6977168949771693e-05, "loss": 1.5679, "step": 10700 }, { "epoch": 3.26, "learning_rate": 2.694672754946728e-05, "loss": 1.6087, "step": 10725 }, { "epoch": 3.27, "learning_rate": 2.691628614916286e-05, "loss": 1.5767, "step": 10750 }, { "epoch": 3.28, "learning_rate": 2.688584474885845e-05, "loss": 1.5589, "step": 10775 }, { "epoch": 3.29, "learning_rate": 2.6855403348554036e-05, "loss": 1.5384, "step": 10800 }, { "epoch": 3.3, "learning_rate": 2.6824961948249622e-05, "loss": 1.5174, "step": 10825 }, { "epoch": 3.3, "learning_rate": 2.6794520547945207e-05, "loss": 1.585, "step": 10850 }, { "epoch": 3.31, "learning_rate": 2.6764079147640796e-05, "loss": 1.5766, "step": 10875 }, { "epoch": 3.32, "learning_rate": 2.6733637747336382e-05, "loss": 1.5724, "step": 10900 }, { "epoch": 3.33, "learning_rate": 2.6703196347031964e-05, "loss": 1.5789, "step": 10925 }, { "epoch": 3.33, "learning_rate": 2.667275494672755e-05, "loss": 1.566, "step": 10950 }, { "epoch": 3.34, "learning_rate": 2.664231354642314e-05, "loss": 1.5702, "step": 10975 }, { "epoch": 3.35, "learning_rate": 2.6611872146118725e-05, "loss": 1.5913, "step": 11000 }, { "epoch": 3.36, "learning_rate": 2.658143074581431e-05, "loss": 1.5544, "step": 11025 }, { "epoch": 3.36, "learning_rate": 2.6550989345509893e-05, "loss": 1.4748, "step": 11050 }, { "epoch": 3.37, "learning_rate": 2.6520547945205485e-05, "loss": 1.5538, "step": 11075 }, { "epoch": 3.38, "learning_rate": 2.6490106544901068e-05, "loss": 1.5493, "step": 11100 }, { "epoch": 3.39, "learning_rate": 2.6459665144596653e-05, "loss": 1.5631, "step": 11125 }, { "epoch": 3.39, "learning_rate": 2.642922374429224e-05, "loss": 1.6231, "step": 11150 }, { "epoch": 3.4, "learning_rate": 2.6398782343987828e-05, "loss": 1.5592, "step": 11175 }, { "epoch": 3.41, "learning_rate": 2.6368340943683414e-05, "loss": 1.4599, "step": 11200 }, { "epoch": 3.42, "learning_rate": 2.6337899543378996e-05, "loss": 1.5991, "step": 11225 }, { "epoch": 3.42, "learning_rate": 2.630745814307458e-05, "loss": 1.5998, "step": 11250 }, { "epoch": 3.43, "learning_rate": 2.627701674277017e-05, "loss": 1.5227, "step": 11275 }, { "epoch": 3.44, "learning_rate": 2.6246575342465756e-05, "loss": 1.5894, "step": 11300 }, { "epoch": 3.45, "learning_rate": 2.6216133942161342e-05, "loss": 1.5064, "step": 11325 }, { "epoch": 3.46, "learning_rate": 2.6185692541856928e-05, "loss": 1.5878, "step": 11350 }, { "epoch": 3.46, "learning_rate": 2.6155251141552517e-05, "loss": 1.608, "step": 11375 }, { "epoch": 3.47, "learning_rate": 2.61248097412481e-05, "loss": 1.6658, "step": 11400 }, { "epoch": 3.48, "learning_rate": 2.6094368340943685e-05, "loss": 1.5905, "step": 11425 }, { "epoch": 3.49, "learning_rate": 2.606392694063927e-05, "loss": 1.6711, "step": 11450 }, { "epoch": 3.49, "learning_rate": 2.603348554033486e-05, "loss": 1.5699, "step": 11475 }, { "epoch": 3.5, "learning_rate": 2.6003044140030445e-05, "loss": 1.6112, "step": 11500 }, { "epoch": 3.51, "learning_rate": 2.597260273972603e-05, "loss": 1.5901, "step": 11525 }, { "epoch": 3.52, "learning_rate": 2.5942161339421613e-05, "loss": 1.5447, "step": 11550 }, { "epoch": 3.52, "learning_rate": 2.5911719939117202e-05, "loss": 1.5636, "step": 11575 }, { "epoch": 3.53, "learning_rate": 2.5881278538812788e-05, "loss": 1.6046, "step": 11600 }, { "epoch": 3.54, "learning_rate": 2.5850837138508374e-05, "loss": 1.5229, "step": 11625 }, { "epoch": 3.55, "learning_rate": 2.582039573820396e-05, "loss": 1.6017, "step": 11650 }, { "epoch": 3.55, "learning_rate": 2.578995433789955e-05, "loss": 1.5262, "step": 11675 }, { "epoch": 3.56, "learning_rate": 2.5759512937595134e-05, "loss": 1.519, "step": 11700 }, { "epoch": 3.57, "learning_rate": 2.5729071537290716e-05, "loss": 1.5743, "step": 11725 }, { "epoch": 3.58, "learning_rate": 2.5698630136986302e-05, "loss": 1.566, "step": 11750 }, { "epoch": 3.58, "learning_rate": 2.566818873668189e-05, "loss": 1.5854, "step": 11775 }, { "epoch": 3.59, "learning_rate": 2.5637747336377477e-05, "loss": 1.6292, "step": 11800 }, { "epoch": 3.6, "learning_rate": 2.5607305936073062e-05, "loss": 1.6367, "step": 11825 }, { "epoch": 3.61, "learning_rate": 2.5576864535768645e-05, "loss": 1.6502, "step": 11850 }, { "epoch": 3.61, "learning_rate": 2.5546423135464237e-05, "loss": 1.6127, "step": 11875 }, { "epoch": 3.62, "learning_rate": 2.551598173515982e-05, "loss": 1.5351, "step": 11900 }, { "epoch": 3.63, "learning_rate": 2.5485540334855405e-05, "loss": 1.5574, "step": 11925 }, { "epoch": 3.64, "learning_rate": 2.545509893455099e-05, "loss": 1.6821, "step": 11950 }, { "epoch": 3.65, "learning_rate": 2.542465753424658e-05, "loss": 1.4367, "step": 11975 }, { "epoch": 3.65, "learning_rate": 2.5394216133942166e-05, "loss": 1.6209, "step": 12000 }, { "epoch": 3.66, "learning_rate": 2.5363774733637748e-05, "loss": 1.5987, "step": 12025 }, { "epoch": 3.67, "learning_rate": 2.5333333333333334e-05, "loss": 1.5591, "step": 12050 }, { "epoch": 3.68, "learning_rate": 2.5302891933028923e-05, "loss": 1.6609, "step": 12075 }, { "epoch": 3.68, "learning_rate": 2.527245053272451e-05, "loss": 1.5506, "step": 12100 }, { "epoch": 3.69, "learning_rate": 2.5242009132420094e-05, "loss": 1.6036, "step": 12125 }, { "epoch": 3.7, "learning_rate": 2.521156773211568e-05, "loss": 1.6555, "step": 12150 }, { "epoch": 3.71, "learning_rate": 2.518112633181127e-05, "loss": 1.6392, "step": 12175 }, { "epoch": 3.71, "learning_rate": 2.515068493150685e-05, "loss": 1.6013, "step": 12200 }, { "epoch": 3.72, "learning_rate": 2.5120243531202437e-05, "loss": 1.6, "step": 12225 }, { "epoch": 3.73, "learning_rate": 2.5089802130898022e-05, "loss": 1.5195, "step": 12250 }, { "epoch": 3.74, "learning_rate": 2.505936073059361e-05, "loss": 1.5349, "step": 12275 }, { "epoch": 3.74, "learning_rate": 2.5028919330289197e-05, "loss": 1.6064, "step": 12300 }, { "epoch": 3.75, "learning_rate": 2.4998477929984783e-05, "loss": 1.5887, "step": 12325 }, { "epoch": 3.76, "learning_rate": 2.4969254185692542e-05, "loss": 1.6195, "step": 12350 }, { "epoch": 3.77, "learning_rate": 2.493881278538813e-05, "loss": 1.614, "step": 12375 }, { "epoch": 3.77, "learning_rate": 2.4908371385083717e-05, "loss": 1.6406, "step": 12400 }, { "epoch": 3.78, "learning_rate": 2.4877929984779302e-05, "loss": 1.5978, "step": 12425 }, { "epoch": 3.79, "learning_rate": 2.4847488584474885e-05, "loss": 1.662, "step": 12450 }, { "epoch": 3.8, "learning_rate": 2.4817047184170474e-05, "loss": 1.5427, "step": 12475 }, { "epoch": 3.81, "learning_rate": 2.478660578386606e-05, "loss": 1.6268, "step": 12500 }, { "epoch": 3.81, "learning_rate": 2.4756164383561645e-05, "loss": 1.6355, "step": 12525 }, { "epoch": 3.82, "learning_rate": 2.472572298325723e-05, "loss": 1.5728, "step": 12550 }, { "epoch": 3.83, "learning_rate": 2.469528158295282e-05, "loss": 1.6309, "step": 12575 }, { "epoch": 3.84, "learning_rate": 2.4664840182648405e-05, "loss": 1.5316, "step": 12600 }, { "epoch": 3.84, "learning_rate": 2.4634398782343988e-05, "loss": 1.55, "step": 12625 }, { "epoch": 3.85, "learning_rate": 2.4603957382039573e-05, "loss": 1.6023, "step": 12650 }, { "epoch": 3.86, "learning_rate": 2.4573515981735162e-05, "loss": 1.5844, "step": 12675 }, { "epoch": 3.87, "learning_rate": 2.4543074581430748e-05, "loss": 1.6453, "step": 12700 }, { "epoch": 3.87, "learning_rate": 2.4512633181126334e-05, "loss": 1.509, "step": 12725 }, { "epoch": 3.88, "learning_rate": 2.448219178082192e-05, "loss": 1.5422, "step": 12750 }, { "epoch": 3.89, "learning_rate": 2.445175038051751e-05, "loss": 1.6115, "step": 12775 }, { "epoch": 3.9, "learning_rate": 2.442130898021309e-05, "loss": 1.5477, "step": 12800 }, { "epoch": 3.9, "learning_rate": 2.4390867579908677e-05, "loss": 1.6476, "step": 12825 }, { "epoch": 3.91, "learning_rate": 2.4360426179604262e-05, "loss": 1.6505, "step": 12850 }, { "epoch": 3.92, "learning_rate": 2.432998477929985e-05, "loss": 1.5546, "step": 12875 }, { "epoch": 3.93, "learning_rate": 2.4299543378995437e-05, "loss": 1.5796, "step": 12900 }, { "epoch": 3.93, "learning_rate": 2.426910197869102e-05, "loss": 1.5756, "step": 12925 }, { "epoch": 3.94, "learning_rate": 2.4238660578386605e-05, "loss": 1.5754, "step": 12950 }, { "epoch": 3.95, "learning_rate": 2.4208219178082194e-05, "loss": 1.5819, "step": 12975 }, { "epoch": 3.96, "learning_rate": 2.417777777777778e-05, "loss": 1.5826, "step": 13000 }, { "epoch": 3.96, "learning_rate": 2.4147336377473365e-05, "loss": 1.6289, "step": 13025 }, { "epoch": 3.97, "learning_rate": 2.411689497716895e-05, "loss": 1.5559, "step": 13050 }, { "epoch": 3.98, "learning_rate": 2.408645357686454e-05, "loss": 1.6191, "step": 13075 }, { "epoch": 3.99, "learning_rate": 2.4056012176560122e-05, "loss": 1.5491, "step": 13100 }, { "epoch": 4.0, "learning_rate": 2.4025570776255708e-05, "loss": 1.6345, "step": 13125 }, { "epoch": 4.0, "learning_rate": 2.3995129375951294e-05, "loss": 1.4829, "step": 13150 }, { "epoch": 4.01, "learning_rate": 2.3964687975646883e-05, "loss": 1.4067, "step": 13175 }, { "epoch": 4.02, "learning_rate": 2.393424657534247e-05, "loss": 1.3637, "step": 13200 }, { "epoch": 4.03, "learning_rate": 2.3903805175038054e-05, "loss": 1.2731, "step": 13225 }, { "epoch": 4.03, "learning_rate": 2.3873363774733636e-05, "loss": 1.3727, "step": 13250 }, { "epoch": 4.04, "learning_rate": 2.3842922374429226e-05, "loss": 1.2957, "step": 13275 }, { "epoch": 4.05, "learning_rate": 2.381248097412481e-05, "loss": 1.3658, "step": 13300 }, { "epoch": 4.06, "learning_rate": 2.3782039573820397e-05, "loss": 1.4263, "step": 13325 }, { "epoch": 4.06, "learning_rate": 2.3751598173515983e-05, "loss": 1.36, "step": 13350 }, { "epoch": 4.07, "learning_rate": 2.372115677321157e-05, "loss": 1.2446, "step": 13375 }, { "epoch": 4.08, "learning_rate": 2.3690715372907157e-05, "loss": 1.3264, "step": 13400 }, { "epoch": 4.09, "learning_rate": 2.366027397260274e-05, "loss": 1.3395, "step": 13425 }, { "epoch": 4.09, "learning_rate": 2.3629832572298325e-05, "loss": 1.3421, "step": 13450 }, { "epoch": 4.1, "learning_rate": 2.3599391171993914e-05, "loss": 1.4379, "step": 13475 }, { "epoch": 4.11, "learning_rate": 2.35689497716895e-05, "loss": 1.3355, "step": 13500 }, { "epoch": 4.12, "learning_rate": 2.3538508371385086e-05, "loss": 1.4226, "step": 13525 }, { "epoch": 4.12, "learning_rate": 2.3508066971080668e-05, "loss": 1.4034, "step": 13550 }, { "epoch": 4.13, "learning_rate": 2.347762557077626e-05, "loss": 1.3015, "step": 13575 }, { "epoch": 4.14, "learning_rate": 2.3447184170471843e-05, "loss": 1.3953, "step": 13600 }, { "epoch": 4.15, "learning_rate": 2.341674277016743e-05, "loss": 1.3534, "step": 13625 }, { "epoch": 4.16, "learning_rate": 2.3386301369863014e-05, "loss": 1.3378, "step": 13650 }, { "epoch": 4.16, "learning_rate": 2.3355859969558603e-05, "loss": 1.345, "step": 13675 }, { "epoch": 4.17, "learning_rate": 2.332541856925419e-05, "loss": 1.4098, "step": 13700 }, { "epoch": 4.18, "learning_rate": 2.329497716894977e-05, "loss": 1.3938, "step": 13725 }, { "epoch": 4.19, "learning_rate": 2.3264535768645357e-05, "loss": 1.4573, "step": 13750 }, { "epoch": 4.19, "learning_rate": 2.3234094368340946e-05, "loss": 1.396, "step": 13775 }, { "epoch": 4.2, "learning_rate": 2.320365296803653e-05, "loss": 1.3915, "step": 13800 }, { "epoch": 4.21, "learning_rate": 2.3173211567732117e-05, "loss": 1.4149, "step": 13825 }, { "epoch": 4.22, "learning_rate": 2.3142770167427703e-05, "loss": 1.4299, "step": 13850 }, { "epoch": 4.22, "learning_rate": 2.3112328767123292e-05, "loss": 1.388, "step": 13875 }, { "epoch": 4.23, "learning_rate": 2.3081887366818874e-05, "loss": 1.4334, "step": 13900 }, { "epoch": 4.24, "learning_rate": 2.305144596651446e-05, "loss": 1.358, "step": 13925 }, { "epoch": 4.25, "learning_rate": 2.3021004566210046e-05, "loss": 1.3849, "step": 13950 }, { "epoch": 4.25, "learning_rate": 2.2990563165905635e-05, "loss": 1.2529, "step": 13975 }, { "epoch": 4.26, "learning_rate": 2.296012176560122e-05, "loss": 1.406, "step": 14000 }, { "epoch": 4.27, "learning_rate": 2.2929680365296806e-05, "loss": 1.3589, "step": 14025 }, { "epoch": 4.28, "learning_rate": 2.289923896499239e-05, "loss": 1.3382, "step": 14050 }, { "epoch": 4.28, "learning_rate": 2.2868797564687977e-05, "loss": 1.4394, "step": 14075 }, { "epoch": 4.29, "learning_rate": 2.2838356164383563e-05, "loss": 1.4327, "step": 14100 }, { "epoch": 4.3, "learning_rate": 2.280791476407915e-05, "loss": 1.3955, "step": 14125 }, { "epoch": 4.31, "learning_rate": 2.2777473363774735e-05, "loss": 1.38, "step": 14150 }, { "epoch": 4.32, "learning_rate": 2.2747031963470324e-05, "loss": 1.3289, "step": 14175 }, { "epoch": 4.32, "learning_rate": 2.271659056316591e-05, "loss": 1.3282, "step": 14200 }, { "epoch": 4.33, "learning_rate": 2.268614916286149e-05, "loss": 1.4225, "step": 14225 }, { "epoch": 4.34, "learning_rate": 2.2655707762557077e-05, "loss": 1.358, "step": 14250 }, { "epoch": 4.35, "learning_rate": 2.2625266362252666e-05, "loss": 1.3493, "step": 14275 }, { "epoch": 4.35, "learning_rate": 2.2594824961948252e-05, "loss": 1.4606, "step": 14300 }, { "epoch": 4.36, "learning_rate": 2.2564383561643838e-05, "loss": 1.3846, "step": 14325 }, { "epoch": 4.37, "learning_rate": 2.253394216133942e-05, "loss": 1.3729, "step": 14350 }, { "epoch": 4.38, "learning_rate": 2.2503500761035012e-05, "loss": 1.3396, "step": 14375 }, { "epoch": 4.38, "learning_rate": 2.2473059360730595e-05, "loss": 1.3955, "step": 14400 }, { "epoch": 4.39, "learning_rate": 2.244261796042618e-05, "loss": 1.3225, "step": 14425 }, { "epoch": 4.4, "learning_rate": 2.2412176560121766e-05, "loss": 1.3583, "step": 14450 }, { "epoch": 4.41, "learning_rate": 2.2381735159817355e-05, "loss": 1.3146, "step": 14475 }, { "epoch": 4.41, "learning_rate": 2.235129375951294e-05, "loss": 1.4239, "step": 14500 }, { "epoch": 4.42, "learning_rate": 2.2320852359208523e-05, "loss": 1.3779, "step": 14525 }, { "epoch": 4.43, "learning_rate": 2.229041095890411e-05, "loss": 1.4361, "step": 14550 }, { "epoch": 4.44, "learning_rate": 2.2259969558599698e-05, "loss": 1.3766, "step": 14575 }, { "epoch": 4.44, "learning_rate": 2.2229528158295284e-05, "loss": 1.3609, "step": 14600 }, { "epoch": 4.45, "learning_rate": 2.219908675799087e-05, "loss": 1.4332, "step": 14625 }, { "epoch": 4.46, "learning_rate": 2.2168645357686455e-05, "loss": 1.4219, "step": 14650 }, { "epoch": 4.47, "learning_rate": 2.2138203957382044e-05, "loss": 1.4023, "step": 14675 }, { "epoch": 4.47, "learning_rate": 2.2107762557077626e-05, "loss": 1.4445, "step": 14700 }, { "epoch": 4.48, "learning_rate": 2.2077321156773212e-05, "loss": 1.3996, "step": 14725 }, { "epoch": 4.49, "learning_rate": 2.2048097412480978e-05, "loss": 1.3752, "step": 14750 }, { "epoch": 4.5, "learning_rate": 2.2017656012176563e-05, "loss": 1.3732, "step": 14775 }, { "epoch": 4.51, "learning_rate": 2.198721461187215e-05, "loss": 1.4234, "step": 14800 }, { "epoch": 4.51, "learning_rate": 2.195677321156773e-05, "loss": 1.3971, "step": 14825 }, { "epoch": 4.52, "learning_rate": 2.192633181126332e-05, "loss": 1.3729, "step": 14850 }, { "epoch": 4.53, "learning_rate": 2.1895890410958906e-05, "loss": 1.3405, "step": 14875 }, { "epoch": 4.54, "learning_rate": 2.1865449010654492e-05, "loss": 1.3572, "step": 14900 }, { "epoch": 4.54, "learning_rate": 2.1835007610350077e-05, "loss": 1.3331, "step": 14925 }, { "epoch": 4.55, "learning_rate": 2.1804566210045667e-05, "loss": 1.368, "step": 14950 }, { "epoch": 4.56, "learning_rate": 2.1774124809741252e-05, "loss": 1.3649, "step": 14975 }, { "epoch": 4.57, "learning_rate": 2.1743683409436835e-05, "loss": 1.389, "step": 15000 }, { "epoch": 4.57, "learning_rate": 2.171324200913242e-05, "loss": 1.4163, "step": 15025 }, { "epoch": 4.58, "learning_rate": 2.168280060882801e-05, "loss": 1.3442, "step": 15050 }, { "epoch": 4.59, "learning_rate": 2.1652359208523595e-05, "loss": 1.3446, "step": 15075 }, { "epoch": 4.6, "learning_rate": 2.162191780821918e-05, "loss": 1.3772, "step": 15100 }, { "epoch": 4.6, "learning_rate": 2.1591476407914763e-05, "loss": 1.4261, "step": 15125 }, { "epoch": 4.61, "learning_rate": 2.1561035007610352e-05, "loss": 1.4058, "step": 15150 }, { "epoch": 4.62, "learning_rate": 2.1530593607305938e-05, "loss": 1.3876, "step": 15175 }, { "epoch": 4.63, "learning_rate": 2.1500152207001523e-05, "loss": 1.3427, "step": 15200 }, { "epoch": 4.63, "learning_rate": 2.146971080669711e-05, "loss": 1.3778, "step": 15225 }, { "epoch": 4.64, "learning_rate": 2.1439269406392698e-05, "loss": 1.3358, "step": 15250 }, { "epoch": 4.65, "learning_rate": 2.1408828006088284e-05, "loss": 1.4508, "step": 15275 }, { "epoch": 4.66, "learning_rate": 2.1378386605783866e-05, "loss": 1.3823, "step": 15300 }, { "epoch": 4.67, "learning_rate": 2.1347945205479452e-05, "loss": 1.4207, "step": 15325 }, { "epoch": 4.67, "learning_rate": 2.131750380517504e-05, "loss": 1.3759, "step": 15350 }, { "epoch": 4.68, "learning_rate": 2.1287062404870626e-05, "loss": 1.4415, "step": 15375 }, { "epoch": 4.69, "learning_rate": 2.1256621004566212e-05, "loss": 1.3673, "step": 15400 }, { "epoch": 4.7, "learning_rate": 2.1226179604261798e-05, "loss": 1.4189, "step": 15425 }, { "epoch": 4.7, "learning_rate": 2.1195738203957387e-05, "loss": 1.3827, "step": 15450 }, { "epoch": 4.71, "learning_rate": 2.116529680365297e-05, "loss": 1.3898, "step": 15475 }, { "epoch": 4.72, "learning_rate": 2.1134855403348555e-05, "loss": 1.444, "step": 15500 }, { "epoch": 4.73, "learning_rate": 2.110441400304414e-05, "loss": 1.3941, "step": 15525 }, { "epoch": 4.73, "learning_rate": 2.107397260273973e-05, "loss": 1.4336, "step": 15550 }, { "epoch": 4.74, "learning_rate": 2.1043531202435315e-05, "loss": 1.4159, "step": 15575 }, { "epoch": 4.75, "learning_rate": 2.10130898021309e-05, "loss": 1.351, "step": 15600 }, { "epoch": 4.76, "learning_rate": 2.0982648401826483e-05, "loss": 1.336, "step": 15625 }, { "epoch": 4.76, "learning_rate": 2.0952207001522072e-05, "loss": 1.4279, "step": 15650 }, { "epoch": 4.77, "learning_rate": 2.0921765601217658e-05, "loss": 1.3384, "step": 15675 }, { "epoch": 4.78, "learning_rate": 2.0891324200913244e-05, "loss": 1.3338, "step": 15700 }, { "epoch": 4.79, "learning_rate": 2.086088280060883e-05, "loss": 1.3422, "step": 15725 }, { "epoch": 4.79, "learning_rate": 2.083044140030442e-05, "loss": 1.4765, "step": 15750 }, { "epoch": 4.8, "learning_rate": 2.08e-05, "loss": 1.4493, "step": 15775 }, { "epoch": 4.81, "learning_rate": 2.0769558599695586e-05, "loss": 1.4215, "step": 15800 }, { "epoch": 4.82, "learning_rate": 2.0739117199391172e-05, "loss": 1.4274, "step": 15825 }, { "epoch": 4.82, "learning_rate": 2.070867579908676e-05, "loss": 1.4245, "step": 15850 }, { "epoch": 4.83, "learning_rate": 2.0678234398782347e-05, "loss": 1.4564, "step": 15875 }, { "epoch": 4.84, "learning_rate": 2.0647792998477933e-05, "loss": 1.3672, "step": 15900 }, { "epoch": 4.85, "learning_rate": 2.0617351598173515e-05, "loss": 1.4063, "step": 15925 }, { "epoch": 4.86, "learning_rate": 2.0586910197869104e-05, "loss": 1.4454, "step": 15950 }, { "epoch": 4.86, "learning_rate": 2.055646879756469e-05, "loss": 1.4042, "step": 15975 }, { "epoch": 4.87, "learning_rate": 2.0526027397260275e-05, "loss": 1.3176, "step": 16000 }, { "epoch": 4.88, "learning_rate": 2.049558599695586e-05, "loss": 1.3937, "step": 16025 }, { "epoch": 4.89, "learning_rate": 2.046514459665145e-05, "loss": 1.4184, "step": 16050 }, { "epoch": 4.89, "learning_rate": 2.0434703196347036e-05, "loss": 1.3941, "step": 16075 }, { "epoch": 4.9, "learning_rate": 2.0404261796042618e-05, "loss": 1.3824, "step": 16100 }, { "epoch": 4.91, "learning_rate": 2.0373820395738204e-05, "loss": 1.3031, "step": 16125 }, { "epoch": 4.92, "learning_rate": 2.0343378995433793e-05, "loss": 1.3937, "step": 16150 }, { "epoch": 4.92, "learning_rate": 2.031293759512938e-05, "loss": 1.3045, "step": 16175 }, { "epoch": 4.93, "learning_rate": 2.0282496194824964e-05, "loss": 1.4681, "step": 16200 }, { "epoch": 4.94, "learning_rate": 2.025205479452055e-05, "loss": 1.4129, "step": 16225 }, { "epoch": 4.95, "learning_rate": 2.022161339421614e-05, "loss": 1.4236, "step": 16250 }, { "epoch": 4.95, "learning_rate": 2.019117199391172e-05, "loss": 1.4638, "step": 16275 }, { "epoch": 4.96, "learning_rate": 2.0160730593607307e-05, "loss": 1.4065, "step": 16300 }, { "epoch": 4.97, "learning_rate": 2.0130289193302892e-05, "loss": 1.395, "step": 16325 }, { "epoch": 4.98, "learning_rate": 2.009984779299848e-05, "loss": 1.441, "step": 16350 }, { "epoch": 4.98, "learning_rate": 2.0069406392694067e-05, "loss": 1.4944, "step": 16375 }, { "epoch": 4.99, "learning_rate": 2.003896499238965e-05, "loss": 1.4018, "step": 16400 }, { "epoch": 5.0, "learning_rate": 2.0008523592085235e-05, "loss": 1.4352, "step": 16425 }, { "epoch": 5.01, "learning_rate": 1.9978082191780824e-05, "loss": 1.1718, "step": 16450 }, { "epoch": 5.02, "learning_rate": 1.994764079147641e-05, "loss": 1.2079, "step": 16475 }, { "epoch": 5.02, "learning_rate": 1.9917199391171996e-05, "loss": 1.1671, "step": 16500 }, { "epoch": 5.03, "learning_rate": 1.988675799086758e-05, "loss": 1.1194, "step": 16525 }, { "epoch": 5.04, "learning_rate": 1.9856316590563167e-05, "loss": 1.2303, "step": 16550 }, { "epoch": 5.05, "learning_rate": 1.9825875190258753e-05, "loss": 1.1932, "step": 16575 }, { "epoch": 5.05, "learning_rate": 1.979543378995434e-05, "loss": 1.2365, "step": 16600 }, { "epoch": 5.06, "learning_rate": 1.9764992389649927e-05, "loss": 1.1352, "step": 16625 }, { "epoch": 5.07, "learning_rate": 1.973455098934551e-05, "loss": 1.205, "step": 16650 }, { "epoch": 5.08, "learning_rate": 1.97041095890411e-05, "loss": 1.2005, "step": 16675 }, { "epoch": 5.08, "learning_rate": 1.9673668188736684e-05, "loss": 1.2812, "step": 16700 }, { "epoch": 5.09, "learning_rate": 1.964322678843227e-05, "loss": 1.1773, "step": 16725 }, { "epoch": 5.1, "learning_rate": 1.9612785388127856e-05, "loss": 1.1904, "step": 16750 }, { "epoch": 5.11, "learning_rate": 1.958234398782344e-05, "loss": 1.2193, "step": 16775 }, { "epoch": 5.11, "learning_rate": 1.9551902587519027e-05, "loss": 1.1983, "step": 16800 }, { "epoch": 5.12, "learning_rate": 1.9521461187214613e-05, "loss": 1.1261, "step": 16825 }, { "epoch": 5.13, "learning_rate": 1.94910197869102e-05, "loss": 1.2459, "step": 16850 }, { "epoch": 5.14, "learning_rate": 1.9460578386605788e-05, "loss": 1.2696, "step": 16875 }, { "epoch": 5.14, "learning_rate": 1.943013698630137e-05, "loss": 1.1993, "step": 16900 }, { "epoch": 5.15, "learning_rate": 1.939969558599696e-05, "loss": 1.1595, "step": 16925 }, { "epoch": 5.16, "learning_rate": 1.9369254185692545e-05, "loss": 1.1438, "step": 16950 }, { "epoch": 5.17, "learning_rate": 1.933881278538813e-05, "loss": 1.2023, "step": 16975 }, { "epoch": 5.18, "learning_rate": 1.9308371385083716e-05, "loss": 1.2327, "step": 17000 }, { "epoch": 5.18, "learning_rate": 1.92779299847793e-05, "loss": 1.2655, "step": 17025 }, { "epoch": 5.19, "learning_rate": 1.9247488584474887e-05, "loss": 1.1495, "step": 17050 }, { "epoch": 5.2, "learning_rate": 1.9217047184170473e-05, "loss": 1.2121, "step": 17075 }, { "epoch": 5.21, "learning_rate": 1.918660578386606e-05, "loss": 1.2187, "step": 17100 }, { "epoch": 5.21, "learning_rate": 1.9156164383561648e-05, "loss": 1.2401, "step": 17125 }, { "epoch": 5.22, "learning_rate": 1.912572298325723e-05, "loss": 1.1772, "step": 17150 }, { "epoch": 5.23, "learning_rate": 1.909528158295282e-05, "loss": 1.2103, "step": 17175 }, { "epoch": 5.24, "learning_rate": 1.90648401826484e-05, "loss": 1.2021, "step": 17200 }, { "epoch": 5.24, "learning_rate": 1.9035616438356167e-05, "loss": 1.1839, "step": 17225 }, { "epoch": 5.25, "learning_rate": 1.9005175038051753e-05, "loss": 1.2107, "step": 17250 }, { "epoch": 5.26, "learning_rate": 1.897473363774734e-05, "loss": 1.2437, "step": 17275 }, { "epoch": 5.27, "learning_rate": 1.8944292237442924e-05, "loss": 1.2521, "step": 17300 }, { "epoch": 5.27, "learning_rate": 1.8915068493150687e-05, "loss": 1.1999, "step": 17325 }, { "epoch": 5.28, "learning_rate": 1.8884627092846272e-05, "loss": 1.1996, "step": 17350 }, { "epoch": 5.29, "learning_rate": 1.8854185692541858e-05, "loss": 1.245, "step": 17375 }, { "epoch": 5.3, "learning_rate": 1.8823744292237444e-05, "loss": 1.2139, "step": 17400 }, { "epoch": 5.3, "learning_rate": 1.879330289193303e-05, "loss": 1.196, "step": 17425 }, { "epoch": 5.31, "learning_rate": 1.8762861491628615e-05, "loss": 1.267, "step": 17450 }, { "epoch": 5.32, "learning_rate": 1.87324200913242e-05, "loss": 1.2152, "step": 17475 }, { "epoch": 5.33, "learning_rate": 1.870197869101979e-05, "loss": 1.2098, "step": 17500 }, { "epoch": 5.33, "learning_rate": 1.8671537290715372e-05, "loss": 1.2526, "step": 17525 }, { "epoch": 5.34, "learning_rate": 1.864109589041096e-05, "loss": 1.1557, "step": 17550 }, { "epoch": 5.35, "learning_rate": 1.8610654490106547e-05, "loss": 1.2998, "step": 17575 }, { "epoch": 5.36, "learning_rate": 1.8580213089802133e-05, "loss": 1.192, "step": 17600 }, { "epoch": 5.37, "learning_rate": 1.8549771689497718e-05, "loss": 1.1579, "step": 17625 }, { "epoch": 5.37, "learning_rate": 1.8519330289193304e-05, "loss": 1.2424, "step": 17650 }, { "epoch": 5.38, "learning_rate": 1.848888888888889e-05, "loss": 1.2178, "step": 17675 }, { "epoch": 5.39, "learning_rate": 1.8458447488584475e-05, "loss": 1.2272, "step": 17700 }, { "epoch": 5.4, "learning_rate": 1.842800608828006e-05, "loss": 1.2794, "step": 17725 }, { "epoch": 5.4, "learning_rate": 1.839756468797565e-05, "loss": 1.1844, "step": 17750 }, { "epoch": 5.41, "learning_rate": 1.8367123287671232e-05, "loss": 1.2341, "step": 17775 }, { "epoch": 5.42, "learning_rate": 1.833668188736682e-05, "loss": 1.1533, "step": 17800 }, { "epoch": 5.43, "learning_rate": 1.8306240487062407e-05, "loss": 1.2264, "step": 17825 }, { "epoch": 5.43, "learning_rate": 1.8275799086757993e-05, "loss": 1.222, "step": 17850 }, { "epoch": 5.44, "learning_rate": 1.824535768645358e-05, "loss": 1.1864, "step": 17875 }, { "epoch": 5.45, "learning_rate": 1.8214916286149164e-05, "loss": 1.1615, "step": 17900 }, { "epoch": 5.46, "learning_rate": 1.818447488584475e-05, "loss": 1.2101, "step": 17925 }, { "epoch": 5.46, "learning_rate": 1.8154033485540335e-05, "loss": 1.2291, "step": 17950 }, { "epoch": 5.47, "learning_rate": 1.812359208523592e-05, "loss": 1.1934, "step": 17975 }, { "epoch": 5.48, "learning_rate": 1.809315068493151e-05, "loss": 1.1713, "step": 18000 }, { "epoch": 5.49, "learning_rate": 1.8062709284627092e-05, "loss": 1.1374, "step": 18025 }, { "epoch": 5.49, "learning_rate": 1.803226788432268e-05, "loss": 1.1841, "step": 18050 }, { "epoch": 5.5, "learning_rate": 1.8001826484018264e-05, "loss": 1.2757, "step": 18075 }, { "epoch": 5.51, "learning_rate": 1.7971385083713853e-05, "loss": 1.225, "step": 18100 }, { "epoch": 5.52, "learning_rate": 1.794094368340944e-05, "loss": 1.1889, "step": 18125 }, { "epoch": 5.53, "learning_rate": 1.7910502283105024e-05, "loss": 1.2067, "step": 18150 }, { "epoch": 5.53, "learning_rate": 1.788006088280061e-05, "loss": 1.2683, "step": 18175 }, { "epoch": 5.54, "learning_rate": 1.7849619482496196e-05, "loss": 1.194, "step": 18200 }, { "epoch": 5.55, "learning_rate": 1.781917808219178e-05, "loss": 1.2623, "step": 18225 }, { "epoch": 5.56, "learning_rate": 1.7788736681887367e-05, "loss": 1.2462, "step": 18250 }, { "epoch": 5.56, "learning_rate": 1.7758295281582953e-05, "loss": 1.2488, "step": 18275 }, { "epoch": 5.57, "learning_rate": 1.7727853881278542e-05, "loss": 1.1575, "step": 18300 }, { "epoch": 5.58, "learning_rate": 1.7697412480974124e-05, "loss": 1.3261, "step": 18325 }, { "epoch": 5.59, "learning_rate": 1.7666971080669713e-05, "loss": 1.1545, "step": 18350 }, { "epoch": 5.59, "learning_rate": 1.76365296803653e-05, "loss": 1.2144, "step": 18375 }, { "epoch": 5.6, "learning_rate": 1.7606088280060884e-05, "loss": 1.2061, "step": 18400 }, { "epoch": 5.61, "learning_rate": 1.757564687975647e-05, "loss": 1.2538, "step": 18425 }, { "epoch": 5.62, "learning_rate": 1.7545205479452056e-05, "loss": 1.238, "step": 18450 }, { "epoch": 5.62, "learning_rate": 1.751476407914764e-05, "loss": 1.1711, "step": 18475 }, { "epoch": 5.63, "learning_rate": 1.7484322678843227e-05, "loss": 1.2815, "step": 18500 }, { "epoch": 5.64, "learning_rate": 1.7453881278538813e-05, "loss": 1.2561, "step": 18525 }, { "epoch": 5.65, "learning_rate": 1.7423439878234402e-05, "loss": 1.2703, "step": 18550 }, { "epoch": 5.65, "learning_rate": 1.7392998477929984e-05, "loss": 1.2548, "step": 18575 }, { "epoch": 5.66, "learning_rate": 1.7362557077625573e-05, "loss": 1.213, "step": 18600 }, { "epoch": 5.67, "learning_rate": 1.733211567732116e-05, "loss": 1.2203, "step": 18625 }, { "epoch": 5.68, "learning_rate": 1.7301674277016745e-05, "loss": 1.2282, "step": 18650 }, { "epoch": 5.68, "learning_rate": 1.727123287671233e-05, "loss": 1.1951, "step": 18675 }, { "epoch": 5.69, "learning_rate": 1.7240791476407916e-05, "loss": 1.2652, "step": 18700 }, { "epoch": 5.7, "learning_rate": 1.72103500761035e-05, "loss": 1.1692, "step": 18725 }, { "epoch": 5.71, "learning_rate": 1.7179908675799087e-05, "loss": 1.2708, "step": 18750 }, { "epoch": 5.72, "learning_rate": 1.7149467275494673e-05, "loss": 1.1725, "step": 18775 }, { "epoch": 5.72, "learning_rate": 1.7119025875190262e-05, "loss": 1.1644, "step": 18800 }, { "epoch": 5.73, "learning_rate": 1.7088584474885844e-05, "loss": 1.2263, "step": 18825 }, { "epoch": 5.74, "learning_rate": 1.7058143074581433e-05, "loss": 1.2848, "step": 18850 }, { "epoch": 5.75, "learning_rate": 1.7027701674277016e-05, "loss": 1.2118, "step": 18875 }, { "epoch": 5.75, "learning_rate": 1.6997260273972605e-05, "loss": 1.2332, "step": 18900 }, { "epoch": 5.76, "learning_rate": 1.696681887366819e-05, "loss": 1.1837, "step": 18925 }, { "epoch": 5.77, "learning_rate": 1.6936377473363776e-05, "loss": 1.2719, "step": 18950 }, { "epoch": 5.78, "learning_rate": 1.6905936073059362e-05, "loss": 1.1825, "step": 18975 }, { "epoch": 5.78, "learning_rate": 1.6875494672754948e-05, "loss": 1.2161, "step": 19000 }, { "epoch": 5.79, "learning_rate": 1.6845053272450533e-05, "loss": 1.2578, "step": 19025 }, { "epoch": 5.8, "learning_rate": 1.681461187214612e-05, "loss": 1.2, "step": 19050 }, { "epoch": 5.81, "learning_rate": 1.6784170471841705e-05, "loss": 1.2705, "step": 19075 }, { "epoch": 5.81, "learning_rate": 1.6753729071537294e-05, "loss": 1.2378, "step": 19100 }, { "epoch": 5.82, "learning_rate": 1.6723287671232876e-05, "loss": 1.2475, "step": 19125 }, { "epoch": 5.83, "learning_rate": 1.6692846270928465e-05, "loss": 1.2566, "step": 19150 }, { "epoch": 5.84, "learning_rate": 1.666240487062405e-05, "loss": 1.3268, "step": 19175 }, { "epoch": 5.84, "learning_rate": 1.6631963470319636e-05, "loss": 1.244, "step": 19200 }, { "epoch": 5.85, "learning_rate": 1.6601522070015222e-05, "loss": 1.2251, "step": 19225 }, { "epoch": 5.86, "learning_rate": 1.6571080669710808e-05, "loss": 1.1907, "step": 19250 }, { "epoch": 5.87, "learning_rate": 1.6540639269406393e-05, "loss": 1.1661, "step": 19275 }, { "epoch": 5.88, "learning_rate": 1.651019786910198e-05, "loss": 1.1985, "step": 19300 }, { "epoch": 5.88, "learning_rate": 1.6479756468797565e-05, "loss": 1.1985, "step": 19325 }, { "epoch": 5.89, "learning_rate": 1.6449315068493154e-05, "loss": 1.2457, "step": 19350 }, { "epoch": 5.9, "learning_rate": 1.6418873668188736e-05, "loss": 1.1869, "step": 19375 }, { "epoch": 5.91, "learning_rate": 1.6388432267884325e-05, "loss": 1.1581, "step": 19400 }, { "epoch": 5.91, "learning_rate": 1.635799086757991e-05, "loss": 1.2958, "step": 19425 }, { "epoch": 5.92, "learning_rate": 1.6327549467275497e-05, "loss": 1.1531, "step": 19450 }, { "epoch": 5.93, "learning_rate": 1.6297108066971082e-05, "loss": 1.2319, "step": 19475 }, { "epoch": 5.94, "learning_rate": 1.6266666666666668e-05, "loss": 1.2317, "step": 19500 }, { "epoch": 5.94, "learning_rate": 1.6236225266362254e-05, "loss": 1.1822, "step": 19525 }, { "epoch": 5.95, "learning_rate": 1.620578386605784e-05, "loss": 1.2323, "step": 19550 }, { "epoch": 5.96, "learning_rate": 1.6175342465753425e-05, "loss": 1.2421, "step": 19575 }, { "epoch": 5.97, "learning_rate": 1.6144901065449014e-05, "loss": 1.1909, "step": 19600 }, { "epoch": 5.97, "learning_rate": 1.6114459665144596e-05, "loss": 1.1956, "step": 19625 }, { "epoch": 5.98, "learning_rate": 1.6084018264840185e-05, "loss": 1.2423, "step": 19650 }, { "epoch": 5.99, "learning_rate": 1.6053576864535768e-05, "loss": 1.1934, "step": 19675 }, { "epoch": 6.0, "learning_rate": 1.6023135464231357e-05, "loss": 1.2855, "step": 19700 }, { "epoch": 6.0, "learning_rate": 1.5992694063926942e-05, "loss": 1.1549, "step": 19725 }, { "epoch": 6.01, "learning_rate": 1.5962252663622528e-05, "loss": 1.0158, "step": 19750 }, { "epoch": 6.02, "learning_rate": 1.5931811263318114e-05, "loss": 1.1329, "step": 19775 }, { "epoch": 6.03, "learning_rate": 1.59013698630137e-05, "loss": 1.0144, "step": 19800 }, { "epoch": 6.04, "learning_rate": 1.5870928462709285e-05, "loss": 1.0526, "step": 19825 }, { "epoch": 6.04, "learning_rate": 1.584048706240487e-05, "loss": 1.0858, "step": 19850 }, { "epoch": 6.05, "learning_rate": 1.5810045662100456e-05, "loss": 1.0668, "step": 19875 }, { "epoch": 6.06, "learning_rate": 1.5779604261796046e-05, "loss": 1.0202, "step": 19900 }, { "epoch": 6.07, "learning_rate": 1.5749162861491628e-05, "loss": 1.0321, "step": 19925 }, { "epoch": 6.07, "learning_rate": 1.5718721461187217e-05, "loss": 1.0682, "step": 19950 }, { "epoch": 6.08, "learning_rate": 1.5688280060882803e-05, "loss": 1.0644, "step": 19975 }, { "epoch": 6.09, "learning_rate": 1.5657838660578388e-05, "loss": 1.1266, "step": 20000 }, { "epoch": 6.1, "learning_rate": 1.5627397260273974e-05, "loss": 1.0861, "step": 20025 }, { "epoch": 6.1, "learning_rate": 1.559695585996956e-05, "loss": 1.0446, "step": 20050 }, { "epoch": 6.11, "learning_rate": 1.5566514459665145e-05, "loss": 1.0408, "step": 20075 }, { "epoch": 6.12, "learning_rate": 1.553607305936073e-05, "loss": 1.0181, "step": 20100 }, { "epoch": 6.13, "learning_rate": 1.5505631659056317e-05, "loss": 1.0721, "step": 20125 }, { "epoch": 6.13, "learning_rate": 1.5475190258751906e-05, "loss": 1.08, "step": 20150 }, { "epoch": 6.14, "learning_rate": 1.5444748858447488e-05, "loss": 1.1263, "step": 20175 }, { "epoch": 6.15, "learning_rate": 1.5414307458143077e-05, "loss": 1.0331, "step": 20200 }, { "epoch": 6.16, "learning_rate": 1.5383866057838663e-05, "loss": 1.0047, "step": 20225 }, { "epoch": 6.16, "learning_rate": 1.535342465753425e-05, "loss": 1.0146, "step": 20250 }, { "epoch": 6.17, "learning_rate": 1.5322983257229834e-05, "loss": 1.0828, "step": 20275 }, { "epoch": 6.18, "learning_rate": 1.529254185692542e-05, "loss": 1.0656, "step": 20300 }, { "epoch": 6.19, "learning_rate": 1.5262100456621006e-05, "loss": 1.0661, "step": 20325 }, { "epoch": 6.19, "learning_rate": 1.5231659056316593e-05, "loss": 1.0858, "step": 20350 }, { "epoch": 6.2, "learning_rate": 1.5201217656012177e-05, "loss": 1.0975, "step": 20375 }, { "epoch": 6.21, "learning_rate": 1.5170776255707764e-05, "loss": 1.0874, "step": 20400 }, { "epoch": 6.22, "learning_rate": 1.514033485540335e-05, "loss": 1.061, "step": 20425 }, { "epoch": 6.23, "learning_rate": 1.5109893455098936e-05, "loss": 1.0563, "step": 20450 }, { "epoch": 6.23, "learning_rate": 1.5079452054794521e-05, "loss": 1.0779, "step": 20475 }, { "epoch": 6.24, "learning_rate": 1.5049010654490109e-05, "loss": 1.0346, "step": 20500 }, { "epoch": 6.25, "learning_rate": 1.5018569254185693e-05, "loss": 1.0741, "step": 20525 }, { "epoch": 6.26, "learning_rate": 1.498812785388128e-05, "loss": 1.0446, "step": 20550 }, { "epoch": 6.26, "learning_rate": 1.4957686453576866e-05, "loss": 1.0447, "step": 20575 }, { "epoch": 6.27, "learning_rate": 1.4927245053272451e-05, "loss": 1.0887, "step": 20600 }, { "epoch": 6.28, "learning_rate": 1.4896803652968037e-05, "loss": 1.0644, "step": 20625 }, { "epoch": 6.29, "learning_rate": 1.4866362252663624e-05, "loss": 1.0262, "step": 20650 }, { "epoch": 6.29, "learning_rate": 1.4835920852359208e-05, "loss": 1.0814, "step": 20675 }, { "epoch": 6.3, "learning_rate": 1.4805479452054796e-05, "loss": 1.0796, "step": 20700 }, { "epoch": 6.31, "learning_rate": 1.4775038051750381e-05, "loss": 1.0907, "step": 20725 }, { "epoch": 6.32, "learning_rate": 1.4744596651445969e-05, "loss": 1.0584, "step": 20750 }, { "epoch": 6.32, "learning_rate": 1.4714155251141553e-05, "loss": 1.0504, "step": 20775 }, { "epoch": 6.33, "learning_rate": 1.468371385083714e-05, "loss": 1.0383, "step": 20800 }, { "epoch": 6.34, "learning_rate": 1.4653272450532726e-05, "loss": 1.0387, "step": 20825 }, { "epoch": 6.35, "learning_rate": 1.4622831050228312e-05, "loss": 1.0644, "step": 20850 }, { "epoch": 6.35, "learning_rate": 1.4592389649923897e-05, "loss": 1.008, "step": 20875 }, { "epoch": 6.36, "learning_rate": 1.4561948249619485e-05, "loss": 1.086, "step": 20900 }, { "epoch": 6.37, "learning_rate": 1.4531506849315069e-05, "loss": 1.0496, "step": 20925 }, { "epoch": 6.38, "learning_rate": 1.4501065449010656e-05, "loss": 0.9918, "step": 20950 }, { "epoch": 6.39, "learning_rate": 1.4470624048706242e-05, "loss": 1.0559, "step": 20975 }, { "epoch": 6.39, "learning_rate": 1.4440182648401827e-05, "loss": 1.0467, "step": 21000 }, { "epoch": 6.4, "learning_rate": 1.4409741248097413e-05, "loss": 1.0557, "step": 21025 }, { "epoch": 6.41, "learning_rate": 1.4379299847793e-05, "loss": 1.0846, "step": 21050 }, { "epoch": 6.42, "learning_rate": 1.4348858447488584e-05, "loss": 1.0836, "step": 21075 }, { "epoch": 6.42, "learning_rate": 1.4318417047184172e-05, "loss": 1.082, "step": 21100 }, { "epoch": 6.43, "learning_rate": 1.4287975646879757e-05, "loss": 1.0596, "step": 21125 }, { "epoch": 6.44, "learning_rate": 1.4257534246575345e-05, "loss": 1.0321, "step": 21150 }, { "epoch": 6.45, "learning_rate": 1.4227092846270929e-05, "loss": 1.0805, "step": 21175 }, { "epoch": 6.45, "learning_rate": 1.4196651445966516e-05, "loss": 1.1013, "step": 21200 }, { "epoch": 6.46, "learning_rate": 1.4166210045662102e-05, "loss": 1.0287, "step": 21225 }, { "epoch": 6.47, "learning_rate": 1.4135768645357688e-05, "loss": 1.0995, "step": 21250 }, { "epoch": 6.48, "learning_rate": 1.4105327245053273e-05, "loss": 1.0834, "step": 21275 }, { "epoch": 6.48, "learning_rate": 1.407488584474886e-05, "loss": 1.0702, "step": 21300 }, { "epoch": 6.49, "learning_rate": 1.4045662100456623e-05, "loss": 1.0709, "step": 21325 }, { "epoch": 6.5, "learning_rate": 1.4015220700152209e-05, "loss": 1.126, "step": 21350 }, { "epoch": 6.51, "learning_rate": 1.3984779299847794e-05, "loss": 1.0661, "step": 21375 }, { "epoch": 6.51, "learning_rate": 1.395433789954338e-05, "loss": 1.0154, "step": 21400 }, { "epoch": 6.52, "learning_rate": 1.3923896499238967e-05, "loss": 1.0203, "step": 21425 }, { "epoch": 6.53, "learning_rate": 1.3893455098934551e-05, "loss": 1.0997, "step": 21450 }, { "epoch": 6.54, "learning_rate": 1.3863013698630139e-05, "loss": 1.1286, "step": 21475 }, { "epoch": 6.54, "learning_rate": 1.3832572298325724e-05, "loss": 1.0367, "step": 21500 }, { "epoch": 6.55, "learning_rate": 1.380213089802131e-05, "loss": 1.0712, "step": 21525 }, { "epoch": 6.56, "learning_rate": 1.3771689497716896e-05, "loss": 1.0636, "step": 21550 }, { "epoch": 6.57, "learning_rate": 1.3741248097412483e-05, "loss": 1.056, "step": 21575 }, { "epoch": 6.58, "learning_rate": 1.3710806697108067e-05, "loss": 1.107, "step": 21600 }, { "epoch": 6.58, "learning_rate": 1.3680365296803655e-05, "loss": 1.1084, "step": 21625 }, { "epoch": 6.59, "learning_rate": 1.364992389649924e-05, "loss": 1.0469, "step": 21650 }, { "epoch": 6.6, "learning_rate": 1.3619482496194828e-05, "loss": 1.0499, "step": 21675 }, { "epoch": 6.61, "learning_rate": 1.3589041095890412e-05, "loss": 1.0193, "step": 21700 }, { "epoch": 6.61, "learning_rate": 1.3558599695585999e-05, "loss": 1.0213, "step": 21725 }, { "epoch": 6.62, "learning_rate": 1.3528158295281583e-05, "loss": 1.0763, "step": 21750 }, { "epoch": 6.63, "learning_rate": 1.349771689497717e-05, "loss": 1.0602, "step": 21775 }, { "epoch": 6.64, "learning_rate": 1.3467275494672756e-05, "loss": 1.0075, "step": 21800 }, { "epoch": 6.64, "learning_rate": 1.3436834094368343e-05, "loss": 1.0394, "step": 21825 }, { "epoch": 6.65, "learning_rate": 1.3406392694063927e-05, "loss": 1.1018, "step": 21850 }, { "epoch": 6.66, "learning_rate": 1.3375951293759515e-05, "loss": 1.1407, "step": 21875 }, { "epoch": 6.67, "learning_rate": 1.33455098934551e-05, "loss": 1.0651, "step": 21900 }, { "epoch": 6.67, "learning_rate": 1.3316286149162863e-05, "loss": 1.1539, "step": 21925 }, { "epoch": 6.68, "learning_rate": 1.328584474885845e-05, "loss": 1.1164, "step": 21950 }, { "epoch": 6.69, "learning_rate": 1.3255403348554034e-05, "loss": 1.0859, "step": 21975 }, { "epoch": 6.7, "learning_rate": 1.3224961948249621e-05, "loss": 1.0339, "step": 22000 }, { "epoch": 6.7, "learning_rate": 1.3194520547945207e-05, "loss": 1.0881, "step": 22025 }, { "epoch": 6.71, "learning_rate": 1.3164079147640793e-05, "loss": 1.0617, "step": 22050 }, { "epoch": 6.72, "learning_rate": 1.3133637747336379e-05, "loss": 1.0946, "step": 22075 }, { "epoch": 6.73, "learning_rate": 1.3103196347031966e-05, "loss": 1.0516, "step": 22100 }, { "epoch": 6.74, "learning_rate": 1.307275494672755e-05, "loss": 1.0097, "step": 22125 }, { "epoch": 6.74, "learning_rate": 1.3042313546423137e-05, "loss": 0.9982, "step": 22150 }, { "epoch": 6.75, "learning_rate": 1.3011872146118723e-05, "loss": 1.0149, "step": 22175 }, { "epoch": 6.76, "learning_rate": 1.298143074581431e-05, "loss": 1.0674, "step": 22200 }, { "epoch": 6.77, "learning_rate": 1.2950989345509894e-05, "loss": 1.0485, "step": 22225 }, { "epoch": 6.77, "learning_rate": 1.2920547945205482e-05, "loss": 1.0641, "step": 22250 }, { "epoch": 6.78, "learning_rate": 1.2890106544901066e-05, "loss": 1.0306, "step": 22275 }, { "epoch": 6.79, "learning_rate": 1.2859665144596653e-05, "loss": 1.0526, "step": 22300 }, { "epoch": 6.8, "learning_rate": 1.2829223744292239e-05, "loss": 1.1053, "step": 22325 }, { "epoch": 6.8, "learning_rate": 1.2798782343987823e-05, "loss": 1.069, "step": 22350 }, { "epoch": 6.81, "learning_rate": 1.276834094368341e-05, "loss": 1.0654, "step": 22375 }, { "epoch": 6.82, "learning_rate": 1.2737899543378996e-05, "loss": 1.0341, "step": 22400 }, { "epoch": 6.83, "learning_rate": 1.2707458143074583e-05, "loss": 1.0856, "step": 22425 }, { "epoch": 6.83, "learning_rate": 1.2677016742770167e-05, "loss": 1.0456, "step": 22450 }, { "epoch": 6.84, "learning_rate": 1.2646575342465755e-05, "loss": 1.0782, "step": 22475 }, { "epoch": 6.85, "learning_rate": 1.261613394216134e-05, "loss": 1.0911, "step": 22500 }, { "epoch": 6.86, "learning_rate": 1.2585692541856926e-05, "loss": 1.0151, "step": 22525 }, { "epoch": 6.86, "learning_rate": 1.2555251141552512e-05, "loss": 1.1133, "step": 22550 }, { "epoch": 6.87, "learning_rate": 1.2524809741248099e-05, "loss": 1.098, "step": 22575 }, { "epoch": 6.88, "learning_rate": 1.2494368340943683e-05, "loss": 1.0943, "step": 22600 }, { "epoch": 6.89, "learning_rate": 1.246392694063927e-05, "loss": 1.0622, "step": 22625 }, { "epoch": 6.89, "learning_rate": 1.2433485540334856e-05, "loss": 1.0801, "step": 22650 }, { "epoch": 6.9, "learning_rate": 1.2403044140030442e-05, "loss": 1.0496, "step": 22675 }, { "epoch": 6.91, "learning_rate": 1.2372602739726027e-05, "loss": 1.1017, "step": 22700 }, { "epoch": 6.92, "learning_rate": 1.2342161339421615e-05, "loss": 1.045, "step": 22725 }, { "epoch": 6.93, "learning_rate": 1.2311719939117199e-05, "loss": 1.1071, "step": 22750 }, { "epoch": 6.93, "learning_rate": 1.2281278538812786e-05, "loss": 1.0852, "step": 22775 }, { "epoch": 6.94, "learning_rate": 1.2250837138508372e-05, "loss": 1.1576, "step": 22800 }, { "epoch": 6.95, "learning_rate": 1.2220395738203959e-05, "loss": 1.079, "step": 22825 }, { "epoch": 6.96, "learning_rate": 1.2189954337899543e-05, "loss": 1.1521, "step": 22850 }, { "epoch": 6.96, "learning_rate": 1.215951293759513e-05, "loss": 1.0449, "step": 22875 }, { "epoch": 6.97, "learning_rate": 1.2129071537290714e-05, "loss": 1.1417, "step": 22900 }, { "epoch": 6.98, "learning_rate": 1.2098630136986302e-05, "loss": 1.0472, "step": 22925 }, { "epoch": 6.99, "learning_rate": 1.2068188736681888e-05, "loss": 1.0093, "step": 22950 }, { "epoch": 6.99, "learning_rate": 1.2037747336377475e-05, "loss": 1.0782, "step": 22975 }, { "epoch": 7.0, "learning_rate": 1.2007305936073059e-05, "loss": 1.0471, "step": 23000 }, { "epoch": 7.01, "learning_rate": 1.1976864535768646e-05, "loss": 0.9486, "step": 23025 }, { "epoch": 7.02, "learning_rate": 1.1946423135464232e-05, "loss": 0.974, "step": 23050 }, { "epoch": 7.02, "learning_rate": 1.1915981735159818e-05, "loss": 0.9259, "step": 23075 }, { "epoch": 7.03, "learning_rate": 1.1885540334855403e-05, "loss": 0.928, "step": 23100 }, { "epoch": 7.04, "learning_rate": 1.185509893455099e-05, "loss": 0.9293, "step": 23125 }, { "epoch": 7.05, "learning_rate": 1.1824657534246575e-05, "loss": 0.9387, "step": 23150 }, { "epoch": 7.05, "learning_rate": 1.1794216133942162e-05, "loss": 0.8687, "step": 23175 }, { "epoch": 7.06, "learning_rate": 1.1763774733637748e-05, "loss": 0.9467, "step": 23200 }, { "epoch": 7.07, "learning_rate": 1.1733333333333335e-05, "loss": 0.8817, "step": 23225 }, { "epoch": 7.08, "learning_rate": 1.1702891933028919e-05, "loss": 0.9304, "step": 23250 }, { "epoch": 7.09, "learning_rate": 1.1672450532724506e-05, "loss": 0.9319, "step": 23275 }, { "epoch": 7.09, "learning_rate": 1.164200913242009e-05, "loss": 0.9488, "step": 23300 }, { "epoch": 7.1, "learning_rate": 1.1611567732115678e-05, "loss": 0.9054, "step": 23325 }, { "epoch": 7.11, "learning_rate": 1.1581126331811263e-05, "loss": 0.9246, "step": 23350 }, { "epoch": 7.12, "learning_rate": 1.155068493150685e-05, "loss": 0.898, "step": 23375 }, { "epoch": 7.12, "learning_rate": 1.1520243531202435e-05, "loss": 0.947, "step": 23400 }, { "epoch": 7.13, "learning_rate": 1.1489802130898022e-05, "loss": 0.9288, "step": 23425 }, { "epoch": 7.14, "learning_rate": 1.1459360730593608e-05, "loss": 0.9544, "step": 23450 }, { "epoch": 7.15, "learning_rate": 1.1428919330289194e-05, "loss": 0.9711, "step": 23475 }, { "epoch": 7.15, "learning_rate": 1.139847792998478e-05, "loss": 0.9133, "step": 23500 }, { "epoch": 7.16, "learning_rate": 1.1368036529680367e-05, "loss": 0.9182, "step": 23525 }, { "epoch": 7.17, "learning_rate": 1.133759512937595e-05, "loss": 0.8882, "step": 23550 }, { "epoch": 7.18, "learning_rate": 1.1307153729071538e-05, "loss": 0.9431, "step": 23575 }, { "epoch": 7.18, "learning_rate": 1.1276712328767124e-05, "loss": 0.9347, "step": 23600 }, { "epoch": 7.19, "learning_rate": 1.1246270928462711e-05, "loss": 0.9397, "step": 23625 }, { "epoch": 7.2, "learning_rate": 1.1215829528158295e-05, "loss": 0.8936, "step": 23650 }, { "epoch": 7.21, "learning_rate": 1.1185388127853882e-05, "loss": 0.9422, "step": 23675 }, { "epoch": 7.21, "learning_rate": 1.1154946727549466e-05, "loss": 0.994, "step": 23700 }, { "epoch": 7.22, "learning_rate": 1.1124505327245054e-05, "loss": 0.9458, "step": 23725 }, { "epoch": 7.23, "learning_rate": 1.109406392694064e-05, "loss": 0.9833, "step": 23750 }, { "epoch": 7.24, "learning_rate": 1.1063622526636227e-05, "loss": 0.9156, "step": 23775 }, { "epoch": 7.25, "learning_rate": 1.103318112633181e-05, "loss": 0.952, "step": 23800 }, { "epoch": 7.25, "learning_rate": 1.1002739726027398e-05, "loss": 0.9329, "step": 23825 }, { "epoch": 7.26, "learning_rate": 1.0972298325722984e-05, "loss": 0.969, "step": 23850 }, { "epoch": 7.27, "learning_rate": 1.094185692541857e-05, "loss": 0.9036, "step": 23875 }, { "epoch": 7.28, "learning_rate": 1.0911415525114155e-05, "loss": 0.893, "step": 23900 }, { "epoch": 7.28, "learning_rate": 1.0880974124809743e-05, "loss": 1.0085, "step": 23925 }, { "epoch": 7.29, "learning_rate": 1.0850532724505327e-05, "loss": 0.92, "step": 23950 }, { "epoch": 7.3, "learning_rate": 1.0820091324200914e-05, "loss": 0.928, "step": 23975 }, { "epoch": 7.31, "learning_rate": 1.07896499238965e-05, "loss": 0.9017, "step": 24000 }, { "epoch": 7.31, "learning_rate": 1.0759208523592087e-05, "loss": 0.9032, "step": 24025 }, { "epoch": 7.32, "learning_rate": 1.0728767123287671e-05, "loss": 0.9073, "step": 24050 }, { "epoch": 7.33, "learning_rate": 1.0698325722983258e-05, "loss": 0.9447, "step": 24075 }, { "epoch": 7.34, "learning_rate": 1.0667884322678842e-05, "loss": 0.9341, "step": 24100 }, { "epoch": 7.34, "learning_rate": 1.0638660578386606e-05, "loss": 0.9369, "step": 24125 }, { "epoch": 7.35, "learning_rate": 1.0608219178082194e-05, "loss": 0.9721, "step": 24150 }, { "epoch": 7.36, "learning_rate": 1.0577777777777778e-05, "loss": 0.9367, "step": 24175 }, { "epoch": 7.37, "learning_rate": 1.0547336377473365e-05, "loss": 0.9746, "step": 24200 }, { "epoch": 7.37, "learning_rate": 1.0516894977168949e-05, "loss": 0.914, "step": 24225 }, { "epoch": 7.38, "learning_rate": 1.0486453576864537e-05, "loss": 0.8861, "step": 24250 }, { "epoch": 7.39, "learning_rate": 1.0456012176560122e-05, "loss": 0.9416, "step": 24275 }, { "epoch": 7.4, "learning_rate": 1.042557077625571e-05, "loss": 0.9378, "step": 24300 }, { "epoch": 7.4, "learning_rate": 1.0395129375951294e-05, "loss": 0.9149, "step": 24325 }, { "epoch": 7.41, "learning_rate": 1.0364687975646881e-05, "loss": 0.9661, "step": 24350 }, { "epoch": 7.42, "learning_rate": 1.0334246575342467e-05, "loss": 0.9946, "step": 24375 }, { "epoch": 7.43, "learning_rate": 1.0303805175038052e-05, "loss": 0.9153, "step": 24400 }, { "epoch": 7.44, "learning_rate": 1.0273363774733638e-05, "loss": 1.0122, "step": 24425 }, { "epoch": 7.44, "learning_rate": 1.0242922374429225e-05, "loss": 0.9563, "step": 24450 }, { "epoch": 7.45, "learning_rate": 1.021248097412481e-05, "loss": 0.9451, "step": 24475 }, { "epoch": 7.46, "learning_rate": 1.0182039573820397e-05, "loss": 0.9543, "step": 24500 }, { "epoch": 7.47, "learning_rate": 1.0151598173515982e-05, "loss": 0.9336, "step": 24525 }, { "epoch": 7.47, "learning_rate": 1.012115677321157e-05, "loss": 0.9474, "step": 24550 }, { "epoch": 7.48, "learning_rate": 1.0090715372907154e-05, "loss": 0.9503, "step": 24575 }, { "epoch": 7.49, "learning_rate": 1.0060273972602741e-05, "loss": 0.9395, "step": 24600 }, { "epoch": 7.5, "learning_rate": 1.0029832572298325e-05, "loss": 0.9076, "step": 24625 }, { "epoch": 7.5, "learning_rate": 9.999391171993912e-06, "loss": 0.9026, "step": 24650 }, { "epoch": 7.51, "learning_rate": 9.968949771689498e-06, "loss": 0.9039, "step": 24675 }, { "epoch": 7.52, "learning_rate": 9.938508371385086e-06, "loss": 0.9744, "step": 24700 }, { "epoch": 7.53, "learning_rate": 9.908066971080671e-06, "loss": 0.9818, "step": 24725 }, { "epoch": 7.53, "learning_rate": 9.877625570776257e-06, "loss": 0.8859, "step": 24750 }, { "epoch": 7.54, "learning_rate": 9.847184170471843e-06, "loss": 0.9602, "step": 24775 }, { "epoch": 7.55, "learning_rate": 9.816742770167428e-06, "loss": 0.9405, "step": 24800 }, { "epoch": 7.56, "learning_rate": 9.786301369863016e-06, "loss": 0.9669, "step": 24825 }, { "epoch": 7.56, "learning_rate": 9.755859969558601e-06, "loss": 0.9106, "step": 24850 }, { "epoch": 7.57, "learning_rate": 9.725418569254187e-06, "loss": 0.9118, "step": 24875 }, { "epoch": 7.58, "learning_rate": 9.694977168949773e-06, "loss": 0.981, "step": 24900 }, { "epoch": 7.59, "learning_rate": 9.664535768645358e-06, "loss": 0.974, "step": 24925 }, { "epoch": 7.6, "learning_rate": 9.634094368340946e-06, "loss": 0.8843, "step": 24950 }, { "epoch": 7.6, "learning_rate": 9.603652968036531e-06, "loss": 0.9714, "step": 24975 }, { "epoch": 7.61, "learning_rate": 9.573211567732117e-06, "loss": 0.9101, "step": 25000 }, { "epoch": 7.62, "learning_rate": 9.542770167427703e-06, "loss": 0.9264, "step": 25025 }, { "epoch": 7.63, "learning_rate": 9.512328767123288e-06, "loss": 0.9459, "step": 25050 }, { "epoch": 7.63, "learning_rate": 9.481887366818874e-06, "loss": 0.9391, "step": 25075 }, { "epoch": 7.64, "learning_rate": 9.451445966514461e-06, "loss": 0.9473, "step": 25100 }, { "epoch": 7.65, "learning_rate": 9.421004566210047e-06, "loss": 0.897, "step": 25125 }, { "epoch": 7.66, "learning_rate": 9.390563165905633e-06, "loss": 0.9351, "step": 25150 }, { "epoch": 7.66, "learning_rate": 9.360121765601219e-06, "loss": 0.9249, "step": 25175 }, { "epoch": 7.67, "learning_rate": 9.329680365296804e-06, "loss": 0.9407, "step": 25200 }, { "epoch": 7.68, "learning_rate": 9.299238964992392e-06, "loss": 0.9331, "step": 25225 }, { "epoch": 7.69, "learning_rate": 9.268797564687977e-06, "loss": 0.9749, "step": 25250 }, { "epoch": 7.69, "learning_rate": 9.238356164383563e-06, "loss": 0.981, "step": 25275 }, { "epoch": 7.7, "learning_rate": 9.207914764079149e-06, "loss": 0.9334, "step": 25300 }, { "epoch": 7.71, "learning_rate": 9.177473363774734e-06, "loss": 0.928, "step": 25325 }, { "epoch": 7.72, "learning_rate": 9.147031963470322e-06, "loss": 0.8981, "step": 25350 }, { "epoch": 7.72, "learning_rate": 9.116590563165907e-06, "loss": 0.9735, "step": 25375 }, { "epoch": 7.73, "learning_rate": 9.086149162861493e-06, "loss": 0.9042, "step": 25400 }, { "epoch": 7.74, "learning_rate": 9.055707762557079e-06, "loss": 0.9443, "step": 25425 }, { "epoch": 7.75, "learning_rate": 9.025266362252664e-06, "loss": 0.9571, "step": 25450 }, { "epoch": 7.75, "learning_rate": 8.99482496194825e-06, "loss": 0.9154, "step": 25475 }, { "epoch": 7.76, "learning_rate": 8.964383561643837e-06, "loss": 0.9131, "step": 25500 }, { "epoch": 7.77, "learning_rate": 8.933942161339423e-06, "loss": 0.9096, "step": 25525 }, { "epoch": 7.78, "learning_rate": 8.903500761035009e-06, "loss": 1.0131, "step": 25550 }, { "epoch": 7.79, "learning_rate": 8.873059360730594e-06, "loss": 0.9257, "step": 25575 }, { "epoch": 7.79, "learning_rate": 8.84261796042618e-06, "loss": 0.9627, "step": 25600 }, { "epoch": 7.8, "learning_rate": 8.812176560121768e-06, "loss": 0.9599, "step": 25625 }, { "epoch": 7.81, "learning_rate": 8.781735159817353e-06, "loss": 1.0309, "step": 25650 }, { "epoch": 7.82, "learning_rate": 8.751293759512939e-06, "loss": 0.9223, "step": 25675 }, { "epoch": 7.82, "learning_rate": 8.720852359208525e-06, "loss": 0.9609, "step": 25700 }, { "epoch": 7.83, "learning_rate": 8.69041095890411e-06, "loss": 0.9848, "step": 25725 }, { "epoch": 7.84, "learning_rate": 8.659969558599698e-06, "loss": 0.9744, "step": 25750 }, { "epoch": 7.85, "learning_rate": 8.629528158295283e-06, "loss": 0.9079, "step": 25775 }, { "epoch": 7.85, "learning_rate": 8.599086757990869e-06, "loss": 0.9524, "step": 25800 }, { "epoch": 7.86, "learning_rate": 8.568645357686455e-06, "loss": 0.8825, "step": 25825 }, { "epoch": 7.87, "learning_rate": 8.53820395738204e-06, "loss": 0.9648, "step": 25850 }, { "epoch": 7.88, "learning_rate": 8.507762557077626e-06, "loss": 0.9268, "step": 25875 }, { "epoch": 7.88, "learning_rate": 8.477321156773213e-06, "loss": 1.0055, "step": 25900 }, { "epoch": 7.89, "learning_rate": 8.446879756468799e-06, "loss": 0.9859, "step": 25925 }, { "epoch": 7.9, "learning_rate": 8.416438356164385e-06, "loss": 0.876, "step": 25950 }, { "epoch": 7.91, "learning_rate": 8.38599695585997e-06, "loss": 0.9929, "step": 25975 }, { "epoch": 7.91, "learning_rate": 8.355555555555556e-06, "loss": 0.937, "step": 26000 }, { "epoch": 7.92, "learning_rate": 8.325114155251143e-06, "loss": 0.9504, "step": 26025 }, { "epoch": 7.93, "learning_rate": 8.29467275494673e-06, "loss": 0.9475, "step": 26050 }, { "epoch": 7.94, "learning_rate": 8.264231354642315e-06, "loss": 0.8728, "step": 26075 }, { "epoch": 7.95, "learning_rate": 8.2337899543379e-06, "loss": 0.9089, "step": 26100 }, { "epoch": 7.95, "learning_rate": 8.203348554033486e-06, "loss": 0.963, "step": 26125 }, { "epoch": 7.96, "learning_rate": 8.172907153729072e-06, "loss": 0.9398, "step": 26150 }, { "epoch": 7.97, "learning_rate": 8.14246575342466e-06, "loss": 0.9664, "step": 26175 }, { "epoch": 7.98, "learning_rate": 8.112024353120245e-06, "loss": 0.9756, "step": 26200 }, { "epoch": 7.98, "learning_rate": 8.08158295281583e-06, "loss": 0.9686, "step": 26225 }, { "epoch": 7.99, "learning_rate": 8.051141552511416e-06, "loss": 0.9906, "step": 26250 }, { "epoch": 8.0, "learning_rate": 8.020700152207002e-06, "loss": 0.9644, "step": 26275 }, { "epoch": 8.01, "learning_rate": 7.99025875190259e-06, "loss": 0.8352, "step": 26300 }, { "epoch": 8.01, "learning_rate": 7.959817351598175e-06, "loss": 0.7675, "step": 26325 }, { "epoch": 8.02, "learning_rate": 7.92937595129376e-06, "loss": 0.8487, "step": 26350 }, { "epoch": 8.03, "learning_rate": 7.898934550989346e-06, "loss": 0.8241, "step": 26375 }, { "epoch": 8.04, "learning_rate": 7.868493150684932e-06, "loss": 0.8417, "step": 26400 }, { "epoch": 8.04, "learning_rate": 7.83805175038052e-06, "loss": 0.8277, "step": 26425 }, { "epoch": 8.05, "learning_rate": 7.807610350076105e-06, "loss": 0.8032, "step": 26450 }, { "epoch": 8.06, "learning_rate": 7.77716894977169e-06, "loss": 0.8111, "step": 26475 }, { "epoch": 8.07, "learning_rate": 7.746727549467276e-06, "loss": 0.8319, "step": 26500 }, { "epoch": 8.07, "learning_rate": 7.716286149162862e-06, "loss": 0.8437, "step": 26525 }, { "epoch": 8.08, "learning_rate": 7.685844748858448e-06, "loss": 0.8167, "step": 26550 }, { "epoch": 8.09, "learning_rate": 7.655403348554035e-06, "loss": 0.798, "step": 26575 }, { "epoch": 8.1, "learning_rate": 7.624961948249621e-06, "loss": 0.8465, "step": 26600 }, { "epoch": 8.11, "learning_rate": 7.594520547945206e-06, "loss": 0.8215, "step": 26625 }, { "epoch": 8.11, "learning_rate": 7.564079147640791e-06, "loss": 0.8385, "step": 26650 }, { "epoch": 8.12, "learning_rate": 7.533637747336378e-06, "loss": 0.8552, "step": 26675 }, { "epoch": 8.13, "learning_rate": 7.503196347031964e-06, "loss": 0.8127, "step": 26700 }, { "epoch": 8.14, "learning_rate": 7.472754946727549e-06, "loss": 0.8545, "step": 26725 }, { "epoch": 8.14, "learning_rate": 7.442313546423136e-06, "loss": 0.8589, "step": 26750 }, { "epoch": 8.15, "learning_rate": 7.4118721461187215e-06, "loss": 0.8231, "step": 26775 }, { "epoch": 8.16, "learning_rate": 7.381430745814307e-06, "loss": 0.8292, "step": 26800 }, { "epoch": 8.17, "learning_rate": 7.350989345509894e-06, "loss": 0.866, "step": 26825 }, { "epoch": 8.17, "learning_rate": 7.320547945205479e-06, "loss": 0.8413, "step": 26850 }, { "epoch": 8.18, "learning_rate": 7.290106544901066e-06, "loss": 0.8609, "step": 26875 }, { "epoch": 8.19, "learning_rate": 7.259665144596652e-06, "loss": 0.8566, "step": 26900 }, { "epoch": 8.2, "learning_rate": 7.229223744292237e-06, "loss": 0.9092, "step": 26925 }, { "epoch": 8.2, "learning_rate": 7.198782343987824e-06, "loss": 0.8238, "step": 26950 }, { "epoch": 8.21, "learning_rate": 7.1683409436834095e-06, "loss": 0.8372, "step": 26975 }, { "epoch": 8.22, "learning_rate": 7.137899543378995e-06, "loss": 0.8547, "step": 27000 }, { "epoch": 8.23, "learning_rate": 7.107458143074582e-06, "loss": 0.8121, "step": 27025 }, { "epoch": 8.23, "learning_rate": 7.077016742770167e-06, "loss": 0.8415, "step": 27050 }, { "epoch": 8.24, "learning_rate": 7.046575342465753e-06, "loss": 0.8953, "step": 27075 }, { "epoch": 8.25, "learning_rate": 7.01613394216134e-06, "loss": 0.8622, "step": 27100 }, { "epoch": 8.26, "learning_rate": 6.985692541856925e-06, "loss": 0.874, "step": 27125 }, { "epoch": 8.26, "learning_rate": 6.955251141552512e-06, "loss": 0.8017, "step": 27150 }, { "epoch": 8.27, "learning_rate": 6.9248097412480975e-06, "loss": 0.82, "step": 27175 }, { "epoch": 8.28, "learning_rate": 6.894368340943683e-06, "loss": 0.827, "step": 27200 }, { "epoch": 8.29, "learning_rate": 6.86392694063927e-06, "loss": 0.8299, "step": 27225 }, { "epoch": 8.3, "learning_rate": 6.833485540334855e-06, "loss": 0.8529, "step": 27250 }, { "epoch": 8.3, "learning_rate": 6.803044140030441e-06, "loss": 0.8119, "step": 27275 }, { "epoch": 8.31, "learning_rate": 6.7726027397260276e-06, "loss": 0.8448, "step": 27300 }, { "epoch": 8.32, "learning_rate": 6.742161339421613e-06, "loss": 0.824, "step": 27325 }, { "epoch": 8.33, "learning_rate": 6.7117199391172e-06, "loss": 0.8442, "step": 27350 }, { "epoch": 8.33, "learning_rate": 6.6812785388127855e-06, "loss": 0.8194, "step": 27375 }, { "epoch": 8.34, "learning_rate": 6.650837138508371e-06, "loss": 0.8177, "step": 27400 }, { "epoch": 8.35, "learning_rate": 6.620395738203958e-06, "loss": 0.873, "step": 27425 }, { "epoch": 8.36, "learning_rate": 6.589954337899543e-06, "loss": 0.7966, "step": 27450 }, { "epoch": 8.36, "learning_rate": 6.559512937595129e-06, "loss": 0.8656, "step": 27475 }, { "epoch": 8.37, "learning_rate": 6.5290715372907155e-06, "loss": 0.8272, "step": 27500 }, { "epoch": 8.38, "learning_rate": 6.498630136986301e-06, "loss": 0.7805, "step": 27525 }, { "epoch": 8.39, "learning_rate": 6.468188736681888e-06, "loss": 0.8527, "step": 27550 }, { "epoch": 8.39, "learning_rate": 6.4377473363774734e-06, "loss": 0.8507, "step": 27575 }, { "epoch": 8.4, "learning_rate": 6.407305936073059e-06, "loss": 0.7855, "step": 27600 }, { "epoch": 8.41, "learning_rate": 6.376864535768646e-06, "loss": 0.8705, "step": 27625 }, { "epoch": 8.42, "learning_rate": 6.346423135464231e-06, "loss": 0.8078, "step": 27650 }, { "epoch": 8.42, "learning_rate": 6.315981735159817e-06, "loss": 0.7914, "step": 27675 }, { "epoch": 8.43, "learning_rate": 6.2855403348554035e-06, "loss": 0.8536, "step": 27700 }, { "epoch": 8.44, "learning_rate": 6.255098934550989e-06, "loss": 0.8765, "step": 27725 }, { "epoch": 8.45, "learning_rate": 6.224657534246576e-06, "loss": 0.8406, "step": 27750 }, { "epoch": 8.46, "learning_rate": 6.194216133942161e-06, "loss": 0.8427, "step": 27775 }, { "epoch": 8.46, "learning_rate": 6.163774733637747e-06, "loss": 0.8689, "step": 27800 }, { "epoch": 8.47, "learning_rate": 6.133333333333334e-06, "loss": 0.8204, "step": 27825 }, { "epoch": 8.48, "learning_rate": 6.102891933028919e-06, "loss": 0.8447, "step": 27850 }, { "epoch": 8.49, "learning_rate": 6.072450532724505e-06, "loss": 0.8356, "step": 27875 }, { "epoch": 8.49, "learning_rate": 6.0420091324200915e-06, "loss": 0.8776, "step": 27900 }, { "epoch": 8.5, "learning_rate": 6.011567732115677e-06, "loss": 0.8543, "step": 27925 }, { "epoch": 8.51, "learning_rate": 5.981126331811264e-06, "loss": 0.8341, "step": 27950 }, { "epoch": 8.52, "learning_rate": 5.950684931506849e-06, "loss": 0.832, "step": 27975 }, { "epoch": 8.52, "learning_rate": 5.920243531202435e-06, "loss": 0.7922, "step": 28000 }, { "epoch": 8.53, "learning_rate": 5.889802130898022e-06, "loss": 0.8491, "step": 28025 }, { "epoch": 8.54, "learning_rate": 5.859360730593607e-06, "loss": 0.8911, "step": 28050 }, { "epoch": 8.55, "learning_rate": 5.828919330289193e-06, "loss": 0.8432, "step": 28075 }, { "epoch": 8.55, "learning_rate": 5.7984779299847795e-06, "loss": 0.7997, "step": 28100 }, { "epoch": 8.56, "learning_rate": 5.768036529680365e-06, "loss": 0.836, "step": 28125 }, { "epoch": 8.57, "learning_rate": 5.737595129375952e-06, "loss": 0.8332, "step": 28150 }, { "epoch": 8.58, "learning_rate": 5.707153729071537e-06, "loss": 0.8591, "step": 28175 }, { "epoch": 8.58, "learning_rate": 5.676712328767123e-06, "loss": 0.8475, "step": 28200 }, { "epoch": 8.59, "learning_rate": 5.64627092846271e-06, "loss": 0.8842, "step": 28225 }, { "epoch": 8.6, "learning_rate": 5.615829528158295e-06, "loss": 0.7811, "step": 28250 }, { "epoch": 8.61, "learning_rate": 5.585388127853881e-06, "loss": 0.8826, "step": 28275 }, { "epoch": 8.61, "learning_rate": 5.556164383561644e-06, "loss": 0.8344, "step": 28300 }, { "epoch": 8.62, "learning_rate": 5.52572298325723e-06, "loss": 0.8106, "step": 28325 }, { "epoch": 8.63, "learning_rate": 5.495281582952816e-06, "loss": 0.7875, "step": 28350 }, { "epoch": 8.64, "learning_rate": 5.464840182648402e-06, "loss": 0.83, "step": 28375 }, { "epoch": 8.65, "learning_rate": 5.434398782343988e-06, "loss": 0.8188, "step": 28400 }, { "epoch": 8.65, "learning_rate": 5.403957382039574e-06, "loss": 0.8448, "step": 28425 }, { "epoch": 8.66, "learning_rate": 5.37351598173516e-06, "loss": 0.7736, "step": 28450 }, { "epoch": 8.67, "learning_rate": 5.3430745814307465e-06, "loss": 0.8335, "step": 28475 }, { "epoch": 8.68, "learning_rate": 5.312633181126332e-06, "loss": 0.8536, "step": 28500 }, { "epoch": 8.68, "learning_rate": 5.282191780821918e-06, "loss": 0.8378, "step": 28525 }, { "epoch": 8.69, "learning_rate": 5.251750380517504e-06, "loss": 0.8067, "step": 28550 }, { "epoch": 8.7, "learning_rate": 5.222526636225267e-06, "loss": 0.8447, "step": 28575 }, { "epoch": 8.71, "learning_rate": 5.192085235920853e-06, "loss": 0.8222, "step": 28600 }, { "epoch": 8.71, "learning_rate": 5.161643835616439e-06, "loss": 0.813, "step": 28625 }, { "epoch": 8.72, "learning_rate": 5.131202435312025e-06, "loss": 0.8439, "step": 28650 }, { "epoch": 8.73, "learning_rate": 5.100761035007611e-06, "loss": 0.8635, "step": 28675 }, { "epoch": 8.74, "learning_rate": 5.070319634703197e-06, "loss": 0.8653, "step": 28700 }, { "epoch": 8.74, "learning_rate": 5.0398782343987825e-06, "loss": 0.827, "step": 28725 }, { "epoch": 8.75, "learning_rate": 5.009436834094369e-06, "loss": 0.8366, "step": 28750 }, { "epoch": 8.76, "learning_rate": 4.978995433789955e-06, "loss": 0.8199, "step": 28775 }, { "epoch": 8.77, "learning_rate": 4.949771689497717e-06, "loss": 0.9215, "step": 28800 }, { "epoch": 8.77, "learning_rate": 4.919330289193303e-06, "loss": 0.8776, "step": 28825 }, { "epoch": 8.78, "learning_rate": 4.888888888888889e-06, "loss": 0.8023, "step": 28850 }, { "epoch": 8.79, "learning_rate": 4.858447488584475e-06, "loss": 0.8622, "step": 28875 }, { "epoch": 8.8, "learning_rate": 4.8280060882800615e-06, "loss": 0.801, "step": 28900 }, { "epoch": 8.81, "learning_rate": 4.797564687975647e-06, "loss": 0.837, "step": 28925 }, { "epoch": 8.81, "learning_rate": 4.767123287671233e-06, "loss": 0.8399, "step": 28950 }, { "epoch": 8.82, "learning_rate": 4.736681887366819e-06, "loss": 0.804, "step": 28975 }, { "epoch": 8.83, "learning_rate": 4.706240487062405e-06, "loss": 0.8425, "step": 29000 }, { "epoch": 8.84, "learning_rate": 4.675799086757991e-06, "loss": 0.8786, "step": 29025 }, { "epoch": 8.84, "learning_rate": 4.645357686453577e-06, "loss": 0.9328, "step": 29050 }, { "epoch": 8.85, "learning_rate": 4.614916286149163e-06, "loss": 0.8182, "step": 29075 }, { "epoch": 8.86, "learning_rate": 4.5844748858447495e-06, "loss": 0.8446, "step": 29100 }, { "epoch": 8.87, "learning_rate": 4.554033485540335e-06, "loss": 0.8503, "step": 29125 }, { "epoch": 8.87, "learning_rate": 4.523592085235921e-06, "loss": 0.8258, "step": 29150 }, { "epoch": 8.88, "learning_rate": 4.493150684931507e-06, "loss": 0.8692, "step": 29175 }, { "epoch": 8.89, "learning_rate": 4.462709284627093e-06, "loss": 0.8569, "step": 29200 }, { "epoch": 8.9, "learning_rate": 4.432267884322679e-06, "loss": 0.8694, "step": 29225 }, { "epoch": 8.9, "learning_rate": 4.401826484018265e-06, "loss": 0.836, "step": 29250 }, { "epoch": 8.91, "learning_rate": 4.371385083713851e-06, "loss": 0.804, "step": 29275 }, { "epoch": 8.92, "learning_rate": 4.340943683409437e-06, "loss": 0.8593, "step": 29300 }, { "epoch": 8.93, "learning_rate": 4.310502283105023e-06, "loss": 0.8547, "step": 29325 }, { "epoch": 8.93, "learning_rate": 4.280060882800609e-06, "loss": 0.8705, "step": 29350 }, { "epoch": 8.94, "learning_rate": 4.249619482496195e-06, "loss": 0.867, "step": 29375 }, { "epoch": 8.95, "learning_rate": 4.219178082191781e-06, "loss": 0.796, "step": 29400 }, { "epoch": 8.96, "learning_rate": 4.188736681887367e-06, "loss": 0.8249, "step": 29425 }, { "epoch": 8.96, "learning_rate": 4.158295281582953e-06, "loss": 0.8244, "step": 29450 }, { "epoch": 8.97, "learning_rate": 4.127853881278539e-06, "loss": 0.8224, "step": 29475 }, { "epoch": 8.98, "learning_rate": 4.097412480974125e-06, "loss": 0.8259, "step": 29500 }, { "epoch": 8.99, "learning_rate": 4.066971080669711e-06, "loss": 0.8255, "step": 29525 }, { "epoch": 9.0, "learning_rate": 4.036529680365297e-06, "loss": 0.8501, "step": 29550 }, { "epoch": 9.0, "learning_rate": 4.006088280060883e-06, "loss": 0.7706, "step": 29575 }, { "epoch": 9.01, "learning_rate": 3.975646879756469e-06, "loss": 0.7943, "step": 29600 }, { "epoch": 9.02, "learning_rate": 3.945205479452055e-06, "loss": 0.7389, "step": 29625 }, { "epoch": 9.03, "learning_rate": 3.914764079147641e-06, "loss": 0.7088, "step": 29650 }, { "epoch": 9.03, "learning_rate": 3.884322678843227e-06, "loss": 0.772, "step": 29675 }, { "epoch": 9.04, "learning_rate": 3.853881278538813e-06, "loss": 0.7772, "step": 29700 }, { "epoch": 9.05, "learning_rate": 3.823439878234399e-06, "loss": 0.7484, "step": 29725 }, { "epoch": 9.06, "learning_rate": 3.792998477929985e-06, "loss": 0.7653, "step": 29750 }, { "epoch": 9.06, "learning_rate": 3.762557077625571e-06, "loss": 0.7537, "step": 29775 }, { "epoch": 9.07, "learning_rate": 3.732115677321157e-06, "loss": 0.7248, "step": 29800 }, { "epoch": 9.08, "learning_rate": 3.701674277016743e-06, "loss": 0.8037, "step": 29825 }, { "epoch": 9.09, "learning_rate": 3.671232876712329e-06, "loss": 0.7425, "step": 29850 }, { "epoch": 9.09, "learning_rate": 3.640791476407915e-06, "loss": 0.7592, "step": 29875 }, { "epoch": 9.1, "learning_rate": 3.610350076103501e-06, "loss": 0.7541, "step": 29900 }, { "epoch": 9.11, "learning_rate": 3.579908675799087e-06, "loss": 0.739, "step": 29925 }, { "epoch": 9.12, "learning_rate": 3.549467275494673e-06, "loss": 0.7581, "step": 29950 }, { "epoch": 9.12, "learning_rate": 3.519025875190259e-06, "loss": 0.8068, "step": 29975 }, { "epoch": 9.13, "learning_rate": 3.488584474885845e-06, "loss": 0.7699, "step": 30000 }, { "epoch": 9.14, "learning_rate": 3.458143074581431e-06, "loss": 0.725, "step": 30025 }, { "epoch": 9.15, "learning_rate": 3.4277016742770168e-06, "loss": 0.7598, "step": 30050 }, { "epoch": 9.16, "learning_rate": 3.397260273972603e-06, "loss": 0.726, "step": 30075 }, { "epoch": 9.16, "learning_rate": 3.366818873668189e-06, "loss": 0.7622, "step": 30100 }, { "epoch": 9.17, "learning_rate": 3.336377473363775e-06, "loss": 0.7633, "step": 30125 }, { "epoch": 9.18, "learning_rate": 3.3059360730593608e-06, "loss": 0.7839, "step": 30150 }, { "epoch": 9.19, "learning_rate": 3.275494672754947e-06, "loss": 0.758, "step": 30175 }, { "epoch": 9.19, "learning_rate": 3.245053272450533e-06, "loss": 0.7451, "step": 30200 }, { "epoch": 9.2, "learning_rate": 3.214611872146119e-06, "loss": 0.7402, "step": 30225 }, { "epoch": 9.21, "learning_rate": 3.1841704718417048e-06, "loss": 0.733, "step": 30250 }, { "epoch": 9.22, "learning_rate": 3.153729071537291e-06, "loss": 0.7695, "step": 30275 }, { "epoch": 9.22, "learning_rate": 3.123287671232877e-06, "loss": 0.7838, "step": 30300 }, { "epoch": 9.23, "learning_rate": 3.092846270928463e-06, "loss": 0.7946, "step": 30325 }, { "epoch": 9.24, "learning_rate": 3.0624048706240488e-06, "loss": 0.7699, "step": 30350 }, { "epoch": 9.25, "learning_rate": 3.031963470319635e-06, "loss": 0.7173, "step": 30375 }, { "epoch": 9.25, "learning_rate": 3.001522070015221e-06, "loss": 0.7182, "step": 30400 }, { "epoch": 9.26, "learning_rate": 2.971080669710807e-06, "loss": 0.7544, "step": 30425 }, { "epoch": 9.27, "learning_rate": 2.9406392694063927e-06, "loss": 0.7678, "step": 30450 }, { "epoch": 9.28, "learning_rate": 2.910197869101979e-06, "loss": 0.7222, "step": 30475 }, { "epoch": 9.28, "learning_rate": 2.879756468797565e-06, "loss": 0.7949, "step": 30500 }, { "epoch": 9.29, "learning_rate": 2.849315068493151e-06, "loss": 0.7523, "step": 30525 }, { "epoch": 9.3, "learning_rate": 2.8188736681887367e-06, "loss": 0.7809, "step": 30550 }, { "epoch": 9.31, "learning_rate": 2.788432267884323e-06, "loss": 0.7836, "step": 30575 }, { "epoch": 9.32, "learning_rate": 2.757990867579909e-06, "loss": 0.7481, "step": 30600 }, { "epoch": 9.32, "learning_rate": 2.727549467275495e-06, "loss": 0.756, "step": 30625 }, { "epoch": 9.33, "learning_rate": 2.6971080669710807e-06, "loss": 0.8018, "step": 30650 }, { "epoch": 9.34, "learning_rate": 2.666666666666667e-06, "loss": 0.7264, "step": 30675 }, { "epoch": 9.35, "learning_rate": 2.636225266362253e-06, "loss": 0.7553, "step": 30700 }, { "epoch": 9.35, "learning_rate": 2.605783866057839e-06, "loss": 0.8181, "step": 30725 }, { "epoch": 9.36, "learning_rate": 2.5753424657534247e-06, "loss": 0.8361, "step": 30750 }, { "epoch": 9.37, "learning_rate": 2.544901065449011e-06, "loss": 0.7377, "step": 30775 }, { "epoch": 9.38, "learning_rate": 2.514459665144597e-06, "loss": 0.7311, "step": 30800 }, { "epoch": 9.38, "learning_rate": 2.484018264840183e-06, "loss": 0.7555, "step": 30825 }, { "epoch": 9.39, "learning_rate": 2.4535768645357687e-06, "loss": 0.7609, "step": 30850 }, { "epoch": 9.4, "learning_rate": 2.423135464231355e-06, "loss": 0.7774, "step": 30875 }, { "epoch": 9.41, "learning_rate": 2.392694063926941e-06, "loss": 0.7941, "step": 30900 }, { "epoch": 9.41, "learning_rate": 2.362252663622527e-06, "loss": 0.782, "step": 30925 }, { "epoch": 9.42, "learning_rate": 2.3318112633181127e-06, "loss": 0.7627, "step": 30950 }, { "epoch": 9.43, "learning_rate": 2.301369863013699e-06, "loss": 0.7271, "step": 30975 }, { "epoch": 9.44, "learning_rate": 2.270928462709285e-06, "loss": 0.7764, "step": 31000 }, { "epoch": 9.44, "learning_rate": 2.240487062404871e-06, "loss": 0.8404, "step": 31025 }, { "epoch": 9.45, "learning_rate": 2.2100456621004567e-06, "loss": 0.7565, "step": 31050 }, { "epoch": 9.46, "learning_rate": 2.179604261796043e-06, "loss": 0.7917, "step": 31075 }, { "epoch": 9.47, "learning_rate": 2.149162861491629e-06, "loss": 0.7568, "step": 31100 }, { "epoch": 9.47, "learning_rate": 2.1187214611872146e-06, "loss": 0.7278, "step": 31125 }, { "epoch": 9.48, "learning_rate": 2.0882800608828007e-06, "loss": 0.8142, "step": 31150 }, { "epoch": 9.49, "learning_rate": 2.0578386605783868e-06, "loss": 0.791, "step": 31175 }, { "epoch": 9.5, "learning_rate": 2.027397260273973e-06, "loss": 0.7884, "step": 31200 }, { "epoch": 9.51, "learning_rate": 1.9969558599695586e-06, "loss": 0.8056, "step": 31225 }, { "epoch": 9.51, "learning_rate": 1.9665144596651447e-06, "loss": 0.7537, "step": 31250 }, { "epoch": 9.52, "learning_rate": 1.9360730593607308e-06, "loss": 0.7794, "step": 31275 }, { "epoch": 9.53, "learning_rate": 1.9056316590563167e-06, "loss": 0.8168, "step": 31300 }, { "epoch": 9.54, "learning_rate": 1.8751902587519028e-06, "loss": 0.756, "step": 31325 }, { "epoch": 9.54, "learning_rate": 1.8447488584474887e-06, "loss": 0.7625, "step": 31350 }, { "epoch": 9.55, "learning_rate": 1.8143074581430748e-06, "loss": 0.7638, "step": 31375 }, { "epoch": 9.56, "learning_rate": 1.7838660578386607e-06, "loss": 0.7532, "step": 31400 }, { "epoch": 9.57, "learning_rate": 1.7534246575342468e-06, "loss": 0.7272, "step": 31425 }, { "epoch": 9.57, "learning_rate": 1.7229832572298326e-06, "loss": 0.7503, "step": 31450 }, { "epoch": 9.58, "learning_rate": 1.6925418569254187e-06, "loss": 0.7559, "step": 31475 }, { "epoch": 9.59, "learning_rate": 1.6621004566210046e-06, "loss": 0.7825, "step": 31500 }, { "epoch": 9.6, "learning_rate": 1.6316590563165907e-06, "loss": 0.7557, "step": 31525 }, { "epoch": 9.6, "learning_rate": 1.6012176560121766e-06, "loss": 0.7957, "step": 31550 }, { "epoch": 9.61, "learning_rate": 1.5707762557077627e-06, "loss": 0.7323, "step": 31575 }, { "epoch": 9.62, "learning_rate": 1.5403348554033486e-06, "loss": 0.7987, "step": 31600 }, { "epoch": 9.63, "learning_rate": 1.5098934550989347e-06, "loss": 0.8013, "step": 31625 }, { "epoch": 9.63, "learning_rate": 1.4794520547945206e-06, "loss": 0.7903, "step": 31650 }, { "epoch": 9.64, "learning_rate": 1.4490106544901067e-06, "loss": 0.7038, "step": 31675 }, { "epoch": 9.65, "learning_rate": 1.4185692541856926e-06, "loss": 0.7153, "step": 31700 }, { "epoch": 9.66, "learning_rate": 1.3881278538812787e-06, "loss": 0.7497, "step": 31725 }, { "epoch": 9.67, "learning_rate": 1.3576864535768646e-06, "loss": 0.7115, "step": 31750 }, { "epoch": 9.67, "learning_rate": 1.3272450532724507e-06, "loss": 0.7573, "step": 31775 }, { "epoch": 9.68, "learning_rate": 1.2968036529680366e-06, "loss": 0.756, "step": 31800 }, { "epoch": 9.69, "learning_rate": 1.2663622526636227e-06, "loss": 0.797, "step": 31825 }, { "epoch": 9.7, "learning_rate": 1.2359208523592086e-06, "loss": 0.7785, "step": 31850 }, { "epoch": 9.7, "learning_rate": 1.2054794520547947e-06, "loss": 0.75, "step": 31875 }, { "epoch": 9.71, "learning_rate": 1.1750380517503806e-06, "loss": 0.7955, "step": 31900 }, { "epoch": 9.72, "learning_rate": 1.1445966514459667e-06, "loss": 0.8163, "step": 31925 }, { "epoch": 9.73, "learning_rate": 1.1141552511415526e-06, "loss": 0.7569, "step": 31950 }, { "epoch": 9.73, "learning_rate": 1.0837138508371387e-06, "loss": 0.7812, "step": 31975 }, { "epoch": 9.74, "learning_rate": 1.0532724505327246e-06, "loss": 0.7108, "step": 32000 }, { "epoch": 9.75, "learning_rate": 1.0228310502283107e-06, "loss": 0.754, "step": 32025 }, { "epoch": 9.76, "learning_rate": 9.923896499238966e-07, "loss": 0.805, "step": 32050 }, { "epoch": 9.76, "learning_rate": 9.619482496194827e-07, "loss": 0.7494, "step": 32075 }, { "epoch": 9.77, "learning_rate": 9.315068493150686e-07, "loss": 0.7821, "step": 32100 }, { "epoch": 9.78, "learning_rate": 9.010654490106546e-07, "loss": 0.8192, "step": 32125 }, { "epoch": 9.79, "learning_rate": 8.706240487062406e-07, "loss": 0.7508, "step": 32150 }, { "epoch": 9.79, "learning_rate": 8.401826484018266e-07, "loss": 0.7274, "step": 32175 }, { "epoch": 9.8, "learning_rate": 8.097412480974126e-07, "loss": 0.786, "step": 32200 }, { "epoch": 9.81, "learning_rate": 7.792998477929986e-07, "loss": 0.7535, "step": 32225 }, { "epoch": 9.82, "learning_rate": 7.488584474885845e-07, "loss": 0.7448, "step": 32250 }, { "epoch": 9.82, "learning_rate": 7.184170471841705e-07, "loss": 0.7506, "step": 32275 }, { "epoch": 9.83, "learning_rate": 6.879756468797565e-07, "loss": 0.7662, "step": 32300 }, { "epoch": 9.84, "learning_rate": 6.575342465753425e-07, "loss": 0.7408, "step": 32325 }, { "epoch": 9.85, "learning_rate": 6.270928462709285e-07, "loss": 0.7333, "step": 32350 }, { "epoch": 9.86, "learning_rate": 5.966514459665146e-07, "loss": 0.7941, "step": 32375 }, { "epoch": 9.86, "learning_rate": 5.662100456621006e-07, "loss": 0.7735, "step": 32400 }, { "epoch": 9.87, "learning_rate": 5.357686453576865e-07, "loss": 0.7741, "step": 32425 }, { "epoch": 9.88, "learning_rate": 5.053272450532725e-07, "loss": 0.7857, "step": 32450 }, { "epoch": 9.89, "learning_rate": 4.748858447488585e-07, "loss": 0.8137, "step": 32475 }, { "epoch": 9.89, "learning_rate": 4.444444444444445e-07, "loss": 0.7557, "step": 32500 }, { "epoch": 9.9, "learning_rate": 4.140030441400305e-07, "loss": 0.7187, "step": 32525 }, { "epoch": 9.91, "learning_rate": 3.835616438356165e-07, "loss": 0.7536, "step": 32550 }, { "epoch": 9.92, "learning_rate": 3.531202435312025e-07, "loss": 0.7462, "step": 32575 }, { "epoch": 9.92, "learning_rate": 3.226788432267885e-07, "loss": 0.7967, "step": 32600 }, { "epoch": 9.93, "learning_rate": 2.922374429223744e-07, "loss": 0.7531, "step": 32625 }, { "epoch": 9.94, "learning_rate": 2.617960426179604e-07, "loss": 0.7584, "step": 32650 }, { "epoch": 9.95, "learning_rate": 2.3135464231354645e-07, "loss": 0.7664, "step": 32675 }, { "epoch": 9.95, "learning_rate": 2.0091324200913244e-07, "loss": 0.8058, "step": 32700 }, { "epoch": 9.96, "learning_rate": 1.7047184170471844e-07, "loss": 0.795, "step": 32725 }, { "epoch": 9.97, "learning_rate": 1.4003044140030444e-07, "loss": 0.7861, "step": 32750 }, { "epoch": 9.98, "learning_rate": 1.0958904109589042e-07, "loss": 0.7567, "step": 32775 }, { "epoch": 9.98, "learning_rate": 7.914764079147642e-08, "loss": 0.7481, "step": 32800 }, { "epoch": 9.99, "learning_rate": 4.870624048706241e-08, "loss": 0.7556, "step": 32825 }, { "epoch": 10.0, "learning_rate": 1.9482496194824964e-08, "loss": 0.7565, "step": 32850 }, { "epoch": 10.0, "step": 32850, "total_flos": 4.270496328921907e+17, "train_loss": 1.3997784228157961, "train_runtime": 9169.8515, "train_samples_per_second": 21.49, "train_steps_per_second": 3.582 } ], "max_steps": 32850, "num_train_epochs": 10, "total_flos": 4.270496328921907e+17, "trial_name": null, "trial_params": null }