diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7909 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "global_step": 32850, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 3.997199391171994e-05, + "loss": 2.9757, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 3.994155251141553e-05, + "loss": 2.7383, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.9911111111111114e-05, + "loss": 2.5865, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 3.98806697108067e-05, + "loss": 2.6042, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 3.9850228310502285e-05, + "loss": 2.7077, + "step": 125 + }, + { + "epoch": 0.05, + "learning_rate": 3.982100456621005e-05, + "loss": 2.8069, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 3.9790563165905637e-05, + "loss": 2.5831, + "step": 175 + }, + { + "epoch": 0.06, + "learning_rate": 3.976012176560122e-05, + "loss": 2.5526, + "step": 200 + }, + { + "epoch": 0.07, + "learning_rate": 3.972968036529681e-05, + "loss": 2.5865, + "step": 225 + }, + { + "epoch": 0.08, + "learning_rate": 3.9699238964992394e-05, + "loss": 2.4963, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 3.966879756468798e-05, + "loss": 2.5408, + "step": 275 + }, + { + "epoch": 0.09, + "learning_rate": 3.9638356164383565e-05, + "loss": 2.7108, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 3.960791476407915e-05, + "loss": 2.6353, + "step": 325 + }, + { + "epoch": 0.11, + "learning_rate": 3.9577473363774736e-05, + "loss": 2.538, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 3.954703196347032e-05, + "loss": 2.6099, + "step": 375 + }, + { + "epoch": 0.12, + "learning_rate": 3.951659056316591e-05, + "loss": 2.569, + "step": 400 + }, + { + "epoch": 0.13, + "learning_rate": 3.948614916286149e-05, + "loss": 2.4251, + "step": 425 + }, + { + "epoch": 0.14, + "learning_rate": 3.945570776255708e-05, + "loss": 2.4954, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 3.9425266362252665e-05, + "loss": 2.3763, + "step": 475 + }, + { + "epoch": 0.15, + "learning_rate": 3.939482496194826e-05, + "loss": 2.5156, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 3.936438356164384e-05, + "loss": 2.4705, + "step": 525 + }, + { + "epoch": 0.17, + "learning_rate": 3.933394216133942e-05, + "loss": 2.5698, + "step": 550 + }, + { + "epoch": 0.18, + "learning_rate": 3.930350076103501e-05, + "loss": 2.497, + "step": 575 + }, + { + "epoch": 0.18, + "learning_rate": 3.92730593607306e-05, + "loss": 2.3479, + "step": 600 + }, + { + "epoch": 0.19, + "learning_rate": 3.9242617960426186e-05, + "loss": 2.4028, + "step": 625 + }, + { + "epoch": 0.2, + "learning_rate": 3.921217656012177e-05, + "loss": 2.4164, + "step": 650 + }, + { + "epoch": 0.21, + "learning_rate": 3.918173515981735e-05, + "loss": 2.5796, + "step": 675 + }, + { + "epoch": 0.21, + "learning_rate": 3.915129375951294e-05, + "loss": 2.5134, + "step": 700 + }, + { + "epoch": 0.22, + "learning_rate": 3.912085235920853e-05, + "loss": 2.4413, + "step": 725 + }, + { + "epoch": 0.23, + "learning_rate": 3.9090410958904114e-05, + "loss": 2.4925, + "step": 750 + }, + { + "epoch": 0.24, + "learning_rate": 3.90599695585997e-05, + "loss": 2.4394, + "step": 775 + }, + { + "epoch": 0.24, + "learning_rate": 3.9029528158295285e-05, + "loss": 2.4334, + "step": 800 + }, + { + "epoch": 0.25, + "learning_rate": 3.899908675799087e-05, + "loss": 2.3809, + "step": 825 + }, + { + "epoch": 0.26, + "learning_rate": 3.896864535768646e-05, + "loss": 2.4713, + "step": 850 + }, + { + "epoch": 0.27, + "learning_rate": 3.893820395738204e-05, + "loss": 2.3322, + "step": 875 + }, + { + "epoch": 0.27, + "learning_rate": 3.890776255707763e-05, + "loss": 2.3121, + "step": 900 + }, + { + "epoch": 0.28, + "learning_rate": 3.8877321156773214e-05, + "loss": 2.3795, + "step": 925 + }, + { + "epoch": 0.29, + "learning_rate": 3.88468797564688e-05, + "loss": 2.377, + "step": 950 + }, + { + "epoch": 0.3, + "learning_rate": 3.8816438356164385e-05, + "loss": 2.4454, + "step": 975 + }, + { + "epoch": 0.3, + "learning_rate": 3.878599695585998e-05, + "loss": 2.4502, + "step": 1000 + }, + { + "epoch": 0.31, + "learning_rate": 3.8755555555555556e-05, + "loss": 2.423, + "step": 1025 + }, + { + "epoch": 0.32, + "learning_rate": 3.872511415525114e-05, + "loss": 2.3717, + "step": 1050 + }, + { + "epoch": 0.33, + "learning_rate": 3.869467275494673e-05, + "loss": 2.4881, + "step": 1075 + }, + { + "epoch": 0.33, + "learning_rate": 3.866423135464232e-05, + "loss": 2.3898, + "step": 1100 + }, + { + "epoch": 0.34, + "learning_rate": 3.8633789954337906e-05, + "loss": 2.4626, + "step": 1125 + }, + { + "epoch": 0.35, + "learning_rate": 3.860334855403349e-05, + "loss": 2.3674, + "step": 1150 + }, + { + "epoch": 0.36, + "learning_rate": 3.857290715372907e-05, + "loss": 2.3929, + "step": 1175 + }, + { + "epoch": 0.37, + "learning_rate": 3.854246575342466e-05, + "loss": 2.3222, + "step": 1200 + }, + { + "epoch": 0.37, + "learning_rate": 3.851202435312025e-05, + "loss": 2.453, + "step": 1225 + }, + { + "epoch": 0.38, + "learning_rate": 3.8481582952815834e-05, + "loss": 2.4782, + "step": 1250 + }, + { + "epoch": 0.39, + "learning_rate": 3.845114155251142e-05, + "loss": 2.5002, + "step": 1275 + }, + { + "epoch": 0.4, + "learning_rate": 3.8420700152207006e-05, + "loss": 2.3431, + "step": 1300 + }, + { + "epoch": 0.4, + "learning_rate": 3.839025875190259e-05, + "loss": 2.3825, + "step": 1325 + }, + { + "epoch": 0.41, + "learning_rate": 3.835981735159818e-05, + "loss": 2.4681, + "step": 1350 + }, + { + "epoch": 0.42, + "learning_rate": 3.832937595129376e-05, + "loss": 2.4481, + "step": 1375 + }, + { + "epoch": 0.43, + "learning_rate": 3.829893455098935e-05, + "loss": 2.3781, + "step": 1400 + }, + { + "epoch": 0.43, + "learning_rate": 3.8268493150684934e-05, + "loss": 2.309, + "step": 1425 + }, + { + "epoch": 0.44, + "learning_rate": 3.823805175038052e-05, + "loss": 2.4879, + "step": 1450 + }, + { + "epoch": 0.45, + "learning_rate": 3.8207610350076105e-05, + "loss": 2.3736, + "step": 1475 + }, + { + "epoch": 0.46, + "learning_rate": 3.81771689497717e-05, + "loss": 2.5061, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 3.814672754946728e-05, + "loss": 2.3825, + "step": 1525 + }, + { + "epoch": 0.47, + "learning_rate": 3.811628614916286e-05, + "loss": 2.3851, + "step": 1550 + }, + { + "epoch": 0.48, + "learning_rate": 3.808584474885845e-05, + "loss": 2.3361, + "step": 1575 + }, + { + "epoch": 0.49, + "learning_rate": 3.805540334855404e-05, + "loss": 2.4237, + "step": 1600 + }, + { + "epoch": 0.49, + "learning_rate": 3.8024961948249626e-05, + "loss": 2.3025, + "step": 1625 + }, + { + "epoch": 0.5, + "learning_rate": 3.7994520547945205e-05, + "loss": 2.3862, + "step": 1650 + }, + { + "epoch": 0.51, + "learning_rate": 3.796407914764079e-05, + "loss": 2.3268, + "step": 1675 + }, + { + "epoch": 0.52, + "learning_rate": 3.793363774733638e-05, + "loss": 2.3134, + "step": 1700 + }, + { + "epoch": 0.53, + "learning_rate": 3.790319634703197e-05, + "loss": 2.3043, + "step": 1725 + }, + { + "epoch": 0.53, + "learning_rate": 3.7872754946727555e-05, + "loss": 2.2921, + "step": 1750 + }, + { + "epoch": 0.54, + "learning_rate": 3.784231354642314e-05, + "loss": 2.3658, + "step": 1775 + }, + { + "epoch": 0.55, + "learning_rate": 3.7811872146118726e-05, + "loss": 2.3773, + "step": 1800 + }, + { + "epoch": 0.56, + "learning_rate": 3.778143074581431e-05, + "loss": 2.3391, + "step": 1825 + }, + { + "epoch": 0.56, + "learning_rate": 3.77509893455099e-05, + "loss": 2.3664, + "step": 1850 + }, + { + "epoch": 0.57, + "learning_rate": 3.772054794520548e-05, + "loss": 2.3339, + "step": 1875 + }, + { + "epoch": 0.58, + "learning_rate": 3.769010654490107e-05, + "loss": 2.2858, + "step": 1900 + }, + { + "epoch": 0.59, + "learning_rate": 3.7659665144596654e-05, + "loss": 2.344, + "step": 1925 + }, + { + "epoch": 0.59, + "learning_rate": 3.762922374429224e-05, + "loss": 2.2802, + "step": 1950 + }, + { + "epoch": 0.6, + "learning_rate": 3.7598782343987826e-05, + "loss": 2.2779, + "step": 1975 + }, + { + "epoch": 0.61, + "learning_rate": 3.756834094368341e-05, + "loss": 2.3558, + "step": 2000 + }, + { + "epoch": 0.62, + "learning_rate": 3.7537899543379e-05, + "loss": 2.3082, + "step": 2025 + }, + { + "epoch": 0.62, + "learning_rate": 3.750745814307458e-05, + "loss": 2.3199, + "step": 2050 + }, + { + "epoch": 0.63, + "learning_rate": 3.747701674277017e-05, + "loss": 2.4683, + "step": 2075 + }, + { + "epoch": 0.64, + "learning_rate": 3.744657534246576e-05, + "loss": 2.4058, + "step": 2100 + }, + { + "epoch": 0.65, + "learning_rate": 3.741613394216135e-05, + "loss": 2.3419, + "step": 2125 + }, + { + "epoch": 0.65, + "learning_rate": 3.7385692541856926e-05, + "loss": 2.2594, + "step": 2150 + }, + { + "epoch": 0.66, + "learning_rate": 3.735525114155251e-05, + "loss": 2.224, + "step": 2175 + }, + { + "epoch": 0.67, + "learning_rate": 3.7324809741248104e-05, + "loss": 2.4244, + "step": 2200 + }, + { + "epoch": 0.68, + "learning_rate": 3.729436834094369e-05, + "loss": 2.2928, + "step": 2225 + }, + { + "epoch": 0.68, + "learning_rate": 3.7263926940639275e-05, + "loss": 2.2732, + "step": 2250 + }, + { + "epoch": 0.69, + "learning_rate": 3.7233485540334854e-05, + "loss": 2.4067, + "step": 2275 + }, + { + "epoch": 0.7, + "learning_rate": 3.7203044140030446e-05, + "loss": 2.3029, + "step": 2300 + }, + { + "epoch": 0.71, + "learning_rate": 3.717260273972603e-05, + "loss": 2.3482, + "step": 2325 + }, + { + "epoch": 0.72, + "learning_rate": 3.714216133942162e-05, + "loss": 2.2777, + "step": 2350 + }, + { + "epoch": 0.72, + "learning_rate": 3.7111719939117203e-05, + "loss": 2.2948, + "step": 2375 + }, + { + "epoch": 0.73, + "learning_rate": 3.708127853881279e-05, + "loss": 2.3709, + "step": 2400 + }, + { + "epoch": 0.74, + "learning_rate": 3.7050837138508375e-05, + "loss": 2.3924, + "step": 2425 + }, + { + "epoch": 0.75, + "learning_rate": 3.702039573820396e-05, + "loss": 2.3185, + "step": 2450 + }, + { + "epoch": 0.75, + "learning_rate": 3.6989954337899546e-05, + "loss": 2.3042, + "step": 2475 + }, + { + "epoch": 0.76, + "learning_rate": 3.695951293759513e-05, + "loss": 2.3062, + "step": 2500 + }, + { + "epoch": 0.77, + "learning_rate": 3.692907153729072e-05, + "loss": 2.2792, + "step": 2525 + }, + { + "epoch": 0.78, + "learning_rate": 3.68986301369863e-05, + "loss": 2.2511, + "step": 2550 + }, + { + "epoch": 0.78, + "learning_rate": 3.686818873668189e-05, + "loss": 2.2938, + "step": 2575 + }, + { + "epoch": 0.79, + "learning_rate": 3.683774733637748e-05, + "loss": 2.3167, + "step": 2600 + }, + { + "epoch": 0.8, + "learning_rate": 3.680730593607306e-05, + "loss": 2.3586, + "step": 2625 + }, + { + "epoch": 0.81, + "learning_rate": 3.6776864535768646e-05, + "loss": 2.2705, + "step": 2650 + }, + { + "epoch": 0.81, + "learning_rate": 3.674642313546423e-05, + "loss": 2.1603, + "step": 2675 + }, + { + "epoch": 0.82, + "learning_rate": 3.6715981735159824e-05, + "loss": 2.3594, + "step": 2700 + }, + { + "epoch": 0.83, + "learning_rate": 3.668554033485541e-05, + "loss": 2.308, + "step": 2725 + }, + { + "epoch": 0.84, + "learning_rate": 3.6655098934550995e-05, + "loss": 2.3469, + "step": 2750 + }, + { + "epoch": 0.84, + "learning_rate": 3.6624657534246574e-05, + "loss": 2.3153, + "step": 2775 + }, + { + "epoch": 0.85, + "learning_rate": 3.659421613394217e-05, + "loss": 2.3836, + "step": 2800 + }, + { + "epoch": 0.86, + "learning_rate": 3.656377473363775e-05, + "loss": 2.2617, + "step": 2825 + }, + { + "epoch": 0.87, + "learning_rate": 3.653333333333334e-05, + "loss": 2.3834, + "step": 2850 + }, + { + "epoch": 0.88, + "learning_rate": 3.6502891933028924e-05, + "loss": 2.2582, + "step": 2875 + }, + { + "epoch": 0.88, + "learning_rate": 3.647245053272451e-05, + "loss": 2.2326, + "step": 2900 + }, + { + "epoch": 0.89, + "learning_rate": 3.6442009132420095e-05, + "loss": 2.2157, + "step": 2925 + }, + { + "epoch": 0.9, + "learning_rate": 3.641156773211568e-05, + "loss": 2.2988, + "step": 2950 + }, + { + "epoch": 0.91, + "learning_rate": 3.6381126331811267e-05, + "loss": 2.2304, + "step": 2975 + }, + { + "epoch": 0.91, + "learning_rate": 3.635068493150685e-05, + "loss": 2.1554, + "step": 3000 + }, + { + "epoch": 0.92, + "learning_rate": 3.632024353120244e-05, + "loss": 2.2901, + "step": 3025 + }, + { + "epoch": 0.93, + "learning_rate": 3.6289802130898024e-05, + "loss": 2.1346, + "step": 3050 + }, + { + "epoch": 0.94, + "learning_rate": 3.625936073059361e-05, + "loss": 2.2658, + "step": 3075 + }, + { + "epoch": 0.94, + "learning_rate": 3.62289193302892e-05, + "loss": 2.2892, + "step": 3100 + }, + { + "epoch": 0.95, + "learning_rate": 3.619847792998478e-05, + "loss": 2.4276, + "step": 3125 + }, + { + "epoch": 0.96, + "learning_rate": 3.6168036529680366e-05, + "loss": 2.3805, + "step": 3150 + }, + { + "epoch": 0.97, + "learning_rate": 3.613759512937595e-05, + "loss": 2.3277, + "step": 3175 + }, + { + "epoch": 0.97, + "learning_rate": 3.6107153729071544e-05, + "loss": 2.253, + "step": 3200 + }, + { + "epoch": 0.98, + "learning_rate": 3.607671232876713e-05, + "loss": 2.2018, + "step": 3225 + }, + { + "epoch": 0.99, + "learning_rate": 3.604627092846271e-05, + "loss": 2.3574, + "step": 3250 + }, + { + "epoch": 1.0, + "learning_rate": 3.6015829528158295e-05, + "loss": 2.3147, + "step": 3275 + }, + { + "epoch": 1.0, + "learning_rate": 3.598538812785389e-05, + "loss": 2.247, + "step": 3300 + }, + { + "epoch": 1.01, + "learning_rate": 3.595494672754947e-05, + "loss": 1.9512, + "step": 3325 + }, + { + "epoch": 1.02, + "learning_rate": 3.592450532724506e-05, + "loss": 2.0597, + "step": 3350 + }, + { + "epoch": 1.03, + "learning_rate": 3.5894063926940644e-05, + "loss": 2.0195, + "step": 3375 + }, + { + "epoch": 1.04, + "learning_rate": 3.586362252663623e-05, + "loss": 1.9563, + "step": 3400 + }, + { + "epoch": 1.04, + "learning_rate": 3.5833181126331816e-05, + "loss": 1.9845, + "step": 3425 + }, + { + "epoch": 1.05, + "learning_rate": 3.58027397260274e-05, + "loss": 2.0274, + "step": 3450 + }, + { + "epoch": 1.06, + "learning_rate": 3.577229832572299e-05, + "loss": 2.051, + "step": 3475 + }, + { + "epoch": 1.07, + "learning_rate": 3.574185692541857e-05, + "loss": 1.9961, + "step": 3500 + }, + { + "epoch": 1.07, + "learning_rate": 3.571141552511416e-05, + "loss": 1.9761, + "step": 3525 + }, + { + "epoch": 1.08, + "learning_rate": 3.5680974124809744e-05, + "loss": 2.0424, + "step": 3550 + }, + { + "epoch": 1.09, + "learning_rate": 3.565053272450533e-05, + "loss": 1.9622, + "step": 3575 + }, + { + "epoch": 1.1, + "learning_rate": 3.5620091324200915e-05, + "loss": 1.992, + "step": 3600 + }, + { + "epoch": 1.1, + "learning_rate": 3.55896499238965e-05, + "loss": 2.0852, + "step": 3625 + }, + { + "epoch": 1.11, + "learning_rate": 3.555920852359209e-05, + "loss": 2.1086, + "step": 3650 + }, + { + "epoch": 1.12, + "learning_rate": 3.552876712328767e-05, + "loss": 2.0231, + "step": 3675 + }, + { + "epoch": 1.13, + "learning_rate": 3.5498325722983265e-05, + "loss": 1.9472, + "step": 3700 + }, + { + "epoch": 1.13, + "learning_rate": 3.546788432267885e-05, + "loss": 1.9655, + "step": 3725 + }, + { + "epoch": 1.14, + "learning_rate": 3.543744292237443e-05, + "loss": 2.04, + "step": 3750 + }, + { + "epoch": 1.15, + "learning_rate": 3.5407001522070015e-05, + "loss": 2.1005, + "step": 3775 + }, + { + "epoch": 1.16, + "learning_rate": 3.537656012176561e-05, + "loss": 2.1219, + "step": 3800 + }, + { + "epoch": 1.16, + "learning_rate": 3.534611872146119e-05, + "loss": 2.046, + "step": 3825 + }, + { + "epoch": 1.17, + "learning_rate": 3.531567732115678e-05, + "loss": 2.0985, + "step": 3850 + }, + { + "epoch": 1.18, + "learning_rate": 3.528523592085236e-05, + "loss": 2.0795, + "step": 3875 + }, + { + "epoch": 1.19, + "learning_rate": 3.525479452054795e-05, + "loss": 2.0962, + "step": 3900 + }, + { + "epoch": 1.19, + "learning_rate": 3.5224353120243536e-05, + "loss": 2.052, + "step": 3925 + }, + { + "epoch": 1.2, + "learning_rate": 3.519391171993912e-05, + "loss": 2.1188, + "step": 3950 + }, + { + "epoch": 1.21, + "learning_rate": 3.516347031963471e-05, + "loss": 2.0075, + "step": 3975 + }, + { + "epoch": 1.22, + "learning_rate": 3.513302891933029e-05, + "loss": 2.0194, + "step": 4000 + }, + { + "epoch": 1.23, + "learning_rate": 3.510258751902588e-05, + "loss": 2.0319, + "step": 4025 + }, + { + "epoch": 1.23, + "learning_rate": 3.5072146118721464e-05, + "loss": 1.9817, + "step": 4050 + }, + { + "epoch": 1.24, + "learning_rate": 3.504170471841705e-05, + "loss": 2.0719, + "step": 4075 + }, + { + "epoch": 1.25, + "learning_rate": 3.5011263318112636e-05, + "loss": 1.9931, + "step": 4100 + }, + { + "epoch": 1.26, + "learning_rate": 3.498082191780822e-05, + "loss": 1.9983, + "step": 4125 + }, + { + "epoch": 1.26, + "learning_rate": 3.495038051750381e-05, + "loss": 2.1237, + "step": 4150 + }, + { + "epoch": 1.27, + "learning_rate": 3.491993911719939e-05, + "loss": 2.011, + "step": 4175 + }, + { + "epoch": 1.28, + "learning_rate": 3.4889497716894985e-05, + "loss": 2.0294, + "step": 4200 + }, + { + "epoch": 1.29, + "learning_rate": 3.4859056316590564e-05, + "loss": 2.063, + "step": 4225 + }, + { + "epoch": 1.29, + "learning_rate": 3.482861491628615e-05, + "loss": 2.0753, + "step": 4250 + }, + { + "epoch": 1.3, + "learning_rate": 3.4798173515981735e-05, + "loss": 2.0859, + "step": 4275 + }, + { + "epoch": 1.31, + "learning_rate": 3.476773211567733e-05, + "loss": 1.9657, + "step": 4300 + }, + { + "epoch": 1.32, + "learning_rate": 3.4737290715372914e-05, + "loss": 2.0901, + "step": 4325 + }, + { + "epoch": 1.32, + "learning_rate": 3.47068493150685e-05, + "loss": 2.0066, + "step": 4350 + }, + { + "epoch": 1.33, + "learning_rate": 3.467640791476408e-05, + "loss": 2.0134, + "step": 4375 + }, + { + "epoch": 1.34, + "learning_rate": 3.464596651445967e-05, + "loss": 2.0645, + "step": 4400 + }, + { + "epoch": 1.35, + "learning_rate": 3.4615525114155256e-05, + "loss": 2.0559, + "step": 4425 + }, + { + "epoch": 1.35, + "learning_rate": 3.458508371385084e-05, + "loss": 2.1435, + "step": 4450 + }, + { + "epoch": 1.36, + "learning_rate": 3.455464231354643e-05, + "loss": 1.9392, + "step": 4475 + }, + { + "epoch": 1.37, + "learning_rate": 3.452420091324201e-05, + "loss": 2.0839, + "step": 4500 + }, + { + "epoch": 1.38, + "learning_rate": 3.44937595129376e-05, + "loss": 2.1076, + "step": 4525 + }, + { + "epoch": 1.39, + "learning_rate": 3.4463318112633185e-05, + "loss": 2.0119, + "step": 4550 + }, + { + "epoch": 1.39, + "learning_rate": 3.443287671232877e-05, + "loss": 2.1092, + "step": 4575 + }, + { + "epoch": 1.4, + "learning_rate": 3.4402435312024356e-05, + "loss": 2.0383, + "step": 4600 + }, + { + "epoch": 1.41, + "learning_rate": 3.437199391171994e-05, + "loss": 2.1265, + "step": 4625 + }, + { + "epoch": 1.42, + "learning_rate": 3.434155251141553e-05, + "loss": 2.07, + "step": 4650 + }, + { + "epoch": 1.42, + "learning_rate": 3.431111111111111e-05, + "loss": 2.0152, + "step": 4675 + }, + { + "epoch": 1.43, + "learning_rate": 3.42806697108067e-05, + "loss": 2.008, + "step": 4700 + }, + { + "epoch": 1.44, + "learning_rate": 3.4250228310502284e-05, + "loss": 1.9992, + "step": 4725 + }, + { + "epoch": 1.45, + "learning_rate": 3.421978691019787e-05, + "loss": 2.0191, + "step": 4750 + }, + { + "epoch": 1.45, + "learning_rate": 3.4189345509893456e-05, + "loss": 2.0505, + "step": 4775 + }, + { + "epoch": 1.46, + "learning_rate": 3.415890410958904e-05, + "loss": 2.0542, + "step": 4800 + }, + { + "epoch": 1.47, + "learning_rate": 3.4128462709284634e-05, + "loss": 2.1455, + "step": 4825 + }, + { + "epoch": 1.48, + "learning_rate": 3.409802130898021e-05, + "loss": 1.9989, + "step": 4850 + }, + { + "epoch": 1.48, + "learning_rate": 3.40675799086758e-05, + "loss": 2.0734, + "step": 4875 + }, + { + "epoch": 1.49, + "learning_rate": 3.4037138508371384e-05, + "loss": 1.9921, + "step": 4900 + }, + { + "epoch": 1.5, + "learning_rate": 3.400669710806698e-05, + "loss": 2.1307, + "step": 4925 + }, + { + "epoch": 1.51, + "learning_rate": 3.397625570776256e-05, + "loss": 2.0824, + "step": 4950 + }, + { + "epoch": 1.51, + "learning_rate": 3.394581430745815e-05, + "loss": 1.9928, + "step": 4975 + }, + { + "epoch": 1.52, + "learning_rate": 3.391537290715373e-05, + "loss": 2.0448, + "step": 5000 + }, + { + "epoch": 1.53, + "learning_rate": 3.388493150684932e-05, + "loss": 2.0779, + "step": 5025 + }, + { + "epoch": 1.54, + "learning_rate": 3.3854490106544905e-05, + "loss": 2.0343, + "step": 5050 + }, + { + "epoch": 1.54, + "learning_rate": 3.382404870624049e-05, + "loss": 1.9332, + "step": 5075 + }, + { + "epoch": 1.55, + "learning_rate": 3.3793607305936076e-05, + "loss": 2.1341, + "step": 5100 + }, + { + "epoch": 1.56, + "learning_rate": 3.376316590563166e-05, + "loss": 1.978, + "step": 5125 + }, + { + "epoch": 1.57, + "learning_rate": 3.373272450532725e-05, + "loss": 1.9498, + "step": 5150 + }, + { + "epoch": 1.58, + "learning_rate": 3.3702283105022834e-05, + "loss": 1.9063, + "step": 5175 + }, + { + "epoch": 1.58, + "learning_rate": 3.367184170471842e-05, + "loss": 2.0495, + "step": 5200 + }, + { + "epoch": 1.59, + "learning_rate": 3.3641400304414005e-05, + "loss": 2.02, + "step": 5225 + }, + { + "epoch": 1.6, + "learning_rate": 3.361095890410959e-05, + "loss": 1.973, + "step": 5250 + }, + { + "epoch": 1.61, + "learning_rate": 3.3580517503805176e-05, + "loss": 1.96, + "step": 5275 + }, + { + "epoch": 1.61, + "learning_rate": 3.355007610350076e-05, + "loss": 2.0725, + "step": 5300 + }, + { + "epoch": 1.62, + "learning_rate": 3.351963470319635e-05, + "loss": 2.1469, + "step": 5325 + }, + { + "epoch": 1.63, + "learning_rate": 3.348919330289193e-05, + "loss": 2.0721, + "step": 5350 + }, + { + "epoch": 1.64, + "learning_rate": 3.345875190258752e-05, + "loss": 2.1235, + "step": 5375 + }, + { + "epoch": 1.64, + "learning_rate": 3.3428310502283105e-05, + "loss": 1.9994, + "step": 5400 + }, + { + "epoch": 1.65, + "learning_rate": 3.33978691019787e-05, + "loss": 2.0654, + "step": 5425 + }, + { + "epoch": 1.66, + "learning_rate": 3.336742770167428e-05, + "loss": 2.0628, + "step": 5450 + }, + { + "epoch": 1.67, + "learning_rate": 3.333698630136986e-05, + "loss": 2.0541, + "step": 5475 + }, + { + "epoch": 1.67, + "learning_rate": 3.330654490106545e-05, + "loss": 2.0388, + "step": 5500 + }, + { + "epoch": 1.68, + "learning_rate": 3.327610350076104e-05, + "loss": 1.9874, + "step": 5525 + }, + { + "epoch": 1.69, + "learning_rate": 3.3245662100456625e-05, + "loss": 2.0574, + "step": 5550 + }, + { + "epoch": 1.7, + "learning_rate": 3.321522070015221e-05, + "loss": 1.9692, + "step": 5575 + }, + { + "epoch": 1.7, + "learning_rate": 3.31847792998478e-05, + "loss": 1.932, + "step": 5600 + }, + { + "epoch": 1.71, + "learning_rate": 3.315433789954338e-05, + "loss": 1.8747, + "step": 5625 + }, + { + "epoch": 1.72, + "learning_rate": 3.312389649923897e-05, + "loss": 2.0928, + "step": 5650 + }, + { + "epoch": 1.73, + "learning_rate": 3.3093455098934554e-05, + "loss": 2.1114, + "step": 5675 + }, + { + "epoch": 1.74, + "learning_rate": 3.306301369863014e-05, + "loss": 2.0353, + "step": 5700 + }, + { + "epoch": 1.74, + "learning_rate": 3.3032572298325725e-05, + "loss": 1.9485, + "step": 5725 + }, + { + "epoch": 1.75, + "learning_rate": 3.300213089802131e-05, + "loss": 2.0028, + "step": 5750 + }, + { + "epoch": 1.76, + "learning_rate": 3.2971689497716897e-05, + "loss": 2.026, + "step": 5775 + }, + { + "epoch": 1.77, + "learning_rate": 3.294124809741248e-05, + "loss": 2.1633, + "step": 5800 + }, + { + "epoch": 1.77, + "learning_rate": 3.291080669710807e-05, + "loss": 2.0141, + "step": 5825 + }, + { + "epoch": 1.78, + "learning_rate": 3.2880365296803654e-05, + "loss": 1.9247, + "step": 5850 + }, + { + "epoch": 1.79, + "learning_rate": 3.284992389649924e-05, + "loss": 2.005, + "step": 5875 + }, + { + "epoch": 1.8, + "learning_rate": 3.2819482496194825e-05, + "loss": 2.0607, + "step": 5900 + }, + { + "epoch": 1.8, + "learning_rate": 3.278904109589042e-05, + "loss": 2.0251, + "step": 5925 + }, + { + "epoch": 1.81, + "learning_rate": 3.2758599695585996e-05, + "loss": 2.1067, + "step": 5950 + }, + { + "epoch": 1.82, + "learning_rate": 3.272815829528158e-05, + "loss": 1.97, + "step": 5975 + }, + { + "epoch": 1.83, + "learning_rate": 3.269771689497717e-05, + "loss": 2.0772, + "step": 6000 + }, + { + "epoch": 1.83, + "learning_rate": 3.266727549467276e-05, + "loss": 2.0554, + "step": 6025 + }, + { + "epoch": 1.84, + "learning_rate": 3.2636834094368346e-05, + "loss": 1.9819, + "step": 6050 + }, + { + "epoch": 1.85, + "learning_rate": 3.260639269406393e-05, + "loss": 2.0674, + "step": 6075 + }, + { + "epoch": 1.86, + "learning_rate": 3.257595129375951e-05, + "loss": 2.0516, + "step": 6100 + }, + { + "epoch": 1.86, + "learning_rate": 3.25455098934551e-05, + "loss": 2.0866, + "step": 6125 + }, + { + "epoch": 1.87, + "learning_rate": 3.251506849315069e-05, + "loss": 1.9859, + "step": 6150 + }, + { + "epoch": 1.88, + "learning_rate": 3.2484627092846274e-05, + "loss": 1.9645, + "step": 6175 + }, + { + "epoch": 1.89, + "learning_rate": 3.245418569254186e-05, + "loss": 2.0103, + "step": 6200 + }, + { + "epoch": 1.89, + "learning_rate": 3.2423744292237446e-05, + "loss": 2.052, + "step": 6225 + }, + { + "epoch": 1.9, + "learning_rate": 3.239330289193303e-05, + "loss": 2.0134, + "step": 6250 + }, + { + "epoch": 1.91, + "learning_rate": 3.236286149162862e-05, + "loss": 2.025, + "step": 6275 + }, + { + "epoch": 1.92, + "learning_rate": 3.23324200913242e-05, + "loss": 2.0338, + "step": 6300 + }, + { + "epoch": 1.93, + "learning_rate": 3.230197869101979e-05, + "loss": 2.0614, + "step": 6325 + }, + { + "epoch": 1.93, + "learning_rate": 3.2271537290715374e-05, + "loss": 1.9784, + "step": 6350 + }, + { + "epoch": 1.94, + "learning_rate": 3.224109589041096e-05, + "loss": 1.9976, + "step": 6375 + }, + { + "epoch": 1.95, + "learning_rate": 3.2210654490106545e-05, + "loss": 2.0695, + "step": 6400 + }, + { + "epoch": 1.96, + "learning_rate": 3.218021308980214e-05, + "loss": 1.8607, + "step": 6425 + }, + { + "epoch": 1.96, + "learning_rate": 3.214977168949772e-05, + "loss": 2.0003, + "step": 6450 + }, + { + "epoch": 1.97, + "learning_rate": 3.21193302891933e-05, + "loss": 1.9529, + "step": 6475 + }, + { + "epoch": 1.98, + "learning_rate": 3.208888888888889e-05, + "loss": 1.9775, + "step": 6500 + }, + { + "epoch": 1.99, + "learning_rate": 3.205844748858448e-05, + "loss": 2.1202, + "step": 6525 + }, + { + "epoch": 1.99, + "learning_rate": 3.2028006088280066e-05, + "loss": 2.0416, + "step": 6550 + }, + { + "epoch": 2.0, + "learning_rate": 3.1997564687975645e-05, + "loss": 1.939, + "step": 6575 + }, + { + "epoch": 2.01, + "learning_rate": 3.196712328767123e-05, + "loss": 1.8248, + "step": 6600 + }, + { + "epoch": 2.02, + "learning_rate": 3.193668188736682e-05, + "loss": 1.768, + "step": 6625 + }, + { + "epoch": 2.02, + "learning_rate": 3.190624048706241e-05, + "loss": 1.7417, + "step": 6650 + }, + { + "epoch": 2.03, + "learning_rate": 3.1875799086757995e-05, + "loss": 1.787, + "step": 6675 + }, + { + "epoch": 2.04, + "learning_rate": 3.184657534246576e-05, + "loss": 1.8256, + "step": 6700 + }, + { + "epoch": 2.05, + "learning_rate": 3.181613394216134e-05, + "loss": 1.7764, + "step": 6725 + }, + { + "epoch": 2.05, + "learning_rate": 3.1785692541856925e-05, + "loss": 1.7467, + "step": 6750 + }, + { + "epoch": 2.06, + "learning_rate": 3.175525114155251e-05, + "loss": 1.7261, + "step": 6775 + }, + { + "epoch": 2.07, + "learning_rate": 3.17248097412481e-05, + "loss": 1.7825, + "step": 6800 + }, + { + "epoch": 2.08, + "learning_rate": 3.169436834094369e-05, + "loss": 1.7536, + "step": 6825 + }, + { + "epoch": 2.09, + "learning_rate": 3.1663926940639274e-05, + "loss": 1.7507, + "step": 6850 + }, + { + "epoch": 2.09, + "learning_rate": 3.1633485540334853e-05, + "loss": 1.7534, + "step": 6875 + }, + { + "epoch": 2.1, + "learning_rate": 3.1603044140030446e-05, + "loss": 1.6879, + "step": 6900 + }, + { + "epoch": 2.11, + "learning_rate": 3.157260273972603e-05, + "loss": 1.6767, + "step": 6925 + }, + { + "epoch": 2.12, + "learning_rate": 3.154216133942162e-05, + "loss": 1.7718, + "step": 6950 + }, + { + "epoch": 2.12, + "learning_rate": 3.15117199391172e-05, + "loss": 1.7115, + "step": 6975 + }, + { + "epoch": 2.13, + "learning_rate": 3.148127853881279e-05, + "loss": 1.7024, + "step": 7000 + }, + { + "epoch": 2.14, + "learning_rate": 3.1450837138508374e-05, + "loss": 1.7389, + "step": 7025 + }, + { + "epoch": 2.15, + "learning_rate": 3.142039573820396e-05, + "loss": 1.7123, + "step": 7050 + }, + { + "epoch": 2.15, + "learning_rate": 3.1389954337899546e-05, + "loss": 1.835, + "step": 7075 + }, + { + "epoch": 2.16, + "learning_rate": 3.135951293759513e-05, + "loss": 1.7925, + "step": 7100 + }, + { + "epoch": 2.17, + "learning_rate": 3.132907153729072e-05, + "loss": 1.7361, + "step": 7125 + }, + { + "epoch": 2.18, + "learning_rate": 3.12986301369863e-05, + "loss": 1.775, + "step": 7150 + }, + { + "epoch": 2.18, + "learning_rate": 3.126818873668189e-05, + "loss": 1.699, + "step": 7175 + }, + { + "epoch": 2.19, + "learning_rate": 3.123774733637748e-05, + "loss": 1.8377, + "step": 7200 + }, + { + "epoch": 2.2, + "learning_rate": 3.120730593607306e-05, + "loss": 1.6677, + "step": 7225 + }, + { + "epoch": 2.21, + "learning_rate": 3.1176864535768645e-05, + "loss": 1.7617, + "step": 7250 + }, + { + "epoch": 2.21, + "learning_rate": 3.114642313546423e-05, + "loss": 1.7213, + "step": 7275 + }, + { + "epoch": 2.22, + "learning_rate": 3.1115981735159824e-05, + "loss": 1.8376, + "step": 7300 + }, + { + "epoch": 2.23, + "learning_rate": 3.108554033485541e-05, + "loss": 1.792, + "step": 7325 + }, + { + "epoch": 2.24, + "learning_rate": 3.105509893455099e-05, + "loss": 1.8017, + "step": 7350 + }, + { + "epoch": 2.25, + "learning_rate": 3.1024657534246574e-05, + "loss": 1.7817, + "step": 7375 + }, + { + "epoch": 2.25, + "learning_rate": 3.0994216133942166e-05, + "loss": 1.8186, + "step": 7400 + }, + { + "epoch": 2.26, + "learning_rate": 3.096377473363775e-05, + "loss": 1.7081, + "step": 7425 + }, + { + "epoch": 2.27, + "learning_rate": 3.093333333333334e-05, + "loss": 1.7725, + "step": 7450 + }, + { + "epoch": 2.28, + "learning_rate": 3.090289193302892e-05, + "loss": 1.8943, + "step": 7475 + }, + { + "epoch": 2.28, + "learning_rate": 3.087245053272451e-05, + "loss": 1.8723, + "step": 7500 + }, + { + "epoch": 2.29, + "learning_rate": 3.0842009132420095e-05, + "loss": 1.8089, + "step": 7525 + }, + { + "epoch": 2.3, + "learning_rate": 3.081156773211568e-05, + "loss": 1.7635, + "step": 7550 + }, + { + "epoch": 2.31, + "learning_rate": 3.0781126331811266e-05, + "loss": 1.7497, + "step": 7575 + }, + { + "epoch": 2.31, + "learning_rate": 3.075068493150685e-05, + "loss": 1.738, + "step": 7600 + }, + { + "epoch": 2.32, + "learning_rate": 3.072024353120244e-05, + "loss": 1.7198, + "step": 7625 + }, + { + "epoch": 2.33, + "learning_rate": 3.068980213089802e-05, + "loss": 1.7462, + "step": 7650 + }, + { + "epoch": 2.34, + "learning_rate": 3.065936073059361e-05, + "loss": 1.8347, + "step": 7675 + }, + { + "epoch": 2.34, + "learning_rate": 3.0628919330289194e-05, + "loss": 1.8402, + "step": 7700 + }, + { + "epoch": 2.35, + "learning_rate": 3.059847792998478e-05, + "loss": 1.8079, + "step": 7725 + }, + { + "epoch": 2.36, + "learning_rate": 3.0568036529680366e-05, + "loss": 1.7424, + "step": 7750 + }, + { + "epoch": 2.37, + "learning_rate": 3.053759512937595e-05, + "loss": 1.7832, + "step": 7775 + }, + { + "epoch": 2.37, + "learning_rate": 3.050715372907154e-05, + "loss": 1.7963, + "step": 7800 + }, + { + "epoch": 2.38, + "learning_rate": 3.0477929984779303e-05, + "loss": 1.8091, + "step": 7825 + }, + { + "epoch": 2.39, + "learning_rate": 3.044748858447489e-05, + "loss": 1.6953, + "step": 7850 + }, + { + "epoch": 2.4, + "learning_rate": 3.0417047184170478e-05, + "loss": 1.8256, + "step": 7875 + }, + { + "epoch": 2.4, + "learning_rate": 3.038660578386606e-05, + "loss": 1.8461, + "step": 7900 + }, + { + "epoch": 2.41, + "learning_rate": 3.0356164383561646e-05, + "loss": 1.6912, + "step": 7925 + }, + { + "epoch": 2.42, + "learning_rate": 3.032572298325723e-05, + "loss": 1.7326, + "step": 7950 + }, + { + "epoch": 2.43, + "learning_rate": 3.029528158295282e-05, + "loss": 1.7778, + "step": 7975 + }, + { + "epoch": 2.44, + "learning_rate": 3.0264840182648406e-05, + "loss": 1.7841, + "step": 8000 + }, + { + "epoch": 2.44, + "learning_rate": 3.023439878234399e-05, + "loss": 1.8748, + "step": 8025 + }, + { + "epoch": 2.45, + "learning_rate": 3.0203957382039574e-05, + "loss": 1.7683, + "step": 8050 + }, + { + "epoch": 2.46, + "learning_rate": 3.0173515981735163e-05, + "loss": 1.8536, + "step": 8075 + }, + { + "epoch": 2.47, + "learning_rate": 3.014307458143075e-05, + "loss": 1.865, + "step": 8100 + }, + { + "epoch": 2.47, + "learning_rate": 3.0112633181126334e-05, + "loss": 1.7282, + "step": 8125 + }, + { + "epoch": 2.48, + "learning_rate": 3.008219178082192e-05, + "loss": 1.8071, + "step": 8150 + }, + { + "epoch": 2.49, + "learning_rate": 3.005175038051751e-05, + "loss": 1.7984, + "step": 8175 + }, + { + "epoch": 2.5, + "learning_rate": 3.002130898021309e-05, + "loss": 1.8904, + "step": 8200 + }, + { + "epoch": 2.5, + "learning_rate": 2.9990867579908677e-05, + "loss": 1.8645, + "step": 8225 + }, + { + "epoch": 2.51, + "learning_rate": 2.9960426179604263e-05, + "loss": 1.8624, + "step": 8250 + }, + { + "epoch": 2.52, + "learning_rate": 2.9929984779299852e-05, + "loss": 1.7069, + "step": 8275 + }, + { + "epoch": 2.53, + "learning_rate": 2.9899543378995438e-05, + "loss": 1.8513, + "step": 8300 + }, + { + "epoch": 2.53, + "learning_rate": 2.9869101978691023e-05, + "loss": 1.695, + "step": 8325 + }, + { + "epoch": 2.54, + "learning_rate": 2.9838660578386606e-05, + "loss": 1.789, + "step": 8350 + }, + { + "epoch": 2.55, + "learning_rate": 2.9808219178082195e-05, + "loss": 1.8332, + "step": 8375 + }, + { + "epoch": 2.56, + "learning_rate": 2.977777777777778e-05, + "loss": 1.781, + "step": 8400 + }, + { + "epoch": 2.56, + "learning_rate": 2.9747336377473366e-05, + "loss": 1.7331, + "step": 8425 + }, + { + "epoch": 2.57, + "learning_rate": 2.971689497716895e-05, + "loss": 1.7355, + "step": 8450 + }, + { + "epoch": 2.58, + "learning_rate": 2.968645357686454e-05, + "loss": 1.7736, + "step": 8475 + }, + { + "epoch": 2.59, + "learning_rate": 2.9656012176560126e-05, + "loss": 1.8316, + "step": 8500 + }, + { + "epoch": 2.6, + "learning_rate": 2.962557077625571e-05, + "loss": 1.7823, + "step": 8525 + }, + { + "epoch": 2.6, + "learning_rate": 2.9595129375951294e-05, + "loss": 1.8136, + "step": 8550 + }, + { + "epoch": 2.61, + "learning_rate": 2.9564687975646883e-05, + "loss": 1.8344, + "step": 8575 + }, + { + "epoch": 2.62, + "learning_rate": 2.953424657534247e-05, + "loss": 1.8676, + "step": 8600 + }, + { + "epoch": 2.63, + "learning_rate": 2.9503805175038055e-05, + "loss": 1.8041, + "step": 8625 + }, + { + "epoch": 2.63, + "learning_rate": 2.9473363774733637e-05, + "loss": 1.7538, + "step": 8650 + }, + { + "epoch": 2.64, + "learning_rate": 2.944292237442923e-05, + "loss": 1.7551, + "step": 8675 + }, + { + "epoch": 2.65, + "learning_rate": 2.9412480974124812e-05, + "loss": 1.8191, + "step": 8700 + }, + { + "epoch": 2.66, + "learning_rate": 2.9382039573820398e-05, + "loss": 1.6856, + "step": 8725 + }, + { + "epoch": 2.66, + "learning_rate": 2.9351598173515983e-05, + "loss": 1.8142, + "step": 8750 + }, + { + "epoch": 2.67, + "learning_rate": 2.9321156773211572e-05, + "loss": 1.7193, + "step": 8775 + }, + { + "epoch": 2.68, + "learning_rate": 2.9290715372907158e-05, + "loss": 1.8532, + "step": 8800 + }, + { + "epoch": 2.69, + "learning_rate": 2.926027397260274e-05, + "loss": 1.7633, + "step": 8825 + }, + { + "epoch": 2.69, + "learning_rate": 2.9229832572298326e-05, + "loss": 1.8355, + "step": 8850 + }, + { + "epoch": 2.7, + "learning_rate": 2.9199391171993915e-05, + "loss": 1.7773, + "step": 8875 + }, + { + "epoch": 2.71, + "learning_rate": 2.91689497716895e-05, + "loss": 1.8063, + "step": 8900 + }, + { + "epoch": 2.72, + "learning_rate": 2.9138508371385086e-05, + "loss": 1.8132, + "step": 8925 + }, + { + "epoch": 2.72, + "learning_rate": 2.9108066971080672e-05, + "loss": 1.8179, + "step": 8950 + }, + { + "epoch": 2.73, + "learning_rate": 2.907762557077626e-05, + "loss": 1.8025, + "step": 8975 + }, + { + "epoch": 2.74, + "learning_rate": 2.9047184170471843e-05, + "loss": 1.9039, + "step": 9000 + }, + { + "epoch": 2.75, + "learning_rate": 2.901674277016743e-05, + "loss": 1.8192, + "step": 9025 + }, + { + "epoch": 2.75, + "learning_rate": 2.8986301369863015e-05, + "loss": 1.7935, + "step": 9050 + }, + { + "epoch": 2.76, + "learning_rate": 2.8955859969558604e-05, + "loss": 1.8275, + "step": 9075 + }, + { + "epoch": 2.77, + "learning_rate": 2.892541856925419e-05, + "loss": 1.818, + "step": 9100 + }, + { + "epoch": 2.78, + "learning_rate": 2.8894977168949775e-05, + "loss": 1.8142, + "step": 9125 + }, + { + "epoch": 2.79, + "learning_rate": 2.8864535768645357e-05, + "loss": 1.8139, + "step": 9150 + }, + { + "epoch": 2.79, + "learning_rate": 2.8834094368340947e-05, + "loss": 1.8355, + "step": 9175 + }, + { + "epoch": 2.8, + "learning_rate": 2.8803652968036532e-05, + "loss": 1.7334, + "step": 9200 + }, + { + "epoch": 2.81, + "learning_rate": 2.8773211567732118e-05, + "loss": 1.7533, + "step": 9225 + }, + { + "epoch": 2.82, + "learning_rate": 2.8742770167427704e-05, + "loss": 1.7938, + "step": 9250 + }, + { + "epoch": 2.82, + "learning_rate": 2.8712328767123293e-05, + "loss": 1.7383, + "step": 9275 + }, + { + "epoch": 2.83, + "learning_rate": 2.868188736681888e-05, + "loss": 1.7569, + "step": 9300 + }, + { + "epoch": 2.84, + "learning_rate": 2.865144596651446e-05, + "loss": 1.891, + "step": 9325 + }, + { + "epoch": 2.85, + "learning_rate": 2.8621004566210046e-05, + "loss": 1.8103, + "step": 9350 + }, + { + "epoch": 2.85, + "learning_rate": 2.8590563165905635e-05, + "loss": 1.8334, + "step": 9375 + }, + { + "epoch": 2.86, + "learning_rate": 2.856012176560122e-05, + "loss": 1.7458, + "step": 9400 + }, + { + "epoch": 2.87, + "learning_rate": 2.8529680365296807e-05, + "loss": 1.8501, + "step": 9425 + }, + { + "epoch": 2.88, + "learning_rate": 2.849923896499239e-05, + "loss": 1.7494, + "step": 9450 + }, + { + "epoch": 2.88, + "learning_rate": 2.846879756468798e-05, + "loss": 1.7331, + "step": 9475 + }, + { + "epoch": 2.89, + "learning_rate": 2.8438356164383564e-05, + "loss": 1.8252, + "step": 9500 + }, + { + "epoch": 2.9, + "learning_rate": 2.840791476407915e-05, + "loss": 1.808, + "step": 9525 + }, + { + "epoch": 2.91, + "learning_rate": 2.8377473363774735e-05, + "loss": 1.7964, + "step": 9550 + }, + { + "epoch": 2.91, + "learning_rate": 2.8347031963470324e-05, + "loss": 1.8049, + "step": 9575 + }, + { + "epoch": 2.92, + "learning_rate": 2.831659056316591e-05, + "loss": 1.8175, + "step": 9600 + }, + { + "epoch": 2.93, + "learning_rate": 2.8286149162861492e-05, + "loss": 1.768, + "step": 9625 + }, + { + "epoch": 2.94, + "learning_rate": 2.8255707762557078e-05, + "loss": 1.8176, + "step": 9650 + }, + { + "epoch": 2.95, + "learning_rate": 2.8225266362252667e-05, + "loss": 1.7983, + "step": 9675 + }, + { + "epoch": 2.95, + "learning_rate": 2.8194824961948253e-05, + "loss": 1.8383, + "step": 9700 + }, + { + "epoch": 2.96, + "learning_rate": 2.8164383561643838e-05, + "loss": 1.77, + "step": 9725 + }, + { + "epoch": 2.97, + "learning_rate": 2.8133942161339424e-05, + "loss": 1.8007, + "step": 9750 + }, + { + "epoch": 2.98, + "learning_rate": 2.8103500761035013e-05, + "loss": 1.7941, + "step": 9775 + }, + { + "epoch": 2.98, + "learning_rate": 2.8073059360730595e-05, + "loss": 1.8826, + "step": 9800 + }, + { + "epoch": 2.99, + "learning_rate": 2.804261796042618e-05, + "loss": 1.8164, + "step": 9825 + }, + { + "epoch": 3.0, + "learning_rate": 2.8012176560121767e-05, + "loss": 1.8422, + "step": 9850 + }, + { + "epoch": 3.01, + "learning_rate": 2.7981735159817356e-05, + "loss": 1.607, + "step": 9875 + }, + { + "epoch": 3.01, + "learning_rate": 2.795129375951294e-05, + "loss": 1.5504, + "step": 9900 + }, + { + "epoch": 3.02, + "learning_rate": 2.7920852359208527e-05, + "loss": 1.5181, + "step": 9925 + }, + { + "epoch": 3.03, + "learning_rate": 2.789041095890411e-05, + "loss": 1.5856, + "step": 9950 + }, + { + "epoch": 3.04, + "learning_rate": 2.78599695585997e-05, + "loss": 1.4975, + "step": 9975 + }, + { + "epoch": 3.04, + "learning_rate": 2.7829528158295284e-05, + "loss": 1.5684, + "step": 10000 + }, + { + "epoch": 3.05, + "learning_rate": 2.779908675799087e-05, + "loss": 1.6031, + "step": 10025 + }, + { + "epoch": 3.06, + "learning_rate": 2.7768645357686455e-05, + "loss": 1.5398, + "step": 10050 + }, + { + "epoch": 3.07, + "learning_rate": 2.7738203957382045e-05, + "loss": 1.5564, + "step": 10075 + }, + { + "epoch": 3.07, + "learning_rate": 2.770776255707763e-05, + "loss": 1.5395, + "step": 10100 + }, + { + "epoch": 3.08, + "learning_rate": 2.7677321156773213e-05, + "loss": 1.5022, + "step": 10125 + }, + { + "epoch": 3.09, + "learning_rate": 2.7646879756468798e-05, + "loss": 1.4697, + "step": 10150 + }, + { + "epoch": 3.1, + "learning_rate": 2.7616438356164387e-05, + "loss": 1.5704, + "step": 10175 + }, + { + "epoch": 3.11, + "learning_rate": 2.7585996955859973e-05, + "loss": 1.5571, + "step": 10200 + }, + { + "epoch": 3.11, + "learning_rate": 2.755555555555556e-05, + "loss": 1.5742, + "step": 10225 + }, + { + "epoch": 3.12, + "learning_rate": 2.752511415525114e-05, + "loss": 1.5312, + "step": 10250 + }, + { + "epoch": 3.13, + "learning_rate": 2.7494672754946733e-05, + "loss": 1.4847, + "step": 10275 + }, + { + "epoch": 3.14, + "learning_rate": 2.7464231354642316e-05, + "loss": 1.5724, + "step": 10300 + }, + { + "epoch": 3.14, + "learning_rate": 2.74337899543379e-05, + "loss": 1.5509, + "step": 10325 + }, + { + "epoch": 3.15, + "learning_rate": 2.7403348554033487e-05, + "loss": 1.5007, + "step": 10350 + }, + { + "epoch": 3.16, + "learning_rate": 2.7372907153729076e-05, + "loss": 1.5213, + "step": 10375 + }, + { + "epoch": 3.17, + "learning_rate": 2.7342465753424662e-05, + "loss": 1.5834, + "step": 10400 + }, + { + "epoch": 3.17, + "learning_rate": 2.7312024353120244e-05, + "loss": 1.5101, + "step": 10425 + }, + { + "epoch": 3.18, + "learning_rate": 2.728158295281583e-05, + "loss": 1.5733, + "step": 10450 + }, + { + "epoch": 3.19, + "learning_rate": 2.725114155251142e-05, + "loss": 1.5903, + "step": 10475 + }, + { + "epoch": 3.2, + "learning_rate": 2.7220700152207005e-05, + "loss": 1.5721, + "step": 10500 + }, + { + "epoch": 3.2, + "learning_rate": 2.719025875190259e-05, + "loss": 1.5293, + "step": 10525 + }, + { + "epoch": 3.21, + "learning_rate": 2.7159817351598176e-05, + "loss": 1.6016, + "step": 10550 + }, + { + "epoch": 3.22, + "learning_rate": 2.7129375951293765e-05, + "loss": 1.4873, + "step": 10575 + }, + { + "epoch": 3.23, + "learning_rate": 2.7098934550989347e-05, + "loss": 1.6021, + "step": 10600 + }, + { + "epoch": 3.23, + "learning_rate": 2.7068493150684933e-05, + "loss": 1.6195, + "step": 10625 + }, + { + "epoch": 3.24, + "learning_rate": 2.703805175038052e-05, + "loss": 1.6156, + "step": 10650 + }, + { + "epoch": 3.25, + "learning_rate": 2.7007610350076108e-05, + "loss": 1.5061, + "step": 10675 + }, + { + "epoch": 3.26, + "learning_rate": 2.6977168949771693e-05, + "loss": 1.5679, + "step": 10700 + }, + { + "epoch": 3.26, + "learning_rate": 2.694672754946728e-05, + "loss": 1.6087, + "step": 10725 + }, + { + "epoch": 3.27, + "learning_rate": 2.691628614916286e-05, + "loss": 1.5767, + "step": 10750 + }, + { + "epoch": 3.28, + "learning_rate": 2.688584474885845e-05, + "loss": 1.5589, + "step": 10775 + }, + { + "epoch": 3.29, + "learning_rate": 2.6855403348554036e-05, + "loss": 1.5384, + "step": 10800 + }, + { + "epoch": 3.3, + "learning_rate": 2.6824961948249622e-05, + "loss": 1.5174, + "step": 10825 + }, + { + "epoch": 3.3, + "learning_rate": 2.6794520547945207e-05, + "loss": 1.585, + "step": 10850 + }, + { + "epoch": 3.31, + "learning_rate": 2.6764079147640796e-05, + "loss": 1.5766, + "step": 10875 + }, + { + "epoch": 3.32, + "learning_rate": 2.6733637747336382e-05, + "loss": 1.5724, + "step": 10900 + }, + { + "epoch": 3.33, + "learning_rate": 2.6703196347031964e-05, + "loss": 1.5789, + "step": 10925 + }, + { + "epoch": 3.33, + "learning_rate": 2.667275494672755e-05, + "loss": 1.566, + "step": 10950 + }, + { + "epoch": 3.34, + "learning_rate": 2.664231354642314e-05, + "loss": 1.5702, + "step": 10975 + }, + { + "epoch": 3.35, + "learning_rate": 2.6611872146118725e-05, + "loss": 1.5913, + "step": 11000 + }, + { + "epoch": 3.36, + "learning_rate": 2.658143074581431e-05, + "loss": 1.5544, + "step": 11025 + }, + { + "epoch": 3.36, + "learning_rate": 2.6550989345509893e-05, + "loss": 1.4748, + "step": 11050 + }, + { + "epoch": 3.37, + "learning_rate": 2.6520547945205485e-05, + "loss": 1.5538, + "step": 11075 + }, + { + "epoch": 3.38, + "learning_rate": 2.6490106544901068e-05, + "loss": 1.5493, + "step": 11100 + }, + { + "epoch": 3.39, + "learning_rate": 2.6459665144596653e-05, + "loss": 1.5631, + "step": 11125 + }, + { + "epoch": 3.39, + "learning_rate": 2.642922374429224e-05, + "loss": 1.6231, + "step": 11150 + }, + { + "epoch": 3.4, + "learning_rate": 2.6398782343987828e-05, + "loss": 1.5592, + "step": 11175 + }, + { + "epoch": 3.41, + "learning_rate": 2.6368340943683414e-05, + "loss": 1.4599, + "step": 11200 + }, + { + "epoch": 3.42, + "learning_rate": 2.6337899543378996e-05, + "loss": 1.5991, + "step": 11225 + }, + { + "epoch": 3.42, + "learning_rate": 2.630745814307458e-05, + "loss": 1.5998, + "step": 11250 + }, + { + "epoch": 3.43, + "learning_rate": 2.627701674277017e-05, + "loss": 1.5227, + "step": 11275 + }, + { + "epoch": 3.44, + "learning_rate": 2.6246575342465756e-05, + "loss": 1.5894, + "step": 11300 + }, + { + "epoch": 3.45, + "learning_rate": 2.6216133942161342e-05, + "loss": 1.5064, + "step": 11325 + }, + { + "epoch": 3.46, + "learning_rate": 2.6185692541856928e-05, + "loss": 1.5878, + "step": 11350 + }, + { + "epoch": 3.46, + "learning_rate": 2.6155251141552517e-05, + "loss": 1.608, + "step": 11375 + }, + { + "epoch": 3.47, + "learning_rate": 2.61248097412481e-05, + "loss": 1.6658, + "step": 11400 + }, + { + "epoch": 3.48, + "learning_rate": 2.6094368340943685e-05, + "loss": 1.5905, + "step": 11425 + }, + { + "epoch": 3.49, + "learning_rate": 2.606392694063927e-05, + "loss": 1.6711, + "step": 11450 + }, + { + "epoch": 3.49, + "learning_rate": 2.603348554033486e-05, + "loss": 1.5699, + "step": 11475 + }, + { + "epoch": 3.5, + "learning_rate": 2.6003044140030445e-05, + "loss": 1.6112, + "step": 11500 + }, + { + "epoch": 3.51, + "learning_rate": 2.597260273972603e-05, + "loss": 1.5901, + "step": 11525 + }, + { + "epoch": 3.52, + "learning_rate": 2.5942161339421613e-05, + "loss": 1.5447, + "step": 11550 + }, + { + "epoch": 3.52, + "learning_rate": 2.5911719939117202e-05, + "loss": 1.5636, + "step": 11575 + }, + { + "epoch": 3.53, + "learning_rate": 2.5881278538812788e-05, + "loss": 1.6046, + "step": 11600 + }, + { + "epoch": 3.54, + "learning_rate": 2.5850837138508374e-05, + "loss": 1.5229, + "step": 11625 + }, + { + "epoch": 3.55, + "learning_rate": 2.582039573820396e-05, + "loss": 1.6017, + "step": 11650 + }, + { + "epoch": 3.55, + "learning_rate": 2.578995433789955e-05, + "loss": 1.5262, + "step": 11675 + }, + { + "epoch": 3.56, + "learning_rate": 2.5759512937595134e-05, + "loss": 1.519, + "step": 11700 + }, + { + "epoch": 3.57, + "learning_rate": 2.5729071537290716e-05, + "loss": 1.5743, + "step": 11725 + }, + { + "epoch": 3.58, + "learning_rate": 2.5698630136986302e-05, + "loss": 1.566, + "step": 11750 + }, + { + "epoch": 3.58, + "learning_rate": 2.566818873668189e-05, + "loss": 1.5854, + "step": 11775 + }, + { + "epoch": 3.59, + "learning_rate": 2.5637747336377477e-05, + "loss": 1.6292, + "step": 11800 + }, + { + "epoch": 3.6, + "learning_rate": 2.5607305936073062e-05, + "loss": 1.6367, + "step": 11825 + }, + { + "epoch": 3.61, + "learning_rate": 2.5576864535768645e-05, + "loss": 1.6502, + "step": 11850 + }, + { + "epoch": 3.61, + "learning_rate": 2.5546423135464237e-05, + "loss": 1.6127, + "step": 11875 + }, + { + "epoch": 3.62, + "learning_rate": 2.551598173515982e-05, + "loss": 1.5351, + "step": 11900 + }, + { + "epoch": 3.63, + "learning_rate": 2.5485540334855405e-05, + "loss": 1.5574, + "step": 11925 + }, + { + "epoch": 3.64, + "learning_rate": 2.545509893455099e-05, + "loss": 1.6821, + "step": 11950 + }, + { + "epoch": 3.65, + "learning_rate": 2.542465753424658e-05, + "loss": 1.4367, + "step": 11975 + }, + { + "epoch": 3.65, + "learning_rate": 2.5394216133942166e-05, + "loss": 1.6209, + "step": 12000 + }, + { + "epoch": 3.66, + "learning_rate": 2.5363774733637748e-05, + "loss": 1.5987, + "step": 12025 + }, + { + "epoch": 3.67, + "learning_rate": 2.5333333333333334e-05, + "loss": 1.5591, + "step": 12050 + }, + { + "epoch": 3.68, + "learning_rate": 2.5302891933028923e-05, + "loss": 1.6609, + "step": 12075 + }, + { + "epoch": 3.68, + "learning_rate": 2.527245053272451e-05, + "loss": 1.5506, + "step": 12100 + }, + { + "epoch": 3.69, + "learning_rate": 2.5242009132420094e-05, + "loss": 1.6036, + "step": 12125 + }, + { + "epoch": 3.7, + "learning_rate": 2.521156773211568e-05, + "loss": 1.6555, + "step": 12150 + }, + { + "epoch": 3.71, + "learning_rate": 2.518112633181127e-05, + "loss": 1.6392, + "step": 12175 + }, + { + "epoch": 3.71, + "learning_rate": 2.515068493150685e-05, + "loss": 1.6013, + "step": 12200 + }, + { + "epoch": 3.72, + "learning_rate": 2.5120243531202437e-05, + "loss": 1.6, + "step": 12225 + }, + { + "epoch": 3.73, + "learning_rate": 2.5089802130898022e-05, + "loss": 1.5195, + "step": 12250 + }, + { + "epoch": 3.74, + "learning_rate": 2.505936073059361e-05, + "loss": 1.5349, + "step": 12275 + }, + { + "epoch": 3.74, + "learning_rate": 2.5028919330289197e-05, + "loss": 1.6064, + "step": 12300 + }, + { + "epoch": 3.75, + "learning_rate": 2.4998477929984783e-05, + "loss": 1.5887, + "step": 12325 + }, + { + "epoch": 3.76, + "learning_rate": 2.4969254185692542e-05, + "loss": 1.6195, + "step": 12350 + }, + { + "epoch": 3.77, + "learning_rate": 2.493881278538813e-05, + "loss": 1.614, + "step": 12375 + }, + { + "epoch": 3.77, + "learning_rate": 2.4908371385083717e-05, + "loss": 1.6406, + "step": 12400 + }, + { + "epoch": 3.78, + "learning_rate": 2.4877929984779302e-05, + "loss": 1.5978, + "step": 12425 + }, + { + "epoch": 3.79, + "learning_rate": 2.4847488584474885e-05, + "loss": 1.662, + "step": 12450 + }, + { + "epoch": 3.8, + "learning_rate": 2.4817047184170474e-05, + "loss": 1.5427, + "step": 12475 + }, + { + "epoch": 3.81, + "learning_rate": 2.478660578386606e-05, + "loss": 1.6268, + "step": 12500 + }, + { + "epoch": 3.81, + "learning_rate": 2.4756164383561645e-05, + "loss": 1.6355, + "step": 12525 + }, + { + "epoch": 3.82, + "learning_rate": 2.472572298325723e-05, + "loss": 1.5728, + "step": 12550 + }, + { + "epoch": 3.83, + "learning_rate": 2.469528158295282e-05, + "loss": 1.6309, + "step": 12575 + }, + { + "epoch": 3.84, + "learning_rate": 2.4664840182648405e-05, + "loss": 1.5316, + "step": 12600 + }, + { + "epoch": 3.84, + "learning_rate": 2.4634398782343988e-05, + "loss": 1.55, + "step": 12625 + }, + { + "epoch": 3.85, + "learning_rate": 2.4603957382039573e-05, + "loss": 1.6023, + "step": 12650 + }, + { + "epoch": 3.86, + "learning_rate": 2.4573515981735162e-05, + "loss": 1.5844, + "step": 12675 + }, + { + "epoch": 3.87, + "learning_rate": 2.4543074581430748e-05, + "loss": 1.6453, + "step": 12700 + }, + { + "epoch": 3.87, + "learning_rate": 2.4512633181126334e-05, + "loss": 1.509, + "step": 12725 + }, + { + "epoch": 3.88, + "learning_rate": 2.448219178082192e-05, + "loss": 1.5422, + "step": 12750 + }, + { + "epoch": 3.89, + "learning_rate": 2.445175038051751e-05, + "loss": 1.6115, + "step": 12775 + }, + { + "epoch": 3.9, + "learning_rate": 2.442130898021309e-05, + "loss": 1.5477, + "step": 12800 + }, + { + "epoch": 3.9, + "learning_rate": 2.4390867579908677e-05, + "loss": 1.6476, + "step": 12825 + }, + { + "epoch": 3.91, + "learning_rate": 2.4360426179604262e-05, + "loss": 1.6505, + "step": 12850 + }, + { + "epoch": 3.92, + "learning_rate": 2.432998477929985e-05, + "loss": 1.5546, + "step": 12875 + }, + { + "epoch": 3.93, + "learning_rate": 2.4299543378995437e-05, + "loss": 1.5796, + "step": 12900 + }, + { + "epoch": 3.93, + "learning_rate": 2.426910197869102e-05, + "loss": 1.5756, + "step": 12925 + }, + { + "epoch": 3.94, + "learning_rate": 2.4238660578386605e-05, + "loss": 1.5754, + "step": 12950 + }, + { + "epoch": 3.95, + "learning_rate": 2.4208219178082194e-05, + "loss": 1.5819, + "step": 12975 + }, + { + "epoch": 3.96, + "learning_rate": 2.417777777777778e-05, + "loss": 1.5826, + "step": 13000 + }, + { + "epoch": 3.96, + "learning_rate": 2.4147336377473365e-05, + "loss": 1.6289, + "step": 13025 + }, + { + "epoch": 3.97, + "learning_rate": 2.411689497716895e-05, + "loss": 1.5559, + "step": 13050 + }, + { + "epoch": 3.98, + "learning_rate": 2.408645357686454e-05, + "loss": 1.6191, + "step": 13075 + }, + { + "epoch": 3.99, + "learning_rate": 2.4056012176560122e-05, + "loss": 1.5491, + "step": 13100 + }, + { + "epoch": 4.0, + "learning_rate": 2.4025570776255708e-05, + "loss": 1.6345, + "step": 13125 + }, + { + "epoch": 4.0, + "learning_rate": 2.3995129375951294e-05, + "loss": 1.4829, + "step": 13150 + }, + { + "epoch": 4.01, + "learning_rate": 2.3964687975646883e-05, + "loss": 1.4067, + "step": 13175 + }, + { + "epoch": 4.02, + "learning_rate": 2.393424657534247e-05, + "loss": 1.3637, + "step": 13200 + }, + { + "epoch": 4.03, + "learning_rate": 2.3903805175038054e-05, + "loss": 1.2731, + "step": 13225 + }, + { + "epoch": 4.03, + "learning_rate": 2.3873363774733636e-05, + "loss": 1.3727, + "step": 13250 + }, + { + "epoch": 4.04, + "learning_rate": 2.3842922374429226e-05, + "loss": 1.2957, + "step": 13275 + }, + { + "epoch": 4.05, + "learning_rate": 2.381248097412481e-05, + "loss": 1.3658, + "step": 13300 + }, + { + "epoch": 4.06, + "learning_rate": 2.3782039573820397e-05, + "loss": 1.4263, + "step": 13325 + }, + { + "epoch": 4.06, + "learning_rate": 2.3751598173515983e-05, + "loss": 1.36, + "step": 13350 + }, + { + "epoch": 4.07, + "learning_rate": 2.372115677321157e-05, + "loss": 1.2446, + "step": 13375 + }, + { + "epoch": 4.08, + "learning_rate": 2.3690715372907157e-05, + "loss": 1.3264, + "step": 13400 + }, + { + "epoch": 4.09, + "learning_rate": 2.366027397260274e-05, + "loss": 1.3395, + "step": 13425 + }, + { + "epoch": 4.09, + "learning_rate": 2.3629832572298325e-05, + "loss": 1.3421, + "step": 13450 + }, + { + "epoch": 4.1, + "learning_rate": 2.3599391171993914e-05, + "loss": 1.4379, + "step": 13475 + }, + { + "epoch": 4.11, + "learning_rate": 2.35689497716895e-05, + "loss": 1.3355, + "step": 13500 + }, + { + "epoch": 4.12, + "learning_rate": 2.3538508371385086e-05, + "loss": 1.4226, + "step": 13525 + }, + { + "epoch": 4.12, + "learning_rate": 2.3508066971080668e-05, + "loss": 1.4034, + "step": 13550 + }, + { + "epoch": 4.13, + "learning_rate": 2.347762557077626e-05, + "loss": 1.3015, + "step": 13575 + }, + { + "epoch": 4.14, + "learning_rate": 2.3447184170471843e-05, + "loss": 1.3953, + "step": 13600 + }, + { + "epoch": 4.15, + "learning_rate": 2.341674277016743e-05, + "loss": 1.3534, + "step": 13625 + }, + { + "epoch": 4.16, + "learning_rate": 2.3386301369863014e-05, + "loss": 1.3378, + "step": 13650 + }, + { + "epoch": 4.16, + "learning_rate": 2.3355859969558603e-05, + "loss": 1.345, + "step": 13675 + }, + { + "epoch": 4.17, + "learning_rate": 2.332541856925419e-05, + "loss": 1.4098, + "step": 13700 + }, + { + "epoch": 4.18, + "learning_rate": 2.329497716894977e-05, + "loss": 1.3938, + "step": 13725 + }, + { + "epoch": 4.19, + "learning_rate": 2.3264535768645357e-05, + "loss": 1.4573, + "step": 13750 + }, + { + "epoch": 4.19, + "learning_rate": 2.3234094368340946e-05, + "loss": 1.396, + "step": 13775 + }, + { + "epoch": 4.2, + "learning_rate": 2.320365296803653e-05, + "loss": 1.3915, + "step": 13800 + }, + { + "epoch": 4.21, + "learning_rate": 2.3173211567732117e-05, + "loss": 1.4149, + "step": 13825 + }, + { + "epoch": 4.22, + "learning_rate": 2.3142770167427703e-05, + "loss": 1.4299, + "step": 13850 + }, + { + "epoch": 4.22, + "learning_rate": 2.3112328767123292e-05, + "loss": 1.388, + "step": 13875 + }, + { + "epoch": 4.23, + "learning_rate": 2.3081887366818874e-05, + "loss": 1.4334, + "step": 13900 + }, + { + "epoch": 4.24, + "learning_rate": 2.305144596651446e-05, + "loss": 1.358, + "step": 13925 + }, + { + "epoch": 4.25, + "learning_rate": 2.3021004566210046e-05, + "loss": 1.3849, + "step": 13950 + }, + { + "epoch": 4.25, + "learning_rate": 2.2990563165905635e-05, + "loss": 1.2529, + "step": 13975 + }, + { + "epoch": 4.26, + "learning_rate": 2.296012176560122e-05, + "loss": 1.406, + "step": 14000 + }, + { + "epoch": 4.27, + "learning_rate": 2.2929680365296806e-05, + "loss": 1.3589, + "step": 14025 + }, + { + "epoch": 4.28, + "learning_rate": 2.289923896499239e-05, + "loss": 1.3382, + "step": 14050 + }, + { + "epoch": 4.28, + "learning_rate": 2.2868797564687977e-05, + "loss": 1.4394, + "step": 14075 + }, + { + "epoch": 4.29, + "learning_rate": 2.2838356164383563e-05, + "loss": 1.4327, + "step": 14100 + }, + { + "epoch": 4.3, + "learning_rate": 2.280791476407915e-05, + "loss": 1.3955, + "step": 14125 + }, + { + "epoch": 4.31, + "learning_rate": 2.2777473363774735e-05, + "loss": 1.38, + "step": 14150 + }, + { + "epoch": 4.32, + "learning_rate": 2.2747031963470324e-05, + "loss": 1.3289, + "step": 14175 + }, + { + "epoch": 4.32, + "learning_rate": 2.271659056316591e-05, + "loss": 1.3282, + "step": 14200 + }, + { + "epoch": 4.33, + "learning_rate": 2.268614916286149e-05, + "loss": 1.4225, + "step": 14225 + }, + { + "epoch": 4.34, + "learning_rate": 2.2655707762557077e-05, + "loss": 1.358, + "step": 14250 + }, + { + "epoch": 4.35, + "learning_rate": 2.2625266362252666e-05, + "loss": 1.3493, + "step": 14275 + }, + { + "epoch": 4.35, + "learning_rate": 2.2594824961948252e-05, + "loss": 1.4606, + "step": 14300 + }, + { + "epoch": 4.36, + "learning_rate": 2.2564383561643838e-05, + "loss": 1.3846, + "step": 14325 + }, + { + "epoch": 4.37, + "learning_rate": 2.253394216133942e-05, + "loss": 1.3729, + "step": 14350 + }, + { + "epoch": 4.38, + "learning_rate": 2.2503500761035012e-05, + "loss": 1.3396, + "step": 14375 + }, + { + "epoch": 4.38, + "learning_rate": 2.2473059360730595e-05, + "loss": 1.3955, + "step": 14400 + }, + { + "epoch": 4.39, + "learning_rate": 2.244261796042618e-05, + "loss": 1.3225, + "step": 14425 + }, + { + "epoch": 4.4, + "learning_rate": 2.2412176560121766e-05, + "loss": 1.3583, + "step": 14450 + }, + { + "epoch": 4.41, + "learning_rate": 2.2381735159817355e-05, + "loss": 1.3146, + "step": 14475 + }, + { + "epoch": 4.41, + "learning_rate": 2.235129375951294e-05, + "loss": 1.4239, + "step": 14500 + }, + { + "epoch": 4.42, + "learning_rate": 2.2320852359208523e-05, + "loss": 1.3779, + "step": 14525 + }, + { + "epoch": 4.43, + "learning_rate": 2.229041095890411e-05, + "loss": 1.4361, + "step": 14550 + }, + { + "epoch": 4.44, + "learning_rate": 2.2259969558599698e-05, + "loss": 1.3766, + "step": 14575 + }, + { + "epoch": 4.44, + "learning_rate": 2.2229528158295284e-05, + "loss": 1.3609, + "step": 14600 + }, + { + "epoch": 4.45, + "learning_rate": 2.219908675799087e-05, + "loss": 1.4332, + "step": 14625 + }, + { + "epoch": 4.46, + "learning_rate": 2.2168645357686455e-05, + "loss": 1.4219, + "step": 14650 + }, + { + "epoch": 4.47, + "learning_rate": 2.2138203957382044e-05, + "loss": 1.4023, + "step": 14675 + }, + { + "epoch": 4.47, + "learning_rate": 2.2107762557077626e-05, + "loss": 1.4445, + "step": 14700 + }, + { + "epoch": 4.48, + "learning_rate": 2.2077321156773212e-05, + "loss": 1.3996, + "step": 14725 + }, + { + "epoch": 4.49, + "learning_rate": 2.2048097412480978e-05, + "loss": 1.3752, + "step": 14750 + }, + { + "epoch": 4.5, + "learning_rate": 2.2017656012176563e-05, + "loss": 1.3732, + "step": 14775 + }, + { + "epoch": 4.51, + "learning_rate": 2.198721461187215e-05, + "loss": 1.4234, + "step": 14800 + }, + { + "epoch": 4.51, + "learning_rate": 2.195677321156773e-05, + "loss": 1.3971, + "step": 14825 + }, + { + "epoch": 4.52, + "learning_rate": 2.192633181126332e-05, + "loss": 1.3729, + "step": 14850 + }, + { + "epoch": 4.53, + "learning_rate": 2.1895890410958906e-05, + "loss": 1.3405, + "step": 14875 + }, + { + "epoch": 4.54, + "learning_rate": 2.1865449010654492e-05, + "loss": 1.3572, + "step": 14900 + }, + { + "epoch": 4.54, + "learning_rate": 2.1835007610350077e-05, + "loss": 1.3331, + "step": 14925 + }, + { + "epoch": 4.55, + "learning_rate": 2.1804566210045667e-05, + "loss": 1.368, + "step": 14950 + }, + { + "epoch": 4.56, + "learning_rate": 2.1774124809741252e-05, + "loss": 1.3649, + "step": 14975 + }, + { + "epoch": 4.57, + "learning_rate": 2.1743683409436835e-05, + "loss": 1.389, + "step": 15000 + }, + { + "epoch": 4.57, + "learning_rate": 2.171324200913242e-05, + "loss": 1.4163, + "step": 15025 + }, + { + "epoch": 4.58, + "learning_rate": 2.168280060882801e-05, + "loss": 1.3442, + "step": 15050 + }, + { + "epoch": 4.59, + "learning_rate": 2.1652359208523595e-05, + "loss": 1.3446, + "step": 15075 + }, + { + "epoch": 4.6, + "learning_rate": 2.162191780821918e-05, + "loss": 1.3772, + "step": 15100 + }, + { + "epoch": 4.6, + "learning_rate": 2.1591476407914763e-05, + "loss": 1.4261, + "step": 15125 + }, + { + "epoch": 4.61, + "learning_rate": 2.1561035007610352e-05, + "loss": 1.4058, + "step": 15150 + }, + { + "epoch": 4.62, + "learning_rate": 2.1530593607305938e-05, + "loss": 1.3876, + "step": 15175 + }, + { + "epoch": 4.63, + "learning_rate": 2.1500152207001523e-05, + "loss": 1.3427, + "step": 15200 + }, + { + "epoch": 4.63, + "learning_rate": 2.146971080669711e-05, + "loss": 1.3778, + "step": 15225 + }, + { + "epoch": 4.64, + "learning_rate": 2.1439269406392698e-05, + "loss": 1.3358, + "step": 15250 + }, + { + "epoch": 4.65, + "learning_rate": 2.1408828006088284e-05, + "loss": 1.4508, + "step": 15275 + }, + { + "epoch": 4.66, + "learning_rate": 2.1378386605783866e-05, + "loss": 1.3823, + "step": 15300 + }, + { + "epoch": 4.67, + "learning_rate": 2.1347945205479452e-05, + "loss": 1.4207, + "step": 15325 + }, + { + "epoch": 4.67, + "learning_rate": 2.131750380517504e-05, + "loss": 1.3759, + "step": 15350 + }, + { + "epoch": 4.68, + "learning_rate": 2.1287062404870626e-05, + "loss": 1.4415, + "step": 15375 + }, + { + "epoch": 4.69, + "learning_rate": 2.1256621004566212e-05, + "loss": 1.3673, + "step": 15400 + }, + { + "epoch": 4.7, + "learning_rate": 2.1226179604261798e-05, + "loss": 1.4189, + "step": 15425 + }, + { + "epoch": 4.7, + "learning_rate": 2.1195738203957387e-05, + "loss": 1.3827, + "step": 15450 + }, + { + "epoch": 4.71, + "learning_rate": 2.116529680365297e-05, + "loss": 1.3898, + "step": 15475 + }, + { + "epoch": 4.72, + "learning_rate": 2.1134855403348555e-05, + "loss": 1.444, + "step": 15500 + }, + { + "epoch": 4.73, + "learning_rate": 2.110441400304414e-05, + "loss": 1.3941, + "step": 15525 + }, + { + "epoch": 4.73, + "learning_rate": 2.107397260273973e-05, + "loss": 1.4336, + "step": 15550 + }, + { + "epoch": 4.74, + "learning_rate": 2.1043531202435315e-05, + "loss": 1.4159, + "step": 15575 + }, + { + "epoch": 4.75, + "learning_rate": 2.10130898021309e-05, + "loss": 1.351, + "step": 15600 + }, + { + "epoch": 4.76, + "learning_rate": 2.0982648401826483e-05, + "loss": 1.336, + "step": 15625 + }, + { + "epoch": 4.76, + "learning_rate": 2.0952207001522072e-05, + "loss": 1.4279, + "step": 15650 + }, + { + "epoch": 4.77, + "learning_rate": 2.0921765601217658e-05, + "loss": 1.3384, + "step": 15675 + }, + { + "epoch": 4.78, + "learning_rate": 2.0891324200913244e-05, + "loss": 1.3338, + "step": 15700 + }, + { + "epoch": 4.79, + "learning_rate": 2.086088280060883e-05, + "loss": 1.3422, + "step": 15725 + }, + { + "epoch": 4.79, + "learning_rate": 2.083044140030442e-05, + "loss": 1.4765, + "step": 15750 + }, + { + "epoch": 4.8, + "learning_rate": 2.08e-05, + "loss": 1.4493, + "step": 15775 + }, + { + "epoch": 4.81, + "learning_rate": 2.0769558599695586e-05, + "loss": 1.4215, + "step": 15800 + }, + { + "epoch": 4.82, + "learning_rate": 2.0739117199391172e-05, + "loss": 1.4274, + "step": 15825 + }, + { + "epoch": 4.82, + "learning_rate": 2.070867579908676e-05, + "loss": 1.4245, + "step": 15850 + }, + { + "epoch": 4.83, + "learning_rate": 2.0678234398782347e-05, + "loss": 1.4564, + "step": 15875 + }, + { + "epoch": 4.84, + "learning_rate": 2.0647792998477933e-05, + "loss": 1.3672, + "step": 15900 + }, + { + "epoch": 4.85, + "learning_rate": 2.0617351598173515e-05, + "loss": 1.4063, + "step": 15925 + }, + { + "epoch": 4.86, + "learning_rate": 2.0586910197869104e-05, + "loss": 1.4454, + "step": 15950 + }, + { + "epoch": 4.86, + "learning_rate": 2.055646879756469e-05, + "loss": 1.4042, + "step": 15975 + }, + { + "epoch": 4.87, + "learning_rate": 2.0526027397260275e-05, + "loss": 1.3176, + "step": 16000 + }, + { + "epoch": 4.88, + "learning_rate": 2.049558599695586e-05, + "loss": 1.3937, + "step": 16025 + }, + { + "epoch": 4.89, + "learning_rate": 2.046514459665145e-05, + "loss": 1.4184, + "step": 16050 + }, + { + "epoch": 4.89, + "learning_rate": 2.0434703196347036e-05, + "loss": 1.3941, + "step": 16075 + }, + { + "epoch": 4.9, + "learning_rate": 2.0404261796042618e-05, + "loss": 1.3824, + "step": 16100 + }, + { + "epoch": 4.91, + "learning_rate": 2.0373820395738204e-05, + "loss": 1.3031, + "step": 16125 + }, + { + "epoch": 4.92, + "learning_rate": 2.0343378995433793e-05, + "loss": 1.3937, + "step": 16150 + }, + { + "epoch": 4.92, + "learning_rate": 2.031293759512938e-05, + "loss": 1.3045, + "step": 16175 + }, + { + "epoch": 4.93, + "learning_rate": 2.0282496194824964e-05, + "loss": 1.4681, + "step": 16200 + }, + { + "epoch": 4.94, + "learning_rate": 2.025205479452055e-05, + "loss": 1.4129, + "step": 16225 + }, + { + "epoch": 4.95, + "learning_rate": 2.022161339421614e-05, + "loss": 1.4236, + "step": 16250 + }, + { + "epoch": 4.95, + "learning_rate": 2.019117199391172e-05, + "loss": 1.4638, + "step": 16275 + }, + { + "epoch": 4.96, + "learning_rate": 2.0160730593607307e-05, + "loss": 1.4065, + "step": 16300 + }, + { + "epoch": 4.97, + "learning_rate": 2.0130289193302892e-05, + "loss": 1.395, + "step": 16325 + }, + { + "epoch": 4.98, + "learning_rate": 2.009984779299848e-05, + "loss": 1.441, + "step": 16350 + }, + { + "epoch": 4.98, + "learning_rate": 2.0069406392694067e-05, + "loss": 1.4944, + "step": 16375 + }, + { + "epoch": 4.99, + "learning_rate": 2.003896499238965e-05, + "loss": 1.4018, + "step": 16400 + }, + { + "epoch": 5.0, + "learning_rate": 2.0008523592085235e-05, + "loss": 1.4352, + "step": 16425 + }, + { + "epoch": 5.01, + "learning_rate": 1.9978082191780824e-05, + "loss": 1.1718, + "step": 16450 + }, + { + "epoch": 5.02, + "learning_rate": 1.994764079147641e-05, + "loss": 1.2079, + "step": 16475 + }, + { + "epoch": 5.02, + "learning_rate": 1.9917199391171996e-05, + "loss": 1.1671, + "step": 16500 + }, + { + "epoch": 5.03, + "learning_rate": 1.988675799086758e-05, + "loss": 1.1194, + "step": 16525 + }, + { + "epoch": 5.04, + "learning_rate": 1.9856316590563167e-05, + "loss": 1.2303, + "step": 16550 + }, + { + "epoch": 5.05, + "learning_rate": 1.9825875190258753e-05, + "loss": 1.1932, + "step": 16575 + }, + { + "epoch": 5.05, + "learning_rate": 1.979543378995434e-05, + "loss": 1.2365, + "step": 16600 + }, + { + "epoch": 5.06, + "learning_rate": 1.9764992389649927e-05, + "loss": 1.1352, + "step": 16625 + }, + { + "epoch": 5.07, + "learning_rate": 1.973455098934551e-05, + "loss": 1.205, + "step": 16650 + }, + { + "epoch": 5.08, + "learning_rate": 1.97041095890411e-05, + "loss": 1.2005, + "step": 16675 + }, + { + "epoch": 5.08, + "learning_rate": 1.9673668188736684e-05, + "loss": 1.2812, + "step": 16700 + }, + { + "epoch": 5.09, + "learning_rate": 1.964322678843227e-05, + "loss": 1.1773, + "step": 16725 + }, + { + "epoch": 5.1, + "learning_rate": 1.9612785388127856e-05, + "loss": 1.1904, + "step": 16750 + }, + { + "epoch": 5.11, + "learning_rate": 1.958234398782344e-05, + "loss": 1.2193, + "step": 16775 + }, + { + "epoch": 5.11, + "learning_rate": 1.9551902587519027e-05, + "loss": 1.1983, + "step": 16800 + }, + { + "epoch": 5.12, + "learning_rate": 1.9521461187214613e-05, + "loss": 1.1261, + "step": 16825 + }, + { + "epoch": 5.13, + "learning_rate": 1.94910197869102e-05, + "loss": 1.2459, + "step": 16850 + }, + { + "epoch": 5.14, + "learning_rate": 1.9460578386605788e-05, + "loss": 1.2696, + "step": 16875 + }, + { + "epoch": 5.14, + "learning_rate": 1.943013698630137e-05, + "loss": 1.1993, + "step": 16900 + }, + { + "epoch": 5.15, + "learning_rate": 1.939969558599696e-05, + "loss": 1.1595, + "step": 16925 + }, + { + "epoch": 5.16, + "learning_rate": 1.9369254185692545e-05, + "loss": 1.1438, + "step": 16950 + }, + { + "epoch": 5.17, + "learning_rate": 1.933881278538813e-05, + "loss": 1.2023, + "step": 16975 + }, + { + "epoch": 5.18, + "learning_rate": 1.9308371385083716e-05, + "loss": 1.2327, + "step": 17000 + }, + { + "epoch": 5.18, + "learning_rate": 1.92779299847793e-05, + "loss": 1.2655, + "step": 17025 + }, + { + "epoch": 5.19, + "learning_rate": 1.9247488584474887e-05, + "loss": 1.1495, + "step": 17050 + }, + { + "epoch": 5.2, + "learning_rate": 1.9217047184170473e-05, + "loss": 1.2121, + "step": 17075 + }, + { + "epoch": 5.21, + "learning_rate": 1.918660578386606e-05, + "loss": 1.2187, + "step": 17100 + }, + { + "epoch": 5.21, + "learning_rate": 1.9156164383561648e-05, + "loss": 1.2401, + "step": 17125 + }, + { + "epoch": 5.22, + "learning_rate": 1.912572298325723e-05, + "loss": 1.1772, + "step": 17150 + }, + { + "epoch": 5.23, + "learning_rate": 1.909528158295282e-05, + "loss": 1.2103, + "step": 17175 + }, + { + "epoch": 5.24, + "learning_rate": 1.90648401826484e-05, + "loss": 1.2021, + "step": 17200 + }, + { + "epoch": 5.24, + "learning_rate": 1.9035616438356167e-05, + "loss": 1.1839, + "step": 17225 + }, + { + "epoch": 5.25, + "learning_rate": 1.9005175038051753e-05, + "loss": 1.2107, + "step": 17250 + }, + { + "epoch": 5.26, + "learning_rate": 1.897473363774734e-05, + "loss": 1.2437, + "step": 17275 + }, + { + "epoch": 5.27, + "learning_rate": 1.8944292237442924e-05, + "loss": 1.2521, + "step": 17300 + }, + { + "epoch": 5.27, + "learning_rate": 1.8915068493150687e-05, + "loss": 1.1999, + "step": 17325 + }, + { + "epoch": 5.28, + "learning_rate": 1.8884627092846272e-05, + "loss": 1.1996, + "step": 17350 + }, + { + "epoch": 5.29, + "learning_rate": 1.8854185692541858e-05, + "loss": 1.245, + "step": 17375 + }, + { + "epoch": 5.3, + "learning_rate": 1.8823744292237444e-05, + "loss": 1.2139, + "step": 17400 + }, + { + "epoch": 5.3, + "learning_rate": 1.879330289193303e-05, + "loss": 1.196, + "step": 17425 + }, + { + "epoch": 5.31, + "learning_rate": 1.8762861491628615e-05, + "loss": 1.267, + "step": 17450 + }, + { + "epoch": 5.32, + "learning_rate": 1.87324200913242e-05, + "loss": 1.2152, + "step": 17475 + }, + { + "epoch": 5.33, + "learning_rate": 1.870197869101979e-05, + "loss": 1.2098, + "step": 17500 + }, + { + "epoch": 5.33, + "learning_rate": 1.8671537290715372e-05, + "loss": 1.2526, + "step": 17525 + }, + { + "epoch": 5.34, + "learning_rate": 1.864109589041096e-05, + "loss": 1.1557, + "step": 17550 + }, + { + "epoch": 5.35, + "learning_rate": 1.8610654490106547e-05, + "loss": 1.2998, + "step": 17575 + }, + { + "epoch": 5.36, + "learning_rate": 1.8580213089802133e-05, + "loss": 1.192, + "step": 17600 + }, + { + "epoch": 5.37, + "learning_rate": 1.8549771689497718e-05, + "loss": 1.1579, + "step": 17625 + }, + { + "epoch": 5.37, + "learning_rate": 1.8519330289193304e-05, + "loss": 1.2424, + "step": 17650 + }, + { + "epoch": 5.38, + "learning_rate": 1.848888888888889e-05, + "loss": 1.2178, + "step": 17675 + }, + { + "epoch": 5.39, + "learning_rate": 1.8458447488584475e-05, + "loss": 1.2272, + "step": 17700 + }, + { + "epoch": 5.4, + "learning_rate": 1.842800608828006e-05, + "loss": 1.2794, + "step": 17725 + }, + { + "epoch": 5.4, + "learning_rate": 1.839756468797565e-05, + "loss": 1.1844, + "step": 17750 + }, + { + "epoch": 5.41, + "learning_rate": 1.8367123287671232e-05, + "loss": 1.2341, + "step": 17775 + }, + { + "epoch": 5.42, + "learning_rate": 1.833668188736682e-05, + "loss": 1.1533, + "step": 17800 + }, + { + "epoch": 5.43, + "learning_rate": 1.8306240487062407e-05, + "loss": 1.2264, + "step": 17825 + }, + { + "epoch": 5.43, + "learning_rate": 1.8275799086757993e-05, + "loss": 1.222, + "step": 17850 + }, + { + "epoch": 5.44, + "learning_rate": 1.824535768645358e-05, + "loss": 1.1864, + "step": 17875 + }, + { + "epoch": 5.45, + "learning_rate": 1.8214916286149164e-05, + "loss": 1.1615, + "step": 17900 + }, + { + "epoch": 5.46, + "learning_rate": 1.818447488584475e-05, + "loss": 1.2101, + "step": 17925 + }, + { + "epoch": 5.46, + "learning_rate": 1.8154033485540335e-05, + "loss": 1.2291, + "step": 17950 + }, + { + "epoch": 5.47, + "learning_rate": 1.812359208523592e-05, + "loss": 1.1934, + "step": 17975 + }, + { + "epoch": 5.48, + "learning_rate": 1.809315068493151e-05, + "loss": 1.1713, + "step": 18000 + }, + { + "epoch": 5.49, + "learning_rate": 1.8062709284627092e-05, + "loss": 1.1374, + "step": 18025 + }, + { + "epoch": 5.49, + "learning_rate": 1.803226788432268e-05, + "loss": 1.1841, + "step": 18050 + }, + { + "epoch": 5.5, + "learning_rate": 1.8001826484018264e-05, + "loss": 1.2757, + "step": 18075 + }, + { + "epoch": 5.51, + "learning_rate": 1.7971385083713853e-05, + "loss": 1.225, + "step": 18100 + }, + { + "epoch": 5.52, + "learning_rate": 1.794094368340944e-05, + "loss": 1.1889, + "step": 18125 + }, + { + "epoch": 5.53, + "learning_rate": 1.7910502283105024e-05, + "loss": 1.2067, + "step": 18150 + }, + { + "epoch": 5.53, + "learning_rate": 1.788006088280061e-05, + "loss": 1.2683, + "step": 18175 + }, + { + "epoch": 5.54, + "learning_rate": 1.7849619482496196e-05, + "loss": 1.194, + "step": 18200 + }, + { + "epoch": 5.55, + "learning_rate": 1.781917808219178e-05, + "loss": 1.2623, + "step": 18225 + }, + { + "epoch": 5.56, + "learning_rate": 1.7788736681887367e-05, + "loss": 1.2462, + "step": 18250 + }, + { + "epoch": 5.56, + "learning_rate": 1.7758295281582953e-05, + "loss": 1.2488, + "step": 18275 + }, + { + "epoch": 5.57, + "learning_rate": 1.7727853881278542e-05, + "loss": 1.1575, + "step": 18300 + }, + { + "epoch": 5.58, + "learning_rate": 1.7697412480974124e-05, + "loss": 1.3261, + "step": 18325 + }, + { + "epoch": 5.59, + "learning_rate": 1.7666971080669713e-05, + "loss": 1.1545, + "step": 18350 + }, + { + "epoch": 5.59, + "learning_rate": 1.76365296803653e-05, + "loss": 1.2144, + "step": 18375 + }, + { + "epoch": 5.6, + "learning_rate": 1.7606088280060884e-05, + "loss": 1.2061, + "step": 18400 + }, + { + "epoch": 5.61, + "learning_rate": 1.757564687975647e-05, + "loss": 1.2538, + "step": 18425 + }, + { + "epoch": 5.62, + "learning_rate": 1.7545205479452056e-05, + "loss": 1.238, + "step": 18450 + }, + { + "epoch": 5.62, + "learning_rate": 1.751476407914764e-05, + "loss": 1.1711, + "step": 18475 + }, + { + "epoch": 5.63, + "learning_rate": 1.7484322678843227e-05, + "loss": 1.2815, + "step": 18500 + }, + { + "epoch": 5.64, + "learning_rate": 1.7453881278538813e-05, + "loss": 1.2561, + "step": 18525 + }, + { + "epoch": 5.65, + "learning_rate": 1.7423439878234402e-05, + "loss": 1.2703, + "step": 18550 + }, + { + "epoch": 5.65, + "learning_rate": 1.7392998477929984e-05, + "loss": 1.2548, + "step": 18575 + }, + { + "epoch": 5.66, + "learning_rate": 1.7362557077625573e-05, + "loss": 1.213, + "step": 18600 + }, + { + "epoch": 5.67, + "learning_rate": 1.733211567732116e-05, + "loss": 1.2203, + "step": 18625 + }, + { + "epoch": 5.68, + "learning_rate": 1.7301674277016745e-05, + "loss": 1.2282, + "step": 18650 + }, + { + "epoch": 5.68, + "learning_rate": 1.727123287671233e-05, + "loss": 1.1951, + "step": 18675 + }, + { + "epoch": 5.69, + "learning_rate": 1.7240791476407916e-05, + "loss": 1.2652, + "step": 18700 + }, + { + "epoch": 5.7, + "learning_rate": 1.72103500761035e-05, + "loss": 1.1692, + "step": 18725 + }, + { + "epoch": 5.71, + "learning_rate": 1.7179908675799087e-05, + "loss": 1.2708, + "step": 18750 + }, + { + "epoch": 5.72, + "learning_rate": 1.7149467275494673e-05, + "loss": 1.1725, + "step": 18775 + }, + { + "epoch": 5.72, + "learning_rate": 1.7119025875190262e-05, + "loss": 1.1644, + "step": 18800 + }, + { + "epoch": 5.73, + "learning_rate": 1.7088584474885844e-05, + "loss": 1.2263, + "step": 18825 + }, + { + "epoch": 5.74, + "learning_rate": 1.7058143074581433e-05, + "loss": 1.2848, + "step": 18850 + }, + { + "epoch": 5.75, + "learning_rate": 1.7027701674277016e-05, + "loss": 1.2118, + "step": 18875 + }, + { + "epoch": 5.75, + "learning_rate": 1.6997260273972605e-05, + "loss": 1.2332, + "step": 18900 + }, + { + "epoch": 5.76, + "learning_rate": 1.696681887366819e-05, + "loss": 1.1837, + "step": 18925 + }, + { + "epoch": 5.77, + "learning_rate": 1.6936377473363776e-05, + "loss": 1.2719, + "step": 18950 + }, + { + "epoch": 5.78, + "learning_rate": 1.6905936073059362e-05, + "loss": 1.1825, + "step": 18975 + }, + { + "epoch": 5.78, + "learning_rate": 1.6875494672754948e-05, + "loss": 1.2161, + "step": 19000 + }, + { + "epoch": 5.79, + "learning_rate": 1.6845053272450533e-05, + "loss": 1.2578, + "step": 19025 + }, + { + "epoch": 5.8, + "learning_rate": 1.681461187214612e-05, + "loss": 1.2, + "step": 19050 + }, + { + "epoch": 5.81, + "learning_rate": 1.6784170471841705e-05, + "loss": 1.2705, + "step": 19075 + }, + { + "epoch": 5.81, + "learning_rate": 1.6753729071537294e-05, + "loss": 1.2378, + "step": 19100 + }, + { + "epoch": 5.82, + "learning_rate": 1.6723287671232876e-05, + "loss": 1.2475, + "step": 19125 + }, + { + "epoch": 5.83, + "learning_rate": 1.6692846270928465e-05, + "loss": 1.2566, + "step": 19150 + }, + { + "epoch": 5.84, + "learning_rate": 1.666240487062405e-05, + "loss": 1.3268, + "step": 19175 + }, + { + "epoch": 5.84, + "learning_rate": 1.6631963470319636e-05, + "loss": 1.244, + "step": 19200 + }, + { + "epoch": 5.85, + "learning_rate": 1.6601522070015222e-05, + "loss": 1.2251, + "step": 19225 + }, + { + "epoch": 5.86, + "learning_rate": 1.6571080669710808e-05, + "loss": 1.1907, + "step": 19250 + }, + { + "epoch": 5.87, + "learning_rate": 1.6540639269406393e-05, + "loss": 1.1661, + "step": 19275 + }, + { + "epoch": 5.88, + "learning_rate": 1.651019786910198e-05, + "loss": 1.1985, + "step": 19300 + }, + { + "epoch": 5.88, + "learning_rate": 1.6479756468797565e-05, + "loss": 1.1985, + "step": 19325 + }, + { + "epoch": 5.89, + "learning_rate": 1.6449315068493154e-05, + "loss": 1.2457, + "step": 19350 + }, + { + "epoch": 5.9, + "learning_rate": 1.6418873668188736e-05, + "loss": 1.1869, + "step": 19375 + }, + { + "epoch": 5.91, + "learning_rate": 1.6388432267884325e-05, + "loss": 1.1581, + "step": 19400 + }, + { + "epoch": 5.91, + "learning_rate": 1.635799086757991e-05, + "loss": 1.2958, + "step": 19425 + }, + { + "epoch": 5.92, + "learning_rate": 1.6327549467275497e-05, + "loss": 1.1531, + "step": 19450 + }, + { + "epoch": 5.93, + "learning_rate": 1.6297108066971082e-05, + "loss": 1.2319, + "step": 19475 + }, + { + "epoch": 5.94, + "learning_rate": 1.6266666666666668e-05, + "loss": 1.2317, + "step": 19500 + }, + { + "epoch": 5.94, + "learning_rate": 1.6236225266362254e-05, + "loss": 1.1822, + "step": 19525 + }, + { + "epoch": 5.95, + "learning_rate": 1.620578386605784e-05, + "loss": 1.2323, + "step": 19550 + }, + { + "epoch": 5.96, + "learning_rate": 1.6175342465753425e-05, + "loss": 1.2421, + "step": 19575 + }, + { + "epoch": 5.97, + "learning_rate": 1.6144901065449014e-05, + "loss": 1.1909, + "step": 19600 + }, + { + "epoch": 5.97, + "learning_rate": 1.6114459665144596e-05, + "loss": 1.1956, + "step": 19625 + }, + { + "epoch": 5.98, + "learning_rate": 1.6084018264840185e-05, + "loss": 1.2423, + "step": 19650 + }, + { + "epoch": 5.99, + "learning_rate": 1.6053576864535768e-05, + "loss": 1.1934, + "step": 19675 + }, + { + "epoch": 6.0, + "learning_rate": 1.6023135464231357e-05, + "loss": 1.2855, + "step": 19700 + }, + { + "epoch": 6.0, + "learning_rate": 1.5992694063926942e-05, + "loss": 1.1549, + "step": 19725 + }, + { + "epoch": 6.01, + "learning_rate": 1.5962252663622528e-05, + "loss": 1.0158, + "step": 19750 + }, + { + "epoch": 6.02, + "learning_rate": 1.5931811263318114e-05, + "loss": 1.1329, + "step": 19775 + }, + { + "epoch": 6.03, + "learning_rate": 1.59013698630137e-05, + "loss": 1.0144, + "step": 19800 + }, + { + "epoch": 6.04, + "learning_rate": 1.5870928462709285e-05, + "loss": 1.0526, + "step": 19825 + }, + { + "epoch": 6.04, + "learning_rate": 1.584048706240487e-05, + "loss": 1.0858, + "step": 19850 + }, + { + "epoch": 6.05, + "learning_rate": 1.5810045662100456e-05, + "loss": 1.0668, + "step": 19875 + }, + { + "epoch": 6.06, + "learning_rate": 1.5779604261796046e-05, + "loss": 1.0202, + "step": 19900 + }, + { + "epoch": 6.07, + "learning_rate": 1.5749162861491628e-05, + "loss": 1.0321, + "step": 19925 + }, + { + "epoch": 6.07, + "learning_rate": 1.5718721461187217e-05, + "loss": 1.0682, + "step": 19950 + }, + { + "epoch": 6.08, + "learning_rate": 1.5688280060882803e-05, + "loss": 1.0644, + "step": 19975 + }, + { + "epoch": 6.09, + "learning_rate": 1.5657838660578388e-05, + "loss": 1.1266, + "step": 20000 + }, + { + "epoch": 6.1, + "learning_rate": 1.5627397260273974e-05, + "loss": 1.0861, + "step": 20025 + }, + { + "epoch": 6.1, + "learning_rate": 1.559695585996956e-05, + "loss": 1.0446, + "step": 20050 + }, + { + "epoch": 6.11, + "learning_rate": 1.5566514459665145e-05, + "loss": 1.0408, + "step": 20075 + }, + { + "epoch": 6.12, + "learning_rate": 1.553607305936073e-05, + "loss": 1.0181, + "step": 20100 + }, + { + "epoch": 6.13, + "learning_rate": 1.5505631659056317e-05, + "loss": 1.0721, + "step": 20125 + }, + { + "epoch": 6.13, + "learning_rate": 1.5475190258751906e-05, + "loss": 1.08, + "step": 20150 + }, + { + "epoch": 6.14, + "learning_rate": 1.5444748858447488e-05, + "loss": 1.1263, + "step": 20175 + }, + { + "epoch": 6.15, + "learning_rate": 1.5414307458143077e-05, + "loss": 1.0331, + "step": 20200 + }, + { + "epoch": 6.16, + "learning_rate": 1.5383866057838663e-05, + "loss": 1.0047, + "step": 20225 + }, + { + "epoch": 6.16, + "learning_rate": 1.535342465753425e-05, + "loss": 1.0146, + "step": 20250 + }, + { + "epoch": 6.17, + "learning_rate": 1.5322983257229834e-05, + "loss": 1.0828, + "step": 20275 + }, + { + "epoch": 6.18, + "learning_rate": 1.529254185692542e-05, + "loss": 1.0656, + "step": 20300 + }, + { + "epoch": 6.19, + "learning_rate": 1.5262100456621006e-05, + "loss": 1.0661, + "step": 20325 + }, + { + "epoch": 6.19, + "learning_rate": 1.5231659056316593e-05, + "loss": 1.0858, + "step": 20350 + }, + { + "epoch": 6.2, + "learning_rate": 1.5201217656012177e-05, + "loss": 1.0975, + "step": 20375 + }, + { + "epoch": 6.21, + "learning_rate": 1.5170776255707764e-05, + "loss": 1.0874, + "step": 20400 + }, + { + "epoch": 6.22, + "learning_rate": 1.514033485540335e-05, + "loss": 1.061, + "step": 20425 + }, + { + "epoch": 6.23, + "learning_rate": 1.5109893455098936e-05, + "loss": 1.0563, + "step": 20450 + }, + { + "epoch": 6.23, + "learning_rate": 1.5079452054794521e-05, + "loss": 1.0779, + "step": 20475 + }, + { + "epoch": 6.24, + "learning_rate": 1.5049010654490109e-05, + "loss": 1.0346, + "step": 20500 + }, + { + "epoch": 6.25, + "learning_rate": 1.5018569254185693e-05, + "loss": 1.0741, + "step": 20525 + }, + { + "epoch": 6.26, + "learning_rate": 1.498812785388128e-05, + "loss": 1.0446, + "step": 20550 + }, + { + "epoch": 6.26, + "learning_rate": 1.4957686453576866e-05, + "loss": 1.0447, + "step": 20575 + }, + { + "epoch": 6.27, + "learning_rate": 1.4927245053272451e-05, + "loss": 1.0887, + "step": 20600 + }, + { + "epoch": 6.28, + "learning_rate": 1.4896803652968037e-05, + "loss": 1.0644, + "step": 20625 + }, + { + "epoch": 6.29, + "learning_rate": 1.4866362252663624e-05, + "loss": 1.0262, + "step": 20650 + }, + { + "epoch": 6.29, + "learning_rate": 1.4835920852359208e-05, + "loss": 1.0814, + "step": 20675 + }, + { + "epoch": 6.3, + "learning_rate": 1.4805479452054796e-05, + "loss": 1.0796, + "step": 20700 + }, + { + "epoch": 6.31, + "learning_rate": 1.4775038051750381e-05, + "loss": 1.0907, + "step": 20725 + }, + { + "epoch": 6.32, + "learning_rate": 1.4744596651445969e-05, + "loss": 1.0584, + "step": 20750 + }, + { + "epoch": 6.32, + "learning_rate": 1.4714155251141553e-05, + "loss": 1.0504, + "step": 20775 + }, + { + "epoch": 6.33, + "learning_rate": 1.468371385083714e-05, + "loss": 1.0383, + "step": 20800 + }, + { + "epoch": 6.34, + "learning_rate": 1.4653272450532726e-05, + "loss": 1.0387, + "step": 20825 + }, + { + "epoch": 6.35, + "learning_rate": 1.4622831050228312e-05, + "loss": 1.0644, + "step": 20850 + }, + { + "epoch": 6.35, + "learning_rate": 1.4592389649923897e-05, + "loss": 1.008, + "step": 20875 + }, + { + "epoch": 6.36, + "learning_rate": 1.4561948249619485e-05, + "loss": 1.086, + "step": 20900 + }, + { + "epoch": 6.37, + "learning_rate": 1.4531506849315069e-05, + "loss": 1.0496, + "step": 20925 + }, + { + "epoch": 6.38, + "learning_rate": 1.4501065449010656e-05, + "loss": 0.9918, + "step": 20950 + }, + { + "epoch": 6.39, + "learning_rate": 1.4470624048706242e-05, + "loss": 1.0559, + "step": 20975 + }, + { + "epoch": 6.39, + "learning_rate": 1.4440182648401827e-05, + "loss": 1.0467, + "step": 21000 + }, + { + "epoch": 6.4, + "learning_rate": 1.4409741248097413e-05, + "loss": 1.0557, + "step": 21025 + }, + { + "epoch": 6.41, + "learning_rate": 1.4379299847793e-05, + "loss": 1.0846, + "step": 21050 + }, + { + "epoch": 6.42, + "learning_rate": 1.4348858447488584e-05, + "loss": 1.0836, + "step": 21075 + }, + { + "epoch": 6.42, + "learning_rate": 1.4318417047184172e-05, + "loss": 1.082, + "step": 21100 + }, + { + "epoch": 6.43, + "learning_rate": 1.4287975646879757e-05, + "loss": 1.0596, + "step": 21125 + }, + { + "epoch": 6.44, + "learning_rate": 1.4257534246575345e-05, + "loss": 1.0321, + "step": 21150 + }, + { + "epoch": 6.45, + "learning_rate": 1.4227092846270929e-05, + "loss": 1.0805, + "step": 21175 + }, + { + "epoch": 6.45, + "learning_rate": 1.4196651445966516e-05, + "loss": 1.1013, + "step": 21200 + }, + { + "epoch": 6.46, + "learning_rate": 1.4166210045662102e-05, + "loss": 1.0287, + "step": 21225 + }, + { + "epoch": 6.47, + "learning_rate": 1.4135768645357688e-05, + "loss": 1.0995, + "step": 21250 + }, + { + "epoch": 6.48, + "learning_rate": 1.4105327245053273e-05, + "loss": 1.0834, + "step": 21275 + }, + { + "epoch": 6.48, + "learning_rate": 1.407488584474886e-05, + "loss": 1.0702, + "step": 21300 + }, + { + "epoch": 6.49, + "learning_rate": 1.4045662100456623e-05, + "loss": 1.0709, + "step": 21325 + }, + { + "epoch": 6.5, + "learning_rate": 1.4015220700152209e-05, + "loss": 1.126, + "step": 21350 + }, + { + "epoch": 6.51, + "learning_rate": 1.3984779299847794e-05, + "loss": 1.0661, + "step": 21375 + }, + { + "epoch": 6.51, + "learning_rate": 1.395433789954338e-05, + "loss": 1.0154, + "step": 21400 + }, + { + "epoch": 6.52, + "learning_rate": 1.3923896499238967e-05, + "loss": 1.0203, + "step": 21425 + }, + { + "epoch": 6.53, + "learning_rate": 1.3893455098934551e-05, + "loss": 1.0997, + "step": 21450 + }, + { + "epoch": 6.54, + "learning_rate": 1.3863013698630139e-05, + "loss": 1.1286, + "step": 21475 + }, + { + "epoch": 6.54, + "learning_rate": 1.3832572298325724e-05, + "loss": 1.0367, + "step": 21500 + }, + { + "epoch": 6.55, + "learning_rate": 1.380213089802131e-05, + "loss": 1.0712, + "step": 21525 + }, + { + "epoch": 6.56, + "learning_rate": 1.3771689497716896e-05, + "loss": 1.0636, + "step": 21550 + }, + { + "epoch": 6.57, + "learning_rate": 1.3741248097412483e-05, + "loss": 1.056, + "step": 21575 + }, + { + "epoch": 6.58, + "learning_rate": 1.3710806697108067e-05, + "loss": 1.107, + "step": 21600 + }, + { + "epoch": 6.58, + "learning_rate": 1.3680365296803655e-05, + "loss": 1.1084, + "step": 21625 + }, + { + "epoch": 6.59, + "learning_rate": 1.364992389649924e-05, + "loss": 1.0469, + "step": 21650 + }, + { + "epoch": 6.6, + "learning_rate": 1.3619482496194828e-05, + "loss": 1.0499, + "step": 21675 + }, + { + "epoch": 6.61, + "learning_rate": 1.3589041095890412e-05, + "loss": 1.0193, + "step": 21700 + }, + { + "epoch": 6.61, + "learning_rate": 1.3558599695585999e-05, + "loss": 1.0213, + "step": 21725 + }, + { + "epoch": 6.62, + "learning_rate": 1.3528158295281583e-05, + "loss": 1.0763, + "step": 21750 + }, + { + "epoch": 6.63, + "learning_rate": 1.349771689497717e-05, + "loss": 1.0602, + "step": 21775 + }, + { + "epoch": 6.64, + "learning_rate": 1.3467275494672756e-05, + "loss": 1.0075, + "step": 21800 + }, + { + "epoch": 6.64, + "learning_rate": 1.3436834094368343e-05, + "loss": 1.0394, + "step": 21825 + }, + { + "epoch": 6.65, + "learning_rate": 1.3406392694063927e-05, + "loss": 1.1018, + "step": 21850 + }, + { + "epoch": 6.66, + "learning_rate": 1.3375951293759515e-05, + "loss": 1.1407, + "step": 21875 + }, + { + "epoch": 6.67, + "learning_rate": 1.33455098934551e-05, + "loss": 1.0651, + "step": 21900 + }, + { + "epoch": 6.67, + "learning_rate": 1.3316286149162863e-05, + "loss": 1.1539, + "step": 21925 + }, + { + "epoch": 6.68, + "learning_rate": 1.328584474885845e-05, + "loss": 1.1164, + "step": 21950 + }, + { + "epoch": 6.69, + "learning_rate": 1.3255403348554034e-05, + "loss": 1.0859, + "step": 21975 + }, + { + "epoch": 6.7, + "learning_rate": 1.3224961948249621e-05, + "loss": 1.0339, + "step": 22000 + }, + { + "epoch": 6.7, + "learning_rate": 1.3194520547945207e-05, + "loss": 1.0881, + "step": 22025 + }, + { + "epoch": 6.71, + "learning_rate": 1.3164079147640793e-05, + "loss": 1.0617, + "step": 22050 + }, + { + "epoch": 6.72, + "learning_rate": 1.3133637747336379e-05, + "loss": 1.0946, + "step": 22075 + }, + { + "epoch": 6.73, + "learning_rate": 1.3103196347031966e-05, + "loss": 1.0516, + "step": 22100 + }, + { + "epoch": 6.74, + "learning_rate": 1.307275494672755e-05, + "loss": 1.0097, + "step": 22125 + }, + { + "epoch": 6.74, + "learning_rate": 1.3042313546423137e-05, + "loss": 0.9982, + "step": 22150 + }, + { + "epoch": 6.75, + "learning_rate": 1.3011872146118723e-05, + "loss": 1.0149, + "step": 22175 + }, + { + "epoch": 6.76, + "learning_rate": 1.298143074581431e-05, + "loss": 1.0674, + "step": 22200 + }, + { + "epoch": 6.77, + "learning_rate": 1.2950989345509894e-05, + "loss": 1.0485, + "step": 22225 + }, + { + "epoch": 6.77, + "learning_rate": 1.2920547945205482e-05, + "loss": 1.0641, + "step": 22250 + }, + { + "epoch": 6.78, + "learning_rate": 1.2890106544901066e-05, + "loss": 1.0306, + "step": 22275 + }, + { + "epoch": 6.79, + "learning_rate": 1.2859665144596653e-05, + "loss": 1.0526, + "step": 22300 + }, + { + "epoch": 6.8, + "learning_rate": 1.2829223744292239e-05, + "loss": 1.1053, + "step": 22325 + }, + { + "epoch": 6.8, + "learning_rate": 1.2798782343987823e-05, + "loss": 1.069, + "step": 22350 + }, + { + "epoch": 6.81, + "learning_rate": 1.276834094368341e-05, + "loss": 1.0654, + "step": 22375 + }, + { + "epoch": 6.82, + "learning_rate": 1.2737899543378996e-05, + "loss": 1.0341, + "step": 22400 + }, + { + "epoch": 6.83, + "learning_rate": 1.2707458143074583e-05, + "loss": 1.0856, + "step": 22425 + }, + { + "epoch": 6.83, + "learning_rate": 1.2677016742770167e-05, + "loss": 1.0456, + "step": 22450 + }, + { + "epoch": 6.84, + "learning_rate": 1.2646575342465755e-05, + "loss": 1.0782, + "step": 22475 + }, + { + "epoch": 6.85, + "learning_rate": 1.261613394216134e-05, + "loss": 1.0911, + "step": 22500 + }, + { + "epoch": 6.86, + "learning_rate": 1.2585692541856926e-05, + "loss": 1.0151, + "step": 22525 + }, + { + "epoch": 6.86, + "learning_rate": 1.2555251141552512e-05, + "loss": 1.1133, + "step": 22550 + }, + { + "epoch": 6.87, + "learning_rate": 1.2524809741248099e-05, + "loss": 1.098, + "step": 22575 + }, + { + "epoch": 6.88, + "learning_rate": 1.2494368340943683e-05, + "loss": 1.0943, + "step": 22600 + }, + { + "epoch": 6.89, + "learning_rate": 1.246392694063927e-05, + "loss": 1.0622, + "step": 22625 + }, + { + "epoch": 6.89, + "learning_rate": 1.2433485540334856e-05, + "loss": 1.0801, + "step": 22650 + }, + { + "epoch": 6.9, + "learning_rate": 1.2403044140030442e-05, + "loss": 1.0496, + "step": 22675 + }, + { + "epoch": 6.91, + "learning_rate": 1.2372602739726027e-05, + "loss": 1.1017, + "step": 22700 + }, + { + "epoch": 6.92, + "learning_rate": 1.2342161339421615e-05, + "loss": 1.045, + "step": 22725 + }, + { + "epoch": 6.93, + "learning_rate": 1.2311719939117199e-05, + "loss": 1.1071, + "step": 22750 + }, + { + "epoch": 6.93, + "learning_rate": 1.2281278538812786e-05, + "loss": 1.0852, + "step": 22775 + }, + { + "epoch": 6.94, + "learning_rate": 1.2250837138508372e-05, + "loss": 1.1576, + "step": 22800 + }, + { + "epoch": 6.95, + "learning_rate": 1.2220395738203959e-05, + "loss": 1.079, + "step": 22825 + }, + { + "epoch": 6.96, + "learning_rate": 1.2189954337899543e-05, + "loss": 1.1521, + "step": 22850 + }, + { + "epoch": 6.96, + "learning_rate": 1.215951293759513e-05, + "loss": 1.0449, + "step": 22875 + }, + { + "epoch": 6.97, + "learning_rate": 1.2129071537290714e-05, + "loss": 1.1417, + "step": 22900 + }, + { + "epoch": 6.98, + "learning_rate": 1.2098630136986302e-05, + "loss": 1.0472, + "step": 22925 + }, + { + "epoch": 6.99, + "learning_rate": 1.2068188736681888e-05, + "loss": 1.0093, + "step": 22950 + }, + { + "epoch": 6.99, + "learning_rate": 1.2037747336377475e-05, + "loss": 1.0782, + "step": 22975 + }, + { + "epoch": 7.0, + "learning_rate": 1.2007305936073059e-05, + "loss": 1.0471, + "step": 23000 + }, + { + "epoch": 7.01, + "learning_rate": 1.1976864535768646e-05, + "loss": 0.9486, + "step": 23025 + }, + { + "epoch": 7.02, + "learning_rate": 1.1946423135464232e-05, + "loss": 0.974, + "step": 23050 + }, + { + "epoch": 7.02, + "learning_rate": 1.1915981735159818e-05, + "loss": 0.9259, + "step": 23075 + }, + { + "epoch": 7.03, + "learning_rate": 1.1885540334855403e-05, + "loss": 0.928, + "step": 23100 + }, + { + "epoch": 7.04, + "learning_rate": 1.185509893455099e-05, + "loss": 0.9293, + "step": 23125 + }, + { + "epoch": 7.05, + "learning_rate": 1.1824657534246575e-05, + "loss": 0.9387, + "step": 23150 + }, + { + "epoch": 7.05, + "learning_rate": 1.1794216133942162e-05, + "loss": 0.8687, + "step": 23175 + }, + { + "epoch": 7.06, + "learning_rate": 1.1763774733637748e-05, + "loss": 0.9467, + "step": 23200 + }, + { + "epoch": 7.07, + "learning_rate": 1.1733333333333335e-05, + "loss": 0.8817, + "step": 23225 + }, + { + "epoch": 7.08, + "learning_rate": 1.1702891933028919e-05, + "loss": 0.9304, + "step": 23250 + }, + { + "epoch": 7.09, + "learning_rate": 1.1672450532724506e-05, + "loss": 0.9319, + "step": 23275 + }, + { + "epoch": 7.09, + "learning_rate": 1.164200913242009e-05, + "loss": 0.9488, + "step": 23300 + }, + { + "epoch": 7.1, + "learning_rate": 1.1611567732115678e-05, + "loss": 0.9054, + "step": 23325 + }, + { + "epoch": 7.11, + "learning_rate": 1.1581126331811263e-05, + "loss": 0.9246, + "step": 23350 + }, + { + "epoch": 7.12, + "learning_rate": 1.155068493150685e-05, + "loss": 0.898, + "step": 23375 + }, + { + "epoch": 7.12, + "learning_rate": 1.1520243531202435e-05, + "loss": 0.947, + "step": 23400 + }, + { + "epoch": 7.13, + "learning_rate": 1.1489802130898022e-05, + "loss": 0.9288, + "step": 23425 + }, + { + "epoch": 7.14, + "learning_rate": 1.1459360730593608e-05, + "loss": 0.9544, + "step": 23450 + }, + { + "epoch": 7.15, + "learning_rate": 1.1428919330289194e-05, + "loss": 0.9711, + "step": 23475 + }, + { + "epoch": 7.15, + "learning_rate": 1.139847792998478e-05, + "loss": 0.9133, + "step": 23500 + }, + { + "epoch": 7.16, + "learning_rate": 1.1368036529680367e-05, + "loss": 0.9182, + "step": 23525 + }, + { + "epoch": 7.17, + "learning_rate": 1.133759512937595e-05, + "loss": 0.8882, + "step": 23550 + }, + { + "epoch": 7.18, + "learning_rate": 1.1307153729071538e-05, + "loss": 0.9431, + "step": 23575 + }, + { + "epoch": 7.18, + "learning_rate": 1.1276712328767124e-05, + "loss": 0.9347, + "step": 23600 + }, + { + "epoch": 7.19, + "learning_rate": 1.1246270928462711e-05, + "loss": 0.9397, + "step": 23625 + }, + { + "epoch": 7.2, + "learning_rate": 1.1215829528158295e-05, + "loss": 0.8936, + "step": 23650 + }, + { + "epoch": 7.21, + "learning_rate": 1.1185388127853882e-05, + "loss": 0.9422, + "step": 23675 + }, + { + "epoch": 7.21, + "learning_rate": 1.1154946727549466e-05, + "loss": 0.994, + "step": 23700 + }, + { + "epoch": 7.22, + "learning_rate": 1.1124505327245054e-05, + "loss": 0.9458, + "step": 23725 + }, + { + "epoch": 7.23, + "learning_rate": 1.109406392694064e-05, + "loss": 0.9833, + "step": 23750 + }, + { + "epoch": 7.24, + "learning_rate": 1.1063622526636227e-05, + "loss": 0.9156, + "step": 23775 + }, + { + "epoch": 7.25, + "learning_rate": 1.103318112633181e-05, + "loss": 0.952, + "step": 23800 + }, + { + "epoch": 7.25, + "learning_rate": 1.1002739726027398e-05, + "loss": 0.9329, + "step": 23825 + }, + { + "epoch": 7.26, + "learning_rate": 1.0972298325722984e-05, + "loss": 0.969, + "step": 23850 + }, + { + "epoch": 7.27, + "learning_rate": 1.094185692541857e-05, + "loss": 0.9036, + "step": 23875 + }, + { + "epoch": 7.28, + "learning_rate": 1.0911415525114155e-05, + "loss": 0.893, + "step": 23900 + }, + { + "epoch": 7.28, + "learning_rate": 1.0880974124809743e-05, + "loss": 1.0085, + "step": 23925 + }, + { + "epoch": 7.29, + "learning_rate": 1.0850532724505327e-05, + "loss": 0.92, + "step": 23950 + }, + { + "epoch": 7.3, + "learning_rate": 1.0820091324200914e-05, + "loss": 0.928, + "step": 23975 + }, + { + "epoch": 7.31, + "learning_rate": 1.07896499238965e-05, + "loss": 0.9017, + "step": 24000 + }, + { + "epoch": 7.31, + "learning_rate": 1.0759208523592087e-05, + "loss": 0.9032, + "step": 24025 + }, + { + "epoch": 7.32, + "learning_rate": 1.0728767123287671e-05, + "loss": 0.9073, + "step": 24050 + }, + { + "epoch": 7.33, + "learning_rate": 1.0698325722983258e-05, + "loss": 0.9447, + "step": 24075 + }, + { + "epoch": 7.34, + "learning_rate": 1.0667884322678842e-05, + "loss": 0.9341, + "step": 24100 + }, + { + "epoch": 7.34, + "learning_rate": 1.0638660578386606e-05, + "loss": 0.9369, + "step": 24125 + }, + { + "epoch": 7.35, + "learning_rate": 1.0608219178082194e-05, + "loss": 0.9721, + "step": 24150 + }, + { + "epoch": 7.36, + "learning_rate": 1.0577777777777778e-05, + "loss": 0.9367, + "step": 24175 + }, + { + "epoch": 7.37, + "learning_rate": 1.0547336377473365e-05, + "loss": 0.9746, + "step": 24200 + }, + { + "epoch": 7.37, + "learning_rate": 1.0516894977168949e-05, + "loss": 0.914, + "step": 24225 + }, + { + "epoch": 7.38, + "learning_rate": 1.0486453576864537e-05, + "loss": 0.8861, + "step": 24250 + }, + { + "epoch": 7.39, + "learning_rate": 1.0456012176560122e-05, + "loss": 0.9416, + "step": 24275 + }, + { + "epoch": 7.4, + "learning_rate": 1.042557077625571e-05, + "loss": 0.9378, + "step": 24300 + }, + { + "epoch": 7.4, + "learning_rate": 1.0395129375951294e-05, + "loss": 0.9149, + "step": 24325 + }, + { + "epoch": 7.41, + "learning_rate": 1.0364687975646881e-05, + "loss": 0.9661, + "step": 24350 + }, + { + "epoch": 7.42, + "learning_rate": 1.0334246575342467e-05, + "loss": 0.9946, + "step": 24375 + }, + { + "epoch": 7.43, + "learning_rate": 1.0303805175038052e-05, + "loss": 0.9153, + "step": 24400 + }, + { + "epoch": 7.44, + "learning_rate": 1.0273363774733638e-05, + "loss": 1.0122, + "step": 24425 + }, + { + "epoch": 7.44, + "learning_rate": 1.0242922374429225e-05, + "loss": 0.9563, + "step": 24450 + }, + { + "epoch": 7.45, + "learning_rate": 1.021248097412481e-05, + "loss": 0.9451, + "step": 24475 + }, + { + "epoch": 7.46, + "learning_rate": 1.0182039573820397e-05, + "loss": 0.9543, + "step": 24500 + }, + { + "epoch": 7.47, + "learning_rate": 1.0151598173515982e-05, + "loss": 0.9336, + "step": 24525 + }, + { + "epoch": 7.47, + "learning_rate": 1.012115677321157e-05, + "loss": 0.9474, + "step": 24550 + }, + { + "epoch": 7.48, + "learning_rate": 1.0090715372907154e-05, + "loss": 0.9503, + "step": 24575 + }, + { + "epoch": 7.49, + "learning_rate": 1.0060273972602741e-05, + "loss": 0.9395, + "step": 24600 + }, + { + "epoch": 7.5, + "learning_rate": 1.0029832572298325e-05, + "loss": 0.9076, + "step": 24625 + }, + { + "epoch": 7.5, + "learning_rate": 9.999391171993912e-06, + "loss": 0.9026, + "step": 24650 + }, + { + "epoch": 7.51, + "learning_rate": 9.968949771689498e-06, + "loss": 0.9039, + "step": 24675 + }, + { + "epoch": 7.52, + "learning_rate": 9.938508371385086e-06, + "loss": 0.9744, + "step": 24700 + }, + { + "epoch": 7.53, + "learning_rate": 9.908066971080671e-06, + "loss": 0.9818, + "step": 24725 + }, + { + "epoch": 7.53, + "learning_rate": 9.877625570776257e-06, + "loss": 0.8859, + "step": 24750 + }, + { + "epoch": 7.54, + "learning_rate": 9.847184170471843e-06, + "loss": 0.9602, + "step": 24775 + }, + { + "epoch": 7.55, + "learning_rate": 9.816742770167428e-06, + "loss": 0.9405, + "step": 24800 + }, + { + "epoch": 7.56, + "learning_rate": 9.786301369863016e-06, + "loss": 0.9669, + "step": 24825 + }, + { + "epoch": 7.56, + "learning_rate": 9.755859969558601e-06, + "loss": 0.9106, + "step": 24850 + }, + { + "epoch": 7.57, + "learning_rate": 9.725418569254187e-06, + "loss": 0.9118, + "step": 24875 + }, + { + "epoch": 7.58, + "learning_rate": 9.694977168949773e-06, + "loss": 0.981, + "step": 24900 + }, + { + "epoch": 7.59, + "learning_rate": 9.664535768645358e-06, + "loss": 0.974, + "step": 24925 + }, + { + "epoch": 7.6, + "learning_rate": 9.634094368340946e-06, + "loss": 0.8843, + "step": 24950 + }, + { + "epoch": 7.6, + "learning_rate": 9.603652968036531e-06, + "loss": 0.9714, + "step": 24975 + }, + { + "epoch": 7.61, + "learning_rate": 9.573211567732117e-06, + "loss": 0.9101, + "step": 25000 + }, + { + "epoch": 7.62, + "learning_rate": 9.542770167427703e-06, + "loss": 0.9264, + "step": 25025 + }, + { + "epoch": 7.63, + "learning_rate": 9.512328767123288e-06, + "loss": 0.9459, + "step": 25050 + }, + { + "epoch": 7.63, + "learning_rate": 9.481887366818874e-06, + "loss": 0.9391, + "step": 25075 + }, + { + "epoch": 7.64, + "learning_rate": 9.451445966514461e-06, + "loss": 0.9473, + "step": 25100 + }, + { + "epoch": 7.65, + "learning_rate": 9.421004566210047e-06, + "loss": 0.897, + "step": 25125 + }, + { + "epoch": 7.66, + "learning_rate": 9.390563165905633e-06, + "loss": 0.9351, + "step": 25150 + }, + { + "epoch": 7.66, + "learning_rate": 9.360121765601219e-06, + "loss": 0.9249, + "step": 25175 + }, + { + "epoch": 7.67, + "learning_rate": 9.329680365296804e-06, + "loss": 0.9407, + "step": 25200 + }, + { + "epoch": 7.68, + "learning_rate": 9.299238964992392e-06, + "loss": 0.9331, + "step": 25225 + }, + { + "epoch": 7.69, + "learning_rate": 9.268797564687977e-06, + "loss": 0.9749, + "step": 25250 + }, + { + "epoch": 7.69, + "learning_rate": 9.238356164383563e-06, + "loss": 0.981, + "step": 25275 + }, + { + "epoch": 7.7, + "learning_rate": 9.207914764079149e-06, + "loss": 0.9334, + "step": 25300 + }, + { + "epoch": 7.71, + "learning_rate": 9.177473363774734e-06, + "loss": 0.928, + "step": 25325 + }, + { + "epoch": 7.72, + "learning_rate": 9.147031963470322e-06, + "loss": 0.8981, + "step": 25350 + }, + { + "epoch": 7.72, + "learning_rate": 9.116590563165907e-06, + "loss": 0.9735, + "step": 25375 + }, + { + "epoch": 7.73, + "learning_rate": 9.086149162861493e-06, + "loss": 0.9042, + "step": 25400 + }, + { + "epoch": 7.74, + "learning_rate": 9.055707762557079e-06, + "loss": 0.9443, + "step": 25425 + }, + { + "epoch": 7.75, + "learning_rate": 9.025266362252664e-06, + "loss": 0.9571, + "step": 25450 + }, + { + "epoch": 7.75, + "learning_rate": 8.99482496194825e-06, + "loss": 0.9154, + "step": 25475 + }, + { + "epoch": 7.76, + "learning_rate": 8.964383561643837e-06, + "loss": 0.9131, + "step": 25500 + }, + { + "epoch": 7.77, + "learning_rate": 8.933942161339423e-06, + "loss": 0.9096, + "step": 25525 + }, + { + "epoch": 7.78, + "learning_rate": 8.903500761035009e-06, + "loss": 1.0131, + "step": 25550 + }, + { + "epoch": 7.79, + "learning_rate": 8.873059360730594e-06, + "loss": 0.9257, + "step": 25575 + }, + { + "epoch": 7.79, + "learning_rate": 8.84261796042618e-06, + "loss": 0.9627, + "step": 25600 + }, + { + "epoch": 7.8, + "learning_rate": 8.812176560121768e-06, + "loss": 0.9599, + "step": 25625 + }, + { + "epoch": 7.81, + "learning_rate": 8.781735159817353e-06, + "loss": 1.0309, + "step": 25650 + }, + { + "epoch": 7.82, + "learning_rate": 8.751293759512939e-06, + "loss": 0.9223, + "step": 25675 + }, + { + "epoch": 7.82, + "learning_rate": 8.720852359208525e-06, + "loss": 0.9609, + "step": 25700 + }, + { + "epoch": 7.83, + "learning_rate": 8.69041095890411e-06, + "loss": 0.9848, + "step": 25725 + }, + { + "epoch": 7.84, + "learning_rate": 8.659969558599698e-06, + "loss": 0.9744, + "step": 25750 + }, + { + "epoch": 7.85, + "learning_rate": 8.629528158295283e-06, + "loss": 0.9079, + "step": 25775 + }, + { + "epoch": 7.85, + "learning_rate": 8.599086757990869e-06, + "loss": 0.9524, + "step": 25800 + }, + { + "epoch": 7.86, + "learning_rate": 8.568645357686455e-06, + "loss": 0.8825, + "step": 25825 + }, + { + "epoch": 7.87, + "learning_rate": 8.53820395738204e-06, + "loss": 0.9648, + "step": 25850 + }, + { + "epoch": 7.88, + "learning_rate": 8.507762557077626e-06, + "loss": 0.9268, + "step": 25875 + }, + { + "epoch": 7.88, + "learning_rate": 8.477321156773213e-06, + "loss": 1.0055, + "step": 25900 + }, + { + "epoch": 7.89, + "learning_rate": 8.446879756468799e-06, + "loss": 0.9859, + "step": 25925 + }, + { + "epoch": 7.9, + "learning_rate": 8.416438356164385e-06, + "loss": 0.876, + "step": 25950 + }, + { + "epoch": 7.91, + "learning_rate": 8.38599695585997e-06, + "loss": 0.9929, + "step": 25975 + }, + { + "epoch": 7.91, + "learning_rate": 8.355555555555556e-06, + "loss": 0.937, + "step": 26000 + }, + { + "epoch": 7.92, + "learning_rate": 8.325114155251143e-06, + "loss": 0.9504, + "step": 26025 + }, + { + "epoch": 7.93, + "learning_rate": 8.29467275494673e-06, + "loss": 0.9475, + "step": 26050 + }, + { + "epoch": 7.94, + "learning_rate": 8.264231354642315e-06, + "loss": 0.8728, + "step": 26075 + }, + { + "epoch": 7.95, + "learning_rate": 8.2337899543379e-06, + "loss": 0.9089, + "step": 26100 + }, + { + "epoch": 7.95, + "learning_rate": 8.203348554033486e-06, + "loss": 0.963, + "step": 26125 + }, + { + "epoch": 7.96, + "learning_rate": 8.172907153729072e-06, + "loss": 0.9398, + "step": 26150 + }, + { + "epoch": 7.97, + "learning_rate": 8.14246575342466e-06, + "loss": 0.9664, + "step": 26175 + }, + { + "epoch": 7.98, + "learning_rate": 8.112024353120245e-06, + "loss": 0.9756, + "step": 26200 + }, + { + "epoch": 7.98, + "learning_rate": 8.08158295281583e-06, + "loss": 0.9686, + "step": 26225 + }, + { + "epoch": 7.99, + "learning_rate": 8.051141552511416e-06, + "loss": 0.9906, + "step": 26250 + }, + { + "epoch": 8.0, + "learning_rate": 8.020700152207002e-06, + "loss": 0.9644, + "step": 26275 + }, + { + "epoch": 8.01, + "learning_rate": 7.99025875190259e-06, + "loss": 0.8352, + "step": 26300 + }, + { + "epoch": 8.01, + "learning_rate": 7.959817351598175e-06, + "loss": 0.7675, + "step": 26325 + }, + { + "epoch": 8.02, + "learning_rate": 7.92937595129376e-06, + "loss": 0.8487, + "step": 26350 + }, + { + "epoch": 8.03, + "learning_rate": 7.898934550989346e-06, + "loss": 0.8241, + "step": 26375 + }, + { + "epoch": 8.04, + "learning_rate": 7.868493150684932e-06, + "loss": 0.8417, + "step": 26400 + }, + { + "epoch": 8.04, + "learning_rate": 7.83805175038052e-06, + "loss": 0.8277, + "step": 26425 + }, + { + "epoch": 8.05, + "learning_rate": 7.807610350076105e-06, + "loss": 0.8032, + "step": 26450 + }, + { + "epoch": 8.06, + "learning_rate": 7.77716894977169e-06, + "loss": 0.8111, + "step": 26475 + }, + { + "epoch": 8.07, + "learning_rate": 7.746727549467276e-06, + "loss": 0.8319, + "step": 26500 + }, + { + "epoch": 8.07, + "learning_rate": 7.716286149162862e-06, + "loss": 0.8437, + "step": 26525 + }, + { + "epoch": 8.08, + "learning_rate": 7.685844748858448e-06, + "loss": 0.8167, + "step": 26550 + }, + { + "epoch": 8.09, + "learning_rate": 7.655403348554035e-06, + "loss": 0.798, + "step": 26575 + }, + { + "epoch": 8.1, + "learning_rate": 7.624961948249621e-06, + "loss": 0.8465, + "step": 26600 + }, + { + "epoch": 8.11, + "learning_rate": 7.594520547945206e-06, + "loss": 0.8215, + "step": 26625 + }, + { + "epoch": 8.11, + "learning_rate": 7.564079147640791e-06, + "loss": 0.8385, + "step": 26650 + }, + { + "epoch": 8.12, + "learning_rate": 7.533637747336378e-06, + "loss": 0.8552, + "step": 26675 + }, + { + "epoch": 8.13, + "learning_rate": 7.503196347031964e-06, + "loss": 0.8127, + "step": 26700 + }, + { + "epoch": 8.14, + "learning_rate": 7.472754946727549e-06, + "loss": 0.8545, + "step": 26725 + }, + { + "epoch": 8.14, + "learning_rate": 7.442313546423136e-06, + "loss": 0.8589, + "step": 26750 + }, + { + "epoch": 8.15, + "learning_rate": 7.4118721461187215e-06, + "loss": 0.8231, + "step": 26775 + }, + { + "epoch": 8.16, + "learning_rate": 7.381430745814307e-06, + "loss": 0.8292, + "step": 26800 + }, + { + "epoch": 8.17, + "learning_rate": 7.350989345509894e-06, + "loss": 0.866, + "step": 26825 + }, + { + "epoch": 8.17, + "learning_rate": 7.320547945205479e-06, + "loss": 0.8413, + "step": 26850 + }, + { + "epoch": 8.18, + "learning_rate": 7.290106544901066e-06, + "loss": 0.8609, + "step": 26875 + }, + { + "epoch": 8.19, + "learning_rate": 7.259665144596652e-06, + "loss": 0.8566, + "step": 26900 + }, + { + "epoch": 8.2, + "learning_rate": 7.229223744292237e-06, + "loss": 0.9092, + "step": 26925 + }, + { + "epoch": 8.2, + "learning_rate": 7.198782343987824e-06, + "loss": 0.8238, + "step": 26950 + }, + { + "epoch": 8.21, + "learning_rate": 7.1683409436834095e-06, + "loss": 0.8372, + "step": 26975 + }, + { + "epoch": 8.22, + "learning_rate": 7.137899543378995e-06, + "loss": 0.8547, + "step": 27000 + }, + { + "epoch": 8.23, + "learning_rate": 7.107458143074582e-06, + "loss": 0.8121, + "step": 27025 + }, + { + "epoch": 8.23, + "learning_rate": 7.077016742770167e-06, + "loss": 0.8415, + "step": 27050 + }, + { + "epoch": 8.24, + "learning_rate": 7.046575342465753e-06, + "loss": 0.8953, + "step": 27075 + }, + { + "epoch": 8.25, + "learning_rate": 7.01613394216134e-06, + "loss": 0.8622, + "step": 27100 + }, + { + "epoch": 8.26, + "learning_rate": 6.985692541856925e-06, + "loss": 0.874, + "step": 27125 + }, + { + "epoch": 8.26, + "learning_rate": 6.955251141552512e-06, + "loss": 0.8017, + "step": 27150 + }, + { + "epoch": 8.27, + "learning_rate": 6.9248097412480975e-06, + "loss": 0.82, + "step": 27175 + }, + { + "epoch": 8.28, + "learning_rate": 6.894368340943683e-06, + "loss": 0.827, + "step": 27200 + }, + { + "epoch": 8.29, + "learning_rate": 6.86392694063927e-06, + "loss": 0.8299, + "step": 27225 + }, + { + "epoch": 8.3, + "learning_rate": 6.833485540334855e-06, + "loss": 0.8529, + "step": 27250 + }, + { + "epoch": 8.3, + "learning_rate": 6.803044140030441e-06, + "loss": 0.8119, + "step": 27275 + }, + { + "epoch": 8.31, + "learning_rate": 6.7726027397260276e-06, + "loss": 0.8448, + "step": 27300 + }, + { + "epoch": 8.32, + "learning_rate": 6.742161339421613e-06, + "loss": 0.824, + "step": 27325 + }, + { + "epoch": 8.33, + "learning_rate": 6.7117199391172e-06, + "loss": 0.8442, + "step": 27350 + }, + { + "epoch": 8.33, + "learning_rate": 6.6812785388127855e-06, + "loss": 0.8194, + "step": 27375 + }, + { + "epoch": 8.34, + "learning_rate": 6.650837138508371e-06, + "loss": 0.8177, + "step": 27400 + }, + { + "epoch": 8.35, + "learning_rate": 6.620395738203958e-06, + "loss": 0.873, + "step": 27425 + }, + { + "epoch": 8.36, + "learning_rate": 6.589954337899543e-06, + "loss": 0.7966, + "step": 27450 + }, + { + "epoch": 8.36, + "learning_rate": 6.559512937595129e-06, + "loss": 0.8656, + "step": 27475 + }, + { + "epoch": 8.37, + "learning_rate": 6.5290715372907155e-06, + "loss": 0.8272, + "step": 27500 + }, + { + "epoch": 8.38, + "learning_rate": 6.498630136986301e-06, + "loss": 0.7805, + "step": 27525 + }, + { + "epoch": 8.39, + "learning_rate": 6.468188736681888e-06, + "loss": 0.8527, + "step": 27550 + }, + { + "epoch": 8.39, + "learning_rate": 6.4377473363774734e-06, + "loss": 0.8507, + "step": 27575 + }, + { + "epoch": 8.4, + "learning_rate": 6.407305936073059e-06, + "loss": 0.7855, + "step": 27600 + }, + { + "epoch": 8.41, + "learning_rate": 6.376864535768646e-06, + "loss": 0.8705, + "step": 27625 + }, + { + "epoch": 8.42, + "learning_rate": 6.346423135464231e-06, + "loss": 0.8078, + "step": 27650 + }, + { + "epoch": 8.42, + "learning_rate": 6.315981735159817e-06, + "loss": 0.7914, + "step": 27675 + }, + { + "epoch": 8.43, + "learning_rate": 6.2855403348554035e-06, + "loss": 0.8536, + "step": 27700 + }, + { + "epoch": 8.44, + "learning_rate": 6.255098934550989e-06, + "loss": 0.8765, + "step": 27725 + }, + { + "epoch": 8.45, + "learning_rate": 6.224657534246576e-06, + "loss": 0.8406, + "step": 27750 + }, + { + "epoch": 8.46, + "learning_rate": 6.194216133942161e-06, + "loss": 0.8427, + "step": 27775 + }, + { + "epoch": 8.46, + "learning_rate": 6.163774733637747e-06, + "loss": 0.8689, + "step": 27800 + }, + { + "epoch": 8.47, + "learning_rate": 6.133333333333334e-06, + "loss": 0.8204, + "step": 27825 + }, + { + "epoch": 8.48, + "learning_rate": 6.102891933028919e-06, + "loss": 0.8447, + "step": 27850 + }, + { + "epoch": 8.49, + "learning_rate": 6.072450532724505e-06, + "loss": 0.8356, + "step": 27875 + }, + { + "epoch": 8.49, + "learning_rate": 6.0420091324200915e-06, + "loss": 0.8776, + "step": 27900 + }, + { + "epoch": 8.5, + "learning_rate": 6.011567732115677e-06, + "loss": 0.8543, + "step": 27925 + }, + { + "epoch": 8.51, + "learning_rate": 5.981126331811264e-06, + "loss": 0.8341, + "step": 27950 + }, + { + "epoch": 8.52, + "learning_rate": 5.950684931506849e-06, + "loss": 0.832, + "step": 27975 + }, + { + "epoch": 8.52, + "learning_rate": 5.920243531202435e-06, + "loss": 0.7922, + "step": 28000 + }, + { + "epoch": 8.53, + "learning_rate": 5.889802130898022e-06, + "loss": 0.8491, + "step": 28025 + }, + { + "epoch": 8.54, + "learning_rate": 5.859360730593607e-06, + "loss": 0.8911, + "step": 28050 + }, + { + "epoch": 8.55, + "learning_rate": 5.828919330289193e-06, + "loss": 0.8432, + "step": 28075 + }, + { + "epoch": 8.55, + "learning_rate": 5.7984779299847795e-06, + "loss": 0.7997, + "step": 28100 + }, + { + "epoch": 8.56, + "learning_rate": 5.768036529680365e-06, + "loss": 0.836, + "step": 28125 + }, + { + "epoch": 8.57, + "learning_rate": 5.737595129375952e-06, + "loss": 0.8332, + "step": 28150 + }, + { + "epoch": 8.58, + "learning_rate": 5.707153729071537e-06, + "loss": 0.8591, + "step": 28175 + }, + { + "epoch": 8.58, + "learning_rate": 5.676712328767123e-06, + "loss": 0.8475, + "step": 28200 + }, + { + "epoch": 8.59, + "learning_rate": 5.64627092846271e-06, + "loss": 0.8842, + "step": 28225 + }, + { + "epoch": 8.6, + "learning_rate": 5.615829528158295e-06, + "loss": 0.7811, + "step": 28250 + }, + { + "epoch": 8.61, + "learning_rate": 5.585388127853881e-06, + "loss": 0.8826, + "step": 28275 + }, + { + "epoch": 8.61, + "learning_rate": 5.556164383561644e-06, + "loss": 0.8344, + "step": 28300 + }, + { + "epoch": 8.62, + "learning_rate": 5.52572298325723e-06, + "loss": 0.8106, + "step": 28325 + }, + { + "epoch": 8.63, + "learning_rate": 5.495281582952816e-06, + "loss": 0.7875, + "step": 28350 + }, + { + "epoch": 8.64, + "learning_rate": 5.464840182648402e-06, + "loss": 0.83, + "step": 28375 + }, + { + "epoch": 8.65, + "learning_rate": 5.434398782343988e-06, + "loss": 0.8188, + "step": 28400 + }, + { + "epoch": 8.65, + "learning_rate": 5.403957382039574e-06, + "loss": 0.8448, + "step": 28425 + }, + { + "epoch": 8.66, + "learning_rate": 5.37351598173516e-06, + "loss": 0.7736, + "step": 28450 + }, + { + "epoch": 8.67, + "learning_rate": 5.3430745814307465e-06, + "loss": 0.8335, + "step": 28475 + }, + { + "epoch": 8.68, + "learning_rate": 5.312633181126332e-06, + "loss": 0.8536, + "step": 28500 + }, + { + "epoch": 8.68, + "learning_rate": 5.282191780821918e-06, + "loss": 0.8378, + "step": 28525 + }, + { + "epoch": 8.69, + "learning_rate": 5.251750380517504e-06, + "loss": 0.8067, + "step": 28550 + }, + { + "epoch": 8.7, + "learning_rate": 5.222526636225267e-06, + "loss": 0.8447, + "step": 28575 + }, + { + "epoch": 8.71, + "learning_rate": 5.192085235920853e-06, + "loss": 0.8222, + "step": 28600 + }, + { + "epoch": 8.71, + "learning_rate": 5.161643835616439e-06, + "loss": 0.813, + "step": 28625 + }, + { + "epoch": 8.72, + "learning_rate": 5.131202435312025e-06, + "loss": 0.8439, + "step": 28650 + }, + { + "epoch": 8.73, + "learning_rate": 5.100761035007611e-06, + "loss": 0.8635, + "step": 28675 + }, + { + "epoch": 8.74, + "learning_rate": 5.070319634703197e-06, + "loss": 0.8653, + "step": 28700 + }, + { + "epoch": 8.74, + "learning_rate": 5.0398782343987825e-06, + "loss": 0.827, + "step": 28725 + }, + { + "epoch": 8.75, + "learning_rate": 5.009436834094369e-06, + "loss": 0.8366, + "step": 28750 + }, + { + "epoch": 8.76, + "learning_rate": 4.978995433789955e-06, + "loss": 0.8199, + "step": 28775 + }, + { + "epoch": 8.77, + "learning_rate": 4.949771689497717e-06, + "loss": 0.9215, + "step": 28800 + }, + { + "epoch": 8.77, + "learning_rate": 4.919330289193303e-06, + "loss": 0.8776, + "step": 28825 + }, + { + "epoch": 8.78, + "learning_rate": 4.888888888888889e-06, + "loss": 0.8023, + "step": 28850 + }, + { + "epoch": 8.79, + "learning_rate": 4.858447488584475e-06, + "loss": 0.8622, + "step": 28875 + }, + { + "epoch": 8.8, + "learning_rate": 4.8280060882800615e-06, + "loss": 0.801, + "step": 28900 + }, + { + "epoch": 8.81, + "learning_rate": 4.797564687975647e-06, + "loss": 0.837, + "step": 28925 + }, + { + "epoch": 8.81, + "learning_rate": 4.767123287671233e-06, + "loss": 0.8399, + "step": 28950 + }, + { + "epoch": 8.82, + "learning_rate": 4.736681887366819e-06, + "loss": 0.804, + "step": 28975 + }, + { + "epoch": 8.83, + "learning_rate": 4.706240487062405e-06, + "loss": 0.8425, + "step": 29000 + }, + { + "epoch": 8.84, + "learning_rate": 4.675799086757991e-06, + "loss": 0.8786, + "step": 29025 + }, + { + "epoch": 8.84, + "learning_rate": 4.645357686453577e-06, + "loss": 0.9328, + "step": 29050 + }, + { + "epoch": 8.85, + "learning_rate": 4.614916286149163e-06, + "loss": 0.8182, + "step": 29075 + }, + { + "epoch": 8.86, + "learning_rate": 4.5844748858447495e-06, + "loss": 0.8446, + "step": 29100 + }, + { + "epoch": 8.87, + "learning_rate": 4.554033485540335e-06, + "loss": 0.8503, + "step": 29125 + }, + { + "epoch": 8.87, + "learning_rate": 4.523592085235921e-06, + "loss": 0.8258, + "step": 29150 + }, + { + "epoch": 8.88, + "learning_rate": 4.493150684931507e-06, + "loss": 0.8692, + "step": 29175 + }, + { + "epoch": 8.89, + "learning_rate": 4.462709284627093e-06, + "loss": 0.8569, + "step": 29200 + }, + { + "epoch": 8.9, + "learning_rate": 4.432267884322679e-06, + "loss": 0.8694, + "step": 29225 + }, + { + "epoch": 8.9, + "learning_rate": 4.401826484018265e-06, + "loss": 0.836, + "step": 29250 + }, + { + "epoch": 8.91, + "learning_rate": 4.371385083713851e-06, + "loss": 0.804, + "step": 29275 + }, + { + "epoch": 8.92, + "learning_rate": 4.340943683409437e-06, + "loss": 0.8593, + "step": 29300 + }, + { + "epoch": 8.93, + "learning_rate": 4.310502283105023e-06, + "loss": 0.8547, + "step": 29325 + }, + { + "epoch": 8.93, + "learning_rate": 4.280060882800609e-06, + "loss": 0.8705, + "step": 29350 + }, + { + "epoch": 8.94, + "learning_rate": 4.249619482496195e-06, + "loss": 0.867, + "step": 29375 + }, + { + "epoch": 8.95, + "learning_rate": 4.219178082191781e-06, + "loss": 0.796, + "step": 29400 + }, + { + "epoch": 8.96, + "learning_rate": 4.188736681887367e-06, + "loss": 0.8249, + "step": 29425 + }, + { + "epoch": 8.96, + "learning_rate": 4.158295281582953e-06, + "loss": 0.8244, + "step": 29450 + }, + { + "epoch": 8.97, + "learning_rate": 4.127853881278539e-06, + "loss": 0.8224, + "step": 29475 + }, + { + "epoch": 8.98, + "learning_rate": 4.097412480974125e-06, + "loss": 0.8259, + "step": 29500 + }, + { + "epoch": 8.99, + "learning_rate": 4.066971080669711e-06, + "loss": 0.8255, + "step": 29525 + }, + { + "epoch": 9.0, + "learning_rate": 4.036529680365297e-06, + "loss": 0.8501, + "step": 29550 + }, + { + "epoch": 9.0, + "learning_rate": 4.006088280060883e-06, + "loss": 0.7706, + "step": 29575 + }, + { + "epoch": 9.01, + "learning_rate": 3.975646879756469e-06, + "loss": 0.7943, + "step": 29600 + }, + { + "epoch": 9.02, + "learning_rate": 3.945205479452055e-06, + "loss": 0.7389, + "step": 29625 + }, + { + "epoch": 9.03, + "learning_rate": 3.914764079147641e-06, + "loss": 0.7088, + "step": 29650 + }, + { + "epoch": 9.03, + "learning_rate": 3.884322678843227e-06, + "loss": 0.772, + "step": 29675 + }, + { + "epoch": 9.04, + "learning_rate": 3.853881278538813e-06, + "loss": 0.7772, + "step": 29700 + }, + { + "epoch": 9.05, + "learning_rate": 3.823439878234399e-06, + "loss": 0.7484, + "step": 29725 + }, + { + "epoch": 9.06, + "learning_rate": 3.792998477929985e-06, + "loss": 0.7653, + "step": 29750 + }, + { + "epoch": 9.06, + "learning_rate": 3.762557077625571e-06, + "loss": 0.7537, + "step": 29775 + }, + { + "epoch": 9.07, + "learning_rate": 3.732115677321157e-06, + "loss": 0.7248, + "step": 29800 + }, + { + "epoch": 9.08, + "learning_rate": 3.701674277016743e-06, + "loss": 0.8037, + "step": 29825 + }, + { + "epoch": 9.09, + "learning_rate": 3.671232876712329e-06, + "loss": 0.7425, + "step": 29850 + }, + { + "epoch": 9.09, + "learning_rate": 3.640791476407915e-06, + "loss": 0.7592, + "step": 29875 + }, + { + "epoch": 9.1, + "learning_rate": 3.610350076103501e-06, + "loss": 0.7541, + "step": 29900 + }, + { + "epoch": 9.11, + "learning_rate": 3.579908675799087e-06, + "loss": 0.739, + "step": 29925 + }, + { + "epoch": 9.12, + "learning_rate": 3.549467275494673e-06, + "loss": 0.7581, + "step": 29950 + }, + { + "epoch": 9.12, + "learning_rate": 3.519025875190259e-06, + "loss": 0.8068, + "step": 29975 + }, + { + "epoch": 9.13, + "learning_rate": 3.488584474885845e-06, + "loss": 0.7699, + "step": 30000 + }, + { + "epoch": 9.14, + "learning_rate": 3.458143074581431e-06, + "loss": 0.725, + "step": 30025 + }, + { + "epoch": 9.15, + "learning_rate": 3.4277016742770168e-06, + "loss": 0.7598, + "step": 30050 + }, + { + "epoch": 9.16, + "learning_rate": 3.397260273972603e-06, + "loss": 0.726, + "step": 30075 + }, + { + "epoch": 9.16, + "learning_rate": 3.366818873668189e-06, + "loss": 0.7622, + "step": 30100 + }, + { + "epoch": 9.17, + "learning_rate": 3.336377473363775e-06, + "loss": 0.7633, + "step": 30125 + }, + { + "epoch": 9.18, + "learning_rate": 3.3059360730593608e-06, + "loss": 0.7839, + "step": 30150 + }, + { + "epoch": 9.19, + "learning_rate": 3.275494672754947e-06, + "loss": 0.758, + "step": 30175 + }, + { + "epoch": 9.19, + "learning_rate": 3.245053272450533e-06, + "loss": 0.7451, + "step": 30200 + }, + { + "epoch": 9.2, + "learning_rate": 3.214611872146119e-06, + "loss": 0.7402, + "step": 30225 + }, + { + "epoch": 9.21, + "learning_rate": 3.1841704718417048e-06, + "loss": 0.733, + "step": 30250 + }, + { + "epoch": 9.22, + "learning_rate": 3.153729071537291e-06, + "loss": 0.7695, + "step": 30275 + }, + { + "epoch": 9.22, + "learning_rate": 3.123287671232877e-06, + "loss": 0.7838, + "step": 30300 + }, + { + "epoch": 9.23, + "learning_rate": 3.092846270928463e-06, + "loss": 0.7946, + "step": 30325 + }, + { + "epoch": 9.24, + "learning_rate": 3.0624048706240488e-06, + "loss": 0.7699, + "step": 30350 + }, + { + "epoch": 9.25, + "learning_rate": 3.031963470319635e-06, + "loss": 0.7173, + "step": 30375 + }, + { + "epoch": 9.25, + "learning_rate": 3.001522070015221e-06, + "loss": 0.7182, + "step": 30400 + }, + { + "epoch": 9.26, + "learning_rate": 2.971080669710807e-06, + "loss": 0.7544, + "step": 30425 + }, + { + "epoch": 9.27, + "learning_rate": 2.9406392694063927e-06, + "loss": 0.7678, + "step": 30450 + }, + { + "epoch": 9.28, + "learning_rate": 2.910197869101979e-06, + "loss": 0.7222, + "step": 30475 + }, + { + "epoch": 9.28, + "learning_rate": 2.879756468797565e-06, + "loss": 0.7949, + "step": 30500 + }, + { + "epoch": 9.29, + "learning_rate": 2.849315068493151e-06, + "loss": 0.7523, + "step": 30525 + }, + { + "epoch": 9.3, + "learning_rate": 2.8188736681887367e-06, + "loss": 0.7809, + "step": 30550 + }, + { + "epoch": 9.31, + "learning_rate": 2.788432267884323e-06, + "loss": 0.7836, + "step": 30575 + }, + { + "epoch": 9.32, + "learning_rate": 2.757990867579909e-06, + "loss": 0.7481, + "step": 30600 + }, + { + "epoch": 9.32, + "learning_rate": 2.727549467275495e-06, + "loss": 0.756, + "step": 30625 + }, + { + "epoch": 9.33, + "learning_rate": 2.6971080669710807e-06, + "loss": 0.8018, + "step": 30650 + }, + { + "epoch": 9.34, + "learning_rate": 2.666666666666667e-06, + "loss": 0.7264, + "step": 30675 + }, + { + "epoch": 9.35, + "learning_rate": 2.636225266362253e-06, + "loss": 0.7553, + "step": 30700 + }, + { + "epoch": 9.35, + "learning_rate": 2.605783866057839e-06, + "loss": 0.8181, + "step": 30725 + }, + { + "epoch": 9.36, + "learning_rate": 2.5753424657534247e-06, + "loss": 0.8361, + "step": 30750 + }, + { + "epoch": 9.37, + "learning_rate": 2.544901065449011e-06, + "loss": 0.7377, + "step": 30775 + }, + { + "epoch": 9.38, + "learning_rate": 2.514459665144597e-06, + "loss": 0.7311, + "step": 30800 + }, + { + "epoch": 9.38, + "learning_rate": 2.484018264840183e-06, + "loss": 0.7555, + "step": 30825 + }, + { + "epoch": 9.39, + "learning_rate": 2.4535768645357687e-06, + "loss": 0.7609, + "step": 30850 + }, + { + "epoch": 9.4, + "learning_rate": 2.423135464231355e-06, + "loss": 0.7774, + "step": 30875 + }, + { + "epoch": 9.41, + "learning_rate": 2.392694063926941e-06, + "loss": 0.7941, + "step": 30900 + }, + { + "epoch": 9.41, + "learning_rate": 2.362252663622527e-06, + "loss": 0.782, + "step": 30925 + }, + { + "epoch": 9.42, + "learning_rate": 2.3318112633181127e-06, + "loss": 0.7627, + "step": 30950 + }, + { + "epoch": 9.43, + "learning_rate": 2.301369863013699e-06, + "loss": 0.7271, + "step": 30975 + }, + { + "epoch": 9.44, + "learning_rate": 2.270928462709285e-06, + "loss": 0.7764, + "step": 31000 + }, + { + "epoch": 9.44, + "learning_rate": 2.240487062404871e-06, + "loss": 0.8404, + "step": 31025 + }, + { + "epoch": 9.45, + "learning_rate": 2.2100456621004567e-06, + "loss": 0.7565, + "step": 31050 + }, + { + "epoch": 9.46, + "learning_rate": 2.179604261796043e-06, + "loss": 0.7917, + "step": 31075 + }, + { + "epoch": 9.47, + "learning_rate": 2.149162861491629e-06, + "loss": 0.7568, + "step": 31100 + }, + { + "epoch": 9.47, + "learning_rate": 2.1187214611872146e-06, + "loss": 0.7278, + "step": 31125 + }, + { + "epoch": 9.48, + "learning_rate": 2.0882800608828007e-06, + "loss": 0.8142, + "step": 31150 + }, + { + "epoch": 9.49, + "learning_rate": 2.0578386605783868e-06, + "loss": 0.791, + "step": 31175 + }, + { + "epoch": 9.5, + "learning_rate": 2.027397260273973e-06, + "loss": 0.7884, + "step": 31200 + }, + { + "epoch": 9.51, + "learning_rate": 1.9969558599695586e-06, + "loss": 0.8056, + "step": 31225 + }, + { + "epoch": 9.51, + "learning_rate": 1.9665144596651447e-06, + "loss": 0.7537, + "step": 31250 + }, + { + "epoch": 9.52, + "learning_rate": 1.9360730593607308e-06, + "loss": 0.7794, + "step": 31275 + }, + { + "epoch": 9.53, + "learning_rate": 1.9056316590563167e-06, + "loss": 0.8168, + "step": 31300 + }, + { + "epoch": 9.54, + "learning_rate": 1.8751902587519028e-06, + "loss": 0.756, + "step": 31325 + }, + { + "epoch": 9.54, + "learning_rate": 1.8447488584474887e-06, + "loss": 0.7625, + "step": 31350 + }, + { + "epoch": 9.55, + "learning_rate": 1.8143074581430748e-06, + "loss": 0.7638, + "step": 31375 + }, + { + "epoch": 9.56, + "learning_rate": 1.7838660578386607e-06, + "loss": 0.7532, + "step": 31400 + }, + { + "epoch": 9.57, + "learning_rate": 1.7534246575342468e-06, + "loss": 0.7272, + "step": 31425 + }, + { + "epoch": 9.57, + "learning_rate": 1.7229832572298326e-06, + "loss": 0.7503, + "step": 31450 + }, + { + "epoch": 9.58, + "learning_rate": 1.6925418569254187e-06, + "loss": 0.7559, + "step": 31475 + }, + { + "epoch": 9.59, + "learning_rate": 1.6621004566210046e-06, + "loss": 0.7825, + "step": 31500 + }, + { + "epoch": 9.6, + "learning_rate": 1.6316590563165907e-06, + "loss": 0.7557, + "step": 31525 + }, + { + "epoch": 9.6, + "learning_rate": 1.6012176560121766e-06, + "loss": 0.7957, + "step": 31550 + }, + { + "epoch": 9.61, + "learning_rate": 1.5707762557077627e-06, + "loss": 0.7323, + "step": 31575 + }, + { + "epoch": 9.62, + "learning_rate": 1.5403348554033486e-06, + "loss": 0.7987, + "step": 31600 + }, + { + "epoch": 9.63, + "learning_rate": 1.5098934550989347e-06, + "loss": 0.8013, + "step": 31625 + }, + { + "epoch": 9.63, + "learning_rate": 1.4794520547945206e-06, + "loss": 0.7903, + "step": 31650 + }, + { + "epoch": 9.64, + "learning_rate": 1.4490106544901067e-06, + "loss": 0.7038, + "step": 31675 + }, + { + "epoch": 9.65, + "learning_rate": 1.4185692541856926e-06, + "loss": 0.7153, + "step": 31700 + }, + { + "epoch": 9.66, + "learning_rate": 1.3881278538812787e-06, + "loss": 0.7497, + "step": 31725 + }, + { + "epoch": 9.67, + "learning_rate": 1.3576864535768646e-06, + "loss": 0.7115, + "step": 31750 + }, + { + "epoch": 9.67, + "learning_rate": 1.3272450532724507e-06, + "loss": 0.7573, + "step": 31775 + }, + { + "epoch": 9.68, + "learning_rate": 1.2968036529680366e-06, + "loss": 0.756, + "step": 31800 + }, + { + "epoch": 9.69, + "learning_rate": 1.2663622526636227e-06, + "loss": 0.797, + "step": 31825 + }, + { + "epoch": 9.7, + "learning_rate": 1.2359208523592086e-06, + "loss": 0.7785, + "step": 31850 + }, + { + "epoch": 9.7, + "learning_rate": 1.2054794520547947e-06, + "loss": 0.75, + "step": 31875 + }, + { + "epoch": 9.71, + "learning_rate": 1.1750380517503806e-06, + "loss": 0.7955, + "step": 31900 + }, + { + "epoch": 9.72, + "learning_rate": 1.1445966514459667e-06, + "loss": 0.8163, + "step": 31925 + }, + { + "epoch": 9.73, + "learning_rate": 1.1141552511415526e-06, + "loss": 0.7569, + "step": 31950 + }, + { + "epoch": 9.73, + "learning_rate": 1.0837138508371387e-06, + "loss": 0.7812, + "step": 31975 + }, + { + "epoch": 9.74, + "learning_rate": 1.0532724505327246e-06, + "loss": 0.7108, + "step": 32000 + }, + { + "epoch": 9.75, + "learning_rate": 1.0228310502283107e-06, + "loss": 0.754, + "step": 32025 + }, + { + "epoch": 9.76, + "learning_rate": 9.923896499238966e-07, + "loss": 0.805, + "step": 32050 + }, + { + "epoch": 9.76, + "learning_rate": 9.619482496194827e-07, + "loss": 0.7494, + "step": 32075 + }, + { + "epoch": 9.77, + "learning_rate": 9.315068493150686e-07, + "loss": 0.7821, + "step": 32100 + }, + { + "epoch": 9.78, + "learning_rate": 9.010654490106546e-07, + "loss": 0.8192, + "step": 32125 + }, + { + "epoch": 9.79, + "learning_rate": 8.706240487062406e-07, + "loss": 0.7508, + "step": 32150 + }, + { + "epoch": 9.79, + "learning_rate": 8.401826484018266e-07, + "loss": 0.7274, + "step": 32175 + }, + { + "epoch": 9.8, + "learning_rate": 8.097412480974126e-07, + "loss": 0.786, + "step": 32200 + }, + { + "epoch": 9.81, + "learning_rate": 7.792998477929986e-07, + "loss": 0.7535, + "step": 32225 + }, + { + "epoch": 9.82, + "learning_rate": 7.488584474885845e-07, + "loss": 0.7448, + "step": 32250 + }, + { + "epoch": 9.82, + "learning_rate": 7.184170471841705e-07, + "loss": 0.7506, + "step": 32275 + }, + { + "epoch": 9.83, + "learning_rate": 6.879756468797565e-07, + "loss": 0.7662, + "step": 32300 + }, + { + "epoch": 9.84, + "learning_rate": 6.575342465753425e-07, + "loss": 0.7408, + "step": 32325 + }, + { + "epoch": 9.85, + "learning_rate": 6.270928462709285e-07, + "loss": 0.7333, + "step": 32350 + }, + { + "epoch": 9.86, + "learning_rate": 5.966514459665146e-07, + "loss": 0.7941, + "step": 32375 + }, + { + "epoch": 9.86, + "learning_rate": 5.662100456621006e-07, + "loss": 0.7735, + "step": 32400 + }, + { + "epoch": 9.87, + "learning_rate": 5.357686453576865e-07, + "loss": 0.7741, + "step": 32425 + }, + { + "epoch": 9.88, + "learning_rate": 5.053272450532725e-07, + "loss": 0.7857, + "step": 32450 + }, + { + "epoch": 9.89, + "learning_rate": 4.748858447488585e-07, + "loss": 0.8137, + "step": 32475 + }, + { + "epoch": 9.89, + "learning_rate": 4.444444444444445e-07, + "loss": 0.7557, + "step": 32500 + }, + { + "epoch": 9.9, + "learning_rate": 4.140030441400305e-07, + "loss": 0.7187, + "step": 32525 + }, + { + "epoch": 9.91, + "learning_rate": 3.835616438356165e-07, + "loss": 0.7536, + "step": 32550 + }, + { + "epoch": 9.92, + "learning_rate": 3.531202435312025e-07, + "loss": 0.7462, + "step": 32575 + }, + { + "epoch": 9.92, + "learning_rate": 3.226788432267885e-07, + "loss": 0.7967, + "step": 32600 + }, + { + "epoch": 9.93, + "learning_rate": 2.922374429223744e-07, + "loss": 0.7531, + "step": 32625 + }, + { + "epoch": 9.94, + "learning_rate": 2.617960426179604e-07, + "loss": 0.7584, + "step": 32650 + }, + { + "epoch": 9.95, + "learning_rate": 2.3135464231354645e-07, + "loss": 0.7664, + "step": 32675 + }, + { + "epoch": 9.95, + "learning_rate": 2.0091324200913244e-07, + "loss": 0.8058, + "step": 32700 + }, + { + "epoch": 9.96, + "learning_rate": 1.7047184170471844e-07, + "loss": 0.795, + "step": 32725 + }, + { + "epoch": 9.97, + "learning_rate": 1.4003044140030444e-07, + "loss": 0.7861, + "step": 32750 + }, + { + "epoch": 9.98, + "learning_rate": 1.0958904109589042e-07, + "loss": 0.7567, + "step": 32775 + }, + { + "epoch": 9.98, + "learning_rate": 7.914764079147642e-08, + "loss": 0.7481, + "step": 32800 + }, + { + "epoch": 9.99, + "learning_rate": 4.870624048706241e-08, + "loss": 0.7556, + "step": 32825 + }, + { + "epoch": 10.0, + "learning_rate": 1.9482496194824964e-08, + "loss": 0.7565, + "step": 32850 + }, + { + "epoch": 10.0, + "step": 32850, + "total_flos": 4.270496328921907e+17, + "train_loss": 1.3997784228157961, + "train_runtime": 9169.8515, + "train_samples_per_second": 21.49, + "train_steps_per_second": 3.582 + } + ], + "max_steps": 32850, + "num_train_epochs": 10, + "total_flos": 4.270496328921907e+17, + "trial_name": null, + "trial_params": null +}