diff --git "a/checkpoint-49300/trainer_state.json" "b/checkpoint-49300/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-49300/trainer_state.json" @@ -0,0 +1,29601 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8531994981179423, + "eval_steps": 500, + "global_step": 49300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.844290657439446e-07, + "loss": 11.3843, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.1072664359861592e-06, + "loss": 11.4922, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.7993079584775088e-06, + "loss": 11.4643, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 2.4913494809688584e-06, + "loss": 11.2703, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 3.1833910034602078e-06, + "loss": 11.0879, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 3.875432525951557e-06, + "loss": 11.0263, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.5674740484429065e-06, + "loss": 10.7875, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 5.190311418685121e-06, + "loss": 10.4875, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 5.882352941176471e-06, + "loss": 10.1665, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 6.5743944636678194e-06, + "loss": 9.6567, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 7.2664359861591705e-06, + "loss": 9.3623, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 7.958477508650519e-06, + "loss": 9.1007, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 8.650519031141868e-06, + "loss": 8.7675, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 9.34256055363322e-06, + "loss": 8.4775, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 1.0034602076124568e-05, + "loss": 8.3187, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 1.0726643598615918e-05, + "loss": 8.0933, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 1.1418685121107267e-05, + "loss": 7.9094, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 1.2110726643598615e-05, + "loss": 7.9038, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 1.2802768166089965e-05, + "loss": 7.8251, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 1.3494809688581317e-05, + "loss": 7.69, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 1.4186851211072666e-05, + "loss": 7.5891, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 1.4878892733564014e-05, + "loss": 7.5995, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 1.5570934256055363e-05, + "loss": 7.5861, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 1.6262975778546713e-05, + "loss": 7.457, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 1.6955017301038063e-05, + "loss": 7.4987, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 1.7647058823529414e-05, + "loss": 7.3925, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 1.8339100346020764e-05, + "loss": 7.3742, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 1.903114186851211e-05, + "loss": 7.2152, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 1.972318339100346e-05, + "loss": 7.1937, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 2.041522491349481e-05, + "loss": 7.2579, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 2.1107266435986158e-05, + "loss": 7.1413, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 2.179930795847751e-05, + "loss": 7.1005, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 2.249134948096886e-05, + "loss": 7.0903, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 2.318339100346021e-05, + "loss": 7.0204, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 2.387543252595156e-05, + "loss": 6.9821, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 2.4567474048442906e-05, + "loss": 6.9959, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 2.5259515570934256e-05, + "loss": 6.9195, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 2.5882352941176475e-05, + "loss": 6.8595, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 2.6505190311418688e-05, + "loss": 6.8417, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 2.7197231833910038e-05, + "loss": 6.8092, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 2.7889273356401385e-05, + "loss": 6.6811, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 2.8581314878892735e-05, + "loss": 6.6395, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 2.927335640138408e-05, + "loss": 6.664, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 2.9965397923875432e-05, + "loss": 6.6459, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 3.065743944636678e-05, + "loss": 6.5786, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 3.134948096885813e-05, + "loss": 6.523, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 3.204152249134948e-05, + "loss": 6.4288, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 3.273356401384083e-05, + "loss": 6.4009, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 3.342560553633218e-05, + "loss": 6.2901, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 3.411764705882353e-05, + "loss": 6.2791, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 3.4809688581314883e-05, + "loss": 6.3766, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 3.550173010380623e-05, + "loss": 6.2361, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 3.619377162629758e-05, + "loss": 6.3265, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 3.688581314878893e-05, + "loss": 6.1918, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 3.757785467128028e-05, + "loss": 6.2941, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 3.826989619377163e-05, + "loss": 6.1485, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 3.896193771626298e-05, + "loss": 6.1348, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 3.965397923875433e-05, + "loss": 6.0892, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 4.034602076124568e-05, + "loss": 6.0854, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 4.103806228373703e-05, + "loss": 5.9963, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 4.173010380622838e-05, + "loss": 5.9803, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 4.242214532871973e-05, + "loss": 5.9827, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 4.311418685121107e-05, + "loss": 5.9034, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 4.380622837370242e-05, + "loss": 5.8993, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 4.449826989619377e-05, + "loss": 5.8226, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 4.519031141868512e-05, + "loss": 5.9912, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 4.588235294117647e-05, + "loss": 5.821, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 4.657439446366782e-05, + "loss": 5.8343, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 4.7266435986159174e-05, + "loss": 5.7343, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 4.7958477508650524e-05, + "loss": 5.7435, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 4.8650519031141874e-05, + "loss": 5.7636, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9342560553633224e-05, + "loss": 5.7366, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 5.003460207612457e-05, + "loss": 5.6594, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 5.072664359861592e-05, + "loss": 5.5874, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 5.141868512110727e-05, + "loss": 5.7374, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 5.211072664359862e-05, + "loss": 5.6076, + "step": 760 + }, + { + "epoch": 0.01, + "learning_rate": 5.280276816608997e-05, + "loss": 5.6246, + "step": 770 + }, + { + "epoch": 0.01, + "learning_rate": 5.349480968858131e-05, + "loss": 5.6905, + "step": 780 + }, + { + "epoch": 0.01, + "learning_rate": 5.418685121107266e-05, + "loss": 5.61, + "step": 790 + }, + { + "epoch": 0.01, + "learning_rate": 5.487889273356401e-05, + "loss": 5.5684, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 5.557093425605536e-05, + "loss": 5.5248, + "step": 810 + }, + { + "epoch": 0.01, + "learning_rate": 5.626297577854671e-05, + "loss": 5.5351, + "step": 820 + }, + { + "epoch": 0.01, + "learning_rate": 5.695501730103806e-05, + "loss": 5.5094, + "step": 830 + }, + { + "epoch": 0.01, + "learning_rate": 5.764705882352941e-05, + "loss": 5.3651, + "step": 840 + }, + { + "epoch": 0.01, + "learning_rate": 5.833910034602076e-05, + "loss": 5.4953, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 5.903114186851212e-05, + "loss": 5.4298, + "step": 860 + }, + { + "epoch": 0.02, + "learning_rate": 5.972318339100347e-05, + "loss": 5.4281, + "step": 870 + }, + { + "epoch": 0.02, + "learning_rate": 6.0415224913494814e-05, + "loss": 5.4204, + "step": 880 + }, + { + "epoch": 0.02, + "learning_rate": 6.110726643598617e-05, + "loss": 5.4936, + "step": 890 + }, + { + "epoch": 0.02, + "learning_rate": 6.179930795847751e-05, + "loss": 5.3641, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 6.249134948096886e-05, + "loss": 5.3753, + "step": 910 + }, + { + "epoch": 0.02, + "learning_rate": 6.318339100346021e-05, + "loss": 5.3648, + "step": 920 + }, + { + "epoch": 0.02, + "learning_rate": 6.387543252595156e-05, + "loss": 5.3226, + "step": 930 + }, + { + "epoch": 0.02, + "learning_rate": 6.456747404844291e-05, + "loss": 5.3352, + "step": 940 + }, + { + "epoch": 0.02, + "learning_rate": 6.525951557093426e-05, + "loss": 5.3024, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 6.595155709342561e-05, + "loss": 5.3395, + "step": 960 + }, + { + "epoch": 0.02, + "learning_rate": 6.664359861591696e-05, + "loss": 5.3071, + "step": 970 + }, + { + "epoch": 0.02, + "learning_rate": 6.733564013840831e-05, + "loss": 5.2185, + "step": 980 + }, + { + "epoch": 0.02, + "learning_rate": 6.802768166089966e-05, + "loss": 5.22, + "step": 990 + }, + { + "epoch": 0.02, + "learning_rate": 6.871972318339101e-05, + "loss": 5.1606, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 6.941176470588236e-05, + "loss": 5.223, + "step": 1010 + }, + { + "epoch": 0.02, + "learning_rate": 7.01038062283737e-05, + "loss": 5.1348, + "step": 1020 + }, + { + "epoch": 0.02, + "learning_rate": 7.079584775086505e-05, + "loss": 5.2111, + "step": 1030 + }, + { + "epoch": 0.02, + "learning_rate": 7.14878892733564e-05, + "loss": 5.1559, + "step": 1040 + }, + { + "epoch": 0.02, + "learning_rate": 7.217993079584775e-05, + "loss": 5.1723, + "step": 1050 + }, + { + "epoch": 0.02, + "learning_rate": 7.28719723183391e-05, + "loss": 5.1858, + "step": 1060 + }, + { + "epoch": 0.02, + "learning_rate": 7.356401384083045e-05, + "loss": 5.1041, + "step": 1070 + }, + { + "epoch": 0.02, + "learning_rate": 7.425605536332181e-05, + "loss": 5.1212, + "step": 1080 + }, + { + "epoch": 0.02, + "learning_rate": 7.494809688581316e-05, + "loss": 5.1205, + "step": 1090 + }, + { + "epoch": 0.02, + "learning_rate": 7.564013840830451e-05, + "loss": 5.1145, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 7.633217993079585e-05, + "loss": 5.0884, + "step": 1110 + }, + { + "epoch": 0.02, + "learning_rate": 7.70242214532872e-05, + "loss": 5.1211, + "step": 1120 + }, + { + "epoch": 0.02, + "learning_rate": 7.771626297577855e-05, + "loss": 5.0219, + "step": 1130 + }, + { + "epoch": 0.02, + "learning_rate": 7.84083044982699e-05, + "loss": 5.0685, + "step": 1140 + }, + { + "epoch": 0.02, + "learning_rate": 7.910034602076125e-05, + "loss": 4.9674, + "step": 1150 + }, + { + "epoch": 0.02, + "learning_rate": 7.97923875432526e-05, + "loss": 5.0026, + "step": 1160 + }, + { + "epoch": 0.02, + "learning_rate": 8.048442906574395e-05, + "loss": 5.0192, + "step": 1170 + }, + { + "epoch": 0.02, + "learning_rate": 8.11764705882353e-05, + "loss": 5.0607, + "step": 1180 + }, + { + "epoch": 0.02, + "learning_rate": 8.186851211072665e-05, + "loss": 5.078, + "step": 1190 + }, + { + "epoch": 0.02, + "learning_rate": 8.2560553633218e-05, + "loss": 4.9619, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 8.325259515570935e-05, + "loss": 4.9139, + "step": 1210 + }, + { + "epoch": 0.02, + "learning_rate": 8.394463667820069e-05, + "loss": 4.9738, + "step": 1220 + }, + { + "epoch": 0.02, + "learning_rate": 8.463667820069204e-05, + "loss": 4.9732, + "step": 1230 + }, + { + "epoch": 0.02, + "learning_rate": 8.532871972318339e-05, + "loss": 4.9963, + "step": 1240 + }, + { + "epoch": 0.02, + "learning_rate": 8.602076124567474e-05, + "loss": 4.9417, + "step": 1250 + }, + { + "epoch": 0.02, + "learning_rate": 8.671280276816609e-05, + "loss": 4.9426, + "step": 1260 + }, + { + "epoch": 0.02, + "learning_rate": 8.740484429065744e-05, + "loss": 4.9065, + "step": 1270 + }, + { + "epoch": 0.02, + "learning_rate": 8.809688581314879e-05, + "loss": 4.8885, + "step": 1280 + }, + { + "epoch": 0.02, + "learning_rate": 8.878892733564014e-05, + "loss": 4.918, + "step": 1290 + }, + { + "epoch": 0.02, + "learning_rate": 8.94809688581315e-05, + "loss": 4.8905, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 9.017301038062284e-05, + "loss": 4.8405, + "step": 1310 + }, + { + "epoch": 0.02, + "learning_rate": 9.086505190311419e-05, + "loss": 4.9333, + "step": 1320 + }, + { + "epoch": 0.02, + "learning_rate": 9.155709342560554e-05, + "loss": 4.8638, + "step": 1330 + }, + { + "epoch": 0.02, + "learning_rate": 9.224913494809689e-05, + "loss": 4.8589, + "step": 1340 + }, + { + "epoch": 0.02, + "learning_rate": 9.294117647058824e-05, + "loss": 4.8543, + "step": 1350 + }, + { + "epoch": 0.02, + "learning_rate": 9.363321799307959e-05, + "loss": 4.8445, + "step": 1360 + }, + { + "epoch": 0.02, + "learning_rate": 9.432525951557094e-05, + "loss": 4.8007, + "step": 1370 + }, + { + "epoch": 0.02, + "learning_rate": 9.501730103806229e-05, + "loss": 4.8765, + "step": 1380 + }, + { + "epoch": 0.02, + "learning_rate": 9.570934256055364e-05, + "loss": 4.7588, + "step": 1390 + }, + { + "epoch": 0.02, + "learning_rate": 9.640138408304499e-05, + "loss": 4.8286, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 9.709342560553634e-05, + "loss": 4.8321, + "step": 1410 + }, + { + "epoch": 0.02, + "learning_rate": 9.778546712802769e-05, + "loss": 4.7992, + "step": 1420 + }, + { + "epoch": 0.02, + "learning_rate": 9.847750865051903e-05, + "loss": 4.8228, + "step": 1430 + }, + { + "epoch": 0.02, + "learning_rate": 9.916955017301038e-05, + "loss": 4.7561, + "step": 1440 + }, + { + "epoch": 0.03, + "learning_rate": 9.986159169550173e-05, + "loss": 4.8082, + "step": 1450 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010055363321799309, + "loss": 4.7646, + "step": 1460 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010124567474048443, + "loss": 4.7917, + "step": 1470 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001019377162629758, + "loss": 4.7832, + "step": 1480 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010262975778546713, + "loss": 4.8308, + "step": 1490 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001033217993079585, + "loss": 4.7319, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010401384083044983, + "loss": 4.7214, + "step": 1510 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010470588235294118, + "loss": 4.7412, + "step": 1520 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010539792387543253, + "loss": 4.723, + "step": 1530 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010608996539792388, + "loss": 4.7602, + "step": 1540 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010678200692041522, + "loss": 4.7148, + "step": 1550 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010747404844290658, + "loss": 4.6936, + "step": 1560 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010816608996539792, + "loss": 4.7064, + "step": 1570 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010885813148788928, + "loss": 4.651, + "step": 1580 + }, + { + "epoch": 0.03, + "learning_rate": 0.00010955017301038062, + "loss": 4.6812, + "step": 1590 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011024221453287198, + "loss": 4.5911, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011093425605536332, + "loss": 4.6238, + "step": 1610 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011162629757785468, + "loss": 4.6017, + "step": 1620 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011231833910034603, + "loss": 4.6857, + "step": 1630 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011301038062283737, + "loss": 4.5994, + "step": 1640 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011370242214532873, + "loss": 4.6037, + "step": 1650 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011439446366782007, + "loss": 4.5733, + "step": 1660 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011508650519031143, + "loss": 4.5224, + "step": 1670 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011577854671280277, + "loss": 4.6287, + "step": 1680 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011647058823529413, + "loss": 4.6344, + "step": 1690 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011716262975778547, + "loss": 4.5715, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011764705882352942, + "loss": 9.2041, + "step": 1710 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011833910034602077, + "loss": 7.614, + "step": 1720 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011896193771626298, + "loss": 6.6816, + "step": 1730 + }, + { + "epoch": 0.03, + "learning_rate": 0.00011965397923875432, + "loss": 6.06, + "step": 1740 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012034602076124568, + "loss": 5.6681, + "step": 1750 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012103806228373702, + "loss": 5.4592, + "step": 1760 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012173010380622839, + "loss": 5.3273, + "step": 1770 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012242214532871975, + "loss": 5.2278, + "step": 1780 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012311418685121109, + "loss": 5.1185, + "step": 1790 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012380622837370245, + "loss": 5.0205, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012449826989619379, + "loss": 5.0303, + "step": 1810 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012519031141868512, + "loss": 4.9824, + "step": 1820 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001258823529411765, + "loss": 4.9583, + "step": 1830 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012657439446366782, + "loss": 4.8147, + "step": 1840 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012726643598615916, + "loss": 4.8703, + "step": 1850 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012782006920415226, + "loss": 7.5927, + "step": 1860 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001285121107266436, + "loss": 6.4347, + "step": 1870 + }, + { + "epoch": 0.03, + "learning_rate": 0.00012920415224913496, + "loss": 5.6382, + "step": 1880 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001298961937716263, + "loss": 5.3613, + "step": 1890 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013058823529411766, + "loss": 5.1758, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 0.000131280276816609, + "loss": 5.0184, + "step": 1910 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013197231833910036, + "loss": 5.0102, + "step": 1920 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001326643598615917, + "loss": 4.966, + "step": 1930 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013335640138408306, + "loss": 4.9783, + "step": 1940 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001340484429065744, + "loss": 4.823, + "step": 1950 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013474048442906574, + "loss": 4.8571, + "step": 1960 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001354325259515571, + "loss": 4.8616, + "step": 1970 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013612456747404844, + "loss": 4.8164, + "step": 1980 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001368166089965398, + "loss": 4.6817, + "step": 1990 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013750865051903114, + "loss": 4.741, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001382006920415225, + "loss": 4.6959, + "step": 2010 + }, + { + "epoch": 0.03, + "learning_rate": 0.00013889273356401384, + "loss": 4.715, + "step": 2020 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001395847750865052, + "loss": 4.6899, + "step": 2030 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014027681660899654, + "loss": 4.6187, + "step": 2040 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001409688581314879, + "loss": 4.6066, + "step": 2050 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014166089965397924, + "loss": 4.6223, + "step": 2060 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001423529411764706, + "loss": 4.5845, + "step": 2070 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014304498269896194, + "loss": 4.6296, + "step": 2080 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001437370242214533, + "loss": 4.5785, + "step": 2090 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014442906574394464, + "loss": 4.5645, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.000145121107266436, + "loss": 4.5528, + "step": 2110 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014581314878892734, + "loss": 4.5808, + "step": 2120 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001465051903114187, + "loss": 4.5717, + "step": 2130 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014719723183391004, + "loss": 4.4441, + "step": 2140 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001478892733564014, + "loss": 4.5927, + "step": 2150 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014858131487889274, + "loss": 4.5481, + "step": 2160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014927335640138408, + "loss": 4.4704, + "step": 2170 + }, + { + "epoch": 0.04, + "learning_rate": 0.00014996539792387544, + "loss": 4.5283, + "step": 2180 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015065743944636678, + "loss": 4.5318, + "step": 2190 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015134948096885814, + "loss": 4.4498, + "step": 2200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015204152249134948, + "loss": 4.5004, + "step": 2210 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015273356401384084, + "loss": 4.4088, + "step": 2220 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015342560553633218, + "loss": 4.4286, + "step": 2230 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015411764705882354, + "loss": 4.3485, + "step": 2240 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015480968858131488, + "loss": 4.4573, + "step": 2250 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015536332179930798, + "loss": 6.7076, + "step": 2260 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015605536332179932, + "loss": 5.5923, + "step": 2270 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015674740484429065, + "loss": 5.0411, + "step": 2280 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015743944636678202, + "loss": 4.84, + "step": 2290 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015813148788927336, + "loss": 4.7474, + "step": 2300 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001588235294117647, + "loss": 4.7279, + "step": 2310 + }, + { + "epoch": 0.04, + "learning_rate": 0.00015951557093425606, + "loss": 4.6071, + "step": 2320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001602076124567474, + "loss": 4.625, + "step": 2330 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016089965397923876, + "loss": 4.5299, + "step": 2340 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016159169550173012, + "loss": 4.5897, + "step": 2350 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016228373702422146, + "loss": 4.5095, + "step": 2360 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016297577854671282, + "loss": 4.4638, + "step": 2370 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016366782006920416, + "loss": 4.4811, + "step": 2380 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016435986159169552, + "loss": 4.5259, + "step": 2390 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016505190311418686, + "loss": 4.4388, + "step": 2400 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016574394463667822, + "loss": 4.454, + "step": 2410 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016643598615916956, + "loss": 4.3943, + "step": 2420 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016712802768166092, + "loss": 4.5017, + "step": 2430 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016782006920415226, + "loss": 4.4687, + "step": 2440 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016851211072664362, + "loss": 4.4336, + "step": 2450 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016920415224913496, + "loss": 4.4298, + "step": 2460 + }, + { + "epoch": 0.04, + "learning_rate": 0.00016989619377162632, + "loss": 4.2917, + "step": 2470 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017058823529411766, + "loss": 4.4265, + "step": 2480 + }, + { + "epoch": 0.04, + "learning_rate": 0.000171280276816609, + "loss": 4.3893, + "step": 2490 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017197231833910036, + "loss": 4.3082, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001726643598615917, + "loss": 4.2956, + "step": 2510 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017335640138408303, + "loss": 4.2683, + "step": 2520 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001740484429065744, + "loss": 4.3174, + "step": 2530 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017474048442906573, + "loss": 4.2942, + "step": 2540 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001754325259515571, + "loss": 4.2764, + "step": 2550 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017612456747404843, + "loss": 4.2629, + "step": 2560 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001768166089965398, + "loss": 4.3498, + "step": 2570 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017750865051903116, + "loss": 4.2502, + "step": 2580 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001782006920415225, + "loss": 4.2069, + "step": 2590 + }, + { + "epoch": 0.04, + "learning_rate": 0.00017882352941176472, + "loss": 4.2616, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00017951557093425605, + "loss": 4.3095, + "step": 2610 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018020761245674742, + "loss": 4.211, + "step": 2620 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018089965397923875, + "loss": 4.1715, + "step": 2630 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018159169550173012, + "loss": 4.1979, + "step": 2640 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018228373702422145, + "loss": 4.2505, + "step": 2650 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018297577854671282, + "loss": 4.2162, + "step": 2660 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018366782006920416, + "loss": 4.2223, + "step": 2670 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018435986159169552, + "loss": 4.2917, + "step": 2680 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018505190311418686, + "loss": 4.1327, + "step": 2690 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018574394463667822, + "loss": 4.2182, + "step": 2700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018643598615916956, + "loss": 4.1291, + "step": 2710 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018712802768166092, + "loss": 4.1336, + "step": 2720 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018782006920415226, + "loss": 4.1628, + "step": 2730 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018851211072664362, + "loss": 4.242, + "step": 2740 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018920415224913496, + "loss": 4.0264, + "step": 2750 + }, + { + "epoch": 0.05, + "learning_rate": 0.00018989619377162632, + "loss": 4.1376, + "step": 2760 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019058823529411766, + "loss": 4.1273, + "step": 2770 + }, + { + "epoch": 0.05, + "learning_rate": 0.000191280276816609, + "loss": 4.1551, + "step": 2780 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019197231833910036, + "loss": 4.1817, + "step": 2790 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001926643598615917, + "loss": 4.0894, + "step": 2800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019335640138408306, + "loss": 4.0932, + "step": 2810 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001940484429065744, + "loss": 4.1004, + "step": 2820 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019474048442906576, + "loss": 4.1235, + "step": 2830 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001954325259515571, + "loss": 4.1434, + "step": 2840 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019612456747404846, + "loss": 4.0648, + "step": 2850 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001968166089965398, + "loss": 4.0025, + "step": 2860 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019750865051903116, + "loss": 4.019, + "step": 2870 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001982006920415225, + "loss": 4.0693, + "step": 2880 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019889273356401386, + "loss": 4.1096, + "step": 2890 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001995847750865052, + "loss": 4.103, + "step": 2900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999999737957568, + "loss": 3.9842, + "step": 2910 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999996789980367, + "loss": 4.0666, + "step": 2920 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999990566473897, + "loss": 3.9075, + "step": 2930 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999981067440192, + "loss": 4.0124, + "step": 2940 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999968292882367, + "loss": 3.9673, + "step": 2950 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999952242804605, + "loss": 4.004, + "step": 2960 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999932917212163, + "loss": 3.9915, + "step": 2970 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999991031611137, + "loss": 3.9317, + "step": 2980 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999884439509633, + "loss": 3.973, + "step": 2990 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999855287415423, + "loss": 3.9258, + "step": 3000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999822859838293, + "loss": 3.9318, + "step": 3010 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999787156788865, + "loss": 3.9232, + "step": 3020 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999748178278827, + "loss": 3.8838, + "step": 3030 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999705924320958, + "loss": 3.8711, + "step": 3040 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999660394929086, + "loss": 3.9406, + "step": 3050 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999961159011813, + "loss": 3.9712, + "step": 3060 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999559509904078, + "loss": 3.9403, + "step": 3070 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999504154303988, + "loss": 3.9109, + "step": 3080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999944552333599, + "loss": 3.8515, + "step": 3090 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999383617019288, + "loss": 3.9066, + "step": 3100 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999318435374167, + "loss": 3.8686, + "step": 3110 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999249978421964, + "loss": 3.842, + "step": 3120 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999178246185117, + "loss": 3.918, + "step": 3130 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999910323868711, + "loss": 3.8543, + "step": 3140 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999024955952521, + "loss": 3.8933, + "step": 3150 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998943398006987, + "loss": 3.8672, + "step": 3160 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998858564877222, + "loss": 3.869, + "step": 3170 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998770456591017, + "loss": 3.896, + "step": 3180 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999867907317723, + "loss": 3.9441, + "step": 3190 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998584414665792, + "loss": 3.8285, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998486481087713, + "loss": 3.8605, + "step": 3210 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998385272475066, + "loss": 3.8374, + "step": 3220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998280788861007, + "loss": 3.7837, + "step": 3230 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998173030279757, + "loss": 3.771, + "step": 3240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998061996766615, + "loss": 3.7959, + "step": 3250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997947688357948, + "loss": 3.7425, + "step": 3260 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199978301050912, + "loss": 3.9077, + "step": 3270 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997709247004884, + "loss": 3.7723, + "step": 3280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999758511413859, + "loss": 3.7547, + "step": 3290 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997457706532977, + "loss": 3.756, + "step": 3300 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997327024229774, + "loss": 3.7913, + "step": 3310 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997193067271794, + "loss": 3.7354, + "step": 3320 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997055835702907, + "loss": 3.7399, + "step": 3330 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996915329568068, + "loss": 3.6828, + "step": 3340 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996771548913303, + "loss": 3.6727, + "step": 3350 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199966244937857, + "loss": 3.6893, + "step": 3360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999647416423343, + "loss": 3.8087, + "step": 3370 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996320560305742, + "loss": 3.7271, + "step": 3380 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996163682052938, + "loss": 3.7227, + "step": 3390 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999600352952641, + "loss": 3.712, + "step": 3400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019995840102778615, + "loss": 3.7924, + "step": 3410 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019995673401863084, + "loss": 3.8106, + "step": 3420 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019995503426834424, + "loss": 3.7323, + "step": 3430 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019995330177748305, + "loss": 3.6854, + "step": 3440 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019995153654661478, + "loss": 3.6432, + "step": 3450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999497385763176, + "loss": 3.7028, + "step": 3460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999479078671805, + "loss": 3.6964, + "step": 3470 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019994604441980315, + "loss": 3.809, + "step": 3480 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019994414823479583, + "loss": 3.7011, + "step": 3490 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019994221931277973, + "loss": 3.6711, + "step": 3500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019994025765438664, + "loss": 3.6232, + "step": 3510 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019993826326025911, + "loss": 3.6765, + "step": 3520 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999362361310504, + "loss": 3.6333, + "step": 3530 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999341762674245, + "loss": 3.6249, + "step": 3540 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019993208367005615, + "loss": 3.6162, + "step": 3550 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992995833963077, + "loss": 3.5603, + "step": 3560 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992780027684452, + "loss": 3.6266, + "step": 3570 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999256094824043, + "loss": 3.6418, + "step": 3580 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992338595702767, + "loss": 3.6158, + "step": 3590 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019992112970144298, + "loss": 3.6985, + "step": 3600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991884071638927, + "loss": 3.6393, + "step": 3610 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999165190026163, + "loss": 3.5601, + "step": 3620 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999141645608846, + "loss": 3.5953, + "step": 3630 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019991177739196527, + "loss": 3.6503, + "step": 3640 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990935749664032, + "loss": 3.5925, + "step": 3650 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990690487570236, + "loss": 3.6135, + "step": 3660 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999044195299548, + "loss": 3.5557, + "step": 3670 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019990190146021165, + "loss": 3.5932, + "step": 3680 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989935066729774, + "loss": 3.6551, + "step": 3690 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989676715204862, + "loss": 3.5287, + "step": 3700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001998941509153105, + "loss": 3.5807, + "step": 3710 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019989150195794033, + "loss": 3.6761, + "step": 3720 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001998888202808058, + "loss": 3.4438, + "step": 3730 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019988610588478532, + "loss": 3.5777, + "step": 3740 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019988335877076793, + "loss": 3.6672, + "step": 3750 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019988057893965356, + "loss": 3.6149, + "step": 3760 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987776639235265, + "loss": 3.5857, + "step": 3770 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019987492112978652, + "loss": 3.5003, + "step": 3780 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998720431528871, + "loss": 3.6546, + "step": 3790 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986913246259716, + "loss": 3.5458, + "step": 3800 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986618905987001, + "loss": 3.6004, + "step": 3810 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998632129456698, + "loss": 3.6526, + "step": 3820 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019986020412097143, + "loss": 3.5539, + "step": 3830 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985716258676036, + "loss": 3.5109, + "step": 3840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998540883440329, + "loss": 3.5406, + "step": 3850 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019985098139379601, + "loss": 3.5728, + "step": 3860 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998478417370674, + "loss": 3.5154, + "step": 3870 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019984466937487545, + "loss": 3.6113, + "step": 3880 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998414643082593, + "loss": 3.5626, + "step": 3890 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019983822653826878, + "loss": 3.5237, + "step": 3900 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998349560659644, + "loss": 3.4899, + "step": 3910 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019983165289241746, + "loss": 3.49, + "step": 3920 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019982831701870988, + "loss": 3.5472, + "step": 3930 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019982494844593434, + "loss": 3.5512, + "step": 3940 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019982154717519427, + "loss": 3.531, + "step": 3950 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001998181132076037, + "loss": 3.4923, + "step": 3960 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019981464654428752, + "loss": 3.5375, + "step": 3970 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019981114718638118, + "loss": 3.4776, + "step": 3980 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019980761513503092, + "loss": 3.5185, + "step": 3990 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019980405039139368, + "loss": 3.5635, + "step": 4000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019980045295663713, + "loss": 3.5751, + "step": 4010 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019979682283193954, + "loss": 3.5863, + "step": 4020 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019979316001849005, + "loss": 3.4812, + "step": 4030 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019978946451748838, + "loss": 3.457, + "step": 4040 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019978573633014506, + "loss": 3.3768, + "step": 4050 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019978197545768119, + "loss": 3.4967, + "step": 4060 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001997781819013287, + "loss": 3.4793, + "step": 4070 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019977435566233016, + "loss": 3.3565, + "step": 4080 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019977049674193892, + "loss": 3.4976, + "step": 4090 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019976660514141892, + "loss": 3.4365, + "step": 4100 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001997626808620449, + "loss": 3.5347, + "step": 4110 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001997587239051022, + "loss": 3.5448, + "step": 4120 + }, + { + "epoch": 0.07, + "learning_rate": 0.000199754734271887, + "loss": 3.4276, + "step": 4130 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019975071196370617, + "loss": 3.4771, + "step": 4140 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019974665698187713, + "loss": 3.5009, + "step": 4150 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019974256932772815, + "loss": 3.3931, + "step": 4160 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019973844900259813, + "loss": 3.3643, + "step": 4170 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019973429600783667, + "loss": 3.4551, + "step": 4180 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001997301103448042, + "loss": 3.3744, + "step": 4190 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019972589201487165, + "loss": 3.4875, + "step": 4200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019972164101942074, + "loss": 3.4148, + "step": 4210 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019971735735984397, + "loss": 3.4092, + "step": 4220 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019971304103754442, + "loss": 3.4911, + "step": 4230 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001997086920539359, + "loss": 3.5266, + "step": 4240 + }, + { + "epoch": 0.07, + "learning_rate": 0.000199704310410443, + "loss": 3.4856, + "step": 4250 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019969989610850087, + "loss": 3.3837, + "step": 4260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019969544914955542, + "loss": 3.4171, + "step": 4270 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019969096953506335, + "loss": 3.3912, + "step": 4280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001996864572664919, + "loss": 3.4548, + "step": 4290 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019968191234531909, + "loss": 3.4519, + "step": 4300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019967733477303364, + "loss": 3.5497, + "step": 4310 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001996727245511349, + "loss": 3.4923, + "step": 4320 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019966808168113304, + "loss": 3.4534, + "step": 4330 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001996634061645488, + "loss": 3.4985, + "step": 4340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019965869800291362, + "loss": 3.4667, + "step": 4350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019965395719776977, + "loss": 3.4741, + "step": 4360 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019964918375067005, + "loss": 3.3865, + "step": 4370 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019964437766317804, + "loss": 3.4611, + "step": 4380 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019963953893686794, + "loss": 3.4155, + "step": 4390 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019963466757332476, + "loss": 3.4273, + "step": 4400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001996297635741441, + "loss": 3.4075, + "step": 4410 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019962482694093226, + "loss": 3.3705, + "step": 4420 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019961985767530627, + "loss": 3.4634, + "step": 4430 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019961485577889382, + "loss": 3.4207, + "step": 4440 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019960982125333333, + "loss": 3.3596, + "step": 4450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019960475410027382, + "loss": 3.3294, + "step": 4460 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019959965432137506, + "loss": 3.4237, + "step": 4470 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019959452191830752, + "loss": 3.381, + "step": 4480 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019958935689275235, + "loss": 3.4286, + "step": 4490 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019958415924640135, + "loss": 3.3818, + "step": 4500 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199578928980957, + "loss": 3.3596, + "step": 4510 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019957366609813252, + "loss": 3.3753, + "step": 4520 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019956837059965178, + "loss": 3.4069, + "step": 4530 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001995630424872493, + "loss": 3.3797, + "step": 4540 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001995576817626704, + "loss": 3.4151, + "step": 4550 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019955228842767096, + "loss": 3.3194, + "step": 4560 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019954686248401752, + "loss": 3.4055, + "step": 4570 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019954140393348745, + "loss": 3.4778, + "step": 4580 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001995359127778687, + "loss": 3.4079, + "step": 4590 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019953038901895988, + "loss": 3.3722, + "step": 4600 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019952483265857034, + "loss": 3.2981, + "step": 4610 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019951924369852008, + "loss": 3.4218, + "step": 4620 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019951362214063977, + "loss": 3.3752, + "step": 4630 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001995079679867708, + "loss": 3.3465, + "step": 4640 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019950228123876516, + "loss": 3.338, + "step": 4650 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019949656189848555, + "loss": 3.4215, + "step": 4660 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019949080996780545, + "loss": 3.3762, + "step": 4670 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019948502544860883, + "loss": 3.3842, + "step": 4680 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019947920834279047, + "loss": 3.2935, + "step": 4690 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019947335865225574, + "loss": 3.3565, + "step": 4700 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001994674763789208, + "loss": 3.3427, + "step": 4710 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019946156152471231, + "loss": 3.3501, + "step": 4720 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001994556140915678, + "loss": 3.3886, + "step": 4730 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019944963408143526, + "loss": 3.4035, + "step": 4740 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019944362149627355, + "loss": 3.3369, + "step": 4750 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019943757633805208, + "loss": 3.3126, + "step": 4760 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199431498608751, + "loss": 3.3814, + "step": 4770 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199425388310361, + "loss": 3.3848, + "step": 4780 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019941924544488357, + "loss": 3.3691, + "step": 4790 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019941307001433085, + "loss": 3.3897, + "step": 4800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001994068620207256, + "loss": 3.3267, + "step": 4810 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019940062146610123, + "loss": 3.3391, + "step": 4820 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019939434835250194, + "loss": 3.2958, + "step": 4830 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019938804268198243, + "loss": 3.3767, + "step": 4840 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019938170445660817, + "loss": 3.2673, + "step": 4850 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019937533367845526, + "loss": 3.2909, + "step": 4860 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019936893034961045, + "loss": 3.377, + "step": 4870 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019936249447217126, + "loss": 3.3764, + "step": 4880 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019935602604824565, + "loss": 3.3943, + "step": 4890 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019934952507995245, + "loss": 3.4182, + "step": 4900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019934299156942106, + "loss": 3.2562, + "step": 4910 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019933642551879155, + "loss": 3.2421, + "step": 4920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001993298269302146, + "loss": 3.2552, + "step": 4930 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019932319580585173, + "loss": 3.3118, + "step": 4940 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019931653214787484, + "loss": 3.3203, + "step": 4950 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001993098359584667, + "loss": 3.2543, + "step": 4960 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001993031072398207, + "loss": 3.3056, + "step": 4970 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019929634599414074, + "loss": 3.3065, + "step": 4980 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019928955222364158, + "loss": 3.3088, + "step": 4990 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019928272593054854, + "loss": 3.2899, + "step": 5000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019927586711709756, + "loss": 3.2867, + "step": 5010 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019926897578553528, + "loss": 3.3519, + "step": 5020 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019926205193811894, + "loss": 3.3988, + "step": 5030 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019925509557711652, + "loss": 3.3081, + "step": 5040 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019924810670480657, + "loss": 3.3955, + "step": 5050 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019924108532347833, + "loss": 3.2414, + "step": 5060 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019923403143543166, + "loss": 3.3343, + "step": 5070 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001992269450429771, + "loss": 3.2954, + "step": 5080 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019921982614843579, + "loss": 3.3105, + "step": 5090 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001992126747541396, + "loss": 3.224, + "step": 5100 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019920549086243093, + "loss": 3.3824, + "step": 5110 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019919827447566291, + "loss": 3.2393, + "step": 5120 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019919102559619927, + "loss": 3.3379, + "step": 5130 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019918374422641443, + "loss": 3.275, + "step": 5140 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019917643036869345, + "loss": 3.254, + "step": 5150 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019916908402543192, + "loss": 3.3366, + "step": 5160 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019916170519903626, + "loss": 3.2938, + "step": 5170 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019915429389192335, + "loss": 3.2917, + "step": 5180 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019914685010652083, + "loss": 3.2354, + "step": 5190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001991393738452669, + "loss": 3.2558, + "step": 5200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019913186511061047, + "loss": 3.3324, + "step": 5210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019912432390501103, + "loss": 3.3408, + "step": 5220 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001991167502309387, + "loss": 3.229, + "step": 5230 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001991091440908743, + "loss": 3.2444, + "step": 5240 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001991015054873092, + "loss": 3.2503, + "step": 5250 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001990938344227455, + "loss": 3.2946, + "step": 5260 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019908613089969585, + "loss": 3.2723, + "step": 5270 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019907839492068358, + "loss": 3.3381, + "step": 5280 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001990706264882426, + "loss": 3.3007, + "step": 5290 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019906282560491753, + "loss": 3.2017, + "step": 5300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019905499227326355, + "loss": 3.2486, + "step": 5310 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019904712649584645, + "loss": 3.2933, + "step": 5320 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019903922827524276, + "loss": 3.1922, + "step": 5330 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019903129761403955, + "loss": 3.2654, + "step": 5340 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019902333451483447, + "loss": 3.256, + "step": 5350 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019901533898023595, + "loss": 3.3318, + "step": 5360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019900731101286288, + "loss": 3.2453, + "step": 5370 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019899925061534485, + "loss": 3.2682, + "step": 5380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019899115779032213, + "loss": 3.2109, + "step": 5390 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001989830325404455, + "loss": 3.2141, + "step": 5400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001989748748683764, + "loss": 3.2428, + "step": 5410 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019896668477678694, + "loss": 3.1986, + "step": 5420 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001989584622683598, + "loss": 3.3107, + "step": 5430 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019895020734578826, + "loss": 3.1527, + "step": 5440 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019894192001177624, + "loss": 3.1769, + "step": 5450 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001989336002690383, + "loss": 3.2245, + "step": 5460 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001989252481202996, + "loss": 3.2321, + "step": 5470 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019891686356829594, + "loss": 3.1875, + "step": 5480 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019890844661577366, + "loss": 3.2488, + "step": 5490 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019889999726548975, + "loss": 3.1772, + "step": 5500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001988915155202119, + "loss": 3.2611, + "step": 5510 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019888300138271823, + "loss": 3.2415, + "step": 5520 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019887445485579762, + "loss": 3.3145, + "step": 5530 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019886587594224953, + "loss": 3.2168, + "step": 5540 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019885726464488398, + "loss": 3.2943, + "step": 5550 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019884862096652162, + "loss": 3.2841, + "step": 5560 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019883994490999373, + "loss": 3.2148, + "step": 5570 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019883123647814217, + "loss": 3.1774, + "step": 5580 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019882249567381943, + "loss": 3.0887, + "step": 5590 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019881372249988858, + "loss": 3.2588, + "step": 5600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001988049169592233, + "loss": 3.2019, + "step": 5610 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019879607905470785, + "loss": 3.1482, + "step": 5620 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019878720878923717, + "loss": 3.1703, + "step": 5630 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019877830616571665, + "loss": 3.2301, + "step": 5640 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019876937118706246, + "loss": 3.2534, + "step": 5650 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001987604038562012, + "loss": 3.2172, + "step": 5660 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001987514041760702, + "loss": 3.2074, + "step": 5670 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019874237214961735, + "loss": 3.3045, + "step": 5680 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019873330777980109, + "loss": 3.1463, + "step": 5690 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019872421106959044, + "loss": 3.1664, + "step": 5700 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019871508202196512, + "loss": 3.2653, + "step": 5710 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019870592063991535, + "loss": 3.2599, + "step": 5720 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019869672692644197, + "loss": 3.304, + "step": 5730 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019868750088455642, + "loss": 3.2631, + "step": 5740 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001986782425172807, + "loss": 3.252, + "step": 5750 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019866895182764745, + "loss": 3.223, + "step": 5760 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019865962881869978, + "loss": 3.2239, + "step": 5770 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019865027349349158, + "loss": 3.1763, + "step": 5780 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019864088585508716, + "loss": 3.2352, + "step": 5790 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019863146590656146, + "loss": 3.1935, + "step": 5800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019862296033028464, + "loss": 3.1874, + "step": 5810 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019861347900103788, + "loss": 3.1787, + "step": 5820 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001986039653706471, + "loss": 3.2122, + "step": 5830 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019859441944222844, + "loss": 3.2559, + "step": 5840 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019858484121890876, + "loss": 3.23, + "step": 5850 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001985752307038254, + "loss": 3.1969, + "step": 5860 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019856558790012628, + "loss": 3.2084, + "step": 5870 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019855591281097, + "loss": 3.2568, + "step": 5880 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019854620543952565, + "loss": 3.1049, + "step": 5890 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019853646578897285, + "loss": 3.2152, + "step": 5900 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019852669386250188, + "loss": 3.1283, + "step": 5910 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019851688966331362, + "loss": 3.2262, + "step": 5920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001985070531946194, + "loss": 3.1105, + "step": 5930 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001984971844596412, + "loss": 3.1419, + "step": 5940 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019848728346161156, + "loss": 3.2079, + "step": 5950 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001984773502037736, + "loss": 3.1263, + "step": 5960 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019846738468938099, + "loss": 3.1069, + "step": 5970 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001984573869216979, + "loss": 3.1432, + "step": 5980 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019844735690399917, + "loss": 3.1582, + "step": 5990 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019843729463957019, + "loss": 3.1746, + "step": 6000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019842720013170685, + "loss": 3.2542, + "step": 6010 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019841707338371566, + "loss": 3.2188, + "step": 6020 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019840691439891367, + "loss": 3.2271, + "step": 6030 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019839672318062846, + "loss": 3.1342, + "step": 6040 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019838649973219822, + "loss": 3.2106, + "step": 6050 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019837624405697163, + "loss": 3.1548, + "step": 6060 + }, + { + "epoch": 0.11, + "learning_rate": 0.000198365956158308, + "loss": 3.1651, + "step": 6070 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019835563603957717, + "loss": 3.2291, + "step": 6080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001983452837041595, + "loss": 3.1544, + "step": 6090 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019833489915544597, + "loss": 3.1993, + "step": 6100 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019832448239683802, + "loss": 3.1555, + "step": 6110 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001983140334317477, + "loss": 3.1418, + "step": 6120 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019830355226359766, + "loss": 3.1266, + "step": 6130 + }, + { + "epoch": 0.11, + "learning_rate": 0.000198293038895821, + "loss": 3.2538, + "step": 6140 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019828249333186135, + "loss": 3.1589, + "step": 6150 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019827191557517302, + "loss": 3.1727, + "step": 6160 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019826130562922072, + "loss": 3.2665, + "step": 6170 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019825066349747985, + "loss": 3.1745, + "step": 6180 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001982399891834362, + "loss": 3.1506, + "step": 6190 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019822928269058623, + "loss": 3.0515, + "step": 6200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019821854402243686, + "loss": 3.2564, + "step": 6210 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001982077731825055, + "loss": 3.1443, + "step": 6220 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001981969701743203, + "loss": 3.183, + "step": 6230 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019818613500141978, + "loss": 3.1295, + "step": 6240 + }, + { + "epoch": 0.11, + "learning_rate": 0.000198175267667353, + "loss": 3.1562, + "step": 6250 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019816436817567955, + "loss": 3.1277, + "step": 6260 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001981534365299697, + "loss": 3.2041, + "step": 6270 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019814247273380406, + "loss": 3.1994, + "step": 6280 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001981314767907739, + "loss": 3.1519, + "step": 6290 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019812044870448094, + "loss": 3.1597, + "step": 6300 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001981093884785375, + "loss": 3.2095, + "step": 6310 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019809829611656637, + "loss": 3.1634, + "step": 6320 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019808717162220086, + "loss": 3.1504, + "step": 6330 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001980760149990849, + "loss": 3.1078, + "step": 6340 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019806482625087278, + "loss": 3.1072, + "step": 6350 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001980536053812295, + "loss": 3.0916, + "step": 6360 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019804235239383045, + "loss": 3.1548, + "step": 6370 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019803106729236153, + "loss": 3.0599, + "step": 6380 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019801975008051933, + "loss": 3.1896, + "step": 6390 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019800840076201074, + "loss": 3.161, + "step": 6400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019799701934055327, + "loss": 3.1419, + "step": 6410 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019798560581987503, + "loss": 3.0797, + "step": 6420 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019797416020371446, + "loss": 3.183, + "step": 6430 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001979626824958206, + "loss": 3.179, + "step": 6440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019795117269995306, + "loss": 3.1881, + "step": 6450 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019793963081988185, + "loss": 3.144, + "step": 6460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019792805685938763, + "loss": 3.0872, + "step": 6470 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019791645082226143, + "loss": 3.0941, + "step": 6480 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019790481271230484, + "loss": 3.1313, + "step": 6490 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019789314253333, + "loss": 3.11, + "step": 6500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019788144028915947, + "loss": 3.1541, + "step": 6510 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019786970598362636, + "loss": 3.1038, + "step": 6520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001978579396205743, + "loss": 3.1532, + "step": 6530 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001978461412038574, + "loss": 3.0607, + "step": 6540 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019783431073734021, + "loss": 3.1238, + "step": 6550 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001978224482248979, + "loss": 3.2191, + "step": 6560 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019781055367041608, + "loss": 3.1481, + "step": 6570 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019779862707779077, + "loss": 3.1215, + "step": 6580 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019778666845092865, + "loss": 3.1182, + "step": 6590 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019777467779374675, + "loss": 3.1303, + "step": 6600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019776265511017263, + "loss": 3.1505, + "step": 6610 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001977506004041444, + "loss": 3.127, + "step": 6620 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019773851367961066, + "loss": 3.1541, + "step": 6630 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019772639494053032, + "loss": 3.1242, + "step": 6640 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019771424419087305, + "loss": 3.2074, + "step": 6650 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019770206143461874, + "loss": 3.0326, + "step": 6660 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019768984667575796, + "loss": 3.0881, + "step": 6670 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001976775999182917, + "loss": 3.1403, + "step": 6680 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019766532116623139, + "loss": 3.1631, + "step": 6690 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019765301042359897, + "loss": 3.1143, + "step": 6700 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001976406676944269, + "loss": 3.0861, + "step": 6710 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019762829298275802, + "loss": 3.0551, + "step": 6720 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019761588629264577, + "loss": 3.1081, + "step": 6730 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019760344762815397, + "loss": 3.1582, + "step": 6740 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975909769933569, + "loss": 3.1374, + "step": 6750 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019757847439233944, + "loss": 3.1343, + "step": 6760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975659398291968, + "loss": 3.1131, + "step": 6770 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975533733080347, + "loss": 3.0765, + "step": 6780 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975407748329694, + "loss": 3.0863, + "step": 6790 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975281444081275, + "loss": 3.0742, + "step": 6800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019751548203764618, + "loss": 3.0474, + "step": 6810 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019750278772567303, + "loss": 3.1571, + "step": 6820 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974900614763661, + "loss": 3.1361, + "step": 6830 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974773032938939, + "loss": 3.225, + "step": 6840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974645131824355, + "loss": 3.1431, + "step": 6850 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019745169114618018, + "loss": 3.1303, + "step": 6860 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019743883718932796, + "loss": 3.1156, + "step": 6870 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019742595131608915, + "loss": 3.0963, + "step": 6880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974130335306846, + "loss": 3.0889, + "step": 6890 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974000838373455, + "loss": 3.1806, + "step": 6900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001973871022403136, + "loss": 3.1749, + "step": 6910 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019737408874384108, + "loss": 3.1413, + "step": 6920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001973610433521905, + "loss": 3.1434, + "step": 6930 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019734796606963494, + "loss": 3.1044, + "step": 6940 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019733485690045792, + "loss": 3.1476, + "step": 6950 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019732171584895341, + "loss": 3.0641, + "step": 6960 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019730854291942574, + "loss": 3.1433, + "step": 6970 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019729533811618975, + "loss": 3.1025, + "step": 6980 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019728210144357076, + "loss": 3.0796, + "step": 6990 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019726883290590444, + "loss": 3.1143, + "step": 7000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019725553250753697, + "loss": 3.1092, + "step": 7010 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001972422002528249, + "loss": 3.152, + "step": 7020 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001972288361461353, + "loss": 3.0712, + "step": 7030 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019721544019184558, + "loss": 3.1106, + "step": 7040 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019720201239434366, + "loss": 3.0513, + "step": 7050 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019718855275802782, + "loss": 3.14, + "step": 7060 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019717506128730686, + "loss": 3.1225, + "step": 7070 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019716153798659985, + "loss": 3.0405, + "step": 7080 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019714798286033652, + "loss": 3.0569, + "step": 7090 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019713439591295678, + "loss": 3.1378, + "step": 7100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001971207771489111, + "loss": 3.1517, + "step": 7110 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019710712657266042, + "loss": 3.1643, + "step": 7120 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019709344418867596, + "loss": 3.0738, + "step": 7130 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019707973000143944, + "loss": 3.0676, + "step": 7140 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019706598401544298, + "loss": 3.1118, + "step": 7150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019705220623518913, + "loss": 3.233, + "step": 7160 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019703839666519084, + "loss": 3.0361, + "step": 7170 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019702455530997147, + "loss": 3.1511, + "step": 7180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001970106821740648, + "loss": 3.0971, + "step": 7190 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019699677726201507, + "loss": 3.0902, + "step": 7200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019698284057837677, + "loss": 3.0847, + "step": 7210 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019696887212771496, + "loss": 3.0655, + "step": 7220 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019695487191460508, + "loss": 3.1484, + "step": 7230 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001969408399436329, + "loss": 3.1138, + "step": 7240 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019692677621939466, + "loss": 3.1026, + "step": 7250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019691268074649695, + "loss": 3.0968, + "step": 7260 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019689855352955678, + "loss": 3.0486, + "step": 7270 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001968843945732016, + "loss": 3.1301, + "step": 7280 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019687020388206922, + "loss": 3.0379, + "step": 7290 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019685598146080774, + "loss": 3.1262, + "step": 7300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019684172731407592, + "loss": 2.9428, + "step": 7310 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019682744144654263, + "loss": 3.1398, + "step": 7320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001968131238628873, + "loss": 3.0266, + "step": 7330 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019679877456779967, + "loss": 3.1056, + "step": 7340 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019678439356597994, + "loss": 3.1434, + "step": 7350 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001967699808621386, + "loss": 3.0334, + "step": 7360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001967555364609966, + "loss": 3.1146, + "step": 7370 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019674106036728523, + "loss": 3.0151, + "step": 7380 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019672655258574622, + "loss": 3.0994, + "step": 7390 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019671201312113158, + "loss": 3.0733, + "step": 7400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001966974419782038, + "loss": 2.9758, + "step": 7410 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019668283916173568, + "loss": 2.9764, + "step": 7420 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019666820467651044, + "loss": 3.1575, + "step": 7430 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019665353852732162, + "loss": 3.0078, + "step": 7440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001966388407189732, + "loss": 3.1733, + "step": 7450 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019662411125627945, + "loss": 3.0998, + "step": 7460 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019660935014406504, + "loss": 3.0089, + "step": 7470 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001965945573871651, + "loss": 3.0505, + "step": 7480 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019657973299042495, + "loss": 3.0794, + "step": 7490 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001965648769587004, + "loss": 3.0451, + "step": 7500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001965499892968576, + "loss": 3.0551, + "step": 7510 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019653507000977306, + "loss": 3.0634, + "step": 7520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001965201191023336, + "loss": 3.0605, + "step": 7530 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019650513657943646, + "loss": 3.0078, + "step": 7540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019649012244598918, + "loss": 3.0178, + "step": 7550 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019647507670690972, + "loss": 3.0934, + "step": 7560 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001964599993671264, + "loss": 3.0159, + "step": 7570 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001964448904315777, + "loss": 3.1005, + "step": 7580 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019642974990521274, + "loss": 3.0167, + "step": 7590 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001964145777929908, + "loss": 3.0454, + "step": 7600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019639937409988157, + "loss": 3.0773, + "step": 7610 + }, + { + "epoch": 0.13, + "learning_rate": 0.000196384138830865, + "loss": 3.1258, + "step": 7620 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019636887199093152, + "loss": 3.0474, + "step": 7630 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001963535735850818, + "loss": 3.1178, + "step": 7640 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019633824361832688, + "loss": 3.0835, + "step": 7650 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019632288209568815, + "loss": 3.0319, + "step": 7660 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019630748902219728, + "loss": 3.0051, + "step": 7670 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001962920644028964, + "loss": 3.0526, + "step": 7680 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001962766082428378, + "loss": 3.1007, + "step": 7690 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019626112054708426, + "loss": 2.9811, + "step": 7700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019624560132070878, + "loss": 3.0545, + "step": 7710 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019623005056879477, + "loss": 3.0022, + "step": 7720 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019621446829643584, + "loss": 3.032, + "step": 7730 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019619885450873612, + "loss": 2.9852, + "step": 7740 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001961832092108099, + "loss": 3.0601, + "step": 7750 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019616753240778186, + "loss": 3.0654, + "step": 7760 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019615182410478695, + "loss": 3.0509, + "step": 7770 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019613608430697047, + "loss": 3.059, + "step": 7780 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001961203130194881, + "loss": 3.143, + "step": 7790 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019610451024750576, + "loss": 3.0043, + "step": 7800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019608867599619963, + "loss": 3.0687, + "step": 7810 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019607281027075635, + "loss": 3.0853, + "step": 7820 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019605691307637274, + "loss": 3.0864, + "step": 7830 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019604098441825595, + "loss": 3.0374, + "step": 7840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001960250243016235, + "loss": 3.1481, + "step": 7850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001960090327317032, + "loss": 3.0954, + "step": 7860 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019599300971373306, + "loss": 3.1255, + "step": 7870 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019597695525296155, + "loss": 3.044, + "step": 7880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001959608693546473, + "loss": 3.0297, + "step": 7890 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019594475202405932, + "loss": 3.122, + "step": 7900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001959286032664769, + "loss": 3.0691, + "step": 7910 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019591242308718957, + "loss": 3.1496, + "step": 7920 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019589621149149723, + "loss": 2.9686, + "step": 7930 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019587996848471003, + "loss": 3.0222, + "step": 7940 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019586369407214842, + "loss": 3.0509, + "step": 7950 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019584738825914315, + "loss": 3.0222, + "step": 7960 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019583105105103521, + "loss": 3.0021, + "step": 7970 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001958146824531759, + "loss": 3.0103, + "step": 7980 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019579828247092684, + "loss": 3.046, + "step": 7990 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019578185110965984, + "loss": 3.0718, + "step": 8000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001957653883747571, + "loss": 3.0021, + "step": 8010 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019574889427161104, + "loss": 3.095, + "step": 8020 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019573236880562425, + "loss": 3.0722, + "step": 8030 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019571581198220983, + "loss": 3.0323, + "step": 8040 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019569922380679094, + "loss": 2.9698, + "step": 8050 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001956826042848011, + "loss": 2.9884, + "step": 8060 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019566595342168408, + "loss": 2.9934, + "step": 8070 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019564927122289397, + "loss": 3.0421, + "step": 8080 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019563255769389503, + "loss": 2.9711, + "step": 8090 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001956158128401618, + "loss": 3.1048, + "step": 8100 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019559903666717916, + "loss": 3.0151, + "step": 8110 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019558222918044215, + "loss": 3.0414, + "step": 8120 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019556539038545617, + "loss": 3.0675, + "step": 8130 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001955485202877368, + "loss": 2.9681, + "step": 8140 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001955316188928099, + "loss": 3.075, + "step": 8150 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001955146862062115, + "loss": 3.0497, + "step": 8160 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019549772223348804, + "loss": 3.0303, + "step": 8170 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019548072698019606, + "loss": 2.9958, + "step": 8180 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019546370045190245, + "loss": 3.0126, + "step": 8190 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001954466426541843, + "loss": 3.0382, + "step": 8200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001954295535926289, + "loss": 3.061, + "step": 8210 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001954124332728339, + "loss": 3.0589, + "step": 8220 + }, + { + "epoch": 0.14, + "learning_rate": 0.000195395281700407, + "loss": 3.0847, + "step": 8230 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019537809888096636, + "loss": 2.9358, + "step": 8240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001953608848201402, + "loss": 3.1177, + "step": 8250 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019534363952356708, + "loss": 3.0948, + "step": 8260 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001953263629968957, + "loss": 2.9618, + "step": 8270 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001953090552457851, + "loss": 3.0322, + "step": 8280 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019529171627590442, + "loss": 2.9583, + "step": 8290 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019527434609293312, + "loss": 2.9391, + "step": 8300 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019525694470256085, + "loss": 3.0146, + "step": 8310 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019523951211048755, + "loss": 3.0339, + "step": 8320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019522204832242323, + "loss": 2.9596, + "step": 8330 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019520455334408825, + "loss": 3.057, + "step": 8340 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019518702718121316, + "loss": 2.9783, + "step": 8350 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019516946983953864, + "loss": 3.0599, + "step": 8360 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001951518813248157, + "loss": 2.9684, + "step": 8370 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019513426164280553, + "loss": 2.9378, + "step": 8380 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001951166107992795, + "loss": 2.9944, + "step": 8390 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019509892880001915, + "loss": 3.0276, + "step": 8400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001950812156508163, + "loss": 3.0099, + "step": 8410 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019506347135747298, + "loss": 3.0482, + "step": 8420 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019504569592580136, + "loss": 3.0347, + "step": 8430 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001950278893616238, + "loss": 3.0826, + "step": 8440 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019501005167077296, + "loss": 3.003, + "step": 8450 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001949921828590916, + "loss": 3.0255, + "step": 8460 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019497428293243268, + "loss": 3.0936, + "step": 8470 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019495635189665942, + "loss": 3.0192, + "step": 8480 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019493838975764515, + "loss": 3.0006, + "step": 8490 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019492039652127345, + "loss": 3.0142, + "step": 8500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019490237219343804, + "loss": 2.9863, + "step": 8510 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019488431678004283, + "loss": 3.1561, + "step": 8520 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019486623028700195, + "loss": 3.0416, + "step": 8530 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001948481127202397, + "loss": 2.9737, + "step": 8540 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001948299640856905, + "loss": 3.0237, + "step": 8550 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019481178438929904, + "loss": 2.9978, + "step": 8560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001947935736370201, + "loss": 3.0166, + "step": 8570 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019477533183481866, + "loss": 3.0747, + "step": 8580 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001947570589886699, + "loss": 3.0458, + "step": 8590 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019473875510455914, + "loss": 3.0557, + "step": 8600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001947204201884819, + "loss": 2.9801, + "step": 8610 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019470205424644376, + "loss": 2.9823, + "step": 8620 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019468365728446063, + "loss": 3.0997, + "step": 8630 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019466522930855842, + "loss": 3.0054, + "step": 8640 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019464677032477332, + "loss": 3.0284, + "step": 8650 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001946282803391516, + "loss": 3.02, + "step": 8660 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001946097593577497, + "loss": 2.989, + "step": 8670 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019459120738663427, + "loss": 2.9651, + "step": 8680 + }, + { + "epoch": 0.15, + "learning_rate": 0.000194572624431882, + "loss": 3.0457, + "step": 8690 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019455401049957985, + "loss": 2.9985, + "step": 8700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019453536559582485, + "loss": 3.0568, + "step": 8710 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019451668972672419, + "loss": 3.0005, + "step": 8720 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019449798289839523, + "loss": 2.9818, + "step": 8730 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001944792451169654, + "loss": 2.987, + "step": 8740 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019446047638857234, + "loss": 3.0396, + "step": 8750 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019444167671936382, + "loss": 3.0405, + "step": 8760 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019442284611549774, + "loss": 3.0238, + "step": 8770 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019440398458314207, + "loss": 3.0507, + "step": 8780 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019438509212847498, + "loss": 2.8835, + "step": 8790 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001943661687576848, + "loss": 2.947, + "step": 8800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001943472144769699, + "loss": 2.9981, + "step": 8810 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019432822929253875, + "loss": 2.9992, + "step": 8820 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019430921321061012, + "loss": 2.9892, + "step": 8830 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001942901662374127, + "loss": 2.9797, + "step": 8840 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019427108837918546, + "loss": 2.9886, + "step": 8850 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019425197964217735, + "loss": 3.0563, + "step": 8860 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001942328400326475, + "loss": 2.9298, + "step": 8870 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019421366955686518, + "loss": 2.9856, + "step": 8880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001941944682211097, + "loss": 3.034, + "step": 8890 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019417523603167052, + "loss": 3.0259, + "step": 8900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019415597299484724, + "loss": 3.0395, + "step": 8910 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019413667911694947, + "loss": 3.0012, + "step": 8920 + }, + { + "epoch": 0.15, + "learning_rate": 0.000194117354404297, + "loss": 2.9629, + "step": 8930 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019409799886321977, + "loss": 3.0074, + "step": 8940 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001940786125000576, + "loss": 3.0157, + "step": 8950 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019405919532116067, + "loss": 2.9597, + "step": 8960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001940397473328891, + "loss": 3.0023, + "step": 8970 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019402026854161314, + "loss": 2.9688, + "step": 8980 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019400075895371313, + "loss": 2.9743, + "step": 8990 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019398121857557948, + "loss": 2.9644, + "step": 9000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019396164741361272, + "loss": 3.0216, + "step": 9010 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019394204547422343, + "loss": 3.0599, + "step": 9020 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019392241276383227, + "loss": 2.9999, + "step": 9030 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019390274928887002, + "loss": 2.9275, + "step": 9040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001938830550557775, + "loss": 2.9654, + "step": 9050 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019386333007100564, + "loss": 2.9792, + "step": 9060 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019384357434101536, + "loss": 2.9648, + "step": 9070 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019382378787227779, + "loss": 2.9937, + "step": 9080 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019380397067127395, + "loss": 2.979, + "step": 9090 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001937841227444951, + "loss": 2.9957, + "step": 9100 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001937642440984425, + "loss": 2.9197, + "step": 9110 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019374433473962738, + "loss": 2.9366, + "step": 9120 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019372439467457117, + "loss": 2.9739, + "step": 9130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019370442390980532, + "loss": 2.9835, + "step": 9140 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019368442245187124, + "loss": 2.9313, + "step": 9150 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019366439030732053, + "loss": 2.9542, + "step": 9160 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019364432748271475, + "loss": 2.9474, + "step": 9170 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019362423398462558, + "loss": 2.9174, + "step": 9180 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001936041098196346, + "loss": 2.9661, + "step": 9190 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935839549943337, + "loss": 2.9808, + "step": 9200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935637695153245, + "loss": 3.0645, + "step": 9210 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019354355338921893, + "loss": 2.9555, + "step": 9220 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935233066226388, + "loss": 2.9731, + "step": 9230 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019350302922221596, + "loss": 3.0524, + "step": 9240 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001934827211945924, + "loss": 2.9097, + "step": 9250 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019346238254642004, + "loss": 3.0397, + "step": 9260 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019344201328436088, + "loss": 3.028, + "step": 9270 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019342161341508694, + "loss": 2.8918, + "step": 9280 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019340118294528024, + "loss": 2.9814, + "step": 9290 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001933807218816328, + "loss": 2.9865, + "step": 9300 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019336023023084682, + "loss": 3.0644, + "step": 9310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001933397079996343, + "loss": 3.0276, + "step": 9320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001933191551947174, + "loss": 2.9845, + "step": 9330 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019329857182282824, + "loss": 3.0101, + "step": 9340 + }, + { + "epoch": 0.16, + "learning_rate": 0.000193277957890709, + "loss": 3.0122, + "step": 9350 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019325731340511177, + "loss": 2.921, + "step": 9360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019323663837279877, + "loss": 2.9458, + "step": 9370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019321593280054216, + "loss": 2.9696, + "step": 9380 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019319519669512412, + "loss": 2.9398, + "step": 9390 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019317443006333678, + "loss": 3.0334, + "step": 9400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019315363291198234, + "loss": 2.9426, + "step": 9410 + }, + { + "epoch": 0.16, + "learning_rate": 0.000193132805247873, + "loss": 3.0177, + "step": 9420 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019311194707783086, + "loss": 2.9086, + "step": 9430 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019309105840868815, + "loss": 2.9704, + "step": 9440 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019307013924728698, + "loss": 3.0218, + "step": 9450 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019304918960047946, + "loss": 2.9821, + "step": 9460 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019302820947512776, + "loss": 2.8981, + "step": 9470 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019300719887810396, + "loss": 2.9528, + "step": 9480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001929861578162901, + "loss": 2.9179, + "step": 9490 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019296508629657836, + "loss": 2.9857, + "step": 9500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019294398432587065, + "loss": 2.9373, + "step": 9510 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019292285191107905, + "loss": 2.9522, + "step": 9520 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019290168905912558, + "loss": 2.9873, + "step": 9530 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001928804957769421, + "loss": 2.9096, + "step": 9540 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019285927207147064, + "loss": 2.8875, + "step": 9550 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019283801794966302, + "loss": 3.0336, + "step": 9560 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001928167334184811, + "loss": 3.034, + "step": 9570 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019279541848489672, + "loss": 3.0551, + "step": 9580 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019277407315589163, + "loss": 2.9556, + "step": 9590 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019275269743845757, + "loss": 3.007, + "step": 9600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001927312913395962, + "loss": 3.0204, + "step": 9610 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019270985486631915, + "loss": 2.9308, + "step": 9620 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019268838802564804, + "loss": 2.9708, + "step": 9630 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019266689082461435, + "loss": 3.0383, + "step": 9640 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019264536327025963, + "loss": 2.9349, + "step": 9650 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019262380536963521, + "loss": 2.9566, + "step": 9660 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001926022171298025, + "loss": 2.9425, + "step": 9670 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019258059855783272, + "loss": 2.8805, + "step": 9680 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001925589496608072, + "loss": 2.9582, + "step": 9690 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019253727044581703, + "loss": 3.004, + "step": 9700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019251556091996337, + "loss": 2.96, + "step": 9710 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019249382109035715, + "loss": 3.0309, + "step": 9720 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001924720509641194, + "loss": 2.8948, + "step": 9730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019245025054838094, + "loss": 2.9517, + "step": 9740 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001924284198502826, + "loss": 2.9324, + "step": 9750 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019240655887697505, + "loss": 3.0396, + "step": 9760 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019238466763561894, + "loss": 2.9349, + "step": 9770 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019236274613338483, + "loss": 2.9799, + "step": 9780 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019234079437745313, + "loss": 2.9472, + "step": 9790 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019231881237501423, + "loss": 2.9399, + "step": 9800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019229680013326843, + "loss": 2.9866, + "step": 9810 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019227475765942585, + "loss": 2.9636, + "step": 9820 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001922526849607066, + "loss": 2.9818, + "step": 9830 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019223058204434067, + "loss": 2.9283, + "step": 9840 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019220844891756791, + "loss": 2.9251, + "step": 9850 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019218628558763813, + "loss": 2.9721, + "step": 9860 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019216409206181093, + "loss": 2.9624, + "step": 9870 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019214186834735594, + "loss": 3.0221, + "step": 9880 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019211961445155255, + "loss": 2.8671, + "step": 9890 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019209733038169015, + "loss": 2.8799, + "step": 9900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019207501614506791, + "loss": 2.9431, + "step": 9910 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001920526717489949, + "loss": 2.9562, + "step": 9920 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019203029720079017, + "loss": 3.0197, + "step": 9930 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019200789250778253, + "loss": 2.9525, + "step": 9940 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001919854576773107, + "loss": 2.9427, + "step": 9950 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019196299271672325, + "loss": 3.0564, + "step": 9960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001919404976333787, + "loss": 2.9436, + "step": 9970 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019191797243464537, + "loss": 3.0733, + "step": 9980 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019189541712790145, + "loss": 2.9817, + "step": 9990 + }, + { + "epoch": 0.17, + "learning_rate": 0.000191872831720535, + "loss": 2.9418, + "step": 10000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019185021621994394, + "loss": 2.9505, + "step": 10010 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019182757063353602, + "loss": 2.9282, + "step": 10020 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001918048949687289, + "loss": 2.9131, + "step": 10030 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019178218923295007, + "loss": 2.8732, + "step": 10040 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019175945343363683, + "loss": 3.0232, + "step": 10050 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001917366875782364, + "loss": 2.9362, + "step": 10060 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019171389167420578, + "loss": 2.9533, + "step": 10070 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019169106572901185, + "loss": 2.9948, + "step": 10080 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019166820975013125, + "loss": 2.9238, + "step": 10090 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019164532374505068, + "loss": 2.9195, + "step": 10100 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019162240772126635, + "loss": 2.8758, + "step": 10110 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001915994616862846, + "loss": 2.983, + "step": 10120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019157648564762134, + "loss": 2.8939, + "step": 10130 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019155347961280258, + "loss": 2.9098, + "step": 10140 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019153044358936395, + "loss": 2.9207, + "step": 10150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019150737758485097, + "loss": 2.9154, + "step": 10160 + }, + { + "epoch": 0.18, + "learning_rate": 0.000191484281606819, + "loss": 2.9161, + "step": 10170 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019146115566283318, + "loss": 2.987, + "step": 10180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001914379997604685, + "loss": 2.9092, + "step": 10190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001914148139073097, + "loss": 2.9463, + "step": 10200 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019139159811095143, + "loss": 2.9294, + "step": 10210 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019136835237899805, + "loss": 2.876, + "step": 10220 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019134507671906382, + "loss": 2.9768, + "step": 10230 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019132177113877273, + "loss": 2.9916, + "step": 10240 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001912984356457586, + "loss": 2.9477, + "step": 10250 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019127507024766503, + "loss": 2.926, + "step": 10260 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019125167495214542, + "loss": 2.8116, + "step": 10270 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019122824976686297, + "loss": 2.9138, + "step": 10280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001912047946994907, + "loss": 3.0209, + "step": 10290 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019118130975771135, + "loss": 3.0251, + "step": 10300 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019115779494921752, + "loss": 2.9224, + "step": 10310 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019113425028171154, + "loss": 2.9678, + "step": 10320 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019111067576290554, + "loss": 3.0192, + "step": 10330 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019108707140052137, + "loss": 2.9724, + "step": 10340 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019106343720229083, + "loss": 2.8939, + "step": 10350 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019103977317595528, + "loss": 3.0247, + "step": 10360 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019101607932926596, + "loss": 2.8776, + "step": 10370 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019099235566998388, + "loss": 2.9, + "step": 10380 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019096860220587977, + "loss": 3.0157, + "step": 10390 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001909448189447342, + "loss": 2.9109, + "step": 10400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019092100589433738, + "loss": 2.9372, + "step": 10410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019089716306248939, + "loss": 2.9234, + "step": 10420 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019087329045700005, + "loss": 2.8724, + "step": 10430 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019084938808568884, + "loss": 2.8899, + "step": 10440 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019082545595638508, + "loss": 2.9045, + "step": 10450 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019080149407692783, + "loss": 3.0254, + "step": 10460 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019077750245516585, + "loss": 3.0135, + "step": 10470 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019075348109895765, + "loss": 2.9534, + "step": 10480 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019072943001617157, + "loss": 2.9523, + "step": 10490 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019070534921468557, + "loss": 2.8766, + "step": 10500 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019068123870238736, + "loss": 2.9016, + "step": 10510 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019065709848717444, + "loss": 3.0137, + "step": 10520 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019063292857695406, + "loss": 2.9743, + "step": 10530 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019060872897964308, + "loss": 2.9845, + "step": 10540 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019058449970316816, + "loss": 2.963, + "step": 10550 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001905602407554657, + "loss": 2.9458, + "step": 10560 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019053595214448178, + "loss": 2.9201, + "step": 10570 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019051163387817218, + "loss": 2.9116, + "step": 10580 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019048728596450247, + "loss": 2.9384, + "step": 10590 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019046290841144786, + "loss": 2.9408, + "step": 10600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019043850122699328, + "loss": 2.9321, + "step": 10610 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001904140644191334, + "loss": 2.8799, + "step": 10620 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019038959799587258, + "loss": 2.9364, + "step": 10630 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019036510196522482, + "loss": 2.9343, + "step": 10640 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019034057633521393, + "loss": 2.9246, + "step": 10650 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001903160211138733, + "loss": 2.9373, + "step": 10660 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019029143630924607, + "loss": 2.9617, + "step": 10670 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019026682192938512, + "loss": 2.8688, + "step": 10680 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019024217798235292, + "loss": 2.9576, + "step": 10690 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019021750447622169, + "loss": 2.985, + "step": 10700 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001901928014190733, + "loss": 2.9384, + "step": 10710 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019016806881899933, + "loss": 2.9793, + "step": 10720 + }, + { + "epoch": 0.19, + "learning_rate": 0.000190143306684101, + "loss": 2.8323, + "step": 10730 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019011851502248923, + "loss": 2.9148, + "step": 10740 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019009369384228458, + "loss": 2.9439, + "step": 10750 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019006884315161734, + "loss": 2.9247, + "step": 10760 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019004396295862743, + "loss": 2.937, + "step": 10770 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001900190532714644, + "loss": 2.8896, + "step": 10780 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018999411409828748, + "loss": 2.9185, + "step": 10790 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018996914544726562, + "loss": 2.8663, + "step": 10800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018994414732657737, + "loss": 2.9704, + "step": 10810 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018991911974441093, + "loss": 2.9734, + "step": 10820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018989406270896416, + "loss": 2.9308, + "step": 10830 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018986897622844454, + "loss": 3.0134, + "step": 10840 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018984386031106927, + "loss": 2.9338, + "step": 10850 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018981871496506512, + "loss": 2.9158, + "step": 10860 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018979354019866852, + "loss": 2.8965, + "step": 10870 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018976833602012559, + "loss": 2.9444, + "step": 10880 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018974310243769196, + "loss": 2.883, + "step": 10890 + }, + { + "epoch": 0.19, + "learning_rate": 0.000189717839459633, + "loss": 2.9124, + "step": 10900 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018969254709422367, + "loss": 2.8878, + "step": 10910 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001896672253497486, + "loss": 2.8779, + "step": 10920 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018964187423450195, + "loss": 2.8612, + "step": 10930 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018961649375678755, + "loss": 2.8846, + "step": 10940 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018959108392491893, + "loss": 2.9795, + "step": 10950 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018956564474721907, + "loss": 2.897, + "step": 10960 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001895401762320207, + "loss": 2.8896, + "step": 10970 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001895146783876661, + "loss": 2.9596, + "step": 10980 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018948915122250716, + "loss": 2.9487, + "step": 10990 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018946359474490535, + "loss": 2.7977, + "step": 11000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018943800896323185, + "loss": 2.9845, + "step": 11010 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001894123938858673, + "loss": 2.9513, + "step": 11020 + }, + { + "epoch": 0.19, + "learning_rate": 0.000189386749521202, + "loss": 2.9211, + "step": 11030 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018936107587763586, + "loss": 3.0171, + "step": 11040 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018933537296357834, + "loss": 2.9208, + "step": 11050 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018930964078744852, + "loss": 2.8953, + "step": 11060 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018928387935767505, + "loss": 2.9189, + "step": 11070 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018925808868269618, + "loss": 2.9682, + "step": 11080 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018923226877095968, + "loss": 2.8637, + "step": 11090 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018920641963092298, + "loss": 2.971, + "step": 11100 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018918054127105304, + "loss": 2.8539, + "step": 11110 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018915463369982636, + "loss": 2.8878, + "step": 11120 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001891286969257291, + "loss": 2.8327, + "step": 11130 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018910273095725687, + "loss": 2.9209, + "step": 11140 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018907673580291498, + "loss": 2.9701, + "step": 11150 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018905071147121814, + "loss": 2.8829, + "step": 11160 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018902465797069072, + "loss": 2.8963, + "step": 11170 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018899857530986665, + "loss": 2.8913, + "step": 11180 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018897246349728936, + "loss": 2.9454, + "step": 11190 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018894893794828968, + "loss": 2.9662, + "step": 11200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018892277077095274, + "loss": 2.779, + "step": 11210 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018889657446669256, + "loss": 2.9785, + "step": 11220 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018887034904408985, + "loss": 2.8737, + "step": 11230 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018884409451173486, + "loss": 2.9645, + "step": 11240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001888178108782273, + "loss": 3.018, + "step": 11250 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018879149815217647, + "loss": 2.9226, + "step": 11260 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018876515634220117, + "loss": 2.9008, + "step": 11270 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018873878545692976, + "loss": 2.876, + "step": 11280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001887123855050001, + "loss": 2.9043, + "step": 11290 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018868595649505955, + "loss": 3.0338, + "step": 11300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018865949843576506, + "loss": 2.9622, + "step": 11310 + }, + { + "epoch": 0.2, + "learning_rate": 0.000188633011335783, + "loss": 2.9477, + "step": 11320 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018860649520378931, + "loss": 2.8959, + "step": 11330 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018857995004846944, + "loss": 2.8437, + "step": 11340 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018855337587851833, + "loss": 2.9983, + "step": 11350 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018852677270264048, + "loss": 3.0235, + "step": 11360 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018850014052954972, + "loss": 2.8903, + "step": 11370 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018847347936796962, + "loss": 2.958, + "step": 11380 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018844678922663307, + "loss": 2.9808, + "step": 11390 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001884200701142825, + "loss": 3.0096, + "step": 11400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018839332203966986, + "loss": 2.8775, + "step": 11410 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018836654501155656, + "loss": 2.9041, + "step": 11420 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001883397390387135, + "loss": 2.9459, + "step": 11430 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018831290412992102, + "loss": 2.9058, + "step": 11440 + }, + { + "epoch": 0.2, + "learning_rate": 0.000188286040293969, + "loss": 2.948, + "step": 11450 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018825914753965682, + "loss": 2.8676, + "step": 11460 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018823222587579318, + "loss": 2.9027, + "step": 11470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018820527531119647, + "loss": 2.9032, + "step": 11480 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018817829585469434, + "loss": 2.9231, + "step": 11490 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018815128751512402, + "loss": 2.9603, + "step": 11500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001881242503013322, + "loss": 2.9196, + "step": 11510 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018809718422217492, + "loss": 2.8995, + "step": 11520 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018807008928651786, + "loss": 2.8717, + "step": 11530 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018804296550323598, + "loss": 2.8334, + "step": 11540 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018801581288121382, + "loss": 2.8974, + "step": 11550 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018798863142934521, + "loss": 2.8754, + "step": 11560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001879614211565336, + "loss": 2.8607, + "step": 11570 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018793418207169177, + "loss": 2.9684, + "step": 11580 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018790691418374194, + "loss": 2.8542, + "step": 11590 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018787961750161585, + "loss": 2.8301, + "step": 11600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018785229203425454, + "loss": 2.8551, + "step": 11610 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018782493779060858, + "loss": 2.8584, + "step": 11620 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018779755477963796, + "loss": 2.8726, + "step": 11630 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018777014301031203, + "loss": 2.9886, + "step": 11640 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018774270249160963, + "loss": 2.989, + "step": 11650 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018771523323251894, + "loss": 2.9735, + "step": 11660 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018768773524203766, + "loss": 2.8517, + "step": 11670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001876602085291728, + "loss": 2.8776, + "step": 11680 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018763265310294084, + "loss": 2.8806, + "step": 11690 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001876050689723676, + "loss": 2.913, + "step": 11700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001875774561464884, + "loss": 2.9279, + "step": 11710 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018754981463434787, + "loss": 2.8531, + "step": 11720 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018752214444500007, + "loss": 2.8862, + "step": 11730 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018749444558750845, + "loss": 2.9839, + "step": 11740 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018746671807094592, + "loss": 2.851, + "step": 11750 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018743896190439463, + "loss": 2.9503, + "step": 11760 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018741117709694628, + "loss": 2.8775, + "step": 11770 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018738336365770175, + "loss": 2.9188, + "step": 11780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001873555215957715, + "loss": 2.9352, + "step": 11790 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001873276509202753, + "loss": 2.8976, + "step": 11800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001872997516403422, + "loss": 2.8732, + "step": 11810 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018727182376511076, + "loss": 2.9441, + "step": 11820 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018724386730372883, + "loss": 2.9046, + "step": 11830 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001872158822653536, + "loss": 2.8626, + "step": 11840 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018718786865915164, + "loss": 2.9396, + "step": 11850 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018715982649429895, + "loss": 2.9242, + "step": 11860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001871317557799808, + "loss": 2.8892, + "step": 11870 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018710365652539185, + "loss": 2.8815, + "step": 11880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018707552873973606, + "loss": 2.9481, + "step": 11890 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018704737243222682, + "loss": 2.9255, + "step": 11900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018701918761208677, + "loss": 2.9414, + "step": 11910 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018699097428854798, + "loss": 2.9348, + "step": 11920 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018696273247085173, + "loss": 2.954, + "step": 11930 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001869344621682488, + "loss": 2.896, + "step": 11940 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001869061633899992, + "loss": 2.9492, + "step": 11950 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001868778361453722, + "loss": 2.8333, + "step": 11960 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001868494804436466, + "loss": 2.843, + "step": 11970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001868210962941103, + "loss": 2.9017, + "step": 11980 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001867926837060606, + "loss": 2.9657, + "step": 11990 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018676424268880424, + "loss": 2.9743, + "step": 12000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018673577325165706, + "loss": 2.8867, + "step": 12010 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018670727540394432, + "loss": 2.9656, + "step": 12020 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018667874915500067, + "loss": 2.966, + "step": 12030 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018665019451416986, + "loss": 2.8191, + "step": 12040 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018662161149080508, + "loss": 2.9215, + "step": 12050 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018659300009426884, + "loss": 2.7995, + "step": 12060 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001865643603339328, + "loss": 2.9132, + "step": 12070 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018653569221917807, + "loss": 2.896, + "step": 12080 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018650699575939494, + "loss": 2.8174, + "step": 12090 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018647827096398306, + "loss": 2.8002, + "step": 12100 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018644951784235126, + "loss": 2.9853, + "step": 12110 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018642073640391776, + "loss": 2.8966, + "step": 12120 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018639192665811005, + "loss": 2.9778, + "step": 12130 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018636308861436475, + "loss": 2.8704, + "step": 12140 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018633422228212792, + "loss": 2.9054, + "step": 12150 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018630532767085478, + "loss": 2.7541, + "step": 12160 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018627640479000986, + "loss": 2.9026, + "step": 12170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018624745364906694, + "loss": 2.8469, + "step": 12180 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001862184742575091, + "loss": 2.8999, + "step": 12190 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001861894666248285, + "loss": 2.8714, + "step": 12200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001861604307605268, + "loss": 2.9258, + "step": 12210 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018613136667411473, + "loss": 2.9277, + "step": 12220 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018610227437511237, + "loss": 2.7877, + "step": 12230 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001860731538730489, + "loss": 2.9708, + "step": 12240 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018604400517746293, + "loss": 2.9528, + "step": 12250 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018601482829790208, + "loss": 2.8791, + "step": 12260 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018598562324392349, + "loss": 2.8895, + "step": 12270 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018595639002509323, + "loss": 2.8381, + "step": 12280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001859271286509868, + "loss": 2.9248, + "step": 12290 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001858978391311888, + "loss": 2.8708, + "step": 12300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001858685214752931, + "loss": 2.9205, + "step": 12310 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018583917569290286, + "loss": 2.9129, + "step": 12320 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018580980179363032, + "loss": 2.8233, + "step": 12330 + }, + { + "epoch": 0.21, + "learning_rate": 0.000185780399787097, + "loss": 2.8203, + "step": 12340 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018575096968293363, + "loss": 2.9456, + "step": 12350 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018572151149078012, + "loss": 2.9582, + "step": 12360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001856920252202856, + "loss": 2.8487, + "step": 12370 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018566251088110834, + "loss": 2.84, + "step": 12380 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018563296848291594, + "loss": 2.8533, + "step": 12390 + }, + { + "epoch": 0.21, + "learning_rate": 0.000185603398035385, + "loss": 2.835, + "step": 12400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001855737995482015, + "loss": 2.8809, + "step": 12410 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018554417303106045, + "loss": 2.9712, + "step": 12420 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018551451849366615, + "loss": 2.9968, + "step": 12430 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018548483594573202, + "loss": 2.9386, + "step": 12440 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018545512539698065, + "loss": 2.9477, + "step": 12450 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018542538685714383, + "loss": 2.8778, + "step": 12460 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001853956203359625, + "loss": 2.8282, + "step": 12470 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018536582584318683, + "loss": 2.8913, + "step": 12480 + }, + { + "epoch": 0.22, + "learning_rate": 0.000185336003388576, + "loss": 2.8826, + "step": 12490 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018530615298189852, + "loss": 2.8559, + "step": 12500 + }, + { + "epoch": 0.22, + "learning_rate": 0.000185276274632932, + "loss": 2.8964, + "step": 12510 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001852463683514631, + "loss": 2.8693, + "step": 12520 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018521643414728774, + "loss": 2.8168, + "step": 12530 + }, + { + "epoch": 0.22, + "learning_rate": 0.000185186472030211, + "loss": 2.8633, + "step": 12540 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018515648201004704, + "loss": 2.8486, + "step": 12550 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018512646409661918, + "loss": 2.9089, + "step": 12560 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018509641829975987, + "loss": 2.8959, + "step": 12570 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018506634462931072, + "loss": 2.9367, + "step": 12580 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018503624309512245, + "loss": 2.9012, + "step": 12590 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001850061137070549, + "loss": 2.9078, + "step": 12600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018497595647497703, + "loss": 2.9025, + "step": 12610 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018494577140876691, + "loss": 2.8727, + "step": 12620 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018491555851831185, + "loss": 2.8372, + "step": 12630 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001848853178135081, + "loss": 2.934, + "step": 12640 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018485504930426108, + "loss": 2.8812, + "step": 12650 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018482475300048536, + "loss": 2.8345, + "step": 12660 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018479442891210462, + "loss": 2.8295, + "step": 12670 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018476407704905151, + "loss": 2.896, + "step": 12680 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018473369742126798, + "loss": 2.8275, + "step": 12690 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018470329003870492, + "loss": 2.834, + "step": 12700 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018467285491132235, + "loss": 2.8596, + "step": 12710 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018464239204908945, + "loss": 2.8922, + "step": 12720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001846119014619843, + "loss": 2.8825, + "step": 12730 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001845813831599943, + "loss": 2.8464, + "step": 12740 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018455083715311578, + "loss": 2.8591, + "step": 12750 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018452026345135416, + "loss": 2.9373, + "step": 12760 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018448966206472396, + "loss": 2.7819, + "step": 12770 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018445903300324875, + "loss": 2.8864, + "step": 12780 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018442837627696119, + "loss": 2.9488, + "step": 12790 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018439769189590295, + "loss": 2.8584, + "step": 12800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001843669798701248, + "loss": 2.8946, + "step": 12810 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018433624020968662, + "loss": 2.7667, + "step": 12820 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001843054729246572, + "loss": 2.8346, + "step": 12830 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018427467802511452, + "loss": 2.8882, + "step": 12840 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018424385552114544, + "loss": 2.8984, + "step": 12850 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018421300542284608, + "loss": 2.9422, + "step": 12860 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018418212774032143, + "loss": 2.8852, + "step": 12870 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018415122248368557, + "loss": 2.9235, + "step": 12880 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018412028966306162, + "loss": 2.8973, + "step": 12890 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018408932928858175, + "loss": 2.8216, + "step": 12900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018405834137038706, + "loss": 2.7905, + "step": 12910 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018402732591862774, + "loss": 2.9434, + "step": 12920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018399628294346308, + "loss": 2.8369, + "step": 12930 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018396521245506118, + "loss": 2.8772, + "step": 12940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018393411446359937, + "loss": 2.8226, + "step": 12950 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018390298897926383, + "loss": 2.9417, + "step": 12960 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018387183601224985, + "loss": 2.8922, + "step": 12970 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001838406555727617, + "loss": 2.889, + "step": 12980 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018380944767101257, + "loss": 2.834, + "step": 12990 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001837782123172247, + "loss": 2.9279, + "step": 13000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018374694952162933, + "loss": 2.8931, + "step": 13010 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018371565929446674, + "loss": 2.8168, + "step": 13020 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001836843416459861, + "loss": 2.8405, + "step": 13030 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018365299658644558, + "loss": 2.9036, + "step": 13040 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018362162412611236, + "loss": 2.853, + "step": 13050 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018359022427526264, + "loss": 2.8501, + "step": 13060 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018355879704418145, + "loss": 2.8945, + "step": 13070 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018352734244316294, + "loss": 2.8679, + "step": 13080 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001834958604825101, + "loss": 2.8951, + "step": 13090 + }, + { + "epoch": 0.23, + "learning_rate": 0.000183464351172535, + "loss": 2.8413, + "step": 13100 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001834328145235586, + "loss": 2.89, + "step": 13110 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001834012505459108, + "loss": 2.898, + "step": 13120 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018336965924993048, + "loss": 2.9128, + "step": 13130 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001833380406459655, + "loss": 2.8674, + "step": 13140 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018330639474437258, + "loss": 2.9914, + "step": 13150 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018327472155551746, + "loss": 2.8589, + "step": 13160 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001832430210897748, + "loss": 2.8687, + "step": 13170 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018321129335752812, + "loss": 2.8548, + "step": 13180 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018317953836917, + "loss": 2.8668, + "step": 13190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001831477561351019, + "loss": 2.8973, + "step": 13200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001831159466657341, + "loss": 2.9491, + "step": 13210 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018308410997148596, + "loss": 2.877, + "step": 13220 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001830522460627857, + "loss": 2.9205, + "step": 13230 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018302035495007036, + "loss": 2.8641, + "step": 13240 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018298843664378605, + "loss": 2.8727, + "step": 13250 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018295649115438766, + "loss": 2.8695, + "step": 13260 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018292451849233901, + "loss": 2.928, + "step": 13270 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018289251866811292, + "loss": 2.8496, + "step": 13280 + }, + { + "epoch": 0.23, + "learning_rate": 0.000182860491692191, + "loss": 2.7952, + "step": 13290 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018282843757506373, + "loss": 2.8437, + "step": 13300 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001827963563272306, + "loss": 2.8567, + "step": 13310 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018276424795919989, + "loss": 2.8917, + "step": 13320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001827321124814888, + "loss": 2.903, + "step": 13330 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001826999499046234, + "loss": 3.0026, + "step": 13340 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018266776023913864, + "loss": 2.8257, + "step": 13350 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018263554349557832, + "loss": 2.7549, + "step": 13360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001826032996844952, + "loss": 2.8128, + "step": 13370 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018257102881645074, + "loss": 2.816, + "step": 13380 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018253873090201544, + "loss": 2.8572, + "step": 13390 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018250640595176857, + "loss": 2.8478, + "step": 13400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018247405397629822, + "loss": 2.8681, + "step": 13410 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001824416749862014, + "loss": 2.8826, + "step": 13420 + }, + { + "epoch": 0.23, + "learning_rate": 0.000182409268992084, + "loss": 2.8709, + "step": 13430 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018237683600456063, + "loss": 2.8938, + "step": 13440 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018234437603425481, + "loss": 2.842, + "step": 13450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018231188909179896, + "loss": 2.9086, + "step": 13460 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018227937518783426, + "loss": 2.9323, + "step": 13470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001822468343330107, + "loss": 2.867, + "step": 13480 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018221426653798717, + "loss": 2.9124, + "step": 13490 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018218167181343137, + "loss": 2.8527, + "step": 13500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001821490501700197, + "loss": 2.8697, + "step": 13510 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018211640161843758, + "loss": 2.8534, + "step": 13520 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018208372616937912, + "loss": 2.9427, + "step": 13530 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018205102383354725, + "loss": 2.8513, + "step": 13540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018201829462165372, + "loss": 2.9051, + "step": 13550 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001819855385444191, + "loss": 2.8396, + "step": 13560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001819527556125727, + "loss": 2.8508, + "step": 13570 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001819199458368527, + "loss": 2.8423, + "step": 13580 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018188710922800608, + "loss": 2.8918, + "step": 13590 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018185424579678848, + "loss": 2.8684, + "step": 13600 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018182135555396448, + "loss": 2.8604, + "step": 13610 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018178843851030739, + "loss": 2.8701, + "step": 13620 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018175549467659921, + "loss": 2.7862, + "step": 13630 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001817225240636309, + "loss": 2.922, + "step": 13640 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018168952668220196, + "loss": 2.8412, + "step": 13650 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001816565025431209, + "loss": 2.7602, + "step": 13660 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001816234516572048, + "loss": 2.8685, + "step": 13670 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018159037403527967, + "loss": 2.9078, + "step": 13680 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018155726968818004, + "loss": 2.9449, + "step": 13690 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018152413862674948, + "loss": 2.8123, + "step": 13700 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018149098086184007, + "loss": 2.8536, + "step": 13710 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018145779640431277, + "loss": 2.7804, + "step": 13720 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018142458526503727, + "loss": 2.9019, + "step": 13730 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018139134745489196, + "loss": 2.7999, + "step": 13740 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018135808298476403, + "loss": 2.8085, + "step": 13750 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018132479186554927, + "loss": 2.9006, + "step": 13760 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018129147410815238, + "loss": 2.9661, + "step": 13770 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018125812972348662, + "loss": 2.8471, + "step": 13780 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018122475872247407, + "loss": 2.8751, + "step": 13790 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018119136111604552, + "loss": 2.8315, + "step": 13800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018115793691514043, + "loss": 2.8093, + "step": 13810 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018112448613070701, + "loss": 2.872, + "step": 13820 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018109100877370217, + "loss": 2.924, + "step": 13830 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018105750485509153, + "loss": 2.9473, + "step": 13840 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018102397438584937, + "loss": 2.8886, + "step": 13850 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018099041737695872, + "loss": 2.8165, + "step": 13860 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018095683383941128, + "loss": 2.7789, + "step": 13870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001809232237842074, + "loss": 2.763, + "step": 13880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001808895872223562, + "loss": 2.8323, + "step": 13890 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018085592416487546, + "loss": 2.9105, + "step": 13900 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018082223462279154, + "loss": 2.8475, + "step": 13910 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018078851860713963, + "loss": 2.8808, + "step": 13920 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018075477612896346, + "loss": 2.8329, + "step": 13930 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001807210071993155, + "loss": 2.9317, + "step": 13940 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001806872118292569, + "loss": 2.858, + "step": 13950 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018065339002985736, + "loss": 2.9045, + "step": 13960 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018061954181219534, + "loss": 2.8583, + "step": 13970 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018058566718735797, + "loss": 2.8354, + "step": 13980 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018055176616644093, + "loss": 2.8926, + "step": 13990 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018051783876054867, + "loss": 2.8634, + "step": 14000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018048388498079413, + "loss": 2.8381, + "step": 14010 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018044990483829905, + "loss": 2.8273, + "step": 14020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018041589834419364, + "loss": 2.807, + "step": 14030 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018038186550961693, + "loss": 2.8978, + "step": 14040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001803478063457164, + "loss": 2.8621, + "step": 14050 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018031372086364832, + "loss": 2.8211, + "step": 14060 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018027960907457738, + "loss": 2.7936, + "step": 14070 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001802454709896771, + "loss": 2.8054, + "step": 14080 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018021130662012946, + "loss": 2.8877, + "step": 14090 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001801771159771251, + "loss": 2.8549, + "step": 14100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018014289907186332, + "loss": 2.7887, + "step": 14110 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001801086559155519, + "loss": 2.8175, + "step": 14120 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001800743865194073, + "loss": 2.9059, + "step": 14130 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018004009089465464, + "loss": 2.7316, + "step": 14140 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018000576905252748, + "loss": 2.7509, + "step": 14150 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017997142100426808, + "loss": 2.8573, + "step": 14160 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017993704676112722, + "loss": 2.845, + "step": 14170 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001799026463343643, + "loss": 2.8722, + "step": 14180 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001798682197352473, + "loss": 2.7838, + "step": 14190 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017983376697505276, + "loss": 2.8039, + "step": 14200 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017979928806506577, + "loss": 2.8431, + "step": 14210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017976478301657994, + "loss": 2.8357, + "step": 14220 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017973025184089763, + "loss": 2.7971, + "step": 14230 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017969569454932954, + "loss": 2.8599, + "step": 14240 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017966111115319503, + "loss": 2.8887, + "step": 14250 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017962650166382203, + "loss": 2.8399, + "step": 14260 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017959186609254693, + "loss": 2.7691, + "step": 14270 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017955720445071477, + "loss": 2.9113, + "step": 14280 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017952251674967906, + "loss": 2.7674, + "step": 14290 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017948780300080185, + "loss": 2.8812, + "step": 14300 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017945306321545378, + "loss": 2.8575, + "step": 14310 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017941829740501385, + "loss": 2.7275, + "step": 14320 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001793835055808698, + "loss": 2.9002, + "step": 14330 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017934868775441783, + "loss": 2.8513, + "step": 14340 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017931384393706258, + "loss": 2.7923, + "step": 14350 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017927897414021722, + "loss": 2.8384, + "step": 14360 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017924407837530351, + "loss": 2.8243, + "step": 14370 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017920915665375163, + "loss": 2.8286, + "step": 14380 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017917420898700027, + "loss": 2.8883, + "step": 14390 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001791392353864967, + "loss": 2.9265, + "step": 14400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017910423586369663, + "loss": 2.7002, + "step": 14410 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001790692104300642, + "loss": 2.8627, + "step": 14420 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017903415909707214, + "loss": 2.914, + "step": 14430 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017899908187620164, + "loss": 2.8426, + "step": 14440 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017896397877894234, + "loss": 2.7995, + "step": 14450 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017892884981679229, + "loss": 2.932, + "step": 14460 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017889369500125821, + "loss": 2.8078, + "step": 14470 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017885851434385508, + "loss": 2.8462, + "step": 14480 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017882330785610643, + "loss": 2.7802, + "step": 14490 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001787880755495443, + "loss": 2.8518, + "step": 14500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017875281743570914, + "loss": 2.8423, + "step": 14510 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001787175335261498, + "loss": 2.808, + "step": 14520 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017868222383242367, + "loss": 2.8658, + "step": 14530 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017864688836609656, + "loss": 2.8834, + "step": 14540 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017861152713874266, + "loss": 2.8919, + "step": 14550 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001785761401619447, + "loss": 2.8937, + "step": 14560 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017854072744729374, + "loss": 2.7923, + "step": 14570 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017850528900638938, + "loss": 2.8302, + "step": 14580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001784698248508395, + "loss": 2.8647, + "step": 14590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001784343349922606, + "loss": 2.8692, + "step": 14600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017839881944227737, + "loss": 2.7796, + "step": 14610 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017836327821252318, + "loss": 2.7829, + "step": 14620 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017832771131463958, + "loss": 2.796, + "step": 14630 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001782921187602766, + "loss": 2.8144, + "step": 14640 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017825650056109272, + "loss": 2.79, + "step": 14650 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017822085672875484, + "loss": 2.7903, + "step": 14660 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001781851872749381, + "loss": 2.7257, + "step": 14670 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001781494922113262, + "loss": 2.8658, + "step": 14680 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017811377154961115, + "loss": 2.7379, + "step": 14690 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017807802530149337, + "loss": 2.8914, + "step": 14700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017804225347868168, + "loss": 2.8099, + "step": 14710 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017800645609289317, + "loss": 2.8294, + "step": 14720 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017797063315585346, + "loss": 2.9011, + "step": 14730 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017793478467929643, + "loss": 2.8232, + "step": 14740 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017789891067496435, + "loss": 2.8373, + "step": 14750 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017786301115460786, + "loss": 2.8774, + "step": 14760 + }, + { + "epoch": 0.26, + "learning_rate": 0.000177827086129986, + "loss": 2.8459, + "step": 14770 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017779113561286605, + "loss": 2.8621, + "step": 14780 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017775515961502376, + "loss": 2.8535, + "step": 14790 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017771915814824313, + "loss": 2.868, + "step": 14800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017768313122431664, + "loss": 2.8009, + "step": 14810 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017764707885504493, + "loss": 2.7355, + "step": 14820 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001776110010522371, + "loss": 2.8461, + "step": 14830 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017757489782771054, + "loss": 2.8092, + "step": 14840 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017753876919329096, + "loss": 2.8095, + "step": 14850 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001775026151608124, + "loss": 2.861, + "step": 14860 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017746643574211729, + "loss": 2.8374, + "step": 14870 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017743023094905617, + "loss": 2.7997, + "step": 14880 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017739400079348818, + "loss": 2.9162, + "step": 14890 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017735774528728054, + "loss": 2.888, + "step": 14900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017732146444230886, + "loss": 2.7305, + "step": 14910 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017728515827045702, + "loss": 2.8512, + "step": 14920 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017724882678361726, + "loss": 2.8966, + "step": 14930 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017721246999369004, + "loss": 2.8744, + "step": 14940 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017717608791258417, + "loss": 2.8306, + "step": 14950 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017713968055221664, + "loss": 2.7961, + "step": 14960 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017710324792451286, + "loss": 2.8248, + "step": 14970 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017706679004140643, + "loss": 2.8738, + "step": 14980 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017703030691483922, + "loss": 2.8515, + "step": 14990 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001769937985567614, + "loss": 2.8115, + "step": 15000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001769572649791314, + "loss": 2.845, + "step": 15010 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017692070619391593, + "loss": 2.8685, + "step": 15020 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017688412221308986, + "loss": 2.7558, + "step": 15030 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017684751304863645, + "loss": 2.8388, + "step": 15040 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017681087871254713, + "loss": 2.897, + "step": 15050 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017677421921682157, + "loss": 2.8361, + "step": 15060 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017673753457346768, + "loss": 2.8457, + "step": 15070 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017670082479450167, + "loss": 2.8193, + "step": 15080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001766640898919479, + "loss": 2.8846, + "step": 15090 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017662732987783905, + "loss": 2.8781, + "step": 15100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017659054476421596, + "loss": 2.869, + "step": 15110 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017655373456312767, + "loss": 2.7992, + "step": 15120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017651689928663148, + "loss": 2.8153, + "step": 15130 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001764800389467929, + "loss": 2.8294, + "step": 15140 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017644315355568567, + "loss": 2.8415, + "step": 15150 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017640624312539172, + "loss": 2.8016, + "step": 15160 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001763693076680011, + "loss": 2.8653, + "step": 15170 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001763323471956122, + "loss": 2.8849, + "step": 15180 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001762953617203315, + "loss": 2.8165, + "step": 15190 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001762583512542738, + "loss": 2.8689, + "step": 15200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017622131580956183, + "loss": 2.8253, + "step": 15210 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017618425539832677, + "loss": 2.8532, + "step": 15220 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017614717003270784, + "loss": 2.7452, + "step": 15230 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017611005972485247, + "loss": 2.8525, + "step": 15240 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017607292448691624, + "loss": 2.7575, + "step": 15250 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017603576433106294, + "loss": 2.8183, + "step": 15260 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017599857926946445, + "loss": 2.9309, + "step": 15270 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001759613693143009, + "loss": 2.6982, + "step": 15280 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017592413447776046, + "loss": 2.9148, + "step": 15290 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017588687477203957, + "loss": 2.8193, + "step": 15300 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017584959020934275, + "loss": 2.809, + "step": 15310 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017581228080188263, + "loss": 2.7372, + "step": 15320 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017577494656188008, + "loss": 2.863, + "step": 15330 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017573758750156403, + "loss": 2.8275, + "step": 15340 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001757002036331715, + "loss": 2.8025, + "step": 15350 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017566279496894773, + "loss": 2.7685, + "step": 15360 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017562536152114603, + "loss": 2.8752, + "step": 15370 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017558790330202787, + "loss": 2.8779, + "step": 15380 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017555042032386275, + "loss": 2.8491, + "step": 15390 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001755129125989284, + "loss": 2.8259, + "step": 15400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017547538013951055, + "loss": 2.9001, + "step": 15410 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017543782295790304, + "loss": 2.8691, + "step": 15420 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017540024106640792, + "loss": 2.8202, + "step": 15430 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017536263447733518, + "loss": 2.8396, + "step": 15440 + }, + { + "epoch": 0.27, + "learning_rate": 0.000175325003203003, + "loss": 2.7954, + "step": 15450 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001752873472557376, + "loss": 2.8284, + "step": 15460 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001752496666478733, + "loss": 2.7724, + "step": 15470 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017521196139175257, + "loss": 2.8546, + "step": 15480 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017517423149972577, + "loss": 2.7889, + "step": 15490 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017513647698415153, + "loss": 2.8595, + "step": 15500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017509869785739638, + "loss": 2.8139, + "step": 15510 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017506089413183505, + "loss": 2.7785, + "step": 15520 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017502306581985026, + "loss": 2.867, + "step": 15530 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017498521293383276, + "loss": 2.8092, + "step": 15540 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017494733548618137, + "loss": 2.7434, + "step": 15550 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017490943348930302, + "loss": 2.7996, + "step": 15560 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017487150695561255, + "loss": 2.8005, + "step": 15570 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017483355589753297, + "loss": 2.8484, + "step": 15580 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001747955803274952, + "loss": 2.79, + "step": 15590 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017475758025793834, + "loss": 2.7952, + "step": 15600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017471955570130937, + "loss": 2.8099, + "step": 15610 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017468150667006334, + "loss": 2.7668, + "step": 15620 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017464343317666336, + "loss": 2.7918, + "step": 15630 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001746053352335805, + "loss": 2.8613, + "step": 15640 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017456721285329384, + "loss": 2.7719, + "step": 15650 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017452906604829054, + "loss": 2.8371, + "step": 15660 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017449089483106565, + "loss": 2.7978, + "step": 15670 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001744526992141223, + "loss": 2.7318, + "step": 15680 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017441447920997152, + "loss": 2.8229, + "step": 15690 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017437623483113244, + "loss": 2.7449, + "step": 15700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017433796609013212, + "loss": 2.8586, + "step": 15710 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017429967299950562, + "loss": 2.8102, + "step": 15720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001742613555717959, + "loss": 2.7687, + "step": 15730 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017422301381955402, + "loss": 2.6833, + "step": 15740 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017418464775533887, + "loss": 2.8388, + "step": 15750 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017414625739171736, + "loss": 2.7716, + "step": 15760 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017410784274126447, + "loss": 2.8253, + "step": 15770 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017406940381656296, + "loss": 2.6807, + "step": 15780 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017403094063020365, + "loss": 2.7787, + "step": 15790 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017399245319478523, + "loss": 2.7793, + "step": 15800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001739539415229144, + "loss": 2.7521, + "step": 15810 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017391540562720578, + "loss": 2.8091, + "step": 15820 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017387684552028188, + "loss": 2.7906, + "step": 15830 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017383826121477328, + "loss": 2.7664, + "step": 15840 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017379965272331827, + "loss": 2.8499, + "step": 15850 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017376102005856327, + "loss": 2.8617, + "step": 15860 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017372236323316248, + "loss": 2.7716, + "step": 15870 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017368368225977805, + "loss": 2.801, + "step": 15880 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017364497715108008, + "loss": 2.7974, + "step": 15890 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017360624791974653, + "loss": 2.7365, + "step": 15900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001735674945784633, + "loss": 2.8028, + "step": 15910 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001735287171399241, + "loss": 2.8272, + "step": 15920 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017348991561683065, + "loss": 2.7809, + "step": 15930 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001734510900218925, + "loss": 2.8632, + "step": 15940 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001734122403678271, + "loss": 2.8332, + "step": 15950 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017337336666735977, + "loss": 2.805, + "step": 15960 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001733344689332237, + "loss": 2.8409, + "step": 15970 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017329554717815997, + "loss": 2.8026, + "step": 15980 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017325660141491748, + "loss": 2.7206, + "step": 15990 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017321763165625308, + "loss": 2.8222, + "step": 16000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001731786379149314, + "loss": 2.836, + "step": 16010 + }, + { + "epoch": 0.28, + "learning_rate": 0.000173139620203725, + "loss": 2.7957, + "step": 16020 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001731005785354142, + "loss": 2.737, + "step": 16030 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017306151292278724, + "loss": 2.7849, + "step": 16040 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001730224233786402, + "loss": 2.8386, + "step": 16050 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017298330991577694, + "loss": 2.9137, + "step": 16060 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017294417254700922, + "loss": 2.8477, + "step": 16070 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017290501128515662, + "loss": 2.7599, + "step": 16080 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017286582614304646, + "loss": 2.7999, + "step": 16090 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017282661713351403, + "loss": 2.8544, + "step": 16100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017278738426940233, + "loss": 2.7988, + "step": 16110 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001727481275635622, + "loss": 2.8953, + "step": 16120 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017270884702885226, + "loss": 2.7554, + "step": 16130 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017266954267813902, + "loss": 2.7036, + "step": 16140 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017263021452429674, + "loss": 2.7541, + "step": 16150 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017259086258020746, + "loss": 2.831, + "step": 16160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001725554255005192, + "loss": 2.8176, + "step": 16170 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017251602839047858, + "loss": 2.7917, + "step": 16180 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017247660752759295, + "loss": 2.8281, + "step": 16190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017243716292477478, + "loss": 2.7732, + "step": 16200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017239769459494425, + "loss": 2.8012, + "step": 16210 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001723582025510293, + "loss": 2.7527, + "step": 16220 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017231868680596572, + "loss": 2.7483, + "step": 16230 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017227914737269698, + "loss": 2.7863, + "step": 16240 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017223958426417432, + "loss": 2.8457, + "step": 16250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017219999749335682, + "loss": 2.7804, + "step": 16260 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017216038707321122, + "loss": 2.9018, + "step": 16270 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017212075301671197, + "loss": 2.7788, + "step": 16280 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017208109533684143, + "loss": 2.805, + "step": 16290 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017204141404658952, + "loss": 2.7992, + "step": 16300 + }, + { + "epoch": 0.28, + "learning_rate": 0.000172001709158954, + "loss": 2.7677, + "step": 16310 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001719619806869403, + "loss": 2.82, + "step": 16320 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017192222864356166, + "loss": 2.8026, + "step": 16330 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001718824530418389, + "loss": 2.7741, + "step": 16340 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001718426538948007, + "loss": 2.7807, + "step": 16350 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017180283121548336, + "loss": 2.754, + "step": 16360 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017176298501693092, + "loss": 2.7757, + "step": 16370 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017172311531219516, + "loss": 2.8049, + "step": 16380 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017168322211433547, + "loss": 2.8367, + "step": 16390 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017164330543641905, + "loss": 2.754, + "step": 16400 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017160336529152063, + "loss": 2.8085, + "step": 16410 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017156340169272282, + "loss": 2.805, + "step": 16420 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001715234146531158, + "loss": 2.8988, + "step": 16430 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017148340418579743, + "loss": 2.8403, + "step": 16440 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017144337030387325, + "loss": 2.8547, + "step": 16450 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017140331302045652, + "loss": 2.8366, + "step": 16460 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017136323234866803, + "loss": 2.7689, + "step": 16470 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017132312830163646, + "loss": 2.8008, + "step": 16480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001712830008924979, + "loss": 2.8867, + "step": 16490 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017124285013439628, + "loss": 2.7388, + "step": 16500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017120267604048305, + "loss": 2.7848, + "step": 16510 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001711624786239174, + "loss": 2.6922, + "step": 16520 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001711222578978661, + "loss": 2.7744, + "step": 16530 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001710820138755035, + "loss": 2.8596, + "step": 16540 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017104174657001176, + "loss": 2.8308, + "step": 16550 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017100145599458053, + "loss": 2.7423, + "step": 16560 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017096114216240706, + "loss": 2.9013, + "step": 16570 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017092080508669633, + "loss": 2.8298, + "step": 16580 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001708804447806608, + "loss": 2.8333, + "step": 16590 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001708400612575207, + "loss": 2.8003, + "step": 16600 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017079965453050373, + "loss": 2.8075, + "step": 16610 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001707592246128452, + "loss": 2.7465, + "step": 16620 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001707187715177881, + "loss": 2.7346, + "step": 16630 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017067829525858294, + "loss": 2.8067, + "step": 16640 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001706377958484879, + "loss": 2.7989, + "step": 16650 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017059727330076858, + "loss": 2.8021, + "step": 16660 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017055672762869835, + "loss": 2.7519, + "step": 16670 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017051615884555802, + "loss": 2.826, + "step": 16680 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017047556696463606, + "loss": 2.8159, + "step": 16690 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001704349519992284, + "loss": 2.7786, + "step": 16700 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017039431396263867, + "loss": 2.8358, + "step": 16710 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017035365286817794, + "loss": 2.8248, + "step": 16720 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001703129687291649, + "loss": 2.8596, + "step": 16730 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017027226155892573, + "loss": 2.8337, + "step": 16740 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001702315313707942, + "loss": 2.813, + "step": 16750 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001701907781781116, + "loss": 2.817, + "step": 16760 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017015000199422677, + "loss": 2.8936, + "step": 16770 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017010920283249607, + "loss": 2.8002, + "step": 16780 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017006838070628342, + "loss": 2.778, + "step": 16790 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001700275356289602, + "loss": 2.8323, + "step": 16800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001699866676139053, + "loss": 2.8026, + "step": 16810 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016994577667450524, + "loss": 2.8119, + "step": 16820 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016990486282415394, + "loss": 2.771, + "step": 16830 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016986392607625284, + "loss": 2.7806, + "step": 16840 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001698229664442109, + "loss": 2.7357, + "step": 16850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001697819839414446, + "loss": 2.7574, + "step": 16860 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016974097858137786, + "loss": 2.8391, + "step": 16870 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001696999503774421, + "loss": 2.7713, + "step": 16880 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016965889934307624, + "loss": 2.7782, + "step": 16890 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016961782549172668, + "loss": 2.7961, + "step": 16900 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016957672883684727, + "loss": 2.7787, + "step": 16910 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001695356093918994, + "loss": 2.7742, + "step": 16920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001694944671703518, + "loss": 2.7579, + "step": 16930 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016945330218568069, + "loss": 2.7735, + "step": 16940 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001694121144513699, + "loss": 2.7466, + "step": 16950 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016937090398091057, + "loss": 2.8751, + "step": 16960 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016932967078780125, + "loss": 2.837, + "step": 16970 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016928841488554809, + "loss": 2.756, + "step": 16980 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016924713628766448, + "loss": 2.7481, + "step": 16990 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001692058350076714, + "loss": 2.8744, + "step": 17000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016916451105909722, + "loss": 2.7689, + "step": 17010 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016912316445547775, + "loss": 2.8006, + "step": 17020 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016908179521035613, + "loss": 2.7571, + "step": 17030 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016904040333728303, + "loss": 2.7424, + "step": 17040 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016899898884981647, + "loss": 2.8445, + "step": 17050 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016895755176152188, + "loss": 2.8398, + "step": 17060 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016891609208597211, + "loss": 2.8671, + "step": 17070 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001688746098367474, + "loss": 2.8487, + "step": 17080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001688331050274354, + "loss": 2.8736, + "step": 17090 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016879157767163113, + "loss": 2.7988, + "step": 17100 + }, + { + "epoch": 0.3, + "learning_rate": 0.000168750027782937, + "loss": 2.8284, + "step": 17110 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001687084553749628, + "loss": 2.8394, + "step": 17120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001686668604613257, + "loss": 2.7947, + "step": 17130 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016862524305565025, + "loss": 2.8229, + "step": 17140 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001685836031715683, + "loss": 2.8209, + "step": 17150 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016854194082271924, + "loss": 2.7725, + "step": 17160 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016850025602274958, + "loss": 2.7916, + "step": 17170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016845854878531338, + "loss": 2.8027, + "step": 17180 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001684168191240719, + "loss": 2.783, + "step": 17190 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016837506705269387, + "loss": 2.7411, + "step": 17200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016833329258485527, + "loss": 2.8206, + "step": 17210 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016829149573423954, + "loss": 2.7904, + "step": 17220 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016824967651453727, + "loss": 2.777, + "step": 17230 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016820783493944652, + "loss": 2.747, + "step": 17240 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016816597102267257, + "loss": 2.7883, + "step": 17250 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016812408477792816, + "loss": 2.7897, + "step": 17260 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001680821762189332, + "loss": 2.7149, + "step": 17270 + }, + { + "epoch": 0.3, + "learning_rate": 0.000168040245359415, + "loss": 2.7443, + "step": 17280 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016799829221310808, + "loss": 2.7742, + "step": 17290 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016795631679375434, + "loss": 2.7295, + "step": 17300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016791431911510301, + "loss": 2.804, + "step": 17310 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016787229919091047, + "loss": 2.7104, + "step": 17320 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016783025703494058, + "loss": 2.8249, + "step": 17330 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016778819266096432, + "loss": 2.7639, + "step": 17340 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016774610608275996, + "loss": 2.8273, + "step": 17350 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016770399731411316, + "loss": 2.787, + "step": 17360 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016766186636881671, + "loss": 2.8083, + "step": 17370 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001676197132606708, + "loss": 2.8092, + "step": 17380 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016757753800348275, + "loss": 2.7616, + "step": 17390 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016753534061106722, + "loss": 2.7979, + "step": 17400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016749312109724606, + "loss": 2.8149, + "step": 17410 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001674508794758485, + "loss": 2.7651, + "step": 17420 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016740861576071075, + "loss": 2.8121, + "step": 17430 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016736632996567656, + "loss": 2.8179, + "step": 17440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001673240221045967, + "loss": 2.7966, + "step": 17450 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016728169219132927, + "loss": 2.7597, + "step": 17460 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001672393402397395, + "loss": 2.7775, + "step": 17470 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001671969662637, + "loss": 2.7973, + "step": 17480 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016715457027709042, + "loss": 2.8243, + "step": 17490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001671121522937977, + "loss": 2.7233, + "step": 17500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016706971232771601, + "loss": 2.7947, + "step": 17510 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001670272503927467, + "loss": 2.7361, + "step": 17520 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016698476650279823, + "loss": 2.7608, + "step": 17530 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016694226067178643, + "loss": 2.8324, + "step": 17540 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001668997329136341, + "loss": 2.746, + "step": 17550 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016685718324227145, + "loss": 2.8628, + "step": 17560 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016681461167163567, + "loss": 2.7929, + "step": 17570 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016677201821567125, + "loss": 2.8012, + "step": 17580 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016672940288832978, + "loss": 2.75, + "step": 17590 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016668676570357006, + "loss": 2.746, + "step": 17600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016664410667535803, + "loss": 2.7613, + "step": 17610 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016660142581766674, + "loss": 2.7674, + "step": 17620 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001665587231444765, + "loss": 2.8047, + "step": 17630 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016651599866977467, + "loss": 2.7876, + "step": 17640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001664732524075558, + "loss": 2.7934, + "step": 17650 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001664304843718215, + "loss": 2.7955, + "step": 17660 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016638769457658062, + "loss": 2.7478, + "step": 17670 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001663448830358491, + "loss": 2.8141, + "step": 17680 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016630204976364994, + "loss": 2.8292, + "step": 17690 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016625919477401336, + "loss": 2.7946, + "step": 17700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001662163180809766, + "loss": 2.7482, + "step": 17710 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016617341969858406, + "loss": 2.7981, + "step": 17720 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016613049964088724, + "loss": 2.8179, + "step": 17730 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016608755792194474, + "loss": 2.7227, + "step": 17740 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016604459455582226, + "loss": 2.7816, + "step": 17750 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016600160955659255, + "loss": 2.672, + "step": 17760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001659586029383355, + "loss": 2.7563, + "step": 17770 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016591557471513807, + "loss": 2.6879, + "step": 17780 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016587252490109423, + "loss": 2.8775, + "step": 17790 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016582945351030517, + "loss": 2.7716, + "step": 17800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016578636055687896, + "loss": 2.8443, + "step": 17810 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001657432460549309, + "loss": 2.7336, + "step": 17820 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001657001100185832, + "loss": 2.7399, + "step": 17830 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016565695246196528, + "loss": 2.7403, + "step": 17840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001656137733992135, + "loss": 2.7536, + "step": 17850 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016557057284447124, + "loss": 2.7578, + "step": 17860 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016552735081188904, + "loss": 2.7914, + "step": 17870 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016548410731562438, + "loss": 2.7923, + "step": 17880 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001654408423698418, + "loss": 2.7951, + "step": 17890 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016539755598871288, + "loss": 2.7983, + "step": 17900 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016535424818641618, + "loss": 2.8124, + "step": 17910 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016531091897713736, + "loss": 2.7834, + "step": 17920 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016526756837506894, + "loss": 2.7905, + "step": 17930 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001652241963944106, + "loss": 2.7818, + "step": 17940 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016518080304936893, + "loss": 2.693, + "step": 17950 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001651373883541576, + "loss": 2.7837, + "step": 17960 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001650939523229972, + "loss": 2.7306, + "step": 17970 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016505049497011535, + "loss": 2.7726, + "step": 17980 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016500701630974663, + "loss": 2.7473, + "step": 17990 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016496351635613256, + "loss": 2.7496, + "step": 18000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016491999512352175, + "loss": 2.8323, + "step": 18010 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016487645262616967, + "loss": 2.7375, + "step": 18020 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001648328888783388, + "loss": 2.8327, + "step": 18030 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001647893038942986, + "loss": 2.7846, + "step": 18040 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016474569768832543, + "loss": 2.8807, + "step": 18050 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001647020702747027, + "loss": 2.7446, + "step": 18060 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001646584216677206, + "loss": 2.8671, + "step": 18070 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001646147518816765, + "loss": 2.7568, + "step": 18080 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016457106093087443, + "loss": 2.8826, + "step": 18090 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016452734882962554, + "loss": 2.6848, + "step": 18100 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016448361559224793, + "loss": 2.7694, + "step": 18110 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016443986123306647, + "loss": 2.7647, + "step": 18120 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016439608576641304, + "loss": 2.8478, + "step": 18130 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001643522892066265, + "loss": 2.6932, + "step": 18140 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016430847156805246, + "loss": 2.8351, + "step": 18150 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016426463286504359, + "loss": 2.7583, + "step": 18160 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016422077311195935, + "loss": 2.742, + "step": 18170 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016417689232316613, + "loss": 2.7747, + "step": 18180 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016413299051303723, + "loss": 2.8436, + "step": 18190 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016408906769595281, + "loss": 2.7222, + "step": 18200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001640451238863, + "loss": 2.6685, + "step": 18210 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016400115909847256, + "loss": 2.7496, + "step": 18220 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016395717334687145, + "loss": 2.8089, + "step": 18230 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016391316664590423, + "loss": 2.8253, + "step": 18240 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016386913900998552, + "loss": 2.7025, + "step": 18250 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016382509045353665, + "loss": 2.7637, + "step": 18260 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016378102099098586, + "loss": 2.8046, + "step": 18270 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016373693063676822, + "loss": 2.7284, + "step": 18280 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016369281940532571, + "loss": 2.8653, + "step": 18290 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016364868731110705, + "loss": 2.7966, + "step": 18300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016360453436856787, + "loss": 2.752, + "step": 18310 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016356036059217058, + "loss": 2.7609, + "step": 18320 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016351616599638447, + "loss": 2.7556, + "step": 18330 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016347195059568558, + "loss": 2.7536, + "step": 18340 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016342771440455679, + "loss": 2.7187, + "step": 18350 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016338345743748786, + "loss": 2.7796, + "step": 18360 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016333917970897522, + "loss": 2.8558, + "step": 18370 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016329488123352222, + "loss": 2.8219, + "step": 18380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016325056202563893, + "loss": 2.7469, + "step": 18390 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016320622209984226, + "loss": 2.7916, + "step": 18400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016316186147065587, + "loss": 2.8748, + "step": 18410 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016311748015261025, + "loss": 2.717, + "step": 18420 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016307307816024262, + "loss": 2.6727, + "step": 18430 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016302865550809695, + "loss": 2.7932, + "step": 18440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001629842122107241, + "loss": 2.7418, + "step": 18450 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016293974828268152, + "loss": 2.8404, + "step": 18460 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016289526373853357, + "loss": 2.7298, + "step": 18470 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016285075859285124, + "loss": 2.7675, + "step": 18480 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016280623286021238, + "loss": 2.7687, + "step": 18490 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016276168655520145, + "loss": 2.718, + "step": 18500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001627171196924098, + "loss": 2.7862, + "step": 18510 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016267253228643543, + "loss": 2.8187, + "step": 18520 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016262792435188308, + "loss": 2.7027, + "step": 18530 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016258329590336417, + "loss": 2.796, + "step": 18540 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016253864695549695, + "loss": 2.8032, + "step": 18550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001624939775229063, + "loss": 2.7271, + "step": 18560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001624492876202238, + "loss": 2.749, + "step": 18570 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016240457726208774, + "loss": 2.7137, + "step": 18580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001623598464631432, + "loss": 2.7466, + "step": 18590 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016231509523804185, + "loss": 2.7104, + "step": 18600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001622703236014421, + "loss": 2.7345, + "step": 18610 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016222553156800906, + "loss": 2.7403, + "step": 18620 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016218071915241443, + "loss": 2.8228, + "step": 18630 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016213588636933672, + "loss": 2.7333, + "step": 18640 + }, + { + "epoch": 0.32, + "learning_rate": 0.000162091033233461, + "loss": 2.7432, + "step": 18650 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016204615975947908, + "loss": 2.7037, + "step": 18660 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001620012659620894, + "loss": 2.7618, + "step": 18670 + }, + { + "epoch": 0.32, + "learning_rate": 0.000161956351855997, + "loss": 2.7488, + "step": 18680 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016191141745591374, + "loss": 2.7264, + "step": 18690 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016186646277655794, + "loss": 2.7886, + "step": 18700 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016182148783265465, + "loss": 2.7069, + "step": 18710 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016177649263893556, + "loss": 2.7515, + "step": 18720 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016173147721013903, + "loss": 2.8095, + "step": 18730 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001616864415610099, + "loss": 2.7623, + "step": 18740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016164138570629982, + "loss": 2.7688, + "step": 18750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001615963096607669, + "loss": 2.7323, + "step": 18760 + }, + { + "epoch": 0.32, + "learning_rate": 0.000161551213439176, + "loss": 2.8057, + "step": 18770 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001615060970562985, + "loss": 2.8108, + "step": 18780 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001614609605269124, + "loss": 2.7498, + "step": 18790 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001614158038658023, + "loss": 2.73, + "step": 18800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016137062708775944, + "loss": 2.8167, + "step": 18810 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016132543020758159, + "loss": 2.74, + "step": 18820 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001612802132400731, + "loss": 2.7977, + "step": 18830 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016123497620004496, + "loss": 2.771, + "step": 18840 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016118971910231467, + "loss": 2.7887, + "step": 18850 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001611444419617063, + "loss": 2.7498, + "step": 18860 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016109914479305065, + "loss": 2.686, + "step": 18870 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001610538276111848, + "loss": 2.6906, + "step": 18880 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016100849043095258, + "loss": 2.8141, + "step": 18890 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016096313326720437, + "loss": 2.695, + "step": 18900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016091775613479695, + "loss": 2.7656, + "step": 18910 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016087235904859384, + "loss": 2.7393, + "step": 18920 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016082694202346488, + "loss": 2.7764, + "step": 18930 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016078150507428668, + "loss": 2.7309, + "step": 18940 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016073604821594212, + "loss": 2.6834, + "step": 18950 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016069057146332085, + "loss": 2.7424, + "step": 18960 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016064507483131885, + "loss": 2.7592, + "step": 18970 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016059955833483872, + "loss": 2.7035, + "step": 18980 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016055402198878946, + "loss": 2.6945, + "step": 18990 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016050846580808673, + "loss": 2.8646, + "step": 19000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016046288980765252, + "loss": 2.8323, + "step": 19010 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001604172940024154, + "loss": 2.6545, + "step": 19020 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016037167840731047, + "loss": 2.6773, + "step": 19030 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001603260430372792, + "loss": 2.7939, + "step": 19040 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016028038790726957, + "loss": 2.7909, + "step": 19050 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001602347130322361, + "loss": 2.7868, + "step": 19060 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016018901842713976, + "loss": 2.8165, + "step": 19070 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016014330410694792, + "loss": 2.7664, + "step": 19080 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016009757008663444, + "loss": 2.7246, + "step": 19090 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016005181638117965, + "loss": 2.7647, + "step": 19100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00016000604300557032, + "loss": 2.7532, + "step": 19110 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015996024997479963, + "loss": 2.7743, + "step": 19120 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015991443730386726, + "loss": 2.6578, + "step": 19130 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015986860500777925, + "loss": 2.7618, + "step": 19140 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015982275310154814, + "loss": 2.7006, + "step": 19150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001597768816001929, + "loss": 2.8215, + "step": 19160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001597309905187388, + "loss": 2.7073, + "step": 19170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001596850798722176, + "loss": 2.8101, + "step": 19180 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015963914967566755, + "loss": 2.787, + "step": 19190 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015959319994413316, + "loss": 2.7274, + "step": 19200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015954723069266545, + "loss": 2.8307, + "step": 19210 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015950124193632174, + "loss": 2.6689, + "step": 19220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015945523369016582, + "loss": 2.6937, + "step": 19230 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015940920596926782, + "loss": 2.773, + "step": 19240 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015936315878870426, + "loss": 2.7769, + "step": 19250 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015931709216355803, + "loss": 2.7217, + "step": 19260 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001592710061089184, + "loss": 2.7107, + "step": 19270 + }, + { + "epoch": 0.33, + "learning_rate": 0.000159224900639881, + "loss": 2.7202, + "step": 19280 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001591787757715478, + "loss": 2.7545, + "step": 19290 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015913263151902718, + "loss": 2.6773, + "step": 19300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015908646789743376, + "loss": 2.7541, + "step": 19310 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001590402849218886, + "loss": 2.7656, + "step": 19320 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015899408260751914, + "loss": 2.8368, + "step": 19330 + }, + { + "epoch": 0.33, + "learning_rate": 0.000158947860969459, + "loss": 2.7538, + "step": 19340 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015890162002284825, + "loss": 2.7658, + "step": 19350 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015885535978283328, + "loss": 2.7537, + "step": 19360 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015880908026456677, + "loss": 2.7521, + "step": 19370 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015876278148320764, + "loss": 2.856, + "step": 19380 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015871646345392128, + "loss": 2.7736, + "step": 19390 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015867012619187926, + "loss": 2.6862, + "step": 19400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001586237697122595, + "loss": 2.7895, + "step": 19410 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001585773940302462, + "loss": 2.7902, + "step": 19420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001585309991610299, + "loss": 2.7456, + "step": 19430 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001584845851198073, + "loss": 2.8468, + "step": 19440 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015843815192178153, + "loss": 2.6724, + "step": 19450 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001583916995821619, + "loss": 2.6813, + "step": 19460 + }, + { + "epoch": 0.34, + "learning_rate": 0.000158345228116164, + "loss": 2.7364, + "step": 19470 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015829873753900973, + "loss": 2.7707, + "step": 19480 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015825222786592717, + "loss": 2.8153, + "step": 19490 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015820569911215076, + "loss": 2.782, + "step": 19500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001581591512929211, + "loss": 2.7578, + "step": 19510 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015811258442348508, + "loss": 2.7634, + "step": 19520 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015806599851909582, + "loss": 2.7278, + "step": 19530 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015801939359501267, + "loss": 2.766, + "step": 19540 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015797276966650125, + "loss": 2.7659, + "step": 19550 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015792612674883332, + "loss": 2.7484, + "step": 19560 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001578794648572869, + "loss": 2.7803, + "step": 19570 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001578327840071463, + "loss": 2.7983, + "step": 19580 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015778608421370189, + "loss": 2.7684, + "step": 19590 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015773936549225037, + "loss": 2.7221, + "step": 19600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001576926278580946, + "loss": 2.7229, + "step": 19610 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015764587132654373, + "loss": 2.7506, + "step": 19620 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015759909591291283, + "loss": 2.8404, + "step": 19630 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001575523016325234, + "loss": 2.7427, + "step": 19640 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015750548850070308, + "loss": 2.7382, + "step": 19650 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015745865653278562, + "loss": 2.7698, + "step": 19660 + }, + { + "epoch": 0.34, + "learning_rate": 0.000157411805744111, + "loss": 2.6701, + "step": 19670 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001573649361500253, + "loss": 2.6558, + "step": 19680 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015731804776588078, + "loss": 2.7024, + "step": 19690 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015727114060703598, + "loss": 2.7297, + "step": 19700 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001572242146888554, + "loss": 2.6867, + "step": 19710 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015717727002670975, + "loss": 2.7126, + "step": 19720 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015713030663597595, + "loss": 2.7263, + "step": 19730 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015708332453203699, + "loss": 2.7076, + "step": 19740 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015703632373028197, + "loss": 2.7176, + "step": 19750 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015698930424610613, + "loss": 2.6239, + "step": 19760 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015694226609491092, + "loss": 2.7948, + "step": 19770 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015689520929210377, + "loss": 2.6523, + "step": 19780 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001568481338530983, + "loss": 2.7988, + "step": 19790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001568010397933142, + "loss": 2.7948, + "step": 19800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001567539271281773, + "loss": 2.7339, + "step": 19810 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015670679587311947, + "loss": 2.7298, + "step": 19820 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015665964604357868, + "loss": 2.8211, + "step": 19830 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015661247765499903, + "loss": 2.7849, + "step": 19840 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015656529072283069, + "loss": 2.7536, + "step": 19850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015651808526252983, + "loss": 2.7694, + "step": 19860 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015647086128955871, + "loss": 2.7349, + "step": 19870 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001564236188193858, + "loss": 2.7772, + "step": 19880 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015637635786748546, + "loss": 2.6842, + "step": 19890 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015632907844933812, + "loss": 2.665, + "step": 19900 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015628178058043037, + "loss": 2.662, + "step": 19910 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001562344642762547, + "loss": 2.8948, + "step": 19920 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015618712955230976, + "loss": 2.7319, + "step": 19930 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015613977642410013, + "loss": 2.7757, + "step": 19940 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015609240490713653, + "loss": 2.8242, + "step": 19950 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015604501501693559, + "loss": 2.6964, + "step": 19960 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015599760676902004, + "loss": 2.8491, + "step": 19970 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001559501801789186, + "loss": 2.6541, + "step": 19980 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015590273526216598, + "loss": 2.8059, + "step": 19990 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015585527203430295, + "loss": 2.7107, + "step": 20000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015580779051087615, + "loss": 2.7319, + "step": 20010 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001557602907074384, + "loss": 2.7049, + "step": 20020 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015571277263954828, + "loss": 2.69, + "step": 20030 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015566523632277058, + "loss": 2.7461, + "step": 20040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001556176817726759, + "loss": 2.704, + "step": 20050 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015557010900484096, + "loss": 2.7858, + "step": 20060 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001555225180348483, + "loss": 2.7577, + "step": 20070 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001554749088782865, + "loss": 2.7188, + "step": 20080 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015542728155075003, + "loss": 2.7508, + "step": 20090 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015537963606783946, + "loss": 2.7614, + "step": 20100 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001553319724451612, + "loss": 2.7492, + "step": 20110 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015528429069832752, + "loss": 2.7263, + "step": 20120 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001552365908429568, + "loss": 2.8144, + "step": 20130 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015518887289467327, + "loss": 2.687, + "step": 20140 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015514113686910708, + "loss": 2.7554, + "step": 20150 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001550933827818943, + "loss": 2.7284, + "step": 20160 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015504561064867692, + "loss": 2.6662, + "step": 20170 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001549978204851029, + "loss": 2.7311, + "step": 20180 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015495001230682597, + "loss": 2.7394, + "step": 20190 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001549021861295059, + "loss": 2.743, + "step": 20200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001548543419688083, + "loss": 2.7293, + "step": 20210 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015480647984040467, + "loss": 2.7846, + "step": 20220 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001547585997599723, + "loss": 2.6246, + "step": 20230 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001547107017431946, + "loss": 2.6712, + "step": 20240 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015466278580576065, + "loss": 2.7177, + "step": 20250 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015461485196336544, + "loss": 2.6898, + "step": 20260 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015456690023170987, + "loss": 2.7407, + "step": 20270 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015451893062650064, + "loss": 2.7841, + "step": 20280 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001544709431634504, + "loss": 2.8458, + "step": 20290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015442293785827756, + "loss": 2.7177, + "step": 20300 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015437491472670636, + "loss": 2.7203, + "step": 20310 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015432687378446697, + "loss": 2.6715, + "step": 20320 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015427881504729535, + "loss": 2.7098, + "step": 20330 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001542307385309333, + "loss": 2.7528, + "step": 20340 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015418264425112835, + "loss": 2.721, + "step": 20350 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015413453222363403, + "loss": 2.7958, + "step": 20360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001540864024642095, + "loss": 2.7251, + "step": 20370 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015403825498861985, + "loss": 2.6831, + "step": 20380 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015399008981263593, + "loss": 2.7772, + "step": 20390 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015394190695203436, + "loss": 2.784, + "step": 20400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001538937064225976, + "loss": 2.6635, + "step": 20410 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015384548824011392, + "loss": 2.6868, + "step": 20420 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015379725242037725, + "loss": 2.7667, + "step": 20430 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015374899897918742, + "loss": 2.7311, + "step": 20440 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015370072793235, + "loss": 2.6391, + "step": 20450 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015365243929567633, + "loss": 2.7, + "step": 20460 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015360413308498347, + "loss": 2.6935, + "step": 20470 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015355580931609425, + "loss": 2.7163, + "step": 20480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015350746800483735, + "loss": 2.8145, + "step": 20490 + }, + { + "epoch": 0.35, + "learning_rate": 0.000153459109167047, + "loss": 2.7342, + "step": 20500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015341073281856336, + "loss": 2.6504, + "step": 20510 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015336233897523226, + "loss": 2.8228, + "step": 20520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001533187695712408, + "loss": 2.6675, + "step": 20530 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001532703425313752, + "loss": 2.7795, + "step": 20540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015322189804264734, + "loss": 2.8105, + "step": 20550 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015317343612092542, + "loss": 2.718, + "step": 20560 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015312980549928539, + "loss": 2.7152, + "step": 20570 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015308131049861188, + "loss": 2.7477, + "step": 20580 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015303279811099414, + "loss": 2.7541, + "step": 20590 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015298426835232257, + "loss": 2.7632, + "step": 20600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015293572123849326, + "loss": 2.7326, + "step": 20610 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015288715678540794, + "loss": 2.7914, + "step": 20620 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015283857500897406, + "loss": 2.7749, + "step": 20630 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015278997592510466, + "loss": 2.7378, + "step": 20640 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015274135954971863, + "loss": 2.7256, + "step": 20650 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015269272589874032, + "loss": 2.7346, + "step": 20660 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015264407498809986, + "loss": 2.7561, + "step": 20670 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015259540683373304, + "loss": 2.7157, + "step": 20680 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015254672145158121, + "loss": 2.7591, + "step": 20690 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015249801885759142, + "loss": 2.7036, + "step": 20700 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001524492990677164, + "loss": 2.6785, + "step": 20710 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001524005620979144, + "loss": 2.6594, + "step": 20720 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001523518079641494, + "loss": 2.7488, + "step": 20730 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015230303668239097, + "loss": 2.7353, + "step": 20740 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015225424826861424, + "loss": 2.6718, + "step": 20750 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001522054427388001, + "loss": 2.6583, + "step": 20760 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015215662010893482, + "loss": 2.7337, + "step": 20770 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015210778039501053, + "loss": 2.7845, + "step": 20780 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001520589236130247, + "loss": 2.6942, + "step": 20790 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015201004977898066, + "loss": 2.8251, + "step": 20800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001519611589088871, + "loss": 2.7954, + "step": 20810 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015191225101875833, + "loss": 2.6657, + "step": 20820 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015186332612461434, + "loss": 2.6709, + "step": 20830 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015181438424248061, + "loss": 2.7979, + "step": 20840 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015176542538838825, + "loss": 2.7096, + "step": 20850 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015171644957837378, + "loss": 2.6988, + "step": 20860 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015166745682847945, + "loss": 2.695, + "step": 20870 + }, + { + "epoch": 0.36, + "learning_rate": 0.000151618447154753, + "loss": 2.6938, + "step": 20880 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015156942057324764, + "loss": 2.7298, + "step": 20890 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015152037710002217, + "loss": 2.744, + "step": 20900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015147131675114102, + "loss": 2.7394, + "step": 20910 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015142223954267396, + "loss": 2.7949, + "step": 20920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001513731454906964, + "loss": 2.7543, + "step": 20930 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015132403461128927, + "loss": 2.6816, + "step": 20940 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015127490692053893, + "loss": 2.7787, + "step": 20950 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001512257624345374, + "loss": 2.7848, + "step": 20960 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015117660116938206, + "loss": 2.8004, + "step": 20970 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001511274231411758, + "loss": 2.7383, + "step": 20980 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015107822836602706, + "loss": 2.7191, + "step": 20990 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001510290168600498, + "loss": 2.813, + "step": 21000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015097978863936327, + "loss": 2.6981, + "step": 21010 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015093054372009238, + "loss": 2.8092, + "step": 21020 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015088128211836747, + "loss": 2.7569, + "step": 21030 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015083200385032432, + "loss": 2.7536, + "step": 21040 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015078270893210415, + "loss": 2.8101, + "step": 21050 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015073339737985372, + "loss": 2.6394, + "step": 21060 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015068406920972516, + "loss": 2.7755, + "step": 21070 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015063472443787597, + "loss": 2.7336, + "step": 21080 + }, + { + "epoch": 0.36, + "learning_rate": 0.00015058536308046932, + "loss": 2.74, + "step": 21090 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015053598515367357, + "loss": 2.716, + "step": 21100 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015048659067366265, + "loss": 2.7168, + "step": 21110 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015043717965661587, + "loss": 2.7088, + "step": 21120 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015038775211871793, + "loss": 2.7301, + "step": 21130 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015033830807615903, + "loss": 2.6422, + "step": 21140 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015028884754513463, + "loss": 2.7346, + "step": 21150 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001502393705418458, + "loss": 2.732, + "step": 21160 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015018987708249876, + "loss": 2.7609, + "step": 21170 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001501403671833053, + "loss": 2.714, + "step": 21180 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015009084086048256, + "loss": 2.6448, + "step": 21190 + }, + { + "epoch": 0.37, + "learning_rate": 0.00015004129813025297, + "loss": 2.8385, + "step": 21200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014999173900884447, + "loss": 2.7753, + "step": 21210 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014994216351249025, + "loss": 2.6628, + "step": 21220 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001498925716574289, + "loss": 2.7229, + "step": 21230 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014984296345990446, + "loss": 2.7417, + "step": 21240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001497933389361662, + "loss": 2.6896, + "step": 21250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014974369810246877, + "loss": 2.7438, + "step": 21260 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001496940409750722, + "loss": 2.7229, + "step": 21270 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014964436757024184, + "loss": 2.684, + "step": 21280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001495946779042483, + "loss": 2.7428, + "step": 21290 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014954497199336765, + "loss": 2.747, + "step": 21300 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014949524985388121, + "loss": 2.6723, + "step": 21310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014944551150207556, + "loss": 2.7017, + "step": 21320 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014939575695424272, + "loss": 2.6889, + "step": 21330 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001493459862266799, + "loss": 2.746, + "step": 21340 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014929619933568965, + "loss": 2.6791, + "step": 21350 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014924639629757984, + "loss": 2.7519, + "step": 21360 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014919657712866358, + "loss": 2.6973, + "step": 21370 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001491467418452593, + "loss": 2.7623, + "step": 21380 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014909689046369075, + "loss": 2.7513, + "step": 21390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001490470230002868, + "loss": 2.6854, + "step": 21400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014899713947138173, + "loss": 2.8011, + "step": 21410 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014894723989331511, + "loss": 2.7108, + "step": 21420 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014889732428243163, + "loss": 2.7174, + "step": 21430 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001488473926550813, + "loss": 2.7685, + "step": 21440 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014879744502761938, + "loss": 2.7387, + "step": 21450 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001487474814164064, + "loss": 2.6919, + "step": 21460 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014869750183780803, + "loss": 2.6923, + "step": 21470 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014864750630819528, + "loss": 2.7323, + "step": 21480 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014859749484394435, + "loss": 2.6721, + "step": 21490 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001485474674614366, + "loss": 2.6804, + "step": 21500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014849742417705867, + "loss": 2.8032, + "step": 21510 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001484473650072024, + "loss": 2.6757, + "step": 21520 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014839728996826482, + "loss": 2.7323, + "step": 21530 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014834719907664819, + "loss": 2.6579, + "step": 21540 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014829709234875986, + "loss": 2.6809, + "step": 21550 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014824696980101246, + "loss": 2.7687, + "step": 21560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014819683144982386, + "loss": 2.6952, + "step": 21570 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014814667731161696, + "loss": 2.8125, + "step": 21580 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001480965074028199, + "loss": 2.7814, + "step": 21590 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014804632173986596, + "loss": 2.7141, + "step": 21600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001479961203391937, + "loss": 2.6923, + "step": 21610 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014794590321724665, + "loss": 2.6761, + "step": 21620 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014789567039047365, + "loss": 2.7466, + "step": 21630 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014784542187532855, + "loss": 2.6914, + "step": 21640 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014779515768827047, + "loss": 2.7441, + "step": 21650 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014774487784576353, + "loss": 2.6901, + "step": 21660 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014769458236427708, + "loss": 2.6345, + "step": 21670 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014764427126028558, + "loss": 2.7547, + "step": 21680 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014759394455026855, + "loss": 2.6468, + "step": 21690 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014754360225071063, + "loss": 2.8358, + "step": 21700 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014749324437810166, + "loss": 2.784, + "step": 21710 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014744287094893647, + "loss": 2.7218, + "step": 21720 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014739248197971505, + "loss": 2.6992, + "step": 21730 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014734207748694244, + "loss": 2.6977, + "step": 21740 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001472916574871288, + "loss": 2.6616, + "step": 21750 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014724122199678935, + "loss": 2.755, + "step": 21760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001471907710324444, + "loss": 2.8135, + "step": 21770 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014714030461061928, + "loss": 2.8107, + "step": 21780 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014708982274784448, + "loss": 2.7428, + "step": 21790 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014703932546065543, + "loss": 2.6953, + "step": 21800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014698881276559264, + "loss": 2.6936, + "step": 21810 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001469382846792018, + "loss": 2.7564, + "step": 21820 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001468877412180335, + "loss": 2.7895, + "step": 21830 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014683718239864334, + "loss": 2.7478, + "step": 21840 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014678660823759206, + "loss": 2.7409, + "step": 21850 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014673601875144545, + "loss": 2.7318, + "step": 21860 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001466854139567741, + "loss": 2.76, + "step": 21870 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014663479387015392, + "loss": 2.7723, + "step": 21880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014658415850816553, + "loss": 2.7567, + "step": 21890 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001465335078873948, + "loss": 2.643, + "step": 21900 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014648284202443246, + "loss": 2.7356, + "step": 21910 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014643216093587427, + "loss": 2.635, + "step": 21920 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014638146463832093, + "loss": 2.7221, + "step": 21930 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014633075314837826, + "loss": 2.6714, + "step": 21940 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001462800264826569, + "loss": 2.6765, + "step": 21950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001462292846577725, + "loss": 2.6695, + "step": 21960 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014617852769034575, + "loss": 2.6916, + "step": 21970 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014612775559700225, + "loss": 2.7364, + "step": 21980 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001460769683943725, + "loss": 2.7165, + "step": 21990 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014602616609909203, + "loss": 2.6742, + "step": 22000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001459753487278013, + "loss": 2.7318, + "step": 22010 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014592451629714574, + "loss": 2.7778, + "step": 22020 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014587366882377553, + "loss": 2.7038, + "step": 22030 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014582280632434604, + "loss": 2.7361, + "step": 22040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001457719288155174, + "loss": 2.6822, + "step": 22050 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014572103631395463, + "loss": 2.7903, + "step": 22060 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014567012883632782, + "loss": 2.7905, + "step": 22070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001456192063993118, + "loss": 2.6545, + "step": 22080 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014556826901958645, + "loss": 2.8058, + "step": 22090 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014551731671383635, + "loss": 2.6541, + "step": 22100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014546634949875114, + "loss": 2.7656, + "step": 22110 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014541536739102532, + "loss": 2.7469, + "step": 22120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001453643704073582, + "loss": 2.7594, + "step": 22130 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014531335856445398, + "loss": 2.7056, + "step": 22140 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014526233187902175, + "loss": 2.7079, + "step": 22150 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014521129036777547, + "loss": 2.7001, + "step": 22160 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014516023404743394, + "loss": 2.7898, + "step": 22170 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014510916293472082, + "loss": 2.6482, + "step": 22180 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001450580770463646, + "loss": 2.6631, + "step": 22190 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001450069763990986, + "loss": 2.7013, + "step": 22200 + }, + { + "epoch": 0.38, + "learning_rate": 0.000144955861009661, + "loss": 2.7463, + "step": 22210 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001449047308947948, + "loss": 2.7401, + "step": 22220 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001448535860712479, + "loss": 2.7186, + "step": 22230 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001448024265557728, + "loss": 2.719, + "step": 22240 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014475125236512704, + "loss": 2.7679, + "step": 22250 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014470006351607287, + "loss": 2.7483, + "step": 22260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001446488600253774, + "loss": 2.6986, + "step": 22270 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001445976419098124, + "loss": 2.7859, + "step": 22280 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014454640918615456, + "loss": 2.6637, + "step": 22290 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014449516187118534, + "loss": 2.7067, + "step": 22300 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001444438999816909, + "loss": 2.7429, + "step": 22310 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014439262353446229, + "loss": 2.6646, + "step": 22320 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001443413325462952, + "loss": 2.7351, + "step": 22330 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014429002703399016, + "loss": 2.6265, + "step": 22340 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001442387070143525, + "loss": 2.6105, + "step": 22350 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001441873725041922, + "loss": 2.6801, + "step": 22360 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014413602352032407, + "loss": 2.7197, + "step": 22370 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001440846600795676, + "loss": 2.7517, + "step": 22380 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014403328219874704, + "loss": 2.6817, + "step": 22390 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014398188989469136, + "loss": 2.7716, + "step": 22400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014393048318423428, + "loss": 2.7196, + "step": 22410 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014387906208421423, + "loss": 2.731, + "step": 22420 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014382762661147434, + "loss": 2.6808, + "step": 22430 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014377617678286244, + "loss": 2.6679, + "step": 22440 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014372471261523111, + "loss": 2.6433, + "step": 22450 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001436732341254376, + "loss": 2.7429, + "step": 22460 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014362174133034382, + "loss": 2.6631, + "step": 22470 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014357023424681637, + "loss": 2.8399, + "step": 22480 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014351871289172658, + "loss": 2.7728, + "step": 22490 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014346717728195045, + "loss": 2.7668, + "step": 22500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014341562743436855, + "loss": 2.6185, + "step": 22510 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014336406336586626, + "loss": 2.7796, + "step": 22520 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014331248509333352, + "loss": 2.7239, + "step": 22530 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014326089263366496, + "loss": 2.7723, + "step": 22540 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014320928600375984, + "loss": 2.6278, + "step": 22550 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014315766522052204, + "loss": 2.6796, + "step": 22560 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014310603030086018, + "loss": 2.7273, + "step": 22570 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014305438126168733, + "loss": 2.7127, + "step": 22580 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001430027181199214, + "loss": 2.6422, + "step": 22590 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001429510408924847, + "loss": 2.7338, + "step": 22600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014289934959630437, + "loss": 2.7889, + "step": 22610 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014284764424831193, + "loss": 2.6941, + "step": 22620 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014279592486544376, + "loss": 2.7124, + "step": 22630 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014274419146464062, + "loss": 2.7101, + "step": 22640 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014269244406284792, + "loss": 2.7017, + "step": 22650 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014264068267701572, + "loss": 2.6391, + "step": 22660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014258890732409864, + "loss": 2.7083, + "step": 22670 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001425371180210558, + "loss": 2.7225, + "step": 22680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014248531478485097, + "loss": 2.7597, + "step": 22690 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014243349763245244, + "loss": 2.742, + "step": 22700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001423816665808331, + "loss": 2.6301, + "step": 22710 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001423298216469704, + "loss": 2.6993, + "step": 22720 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014227796284784622, + "loss": 2.6375, + "step": 22730 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014222609020044712, + "loss": 2.6783, + "step": 22740 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014217420372176416, + "loss": 2.723, + "step": 22750 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014212230342879284, + "loss": 2.6615, + "step": 22760 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014207038933853336, + "loss": 2.6848, + "step": 22770 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014201846146799025, + "loss": 2.781, + "step": 22780 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014196651983417269, + "loss": 2.7071, + "step": 22790 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001419145644540943, + "loss": 2.7238, + "step": 22800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014186259534477319, + "loss": 2.6999, + "step": 22810 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014181061252323209, + "loss": 2.6859, + "step": 22820 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014175861600649806, + "loss": 2.7344, + "step": 22830 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014170660581160272, + "loss": 2.6097, + "step": 22840 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014165458195558218, + "loss": 2.6826, + "step": 22850 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014160254445547702, + "loss": 2.6388, + "step": 22860 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014155049332833228, + "loss": 2.6891, + "step": 22870 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014149842859119744, + "loss": 2.8171, + "step": 22880 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001414463502611265, + "loss": 2.7277, + "step": 22890 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014139425835517786, + "loss": 2.7167, + "step": 22900 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014134215289041433, + "loss": 2.7274, + "step": 22910 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014129003388390333, + "loss": 2.8325, + "step": 22920 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001412379013527165, + "loss": 2.7535, + "step": 22930 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014118575531393002, + "loss": 2.6545, + "step": 22940 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014113359578462454, + "loss": 2.681, + "step": 22950 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014108142278188498, + "loss": 2.6005, + "step": 22960 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014102923632280085, + "loss": 2.6567, + "step": 22970 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014097703642446596, + "loss": 2.74, + "step": 22980 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001409248231039785, + "loss": 2.6758, + "step": 22990 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014087259637844118, + "loss": 2.6205, + "step": 23000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014082035626496094, + "loss": 2.699, + "step": 23010 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014076810278064924, + "loss": 2.6557, + "step": 23020 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014071583594262184, + "loss": 2.6958, + "step": 23030 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014066355576799897, + "loss": 2.7001, + "step": 23040 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014061126227390508, + "loss": 2.6947, + "step": 23050 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014055895547746905, + "loss": 2.5979, + "step": 23060 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001405066353958242, + "loss": 2.7026, + "step": 23070 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001404543020461081, + "loss": 2.6958, + "step": 23080 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001404019554454627, + "loss": 2.653, + "step": 23090 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014034959561103424, + "loss": 2.6806, + "step": 23100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014029722255997344, + "loss": 2.6239, + "step": 23110 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014024483630943518, + "loss": 2.7089, + "step": 23120 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014019243687657875, + "loss": 2.6802, + "step": 23130 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014014002427856776, + "loss": 2.6758, + "step": 23140 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014008759853257008, + "loss": 2.6941, + "step": 23150 + }, + { + "epoch": 0.4, + "learning_rate": 0.00014003515965575798, + "loss": 2.7214, + "step": 23160 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001399827076653079, + "loss": 2.7635, + "step": 23170 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001399302425784007, + "loss": 2.6466, + "step": 23180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013987776441222146, + "loss": 2.7407, + "step": 23190 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013982527318395958, + "loss": 2.7196, + "step": 23200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001397727689108087, + "loss": 2.7343, + "step": 23210 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013972025160996675, + "loss": 2.6659, + "step": 23220 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013966772129863598, + "loss": 2.7176, + "step": 23230 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013961517799402278, + "loss": 2.6925, + "step": 23240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001395626217133379, + "loss": 2.7605, + "step": 23250 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013951005247379633, + "loss": 2.721, + "step": 23260 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013945747029261727, + "loss": 2.6122, + "step": 23270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013940487518702415, + "loss": 2.641, + "step": 23280 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001393522671742447, + "loss": 2.6135, + "step": 23290 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013929964627151082, + "loss": 2.6247, + "step": 23300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013924701249605858, + "loss": 2.7593, + "step": 23310 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013919436586512843, + "loss": 2.7614, + "step": 23320 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001391417063959649, + "loss": 2.6264, + "step": 23330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013908903410581673, + "loss": 2.7066, + "step": 23340 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001390363490119369, + "loss": 2.7264, + "step": 23350 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013898365113158262, + "loss": 2.6689, + "step": 23360 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001389309404820152, + "loss": 2.7244, + "step": 23370 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013887821708050016, + "loss": 2.8185, + "step": 23380 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013882548094430722, + "loss": 2.7146, + "step": 23390 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001387727320907103, + "loss": 2.74, + "step": 23400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001387199705369874, + "loss": 2.7476, + "step": 23410 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013866719630042072, + "loss": 2.721, + "step": 23420 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013861440939829663, + "loss": 2.6641, + "step": 23430 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001385616098479057, + "loss": 2.7156, + "step": 23440 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001385087976665425, + "loss": 2.7076, + "step": 23450 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013845597287150588, + "loss": 2.745, + "step": 23460 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001384031354800987, + "loss": 2.6312, + "step": 23470 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001383502855096281, + "loss": 2.7598, + "step": 23480 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013829742297740514, + "loss": 2.7158, + "step": 23490 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013824454790074519, + "loss": 2.6798, + "step": 23500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001381916602969676, + "loss": 2.7081, + "step": 23510 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013813876018339583, + "loss": 2.6004, + "step": 23520 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013808584757735759, + "loss": 2.6863, + "step": 23530 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013803292249618443, + "loss": 2.7632, + "step": 23540 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013797998495721221, + "loss": 2.645, + "step": 23550 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013792703497778077, + "loss": 2.6463, + "step": 23560 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013787407257523398, + "loss": 2.6695, + "step": 23570 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013782109776691988, + "loss": 2.6054, + "step": 23580 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013776811057019054, + "loss": 2.7125, + "step": 23590 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013771511100240205, + "loss": 2.6672, + "step": 23600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013766209908091455, + "loss": 2.7067, + "step": 23610 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013760907482309232, + "loss": 2.6156, + "step": 23620 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013755603824630363, + "loss": 2.843, + "step": 23630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013750829480883555, + "loss": 2.7367, + "step": 23640 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013745523487387445, + "loss": 2.6752, + "step": 23650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013740216267033763, + "loss": 2.7265, + "step": 23660 + }, + { + "epoch": 0.41, + "learning_rate": 0.000137349078215609, + "loss": 2.657, + "step": 23670 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013729598152707655, + "loss": 2.7196, + "step": 23680 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013724287262213228, + "loss": 2.6186, + "step": 23690 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013718975151817215, + "loss": 2.5905, + "step": 23700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013713661823259608, + "loss": 2.8019, + "step": 23710 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013708347278280818, + "loss": 2.682, + "step": 23720 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013703031518621627, + "loss": 2.7472, + "step": 23730 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013697714546023236, + "loss": 2.7288, + "step": 23740 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001369292823506108, + "loss": 2.625, + "step": 23750 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013687608962676615, + "loss": 2.7256, + "step": 23760 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001368228848240465, + "loss": 2.5728, + "step": 23770 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013676966795987927, + "loss": 2.6909, + "step": 23780 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001367164390516958, + "loss": 2.6917, + "step": 23790 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001366631981169314, + "loss": 2.7467, + "step": 23800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001366099451730253, + "loss": 2.6735, + "step": 23810 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013655668023742062, + "loss": 2.6752, + "step": 23820 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013650340332756447, + "loss": 2.5948, + "step": 23830 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013645011446090788, + "loss": 2.6597, + "step": 23840 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001363968136549058, + "loss": 2.6952, + "step": 23850 + }, + { + "epoch": 0.41, + "learning_rate": 0.000136343500927017, + "loss": 2.6971, + "step": 23860 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001362901762947043, + "loss": 2.6622, + "step": 23870 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013623683977543428, + "loss": 2.581, + "step": 23880 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013618349138667756, + "loss": 2.6647, + "step": 23890 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013613013114590846, + "loss": 2.6405, + "step": 23900 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001360767590706054, + "loss": 2.686, + "step": 23910 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013602337517825048, + "loss": 2.7012, + "step": 23920 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001359699794863298, + "loss": 2.7884, + "step": 23930 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013591657201233328, + "loss": 2.7038, + "step": 23940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013586315277375468, + "loss": 2.6983, + "step": 23950 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013580972178809162, + "loss": 2.5843, + "step": 23960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001357562790728456, + "loss": 2.7072, + "step": 23970 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013570282464552198, + "loss": 2.6556, + "step": 23980 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013564935852362987, + "loss": 2.6763, + "step": 23990 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001355958807246823, + "loss": 2.6535, + "step": 24000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013554239126619604, + "loss": 2.6564, + "step": 24010 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013548889016569174, + "loss": 2.7205, + "step": 24020 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013543537744069387, + "loss": 2.7567, + "step": 24030 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013538185310873067, + "loss": 2.7189, + "step": 24040 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013532831718733417, + "loss": 2.7561, + "step": 24050 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013527476969404028, + "loss": 2.6202, + "step": 24060 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001352212106463886, + "loss": 2.7012, + "step": 24070 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013516764006192257, + "loss": 2.6545, + "step": 24080 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013511405795818944, + "loss": 2.6225, + "step": 24090 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013506046435274012, + "loss": 2.6752, + "step": 24100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001350068592631294, + "loss": 2.6823, + "step": 24110 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013495324270691573, + "loss": 2.7115, + "step": 24120 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001348996147016615, + "loss": 2.7192, + "step": 24130 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013484597526493256, + "loss": 2.7164, + "step": 24140 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013479232441429884, + "loss": 2.7175, + "step": 24150 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001347386621673337, + "loss": 2.6902, + "step": 24160 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013468498854161447, + "loss": 2.6477, + "step": 24170 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013463130355472207, + "loss": 2.6805, + "step": 24180 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013457760722424114, + "loss": 2.611, + "step": 24190 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013452389956776015, + "loss": 2.7243, + "step": 24200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013447018060287118, + "loss": 2.7553, + "step": 24210 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013441645034717, + "loss": 2.7228, + "step": 24220 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013436270881825616, + "loss": 2.6699, + "step": 24230 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013430895603373287, + "loss": 2.617, + "step": 24240 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013425519201120696, + "loss": 2.6809, + "step": 24250 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013420141676828904, + "loss": 2.6714, + "step": 24260 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013414763032259337, + "loss": 2.6926, + "step": 24270 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013409383269173786, + "loss": 2.5747, + "step": 24280 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013404002389334405, + "loss": 2.6385, + "step": 24290 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013398620394503716, + "loss": 2.6434, + "step": 24300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013393237286444615, + "loss": 2.6385, + "step": 24310 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013387853066920352, + "loss": 2.6632, + "step": 24320 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001338246773769454, + "loss": 2.7371, + "step": 24330 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001337708130053117, + "loss": 2.7659, + "step": 24340 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013371693757194576, + "loss": 2.6295, + "step": 24350 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001336630510944947, + "loss": 2.7329, + "step": 24360 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013360915359060914, + "loss": 2.7251, + "step": 24370 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013355524507794347, + "loss": 2.6456, + "step": 24380 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013350132557415548, + "loss": 2.6582, + "step": 24390 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013344739509690674, + "loss": 2.6461, + "step": 24400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013339345366386227, + "loss": 2.7416, + "step": 24410 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013333950129269082, + "loss": 2.7072, + "step": 24420 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013328553800106465, + "loss": 2.7799, + "step": 24430 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013323156380665955, + "loss": 2.7196, + "step": 24440 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013317757872715496, + "loss": 2.7262, + "step": 24450 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013312358278023387, + "loss": 2.6379, + "step": 24460 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001330695759835828, + "loss": 2.6867, + "step": 24470 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001330155583548918, + "loss": 2.741, + "step": 24480 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001329615299118546, + "loss": 2.599, + "step": 24490 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013290749067216832, + "loss": 2.7022, + "step": 24500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013285344065353368, + "loss": 2.7317, + "step": 24510 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013279937987365493, + "loss": 2.685, + "step": 24520 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013274530835023982, + "loss": 2.7313, + "step": 24530 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001326912261009997, + "loss": 2.6035, + "step": 24540 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013263713314364933, + "loss": 2.6815, + "step": 24550 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013258302949590705, + "loss": 2.6506, + "step": 24560 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013252891517549466, + "loss": 2.6217, + "step": 24570 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013247479020013744, + "loss": 2.722, + "step": 24580 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013242065458756423, + "loss": 2.7081, + "step": 24590 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001323665083555073, + "loss": 2.7286, + "step": 24600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013231235152170242, + "loss": 2.6095, + "step": 24610 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013225818410388882, + "loss": 2.5863, + "step": 24620 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001322040061198092, + "loss": 2.7587, + "step": 24630 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001321498175872097, + "loss": 2.7305, + "step": 24640 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013209561852384002, + "loss": 2.7458, + "step": 24650 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013204140894745314, + "loss": 2.7168, + "step": 24660 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001319871888758056, + "loss": 2.65, + "step": 24670 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013193295832665737, + "loss": 2.6896, + "step": 24680 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013187871731777174, + "loss": 2.6825, + "step": 24690 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013182446586691566, + "loss": 2.7736, + "step": 24700 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013177020399185923, + "loss": 2.6889, + "step": 24710 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001317159317103762, + "loss": 2.7002, + "step": 24720 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013166164904024355, + "loss": 2.6801, + "step": 24730 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013160735599924173, + "loss": 2.6965, + "step": 24740 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013155305260515463, + "loss": 2.6586, + "step": 24750 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013149873887576945, + "loss": 2.6893, + "step": 24760 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013144441482887685, + "loss": 2.6534, + "step": 24770 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013139008048227086, + "loss": 2.7165, + "step": 24780 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013133573585374876, + "loss": 2.6307, + "step": 24790 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013128138096111142, + "loss": 2.6672, + "step": 24800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013122701582216287, + "loss": 2.6787, + "step": 24810 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013117264045471057, + "loss": 2.592, + "step": 24820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013111825487656544, + "loss": 2.7031, + "step": 24830 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001310638591055415, + "loss": 2.6763, + "step": 24840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013100945315945635, + "loss": 2.594, + "step": 24850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013095503705613077, + "loss": 2.6684, + "step": 24860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013090061081338895, + "loss": 2.6329, + "step": 24870 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013084617444905834, + "loss": 2.6892, + "step": 24880 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013079172798096975, + "loss": 2.7063, + "step": 24890 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013073727142695732, + "loss": 2.6878, + "step": 24900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013068280480485844, + "loss": 2.6435, + "step": 24910 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013062832813251377, + "loss": 2.6777, + "step": 24920 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013057384142776737, + "loss": 2.6163, + "step": 24930 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001305193447084665, + "loss": 2.6328, + "step": 24940 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013046483799246175, + "loss": 2.7748, + "step": 24950 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001304103212976069, + "loss": 2.6409, + "step": 24960 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013035579464175907, + "loss": 2.5843, + "step": 24970 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001303012580427787, + "loss": 2.6758, + "step": 24980 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013024671151852938, + "loss": 2.7069, + "step": 24990 + }, + { + "epoch": 0.43, + "learning_rate": 0.000130192155086878, + "loss": 2.6971, + "step": 25000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013013758876569465, + "loss": 2.6273, + "step": 25010 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013008301257285272, + "loss": 2.6655, + "step": 25020 + }, + { + "epoch": 0.43, + "learning_rate": 0.00013002842652622877, + "loss": 2.7208, + "step": 25030 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001299738306437027, + "loss": 2.6248, + "step": 25040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001299192249431575, + "loss": 2.7011, + "step": 25050 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012986460944247944, + "loss": 2.6428, + "step": 25060 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012980998415955798, + "loss": 2.7719, + "step": 25070 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012975534911228585, + "loss": 2.5946, + "step": 25080 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001297007043185589, + "loss": 2.7026, + "step": 25090 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012964604979627618, + "loss": 2.7008, + "step": 25100 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001295913855633399, + "loss": 2.747, + "step": 25110 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012953671163765562, + "loss": 2.7272, + "step": 25120 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012948202803713188, + "loss": 2.703, + "step": 25130 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001294273347796804, + "loss": 2.627, + "step": 25140 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001293726318832162, + "loss": 2.7101, + "step": 25150 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001293179193656574, + "loss": 2.5712, + "step": 25160 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012926319724492519, + "loss": 2.6969, + "step": 25170 + }, + { + "epoch": 0.44, + "learning_rate": 0.000129208465538944, + "loss": 2.7268, + "step": 25180 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012915372426564135, + "loss": 2.6974, + "step": 25190 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001290989734429479, + "loss": 2.688, + "step": 25200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012904421308879745, + "loss": 2.5982, + "step": 25210 + }, + { + "epoch": 0.44, + "learning_rate": 0.000128989443221127, + "loss": 2.6046, + "step": 25220 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012893466385787645, + "loss": 2.6203, + "step": 25230 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012887987501698903, + "loss": 2.6056, + "step": 25240 + }, + { + "epoch": 0.44, + "learning_rate": 0.000128825076716411, + "loss": 2.696, + "step": 25250 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012877026897409165, + "loss": 2.6304, + "step": 25260 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012871545180798347, + "loss": 2.6106, + "step": 25270 + }, + { + "epoch": 0.44, + "learning_rate": 0.000128660625236042, + "loss": 2.6627, + "step": 25280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001286057892762258, + "loss": 2.753, + "step": 25290 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001285509439464966, + "loss": 2.7295, + "step": 25300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012849608926481914, + "loss": 2.6748, + "step": 25310 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012844122524916123, + "loss": 2.654, + "step": 25320 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012838635191749374, + "loss": 2.5974, + "step": 25330 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001283314692877906, + "loss": 2.6414, + "step": 25340 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001282765773780288, + "loss": 2.6447, + "step": 25350 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012822167620618832, + "loss": 2.7306, + "step": 25360 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012816676579025223, + "loss": 2.7392, + "step": 25370 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001281118461482066, + "loss": 2.556, + "step": 25380 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012805691729804054, + "loss": 2.6917, + "step": 25390 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012800197925774614, + "loss": 2.7279, + "step": 25400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001279470320453185, + "loss": 2.5659, + "step": 25410 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001278920756787558, + "loss": 2.6185, + "step": 25420 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012783711017605912, + "loss": 2.7416, + "step": 25430 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012778213555523254, + "loss": 2.694, + "step": 25440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001277271518342833, + "loss": 2.6366, + "step": 25450 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012767215903122136, + "loss": 2.8283, + "step": 25460 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012761715716405982, + "loss": 2.6175, + "step": 25470 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012756214625081472, + "loss": 2.6884, + "step": 25480 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012750712630950502, + "loss": 2.6469, + "step": 25490 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012745209735815272, + "loss": 2.6514, + "step": 25500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012739705941478265, + "loss": 2.7389, + "step": 25510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001273420124974227, + "loss": 2.5644, + "step": 25520 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001272869566241037, + "loss": 2.7025, + "step": 25530 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012723189181285927, + "loss": 2.6905, + "step": 25540 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001271768180817261, + "loss": 2.6645, + "step": 25550 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012712173544874377, + "loss": 2.691, + "step": 25560 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012706664393195475, + "loss": 2.7878, + "step": 25570 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012701154354940444, + "loss": 2.6942, + "step": 25580 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012695643431914113, + "loss": 2.7493, + "step": 25590 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012690131625921604, + "loss": 2.6525, + "step": 25600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001268461893876832, + "loss": 2.7048, + "step": 25610 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012679105372259962, + "loss": 2.5927, + "step": 25620 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001267359092820252, + "loss": 2.7784, + "step": 25630 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001266807560840226, + "loss": 2.6885, + "step": 25640 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012662559414665744, + "loss": 2.7298, + "step": 25650 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012657042348799817, + "loss": 2.6121, + "step": 25660 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012651524412611616, + "loss": 2.5925, + "step": 25670 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001264600560790855, + "loss": 2.6464, + "step": 25680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012640485936498322, + "loss": 2.6422, + "step": 25690 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012634965400188923, + "loss": 2.6276, + "step": 25700 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012629444000788616, + "loss": 2.6272, + "step": 25710 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012623921740105953, + "loss": 2.7075, + "step": 25720 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012618398619949764, + "loss": 2.6633, + "step": 25730 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012612874642129173, + "loss": 2.6484, + "step": 25740 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012607349808453568, + "loss": 2.6245, + "step": 25750 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012601824120732626, + "loss": 2.6938, + "step": 25760 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012596297580776307, + "loss": 2.7927, + "step": 25770 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001259077019039484, + "loss": 2.6484, + "step": 25780 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001258524195139874, + "loss": 2.6999, + "step": 25790 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012579712865598802, + "loss": 2.71, + "step": 25800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012574182934806092, + "loss": 2.676, + "step": 25810 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012568652160831957, + "loss": 2.7379, + "step": 25820 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012563120545488015, + "loss": 2.6973, + "step": 25830 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012557588090586169, + "loss": 2.7221, + "step": 25840 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012552054797938588, + "loss": 2.6188, + "step": 25850 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001254652066935772, + "loss": 2.723, + "step": 25860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012540985706656284, + "loss": 2.6901, + "step": 25870 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012535449911647273, + "loss": 2.7593, + "step": 25880 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012529913286143956, + "loss": 2.7126, + "step": 25890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012524375831959874, + "loss": 2.6313, + "step": 25900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001251883755090883, + "loss": 2.7028, + "step": 25910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012513298444804912, + "loss": 2.7125, + "step": 25920 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012507758515462465, + "loss": 2.6642, + "step": 25930 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012502217764696113, + "loss": 2.718, + "step": 25940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012496676194320746, + "loss": 2.6765, + "step": 25950 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012491133806151518, + "loss": 2.7217, + "step": 25960 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001248559060200386, + "loss": 2.5944, + "step": 25970 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012480046583693462, + "loss": 2.6511, + "step": 25980 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012474501753036282, + "loss": 2.6934, + "step": 25990 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012468956111848556, + "loss": 2.6911, + "step": 26000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012463409661946768, + "loss": 2.6613, + "step": 26010 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001245786240514767, + "loss": 2.6827, + "step": 26020 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012452314343268294, + "loss": 2.7109, + "step": 26030 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001244676547812592, + "loss": 2.6181, + "step": 26040 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012441215811538088, + "loss": 2.6611, + "step": 26050 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001243566534532262, + "loss": 2.7017, + "step": 26060 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012430114081297576, + "loss": 2.6021, + "step": 26070 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012424562021281303, + "loss": 2.6923, + "step": 26080 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012419009167092385, + "loss": 2.5941, + "step": 26090 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012413455520549679, + "loss": 2.6065, + "step": 26100 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012407901083472301, + "loss": 2.7595, + "step": 26110 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001240234585767962, + "loss": 2.6459, + "step": 26120 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012396789844991267, + "loss": 2.6779, + "step": 26130 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012391233047227138, + "loss": 2.7104, + "step": 26140 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001238567546620737, + "loss": 2.6531, + "step": 26150 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012380117103752368, + "loss": 2.6872, + "step": 26160 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012374557961682792, + "loss": 2.7101, + "step": 26170 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012368998041819556, + "loss": 2.7079, + "step": 26180 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001236343734598383, + "loss": 2.7023, + "step": 26190 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001235787587599703, + "loss": 2.6828, + "step": 26200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001235231363368084, + "loss": 2.5824, + "step": 26210 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012346750620857187, + "loss": 2.7219, + "step": 26220 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012341186839348253, + "loss": 2.6649, + "step": 26230 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012335622290976468, + "loss": 2.5396, + "step": 26240 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001233005697756452, + "loss": 2.7617, + "step": 26250 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012324490900935344, + "loss": 2.6061, + "step": 26260 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012318924062912126, + "loss": 2.6095, + "step": 26270 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012313356465318298, + "loss": 2.6376, + "step": 26280 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012307788109977544, + "loss": 2.6361, + "step": 26290 + }, + { + "epoch": 0.46, + "learning_rate": 0.000123022189987138, + "loss": 2.6824, + "step": 26300 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012296649133351238, + "loss": 2.647, + "step": 26310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012291078515714294, + "loss": 2.6166, + "step": 26320 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012285507147627632, + "loss": 2.7013, + "step": 26330 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001227993503091617, + "loss": 2.7151, + "step": 26340 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001227436216740508, + "loss": 2.687, + "step": 26350 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012268788558919764, + "loss": 2.6977, + "step": 26360 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012263214207285875, + "loss": 2.6501, + "step": 26370 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012257639114329313, + "loss": 2.7009, + "step": 26380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012252063281876209, + "loss": 2.7077, + "step": 26390 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012246486711752952, + "loss": 2.6515, + "step": 26400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001224090940578616, + "loss": 2.6857, + "step": 26410 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012235331365802695, + "loss": 2.7082, + "step": 26420 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012229752593629663, + "loss": 2.6801, + "step": 26430 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012224173091094408, + "loss": 2.6895, + "step": 26440 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012218592860024517, + "loss": 2.6896, + "step": 26450 + }, + { + "epoch": 0.46, + "learning_rate": 0.000122130119022478, + "loss": 2.6268, + "step": 26460 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001220743021959233, + "loss": 2.5484, + "step": 26470 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012201847813886396, + "loss": 2.5964, + "step": 26480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012196264686958534, + "loss": 2.6641, + "step": 26490 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012190680840637512, + "loss": 2.6655, + "step": 26500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012185096276752341, + "loss": 2.7396, + "step": 26510 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001217951099713226, + "loss": 2.7122, + "step": 26520 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012173925003606739, + "loss": 2.6615, + "step": 26530 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001216833829800549, + "loss": 2.6762, + "step": 26540 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001216275088215846, + "loss": 2.6669, + "step": 26550 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012157162757895821, + "loss": 2.717, + "step": 26560 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012151573927047977, + "loss": 2.7109, + "step": 26570 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012145984391445571, + "loss": 2.6563, + "step": 26580 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001214039415291947, + "loss": 2.646, + "step": 26590 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012134803213300774, + "loss": 2.6838, + "step": 26600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012129211574420811, + "loss": 2.7103, + "step": 26610 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012123619238111141, + "loss": 2.5674, + "step": 26620 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001211802620620355, + "loss": 2.6598, + "step": 26630 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012112432480530054, + "loss": 2.6328, + "step": 26640 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012106838062922891, + "loss": 2.672, + "step": 26650 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012101242955214536, + "loss": 2.6746, + "step": 26660 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012095647159237677, + "loss": 2.668, + "step": 26670 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012090050676825236, + "loss": 2.5858, + "step": 26680 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001208445350981036, + "loss": 2.7594, + "step": 26690 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012078855660026415, + "loss": 2.568, + "step": 26700 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012073257129306993, + "loss": 2.6777, + "step": 26710 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012067657919485913, + "loss": 2.6867, + "step": 26720 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012062058032397214, + "loss": 2.6183, + "step": 26730 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012056457469875153, + "loss": 2.6249, + "step": 26740 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012050856233754212, + "loss": 2.6733, + "step": 26750 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012045254325869091, + "loss": 2.6023, + "step": 26760 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012039651748054717, + "loss": 2.6171, + "step": 26770 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012034048502146225, + "loss": 2.6092, + "step": 26780 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012028444589978978, + "loss": 2.6521, + "step": 26790 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012022840013388554, + "loss": 2.7374, + "step": 26800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012017234774210754, + "loss": 2.6877, + "step": 26810 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012011628874281586, + "loss": 2.6256, + "step": 26820 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012006022315437277, + "loss": 2.5747, + "step": 26830 + }, + { + "epoch": 0.46, + "learning_rate": 0.00012000415099514279, + "loss": 2.6197, + "step": 26840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001199480722834925, + "loss": 2.6537, + "step": 26850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001198919870377906, + "loss": 2.6736, + "step": 26860 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011983589527640808, + "loss": 2.6435, + "step": 26870 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001197797970177179, + "loss": 2.7251, + "step": 26880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001197236922800952, + "loss": 2.7093, + "step": 26890 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011966758108191733, + "loss": 2.6704, + "step": 26900 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001196114634415636, + "loss": 2.6002, + "step": 26910 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011955533937741559, + "loss": 2.6509, + "step": 26920 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011949920890785684, + "loss": 2.6208, + "step": 26930 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001194430720512731, + "loss": 2.7251, + "step": 26940 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011938692882605213, + "loss": 2.6256, + "step": 26950 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011933077925058387, + "loss": 2.6589, + "step": 26960 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011927462334326022, + "loss": 2.7361, + "step": 26970 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011921846112247526, + "loss": 2.6666, + "step": 26980 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011916229260662508, + "loss": 2.6351, + "step": 26990 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011910611781410787, + "loss": 2.6939, + "step": 27000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011904993676332382, + "loss": 2.6589, + "step": 27010 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011899374947267524, + "loss": 2.6417, + "step": 27020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011893755596056639, + "loss": 2.5428, + "step": 27030 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011888135624540369, + "loss": 2.6297, + "step": 27040 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001188251503455955, + "loss": 2.6431, + "step": 27050 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001187689382795522, + "loss": 2.643, + "step": 27060 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001187127200656863, + "loss": 2.654, + "step": 27070 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011865649572241216, + "loss": 2.6291, + "step": 27080 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011860026526814627, + "loss": 2.6327, + "step": 27090 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011854402872130712, + "loss": 2.6706, + "step": 27100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011848778610031512, + "loss": 2.7475, + "step": 27110 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001184315374235927, + "loss": 2.6357, + "step": 27120 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011837528270956435, + "loss": 2.6363, + "step": 27130 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011831902197665643, + "loss": 2.6192, + "step": 27140 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011826275524329729, + "loss": 2.6591, + "step": 27150 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011820648252791728, + "loss": 2.6873, + "step": 27160 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011815020384894872, + "loss": 2.6547, + "step": 27170 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011809391922482586, + "loss": 2.7182, + "step": 27180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011803762867398491, + "loss": 2.5819, + "step": 27190 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011798133221486396, + "loss": 2.6678, + "step": 27200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011792502986590315, + "loss": 2.7201, + "step": 27210 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011786872164554443, + "loss": 2.6216, + "step": 27220 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011781240757223176, + "loss": 2.5959, + "step": 27230 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011775608766441097, + "loss": 2.6722, + "step": 27240 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011769976194052985, + "loss": 2.677, + "step": 27250 + }, + { + "epoch": 0.47, + "learning_rate": 0.000117643430419038, + "loss": 2.5944, + "step": 27260 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011758709311838702, + "loss": 2.6943, + "step": 27270 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011753075005703038, + "loss": 2.6195, + "step": 27280 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011747440125342341, + "loss": 2.6251, + "step": 27290 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011741804672602333, + "loss": 2.8016, + "step": 27300 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011736168649328918, + "loss": 2.67, + "step": 27310 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011730532057368202, + "loss": 2.6414, + "step": 27320 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011724894898566461, + "loss": 2.6078, + "step": 27330 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011719257174770165, + "loss": 2.6762, + "step": 27340 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011713618887825968, + "loss": 2.689, + "step": 27350 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011707980039580711, + "loss": 2.6469, + "step": 27360 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001170234063188141, + "loss": 2.7306, + "step": 27370 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011696700666575273, + "loss": 2.7399, + "step": 27380 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011691060145509691, + "loss": 2.654, + "step": 27390 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011685419070532229, + "loss": 2.6275, + "step": 27400 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011679777443490638, + "loss": 2.675, + "step": 27410 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011674135266232855, + "loss": 2.6736, + "step": 27420 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011668492540606987, + "loss": 2.6315, + "step": 27430 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011662849268461326, + "loss": 2.6603, + "step": 27440 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011657205451644349, + "loss": 2.6751, + "step": 27450 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011651561092004699, + "loss": 2.6896, + "step": 27460 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011645916191391206, + "loss": 2.6608, + "step": 27470 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011640270751652875, + "loss": 2.6124, + "step": 27480 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011634624774638882, + "loss": 2.6527, + "step": 27490 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001162897826219859, + "loss": 2.6628, + "step": 27500 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001162333121618153, + "loss": 2.5349, + "step": 27510 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011617683638437404, + "loss": 2.6097, + "step": 27520 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011612035530816099, + "loss": 2.6792, + "step": 27530 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011606386895167669, + "loss": 2.6609, + "step": 27540 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001160073773334234, + "loss": 2.6572, + "step": 27550 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001159508804719051, + "loss": 2.6729, + "step": 27560 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011589437838562756, + "loss": 2.6484, + "step": 27570 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011583787109309819, + "loss": 2.6943, + "step": 27580 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001157813586128261, + "loss": 2.6476, + "step": 27590 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011572484096332215, + "loss": 2.632, + "step": 27600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011566831816309887, + "loss": 2.615, + "step": 27610 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011561179023067041, + "loss": 2.7265, + "step": 27620 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011555525718455278, + "loss": 2.6326, + "step": 27630 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011549871904326346, + "loss": 2.5967, + "step": 27640 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011544217582532175, + "loss": 2.6755, + "step": 27650 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011538562754924854, + "loss": 2.5492, + "step": 27660 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011532907423356631, + "loss": 2.6474, + "step": 27670 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011527251589679938, + "loss": 2.6639, + "step": 27680 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011521595255747355, + "loss": 2.616, + "step": 27690 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011515938423411631, + "loss": 2.6541, + "step": 27700 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011510281094525681, + "loss": 2.6276, + "step": 27710 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011504623270942575, + "loss": 2.6274, + "step": 27720 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011498964954515558, + "loss": 2.6978, + "step": 27730 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011493306147098026, + "loss": 2.5674, + "step": 27740 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011487646850543534, + "loss": 2.6579, + "step": 27750 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011481987066705808, + "loss": 2.676, + "step": 27760 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011476326797438724, + "loss": 2.7089, + "step": 27770 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011470666044596319, + "loss": 2.6734, + "step": 27780 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011465004810032793, + "loss": 2.6829, + "step": 27790 + }, + { + "epoch": 0.48, + "learning_rate": 0.000114593430956025, + "loss": 2.6666, + "step": 27800 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011453680903159947, + "loss": 2.5892, + "step": 27810 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001144801823455981, + "loss": 2.6461, + "step": 27820 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011442355091656909, + "loss": 2.6368, + "step": 27830 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011436691476306222, + "loss": 2.6904, + "step": 27840 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001143102739036289, + "loss": 2.6637, + "step": 27850 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011425362835682192, + "loss": 2.6049, + "step": 27860 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011419697814119578, + "loss": 2.7022, + "step": 27870 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011414032327530641, + "loss": 2.686, + "step": 27880 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011408366377771125, + "loss": 2.5847, + "step": 27890 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001140269996669693, + "loss": 2.7132, + "step": 27900 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011397033096164112, + "loss": 2.7284, + "step": 27910 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011391365768028864, + "loss": 2.6108, + "step": 27920 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011385697984147539, + "loss": 2.6125, + "step": 27930 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011380029746376639, + "loss": 2.6682, + "step": 27940 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011374361056572813, + "loss": 2.681, + "step": 27950 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011368691916592853, + "loss": 2.5618, + "step": 27960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011363022328293707, + "loss": 2.607, + "step": 27970 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011357352293532463, + "loss": 2.7104, + "step": 27980 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011351681814166358, + "loss": 2.6076, + "step": 27990 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011346010892052779, + "loss": 2.6167, + "step": 28000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011340339529049249, + "loss": 2.5345, + "step": 28010 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011334667727013442, + "loss": 2.6842, + "step": 28020 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011328995487803175, + "loss": 2.656, + "step": 28030 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011323322813276404, + "loss": 2.5403, + "step": 28040 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011317649705291238, + "loss": 2.6755, + "step": 28050 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001131197616570591, + "loss": 2.5629, + "step": 28060 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011306302196378813, + "loss": 2.7436, + "step": 28070 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011300627799168468, + "loss": 2.6227, + "step": 28080 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011294952975933549, + "loss": 2.5972, + "step": 28090 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011289277728532852, + "loss": 2.6842, + "step": 28100 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011283602058825324, + "loss": 2.6627, + "step": 28110 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011277925968670053, + "loss": 2.5849, + "step": 28120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011272249459926256, + "loss": 2.5778, + "step": 28130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001126657253445329, + "loss": 2.6457, + "step": 28140 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011260895194110647, + "loss": 2.6548, + "step": 28150 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011255217440757964, + "loss": 2.6051, + "step": 28160 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011249539276255, + "loss": 2.6459, + "step": 28170 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011243860702461658, + "loss": 2.7059, + "step": 28180 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011238181721237968, + "loss": 2.6301, + "step": 28190 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011232502334444104, + "loss": 2.6867, + "step": 28200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011226822543940363, + "loss": 2.651, + "step": 28210 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011221142351587175, + "loss": 2.6581, + "step": 28220 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001121546175924511, + "loss": 2.5471, + "step": 28230 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001120978076877486, + "loss": 2.6299, + "step": 28240 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011204099382037249, + "loss": 2.703, + "step": 28250 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011198417600893234, + "loss": 2.6221, + "step": 28260 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011192735427203899, + "loss": 2.7261, + "step": 28270 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011187052862830462, + "loss": 2.6168, + "step": 28280 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011181369909634258, + "loss": 2.6417, + "step": 28290 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011175686569476757, + "loss": 2.5682, + "step": 28300 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011170002844219558, + "loss": 2.7134, + "step": 28310 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011164318735724379, + "loss": 2.546, + "step": 28320 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011158634245853066, + "loss": 2.6278, + "step": 28330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011152949376467594, + "loss": 2.5775, + "step": 28340 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011147264129430057, + "loss": 2.6704, + "step": 28350 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011141578506602672, + "loss": 2.6398, + "step": 28360 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001113589250984779, + "loss": 2.6278, + "step": 28370 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001113020614102787, + "loss": 2.5676, + "step": 28380 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011124519402005504, + "loss": 2.6798, + "step": 28390 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011118832294643398, + "loss": 2.6244, + "step": 28400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011113144820804378, + "loss": 2.626, + "step": 28410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011107456982351399, + "loss": 2.6561, + "step": 28420 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011101768781147528, + "loss": 2.6964, + "step": 28430 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011096080219055948, + "loss": 2.6686, + "step": 28440 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001109039129793997, + "loss": 2.6406, + "step": 28450 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011084702019663018, + "loss": 2.6336, + "step": 28460 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001107901238608863, + "loss": 2.6834, + "step": 28470 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011073322399080461, + "loss": 2.6357, + "step": 28480 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011067632060502289, + "loss": 2.5674, + "step": 28490 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011061941372217996, + "loss": 2.6129, + "step": 28500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011056250336091585, + "loss": 2.5922, + "step": 28510 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011050558953987178, + "loss": 2.6232, + "step": 28520 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011044867227768998, + "loss": 2.6263, + "step": 28530 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011039175159301387, + "loss": 2.6401, + "step": 28540 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011033482750448802, + "loss": 2.6823, + "step": 28550 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011027790003075811, + "loss": 2.7083, + "step": 28560 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011022096919047086, + "loss": 2.5843, + "step": 28570 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011016403500227418, + "loss": 2.6861, + "step": 28580 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011010709748481698, + "loss": 2.664, + "step": 28590 + }, + { + "epoch": 0.49, + "learning_rate": 0.00011005015665674939, + "loss": 2.7157, + "step": 28600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010999321253672248, + "loss": 2.6473, + "step": 28610 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001099362651433885, + "loss": 2.6262, + "step": 28620 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010987931449540075, + "loss": 2.6751, + "step": 28630 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010982236061141358, + "loss": 2.6807, + "step": 28640 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010976540351008245, + "loss": 2.7054, + "step": 28650 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010970844321006373, + "loss": 2.6956, + "step": 28660 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010965147973001501, + "loss": 2.6284, + "step": 28670 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010959451308859488, + "loss": 2.6781, + "step": 28680 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010953754330446287, + "loss": 2.614, + "step": 28690 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001094805703962796, + "loss": 2.619, + "step": 28700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010942359438270678, + "loss": 2.6963, + "step": 28710 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010936661528240704, + "loss": 2.6562, + "step": 28720 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010930963311404405, + "loss": 2.5649, + "step": 28730 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010925264789628251, + "loss": 2.6772, + "step": 28740 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010919565964778811, + "loss": 2.6073, + "step": 28750 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010913866838722751, + "loss": 2.6576, + "step": 28760 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010908167413326833, + "loss": 2.6806, + "step": 28770 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001090246769045793, + "loss": 2.7294, + "step": 28780 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010896767671982997, + "loss": 2.6458, + "step": 28790 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010891067359769094, + "loss": 2.7148, + "step": 28800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010885366755683373, + "loss": 2.6724, + "step": 28810 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010879665861593084, + "loss": 2.6418, + "step": 28820 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001087396467936558, + "loss": 2.6238, + "step": 28830 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010868263210868294, + "loss": 2.7061, + "step": 28840 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010862561457968761, + "loss": 2.6605, + "step": 28850 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010856859422534606, + "loss": 2.5803, + "step": 28860 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010851157106433549, + "loss": 2.6371, + "step": 28870 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010845454511533399, + "loss": 2.6414, + "step": 28880 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010839751639702064, + "loss": 2.616, + "step": 28890 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010834048492807531, + "loss": 2.5851, + "step": 28900 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010828345072717885, + "loss": 2.5698, + "step": 28910 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010822641381301297, + "loss": 2.6533, + "step": 28920 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010816937420426033, + "loss": 2.5694, + "step": 28930 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010811233191960439, + "loss": 2.6394, + "step": 28940 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010805528697772952, + "loss": 2.669, + "step": 28950 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010799823939732098, + "loss": 2.5596, + "step": 28960 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010794118919706489, + "loss": 2.6297, + "step": 28970 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010788413639564818, + "loss": 2.6304, + "step": 28980 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010782708101175869, + "loss": 2.5583, + "step": 28990 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010777002306408506, + "loss": 2.5967, + "step": 29000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010771296257131684, + "loss": 2.6573, + "step": 29010 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010765589955214435, + "loss": 2.7197, + "step": 29020 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010759883402525872, + "loss": 2.6176, + "step": 29030 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010754176600935197, + "loss": 2.5794, + "step": 29040 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010748469552311693, + "loss": 2.5773, + "step": 29050 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010742762258524712, + "loss": 2.7293, + "step": 29060 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010737054721443705, + "loss": 2.5993, + "step": 29070 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001073134694293819, + "loss": 2.6393, + "step": 29080 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010725638924877761, + "loss": 2.5803, + "step": 29090 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010719930669132105, + "loss": 2.6177, + "step": 29100 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010714222177570976, + "loss": 2.6393, + "step": 29110 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010708513452064206, + "loss": 2.584, + "step": 29120 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010702804494481708, + "loss": 2.6243, + "step": 29130 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010697095306693463, + "loss": 2.5951, + "step": 29140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010691385890569541, + "loss": 2.6497, + "step": 29150 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010685676247980071, + "loss": 2.6002, + "step": 29160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010679966380795268, + "loss": 2.6689, + "step": 29170 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010674256290885413, + "loss": 2.6378, + "step": 29180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010668545980120868, + "loss": 2.7347, + "step": 29190 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010662835450372059, + "loss": 2.6138, + "step": 29200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010657124703509489, + "loss": 2.6638, + "step": 29210 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010651413741403728, + "loss": 2.6557, + "step": 29220 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010645702565925424, + "loss": 2.6504, + "step": 29230 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010639991178945285, + "loss": 2.6452, + "step": 29240 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010634279582334095, + "loss": 2.6004, + "step": 29250 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010628567777962704, + "loss": 2.6881, + "step": 29260 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010622855767702029, + "loss": 2.6168, + "step": 29270 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010617143553423061, + "loss": 2.6217, + "step": 29280 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001061143113699685, + "loss": 2.6879, + "step": 29290 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010605718520294519, + "loss": 2.5503, + "step": 29300 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010600005705187248, + "loss": 2.5762, + "step": 29310 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010594292693546286, + "loss": 2.6411, + "step": 29320 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010588579487242953, + "loss": 2.5723, + "step": 29330 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010582866088148626, + "loss": 2.6867, + "step": 29340 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010577152498134743, + "loss": 2.6794, + "step": 29350 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010571438719072806, + "loss": 2.6249, + "step": 29360 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010565724752834385, + "loss": 2.6762, + "step": 29370 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010560010601291109, + "loss": 2.5723, + "step": 29380 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001055429626631466, + "loss": 2.6064, + "step": 29390 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001054858174977679, + "loss": 2.6359, + "step": 29400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010542867053549301, + "loss": 2.6553, + "step": 29410 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010537152179504067, + "loss": 2.6693, + "step": 29420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010531437129513005, + "loss": 2.6557, + "step": 29430 + }, + { + "epoch": 0.51, + "learning_rate": 0.000105257219054481, + "loss": 2.628, + "step": 29440 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001052000650918139, + "loss": 2.576, + "step": 29450 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010514290942584972, + "loss": 2.6566, + "step": 29460 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010508575207530995, + "loss": 2.6053, + "step": 29470 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010502859305891668, + "loss": 2.5859, + "step": 29480 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010497143239539253, + "loss": 2.5459, + "step": 29490 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001049142701034606, + "loss": 2.6295, + "step": 29500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010485710620184459, + "loss": 2.588, + "step": 29510 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010479994070926873, + "loss": 2.5839, + "step": 29520 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010474277364445772, + "loss": 2.6405, + "step": 29530 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010468560502613682, + "loss": 2.6253, + "step": 29540 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010462843487303178, + "loss": 2.6633, + "step": 29550 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001045712632038689, + "loss": 2.6782, + "step": 29560 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010451409003737486, + "loss": 2.6544, + "step": 29570 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010445691539227696, + "loss": 2.6215, + "step": 29580 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010439973928730289, + "loss": 2.6364, + "step": 29590 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010434256174118091, + "loss": 2.5883, + "step": 29600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010428538277263962, + "loss": 2.5938, + "step": 29610 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010422820240040825, + "loss": 2.5529, + "step": 29620 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010417102064321633, + "loss": 2.6239, + "step": 29630 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010411383751979395, + "loss": 2.7011, + "step": 29640 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010405665304887165, + "loss": 2.6807, + "step": 29650 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010399946724918031, + "loss": 2.6333, + "step": 29660 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010394228013945135, + "loss": 2.6995, + "step": 29670 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010388509173841661, + "loss": 2.546, + "step": 29680 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010382790206480823, + "loss": 2.607, + "step": 29690 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010377071113735896, + "loss": 2.6008, + "step": 29700 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010371351897480184, + "loss": 2.5819, + "step": 29710 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010365632559587029, + "loss": 2.6544, + "step": 29720 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010359913101929819, + "loss": 2.5333, + "step": 29730 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010354193526381986, + "loss": 2.6862, + "step": 29740 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010348473834816985, + "loss": 2.6102, + "step": 29750 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010342754029108323, + "loss": 2.5955, + "step": 29760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010337034111129543, + "loss": 2.635, + "step": 29770 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010331314082754218, + "loss": 2.5341, + "step": 29780 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010325593945855958, + "loss": 2.6097, + "step": 29790 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010319873702308413, + "loss": 2.5733, + "step": 29800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010314153353985267, + "loss": 2.559, + "step": 29810 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010308432902760238, + "loss": 2.657, + "step": 29820 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010302712350507078, + "loss": 2.6394, + "step": 29830 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010296991699099568, + "loss": 2.6689, + "step": 29840 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010291270950411527, + "loss": 2.6419, + "step": 29850 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010285550106316804, + "loss": 2.6378, + "step": 29860 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010279829168689274, + "loss": 2.5356, + "step": 29870 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010274108139402856, + "loss": 2.5765, + "step": 29880 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010268387020331485, + "loss": 2.5174, + "step": 29890 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010262665813349131, + "loss": 2.6472, + "step": 29900 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010256944520329789, + "loss": 2.6521, + "step": 29910 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010251223143147495, + "loss": 2.6345, + "step": 29920 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010245501683676298, + "loss": 2.6797, + "step": 29930 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001023978014379028, + "loss": 2.5976, + "step": 29940 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010234058525363548, + "loss": 2.6225, + "step": 29950 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010228336830270238, + "loss": 2.687, + "step": 29960 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010222615060384506, + "loss": 2.7049, + "step": 29970 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010216893217580534, + "loss": 2.576, + "step": 29980 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010211171303732534, + "loss": 2.6424, + "step": 29990 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010205449320714733, + "loss": 2.629, + "step": 30000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001019972727040138, + "loss": 2.5718, + "step": 30010 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010194005154666755, + "loss": 2.6925, + "step": 30020 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010188282975385158, + "loss": 2.6104, + "step": 30030 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010182560734430897, + "loss": 2.6072, + "step": 30040 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010176838433678318, + "loss": 2.536, + "step": 30050 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010171116075001767, + "loss": 2.6107, + "step": 30060 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010165393660275633, + "loss": 2.5934, + "step": 30070 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010160243440648898, + "loss": 2.515, + "step": 30080 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010154520924592512, + "loss": 2.6037, + "step": 30090 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010148798357922332, + "loss": 2.6594, + "step": 30100 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010143075742512797, + "loss": 2.6848, + "step": 30110 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010137353080238368, + "loss": 2.5906, + "step": 30120 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010131630372973521, + "loss": 2.6901, + "step": 30130 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010125907622592745, + "loss": 2.7022, + "step": 30140 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010120184830970548, + "loss": 2.6331, + "step": 30150 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010114461999981442, + "loss": 2.5222, + "step": 30160 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010108739131499965, + "loss": 2.6212, + "step": 30170 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010103016227400654, + "loss": 2.5984, + "step": 30180 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010097293289558066, + "loss": 2.72, + "step": 30190 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010091570319846764, + "loss": 2.5951, + "step": 30200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010085847320141328, + "loss": 2.5925, + "step": 30210 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010080124292316342, + "loss": 2.6123, + "step": 30220 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010074401238246399, + "loss": 2.6468, + "step": 30230 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010068678159806104, + "loss": 2.724, + "step": 30240 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010062955058870072, + "loss": 2.6269, + "step": 30250 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001005723193731292, + "loss": 2.6155, + "step": 30260 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010051508797009273, + "loss": 2.6378, + "step": 30270 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010045785639833763, + "loss": 2.7504, + "step": 30280 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010040062467661029, + "loss": 2.5738, + "step": 30290 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001003433928236571, + "loss": 2.5874, + "step": 30300 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010028616085822455, + "loss": 2.6241, + "step": 30310 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010022892879905914, + "loss": 2.6537, + "step": 30320 + }, + { + "epoch": 0.52, + "learning_rate": 0.00010017169666490742, + "loss": 2.6395, + "step": 30330 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010011446447451596, + "loss": 2.6443, + "step": 30340 + }, + { + "epoch": 0.53, + "learning_rate": 0.00010005723224663126, + "loss": 2.6831, + "step": 30350 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001, + "loss": 2.7064, + "step": 30360 + }, + { + "epoch": 0.53, + "learning_rate": 9.994276775336875e-05, + "loss": 2.6256, + "step": 30370 + }, + { + "epoch": 0.53, + "learning_rate": 9.988553552548408e-05, + "loss": 2.6226, + "step": 30380 + }, + { + "epoch": 0.53, + "learning_rate": 9.982830333509259e-05, + "loss": 2.5506, + "step": 30390 + }, + { + "epoch": 0.53, + "learning_rate": 9.977107120094089e-05, + "loss": 2.5901, + "step": 30400 + }, + { + "epoch": 0.53, + "learning_rate": 9.971383914177549e-05, + "loss": 2.614, + "step": 30410 + }, + { + "epoch": 0.53, + "learning_rate": 9.965660717634295e-05, + "loss": 2.6432, + "step": 30420 + }, + { + "epoch": 0.53, + "learning_rate": 9.959937532338976e-05, + "loss": 2.6487, + "step": 30430 + }, + { + "epoch": 0.53, + "learning_rate": 9.954214360166242e-05, + "loss": 2.6833, + "step": 30440 + }, + { + "epoch": 0.53, + "learning_rate": 9.948491202990731e-05, + "loss": 2.6591, + "step": 30450 + }, + { + "epoch": 0.53, + "learning_rate": 9.942768062687079e-05, + "loss": 2.5972, + "step": 30460 + }, + { + "epoch": 0.53, + "learning_rate": 9.937044941129928e-05, + "loss": 2.5943, + "step": 30470 + }, + { + "epoch": 0.53, + "learning_rate": 9.931321840193895e-05, + "loss": 2.6317, + "step": 30480 + }, + { + "epoch": 0.53, + "learning_rate": 9.925598761753604e-05, + "loss": 2.6377, + "step": 30490 + }, + { + "epoch": 0.53, + "learning_rate": 9.91987570768366e-05, + "loss": 2.6294, + "step": 30500 + }, + { + "epoch": 0.53, + "learning_rate": 9.914152679858675e-05, + "loss": 2.5864, + "step": 30510 + }, + { + "epoch": 0.53, + "learning_rate": 9.908429680153238e-05, + "loss": 2.6561, + "step": 30520 + }, + { + "epoch": 0.53, + "learning_rate": 9.902706710441935e-05, + "loss": 2.6002, + "step": 30530 + }, + { + "epoch": 0.53, + "learning_rate": 9.896983772599348e-05, + "loss": 2.7288, + "step": 30540 + }, + { + "epoch": 0.53, + "learning_rate": 9.891260868500037e-05, + "loss": 2.6492, + "step": 30550 + }, + { + "epoch": 0.53, + "learning_rate": 9.885538000018559e-05, + "loss": 2.6029, + "step": 30560 + }, + { + "epoch": 0.53, + "learning_rate": 9.879815169029453e-05, + "loss": 2.6141, + "step": 30570 + }, + { + "epoch": 0.53, + "learning_rate": 9.874092377407257e-05, + "loss": 2.6267, + "step": 30580 + }, + { + "epoch": 0.53, + "learning_rate": 9.868369627026484e-05, + "loss": 2.6424, + "step": 30590 + }, + { + "epoch": 0.53, + "learning_rate": 9.862646919761636e-05, + "loss": 2.5806, + "step": 30600 + }, + { + "epoch": 0.53, + "learning_rate": 9.856924257487208e-05, + "loss": 2.6168, + "step": 30610 + }, + { + "epoch": 0.53, + "learning_rate": 9.851201642077673e-05, + "loss": 2.5249, + "step": 30620 + }, + { + "epoch": 0.53, + "learning_rate": 9.84547907540749e-05, + "loss": 2.6657, + "step": 30630 + }, + { + "epoch": 0.53, + "learning_rate": 9.839756559351103e-05, + "loss": 2.605, + "step": 30640 + }, + { + "epoch": 0.53, + "learning_rate": 9.834034095782942e-05, + "loss": 2.5851, + "step": 30650 + }, + { + "epoch": 0.53, + "learning_rate": 9.828311686577419e-05, + "loss": 2.6036, + "step": 30660 + }, + { + "epoch": 0.53, + "learning_rate": 9.822589333608923e-05, + "loss": 2.6241, + "step": 30670 + }, + { + "epoch": 0.53, + "learning_rate": 9.816867038751826e-05, + "loss": 2.6053, + "step": 30680 + }, + { + "epoch": 0.53, + "learning_rate": 9.811144803880489e-05, + "loss": 2.6454, + "step": 30690 + }, + { + "epoch": 0.53, + "learning_rate": 9.805422630869243e-05, + "loss": 2.5805, + "step": 30700 + }, + { + "epoch": 0.53, + "learning_rate": 9.799700521592404e-05, + "loss": 2.7059, + "step": 30710 + }, + { + "epoch": 0.53, + "learning_rate": 9.793978477924266e-05, + "loss": 2.5708, + "step": 30720 + }, + { + "epoch": 0.53, + "learning_rate": 9.788256501739104e-05, + "loss": 2.5482, + "step": 30730 + }, + { + "epoch": 0.53, + "learning_rate": 9.782534594911166e-05, + "loss": 2.6311, + "step": 30740 + }, + { + "epoch": 0.53, + "learning_rate": 9.77681275931468e-05, + "loss": 2.6146, + "step": 30750 + }, + { + "epoch": 0.53, + "learning_rate": 9.771090996823852e-05, + "loss": 2.6053, + "step": 30760 + }, + { + "epoch": 0.53, + "learning_rate": 9.765369309312863e-05, + "loss": 2.6362, + "step": 30770 + }, + { + "epoch": 0.53, + "learning_rate": 9.759647698655865e-05, + "loss": 2.6108, + "step": 30780 + }, + { + "epoch": 0.53, + "learning_rate": 9.753926166726997e-05, + "loss": 2.5786, + "step": 30790 + }, + { + "epoch": 0.53, + "learning_rate": 9.748204715400356e-05, + "loss": 2.6747, + "step": 30800 + }, + { + "epoch": 0.53, + "learning_rate": 9.742483346550026e-05, + "loss": 2.6162, + "step": 30810 + }, + { + "epoch": 0.53, + "learning_rate": 9.736762062050053e-05, + "loss": 2.7522, + "step": 30820 + }, + { + "epoch": 0.53, + "learning_rate": 9.731040863774464e-05, + "loss": 2.5762, + "step": 30830 + }, + { + "epoch": 0.53, + "learning_rate": 9.72531975359726e-05, + "loss": 2.634, + "step": 30840 + }, + { + "epoch": 0.53, + "learning_rate": 9.719598733392403e-05, + "loss": 2.6933, + "step": 30850 + }, + { + "epoch": 0.53, + "learning_rate": 9.71387780503383e-05, + "loss": 2.5619, + "step": 30860 + }, + { + "epoch": 0.53, + "learning_rate": 9.70815697039545e-05, + "loss": 2.631, + "step": 30870 + }, + { + "epoch": 0.53, + "learning_rate": 9.702436231351141e-05, + "loss": 2.5997, + "step": 30880 + }, + { + "epoch": 0.53, + "learning_rate": 9.696715589774745e-05, + "loss": 2.6879, + "step": 30890 + }, + { + "epoch": 0.53, + "learning_rate": 9.690995047540077e-05, + "loss": 2.5759, + "step": 30900 + }, + { + "epoch": 0.53, + "learning_rate": 9.685274606520919e-05, + "loss": 2.6576, + "step": 30910 + }, + { + "epoch": 0.54, + "learning_rate": 9.679554268591018e-05, + "loss": 2.6339, + "step": 30920 + }, + { + "epoch": 0.54, + "learning_rate": 9.673834035624085e-05, + "loss": 2.6064, + "step": 30930 + }, + { + "epoch": 0.54, + "learning_rate": 9.668113909493807e-05, + "loss": 2.6188, + "step": 30940 + }, + { + "epoch": 0.54, + "learning_rate": 9.662393892073824e-05, + "loss": 2.6869, + "step": 30950 + }, + { + "epoch": 0.54, + "learning_rate": 9.656673985237743e-05, + "loss": 2.6626, + "step": 30960 + }, + { + "epoch": 0.54, + "learning_rate": 9.65095419085914e-05, + "loss": 2.5401, + "step": 30970 + }, + { + "epoch": 0.54, + "learning_rate": 9.64523451081155e-05, + "loss": 2.6519, + "step": 30980 + }, + { + "epoch": 0.54, + "learning_rate": 9.639514946968473e-05, + "loss": 2.5184, + "step": 30990 + }, + { + "epoch": 0.54, + "learning_rate": 9.633795501203366e-05, + "loss": 2.6743, + "step": 31000 + }, + { + "epoch": 0.54, + "learning_rate": 9.62807617538965e-05, + "loss": 2.5904, + "step": 31010 + }, + { + "epoch": 0.54, + "learning_rate": 9.622356971400716e-05, + "loss": 2.6042, + "step": 31020 + }, + { + "epoch": 0.54, + "learning_rate": 9.6166378911099e-05, + "loss": 2.6525, + "step": 31030 + }, + { + "epoch": 0.54, + "learning_rate": 9.610918936390505e-05, + "loss": 2.643, + "step": 31040 + }, + { + "epoch": 0.54, + "learning_rate": 9.605200109115792e-05, + "loss": 2.6554, + "step": 31050 + }, + { + "epoch": 0.54, + "learning_rate": 9.59948141115898e-05, + "loss": 2.6069, + "step": 31060 + }, + { + "epoch": 0.54, + "learning_rate": 9.593762844393249e-05, + "loss": 2.6292, + "step": 31070 + }, + { + "epoch": 0.54, + "learning_rate": 9.588044410691725e-05, + "loss": 2.6482, + "step": 31080 + }, + { + "epoch": 0.54, + "learning_rate": 9.582326111927508e-05, + "loss": 2.6683, + "step": 31090 + }, + { + "epoch": 0.54, + "learning_rate": 9.576607949973639e-05, + "loss": 2.5755, + "step": 31100 + }, + { + "epoch": 0.54, + "learning_rate": 9.570889926703119e-05, + "loss": 2.62, + "step": 31110 + }, + { + "epoch": 0.54, + "learning_rate": 9.565172043988903e-05, + "loss": 2.6012, + "step": 31120 + }, + { + "epoch": 0.54, + "learning_rate": 9.559454303703905e-05, + "loss": 2.6773, + "step": 31130 + }, + { + "epoch": 0.54, + "learning_rate": 9.553736707720985e-05, + "loss": 2.5745, + "step": 31140 + }, + { + "epoch": 0.54, + "learning_rate": 9.548019257912958e-05, + "loss": 2.6531, + "step": 31150 + }, + { + "epoch": 0.54, + "learning_rate": 9.542301956152597e-05, + "loss": 2.6368, + "step": 31160 + }, + { + "epoch": 0.54, + "learning_rate": 9.536584804312615e-05, + "loss": 2.6892, + "step": 31170 + }, + { + "epoch": 0.54, + "learning_rate": 9.530867804265685e-05, + "loss": 2.5924, + "step": 31180 + }, + { + "epoch": 0.54, + "learning_rate": 9.525150957884429e-05, + "loss": 2.5587, + "step": 31190 + }, + { + "epoch": 0.54, + "learning_rate": 9.519434267041416e-05, + "loss": 2.6102, + "step": 31200 + }, + { + "epoch": 0.54, + "learning_rate": 9.513717733609167e-05, + "loss": 2.623, + "step": 31210 + }, + { + "epoch": 0.54, + "learning_rate": 9.508001359460151e-05, + "loss": 2.5667, + "step": 31220 + }, + { + "epoch": 0.54, + "learning_rate": 9.502285146466779e-05, + "loss": 2.5959, + "step": 31230 + }, + { + "epoch": 0.54, + "learning_rate": 9.496569096501418e-05, + "loss": 2.6451, + "step": 31240 + }, + { + "epoch": 0.54, + "learning_rate": 9.490853211436375e-05, + "loss": 2.5376, + "step": 31250 + }, + { + "epoch": 0.54, + "learning_rate": 9.485137493143906e-05, + "loss": 2.6282, + "step": 31260 + }, + { + "epoch": 0.54, + "learning_rate": 9.479421943496215e-05, + "loss": 2.552, + "step": 31270 + }, + { + "epoch": 0.54, + "learning_rate": 9.473706564365442e-05, + "loss": 2.5152, + "step": 31280 + }, + { + "epoch": 0.54, + "learning_rate": 9.467991357623683e-05, + "loss": 2.5714, + "step": 31290 + }, + { + "epoch": 0.54, + "learning_rate": 9.462276325142963e-05, + "loss": 2.7117, + "step": 31300 + }, + { + "epoch": 0.54, + "learning_rate": 9.456561468795268e-05, + "loss": 2.6234, + "step": 31310 + }, + { + "epoch": 0.54, + "learning_rate": 9.45084679045251e-05, + "loss": 2.6562, + "step": 31320 + }, + { + "epoch": 0.54, + "learning_rate": 9.44513229198655e-05, + "loss": 2.5218, + "step": 31330 + }, + { + "epoch": 0.54, + "learning_rate": 9.43941797526919e-05, + "loss": 2.5663, + "step": 31340 + }, + { + "epoch": 0.54, + "learning_rate": 9.433703842172175e-05, + "loss": 2.5953, + "step": 31350 + }, + { + "epoch": 0.54, + "learning_rate": 9.427989894567182e-05, + "loss": 2.6284, + "step": 31360 + }, + { + "epoch": 0.54, + "learning_rate": 9.422276134325829e-05, + "loss": 2.6646, + "step": 31370 + }, + { + "epoch": 0.54, + "learning_rate": 9.416562563319682e-05, + "loss": 2.5863, + "step": 31380 + }, + { + "epoch": 0.54, + "learning_rate": 9.410849183420241e-05, + "loss": 2.687, + "step": 31390 + }, + { + "epoch": 0.54, + "learning_rate": 9.405135996498935e-05, + "loss": 2.6162, + "step": 31400 + }, + { + "epoch": 0.54, + "learning_rate": 9.399423004427135e-05, + "loss": 2.6288, + "step": 31410 + }, + { + "epoch": 0.54, + "learning_rate": 9.393710209076152e-05, + "loss": 2.6449, + "step": 31420 + }, + { + "epoch": 0.54, + "learning_rate": 9.38799761231723e-05, + "loss": 2.5917, + "step": 31430 + }, + { + "epoch": 0.54, + "learning_rate": 9.382285216021546e-05, + "loss": 2.5605, + "step": 31440 + }, + { + "epoch": 0.54, + "learning_rate": 9.376573022060209e-05, + "loss": 2.6231, + "step": 31450 + }, + { + "epoch": 0.54, + "learning_rate": 9.370861032304272e-05, + "loss": 2.5735, + "step": 31460 + }, + { + "epoch": 0.54, + "learning_rate": 9.365149248624711e-05, + "loss": 2.6417, + "step": 31470 + }, + { + "epoch": 0.54, + "learning_rate": 9.359437672892437e-05, + "loss": 2.7132, + "step": 31480 + }, + { + "epoch": 0.54, + "learning_rate": 9.3537263069783e-05, + "loss": 2.6707, + "step": 31490 + }, + { + "epoch": 0.55, + "learning_rate": 9.348015152753067e-05, + "loss": 2.6174, + "step": 31500 + }, + { + "epoch": 0.55, + "learning_rate": 9.34230421208745e-05, + "loss": 2.5828, + "step": 31510 + }, + { + "epoch": 0.55, + "learning_rate": 9.336593486852078e-05, + "loss": 2.6233, + "step": 31520 + }, + { + "epoch": 0.55, + "learning_rate": 9.330882978917526e-05, + "loss": 2.6064, + "step": 31530 + }, + { + "epoch": 0.55, + "learning_rate": 9.325172690154283e-05, + "loss": 2.6624, + "step": 31540 + }, + { + "epoch": 0.55, + "learning_rate": 9.319462622432767e-05, + "loss": 2.6192, + "step": 31550 + }, + { + "epoch": 0.55, + "learning_rate": 9.313752777623332e-05, + "loss": 2.6573, + "step": 31560 + }, + { + "epoch": 0.55, + "learning_rate": 9.30804315759626e-05, + "loss": 2.6556, + "step": 31570 + }, + { + "epoch": 0.55, + "learning_rate": 9.30233376422175e-05, + "loss": 2.5889, + "step": 31580 + }, + { + "epoch": 0.55, + "learning_rate": 9.296624599369932e-05, + "loss": 2.5177, + "step": 31590 + }, + { + "epoch": 0.55, + "learning_rate": 9.290915664910855e-05, + "loss": 2.6447, + "step": 31600 + }, + { + "epoch": 0.55, + "learning_rate": 9.285206962714506e-05, + "loss": 2.5352, + "step": 31610 + }, + { + "epoch": 0.55, + "learning_rate": 9.279498494650782e-05, + "loss": 2.6811, + "step": 31620 + }, + { + "epoch": 0.55, + "learning_rate": 9.27379026258951e-05, + "loss": 2.6113, + "step": 31630 + }, + { + "epoch": 0.55, + "learning_rate": 9.268082268400442e-05, + "loss": 2.6134, + "step": 31640 + }, + { + "epoch": 0.55, + "learning_rate": 9.262374513953246e-05, + "loss": 2.6574, + "step": 31650 + }, + { + "epoch": 0.55, + "learning_rate": 9.256667001117514e-05, + "loss": 2.6808, + "step": 31660 + }, + { + "epoch": 0.55, + "learning_rate": 9.250959731762759e-05, + "loss": 2.6889, + "step": 31670 + }, + { + "epoch": 0.55, + "learning_rate": 9.245252707758416e-05, + "loss": 2.6949, + "step": 31680 + }, + { + "epoch": 0.55, + "learning_rate": 9.239545930973838e-05, + "loss": 2.5865, + "step": 31690 + }, + { + "epoch": 0.55, + "learning_rate": 9.233839403278293e-05, + "loss": 2.6622, + "step": 31700 + }, + { + "epoch": 0.55, + "learning_rate": 9.228133126540978e-05, + "loss": 2.6317, + "step": 31710 + }, + { + "epoch": 0.55, + "learning_rate": 9.222427102630997e-05, + "loss": 2.6546, + "step": 31720 + }, + { + "epoch": 0.55, + "learning_rate": 9.216721333417375e-05, + "loss": 2.6371, + "step": 31730 + }, + { + "epoch": 0.55, + "learning_rate": 9.211015820769054e-05, + "loss": 2.5843, + "step": 31740 + }, + { + "epoch": 0.55, + "learning_rate": 9.205310566554895e-05, + "loss": 2.6576, + "step": 31750 + }, + { + "epoch": 0.55, + "learning_rate": 9.199605572643671e-05, + "loss": 2.599, + "step": 31760 + }, + { + "epoch": 0.55, + "learning_rate": 9.193900840904068e-05, + "loss": 2.5656, + "step": 31770 + }, + { + "epoch": 0.55, + "learning_rate": 9.188196373204687e-05, + "loss": 2.6718, + "step": 31780 + }, + { + "epoch": 0.55, + "learning_rate": 9.182492171414048e-05, + "loss": 2.6519, + "step": 31790 + }, + { + "epoch": 0.55, + "learning_rate": 9.176788237400578e-05, + "loss": 2.5336, + "step": 31800 + }, + { + "epoch": 0.55, + "learning_rate": 9.171084573032614e-05, + "loss": 2.6469, + "step": 31810 + }, + { + "epoch": 0.55, + "learning_rate": 9.165381180178415e-05, + "loss": 2.5766, + "step": 31820 + }, + { + "epoch": 0.55, + "learning_rate": 9.15967806070614e-05, + "loss": 2.7171, + "step": 31830 + }, + { + "epoch": 0.55, + "learning_rate": 9.153975216483864e-05, + "loss": 2.5885, + "step": 31840 + }, + { + "epoch": 0.55, + "learning_rate": 9.14827264937957e-05, + "loss": 2.6437, + "step": 31850 + }, + { + "epoch": 0.55, + "learning_rate": 9.142570361261153e-05, + "loss": 2.6123, + "step": 31860 + }, + { + "epoch": 0.55, + "learning_rate": 9.136868353996416e-05, + "loss": 2.6152, + "step": 31870 + }, + { + "epoch": 0.55, + "learning_rate": 9.131166629453062e-05, + "loss": 2.5959, + "step": 31880 + }, + { + "epoch": 0.55, + "learning_rate": 9.125465189498719e-05, + "loss": 2.6702, + "step": 31890 + }, + { + "epoch": 0.55, + "learning_rate": 9.119764036000901e-05, + "loss": 2.65, + "step": 31900 + }, + { + "epoch": 0.55, + "learning_rate": 9.114063170827045e-05, + "loss": 2.6002, + "step": 31910 + }, + { + "epoch": 0.55, + "learning_rate": 9.10836259584448e-05, + "loss": 2.6223, + "step": 31920 + }, + { + "epoch": 0.55, + "learning_rate": 9.10266231292045e-05, + "loss": 2.6062, + "step": 31930 + }, + { + "epoch": 0.55, + "learning_rate": 9.096962323922104e-05, + "loss": 2.594, + "step": 31940 + }, + { + "epoch": 0.55, + "learning_rate": 9.091262630716488e-05, + "loss": 2.6731, + "step": 31950 + }, + { + "epoch": 0.55, + "learning_rate": 9.085563235170549e-05, + "loss": 2.5561, + "step": 31960 + }, + { + "epoch": 0.55, + "learning_rate": 9.079864139151147e-05, + "loss": 2.6397, + "step": 31970 + }, + { + "epoch": 0.55, + "learning_rate": 9.074165344525037e-05, + "loss": 2.5176, + "step": 31980 + }, + { + "epoch": 0.55, + "learning_rate": 9.068466853158877e-05, + "loss": 2.6687, + "step": 31990 + }, + { + "epoch": 0.55, + "learning_rate": 9.062768666919221e-05, + "loss": 2.579, + "step": 32000 + }, + { + "epoch": 0.55, + "learning_rate": 9.057070787672532e-05, + "loss": 2.6778, + "step": 32010 + }, + { + "epoch": 0.55, + "learning_rate": 9.051373217285165e-05, + "loss": 2.6837, + "step": 32020 + }, + { + "epoch": 0.55, + "learning_rate": 9.045675957623375e-05, + "loss": 2.5402, + "step": 32030 + }, + { + "epoch": 0.55, + "learning_rate": 9.039979010553322e-05, + "loss": 2.6572, + "step": 32040 + }, + { + "epoch": 0.55, + "learning_rate": 9.034282377941054e-05, + "loss": 2.5599, + "step": 32050 + }, + { + "epoch": 0.55, + "learning_rate": 9.028586061652524e-05, + "loss": 2.6042, + "step": 32060 + }, + { + "epoch": 0.56, + "learning_rate": 9.02289006355357e-05, + "loss": 2.5916, + "step": 32070 + }, + { + "epoch": 0.56, + "learning_rate": 9.017194385509941e-05, + "loss": 2.5841, + "step": 32080 + }, + { + "epoch": 0.56, + "learning_rate": 9.011499029387274e-05, + "loss": 2.6411, + "step": 32090 + }, + { + "epoch": 0.56, + "learning_rate": 9.005803997051092e-05, + "loss": 2.4999, + "step": 32100 + }, + { + "epoch": 0.56, + "learning_rate": 9.000109290366825e-05, + "loss": 2.6101, + "step": 32110 + }, + { + "epoch": 0.56, + "learning_rate": 8.994414911199795e-05, + "loss": 2.6506, + "step": 32120 + }, + { + "epoch": 0.56, + "learning_rate": 8.98872086141521e-05, + "loss": 2.5075, + "step": 32130 + }, + { + "epoch": 0.56, + "learning_rate": 8.983027142878175e-05, + "loss": 2.643, + "step": 32140 + }, + { + "epoch": 0.56, + "learning_rate": 8.97733375745368e-05, + "loss": 2.6404, + "step": 32150 + }, + { + "epoch": 0.56, + "learning_rate": 8.971640707006615e-05, + "loss": 2.5382, + "step": 32160 + }, + { + "epoch": 0.56, + "learning_rate": 8.965947993401757e-05, + "loss": 2.6133, + "step": 32170 + }, + { + "epoch": 0.56, + "learning_rate": 8.960255618503764e-05, + "loss": 2.6604, + "step": 32180 + }, + { + "epoch": 0.56, + "learning_rate": 8.9545635841772e-05, + "loss": 2.616, + "step": 32190 + }, + { + "epoch": 0.56, + "learning_rate": 8.948871892286504e-05, + "loss": 2.603, + "step": 32200 + }, + { + "epoch": 0.56, + "learning_rate": 8.943749663908416e-05, + "loss": 2.6277, + "step": 32210 + }, + { + "epoch": 0.56, + "learning_rate": 8.938058627782008e-05, + "loss": 2.6372, + "step": 32220 + }, + { + "epoch": 0.56, + "learning_rate": 8.932367939497712e-05, + "loss": 2.6144, + "step": 32230 + }, + { + "epoch": 0.56, + "learning_rate": 8.926677600919539e-05, + "loss": 2.6459, + "step": 32240 + }, + { + "epoch": 0.56, + "learning_rate": 8.920987613911373e-05, + "loss": 2.6651, + "step": 32250 + }, + { + "epoch": 0.56, + "learning_rate": 8.915297980336982e-05, + "loss": 2.5374, + "step": 32260 + }, + { + "epoch": 0.56, + "learning_rate": 8.909608702060031e-05, + "loss": 2.633, + "step": 32270 + }, + { + "epoch": 0.56, + "learning_rate": 8.903919780944054e-05, + "loss": 2.5547, + "step": 32280 + }, + { + "epoch": 0.56, + "learning_rate": 8.898231218852476e-05, + "loss": 2.6508, + "step": 32290 + }, + { + "epoch": 0.56, + "learning_rate": 8.892543017648603e-05, + "loss": 2.6811, + "step": 32300 + }, + { + "epoch": 0.56, + "learning_rate": 8.886855179195625e-05, + "loss": 2.5617, + "step": 32310 + }, + { + "epoch": 0.56, + "learning_rate": 8.881167705356606e-05, + "loss": 2.578, + "step": 32320 + }, + { + "epoch": 0.56, + "learning_rate": 8.875480597994499e-05, + "loss": 2.5723, + "step": 32330 + }, + { + "epoch": 0.56, + "learning_rate": 8.869793858972132e-05, + "loss": 2.622, + "step": 32340 + }, + { + "epoch": 0.56, + "learning_rate": 8.864107490152214e-05, + "loss": 2.6354, + "step": 32350 + }, + { + "epoch": 0.56, + "learning_rate": 8.858421493397329e-05, + "loss": 2.569, + "step": 32360 + }, + { + "epoch": 0.56, + "learning_rate": 8.852735870569948e-05, + "loss": 2.5618, + "step": 32370 + }, + { + "epoch": 0.56, + "learning_rate": 8.847050623532411e-05, + "loss": 2.5059, + "step": 32380 + }, + { + "epoch": 0.56, + "learning_rate": 8.841365754146938e-05, + "loss": 2.6164, + "step": 32390 + }, + { + "epoch": 0.56, + "learning_rate": 8.835681264275626e-05, + "loss": 2.5865, + "step": 32400 + }, + { + "epoch": 0.56, + "learning_rate": 8.829997155780447e-05, + "loss": 2.5607, + "step": 32410 + }, + { + "epoch": 0.56, + "learning_rate": 8.824313430523242e-05, + "loss": 2.6653, + "step": 32420 + }, + { + "epoch": 0.56, + "learning_rate": 8.818630090365743e-05, + "loss": 2.4724, + "step": 32430 + }, + { + "epoch": 0.56, + "learning_rate": 8.812947137169539e-05, + "loss": 2.6331, + "step": 32440 + }, + { + "epoch": 0.56, + "learning_rate": 8.8072645727961e-05, + "loss": 2.642, + "step": 32450 + }, + { + "epoch": 0.56, + "learning_rate": 8.801582399106767e-05, + "loss": 2.5862, + "step": 32460 + }, + { + "epoch": 0.56, + "learning_rate": 8.795900617962752e-05, + "loss": 2.5325, + "step": 32470 + }, + { + "epoch": 0.56, + "learning_rate": 8.790219231225144e-05, + "loss": 2.5955, + "step": 32480 + }, + { + "epoch": 0.56, + "learning_rate": 8.784538240754893e-05, + "loss": 2.6089, + "step": 32490 + }, + { + "epoch": 0.56, + "learning_rate": 8.778857648412828e-05, + "loss": 2.6552, + "step": 32500 + }, + { + "epoch": 0.56, + "learning_rate": 8.77317745605964e-05, + "loss": 2.5352, + "step": 32510 + }, + { + "epoch": 0.56, + "learning_rate": 8.767497665555899e-05, + "loss": 2.5628, + "step": 32520 + }, + { + "epoch": 0.56, + "learning_rate": 8.761818278762034e-05, + "loss": 2.5662, + "step": 32530 + }, + { + "epoch": 0.56, + "learning_rate": 8.756139297538345e-05, + "loss": 2.6669, + "step": 32540 + }, + { + "epoch": 0.56, + "learning_rate": 8.750460723745003e-05, + "loss": 2.4996, + "step": 32550 + }, + { + "epoch": 0.56, + "learning_rate": 8.74478255924204e-05, + "loss": 2.5825, + "step": 32560 + }, + { + "epoch": 0.56, + "learning_rate": 8.739104805889354e-05, + "loss": 2.6205, + "step": 32570 + }, + { + "epoch": 0.56, + "learning_rate": 8.733427465546714e-05, + "loss": 2.6435, + "step": 32580 + }, + { + "epoch": 0.56, + "learning_rate": 8.727750540073748e-05, + "loss": 2.5927, + "step": 32590 + }, + { + "epoch": 0.56, + "learning_rate": 8.722074031329947e-05, + "loss": 2.6716, + "step": 32600 + }, + { + "epoch": 0.56, + "learning_rate": 8.716397941174674e-05, + "loss": 2.6018, + "step": 32610 + }, + { + "epoch": 0.56, + "learning_rate": 8.710722271467147e-05, + "loss": 2.6438, + "step": 32620 + }, + { + "epoch": 0.56, + "learning_rate": 8.705047024066452e-05, + "loss": 2.5614, + "step": 32630 + }, + { + "epoch": 0.56, + "learning_rate": 8.699372200831532e-05, + "loss": 2.6428, + "step": 32640 + }, + { + "epoch": 0.57, + "learning_rate": 8.69369780362119e-05, + "loss": 2.6942, + "step": 32650 + }, + { + "epoch": 0.57, + "learning_rate": 8.68802383429409e-05, + "loss": 2.5096, + "step": 32660 + }, + { + "epoch": 0.57, + "learning_rate": 8.682350294708766e-05, + "loss": 2.57, + "step": 32670 + }, + { + "epoch": 0.57, + "learning_rate": 8.676677186723597e-05, + "loss": 2.5938, + "step": 32680 + }, + { + "epoch": 0.57, + "learning_rate": 8.671004512196827e-05, + "loss": 2.6658, + "step": 32690 + }, + { + "epoch": 0.57, + "learning_rate": 8.665332272986559e-05, + "loss": 2.6253, + "step": 32700 + }, + { + "epoch": 0.57, + "learning_rate": 8.659660470950755e-05, + "loss": 2.5927, + "step": 32710 + }, + { + "epoch": 0.57, + "learning_rate": 8.653989107947223e-05, + "loss": 2.6732, + "step": 32720 + }, + { + "epoch": 0.57, + "learning_rate": 8.648318185833643e-05, + "loss": 2.571, + "step": 32730 + }, + { + "epoch": 0.57, + "learning_rate": 8.642647706467542e-05, + "loss": 2.6967, + "step": 32740 + }, + { + "epoch": 0.57, + "learning_rate": 8.6369776717063e-05, + "loss": 2.6214, + "step": 32750 + }, + { + "epoch": 0.57, + "learning_rate": 8.63130808340715e-05, + "loss": 2.542, + "step": 32760 + }, + { + "epoch": 0.57, + "learning_rate": 8.625638943427191e-05, + "loss": 2.5753, + "step": 32770 + }, + { + "epoch": 0.57, + "learning_rate": 8.61997025362336e-05, + "loss": 2.5876, + "step": 32780 + }, + { + "epoch": 0.57, + "learning_rate": 8.61430201585246e-05, + "loss": 2.4853, + "step": 32790 + }, + { + "epoch": 0.57, + "learning_rate": 8.608634231971137e-05, + "loss": 2.5875, + "step": 32800 + }, + { + "epoch": 0.57, + "learning_rate": 8.602966903835889e-05, + "loss": 2.671, + "step": 32810 + }, + { + "epoch": 0.57, + "learning_rate": 8.597300033303071e-05, + "loss": 2.6479, + "step": 32820 + }, + { + "epoch": 0.57, + "learning_rate": 8.591633622228879e-05, + "loss": 2.6245, + "step": 32830 + }, + { + "epoch": 0.57, + "learning_rate": 8.585967672469362e-05, + "loss": 2.5774, + "step": 32840 + }, + { + "epoch": 0.57, + "learning_rate": 8.580302185880424e-05, + "loss": 2.6292, + "step": 32850 + }, + { + "epoch": 0.57, + "learning_rate": 8.574637164317811e-05, + "loss": 2.6067, + "step": 32860 + }, + { + "epoch": 0.57, + "learning_rate": 8.568972609637112e-05, + "loss": 2.6024, + "step": 32870 + }, + { + "epoch": 0.57, + "learning_rate": 8.563308523693779e-05, + "loss": 2.6396, + "step": 32880 + }, + { + "epoch": 0.57, + "learning_rate": 8.557644908343095e-05, + "loss": 2.5759, + "step": 32890 + }, + { + "epoch": 0.57, + "learning_rate": 8.551981765440194e-05, + "loss": 2.5823, + "step": 32900 + }, + { + "epoch": 0.57, + "learning_rate": 8.546319096840054e-05, + "loss": 2.5526, + "step": 32910 + }, + { + "epoch": 0.57, + "learning_rate": 8.540656904397506e-05, + "loss": 2.6044, + "step": 32920 + }, + { + "epoch": 0.57, + "learning_rate": 8.534995189967212e-05, + "loss": 2.574, + "step": 32930 + }, + { + "epoch": 0.57, + "learning_rate": 8.529333955403685e-05, + "loss": 2.6452, + "step": 32940 + }, + { + "epoch": 0.57, + "learning_rate": 8.523673202561281e-05, + "loss": 2.5687, + "step": 32950 + }, + { + "epoch": 0.57, + "learning_rate": 8.518012933294197e-05, + "loss": 2.5957, + "step": 32960 + }, + { + "epoch": 0.57, + "learning_rate": 8.512353149456467e-05, + "loss": 2.6516, + "step": 32970 + }, + { + "epoch": 0.57, + "learning_rate": 8.506693852901977e-05, + "loss": 2.6285, + "step": 32980 + }, + { + "epoch": 0.57, + "learning_rate": 8.501035045484441e-05, + "loss": 2.5496, + "step": 32990 + }, + { + "epoch": 0.57, + "learning_rate": 8.495376729057426e-05, + "loss": 2.5975, + "step": 33000 + }, + { + "epoch": 0.57, + "learning_rate": 8.489718905474323e-05, + "loss": 2.5877, + "step": 33010 + }, + { + "epoch": 0.57, + "learning_rate": 8.48406157658837e-05, + "loss": 2.5408, + "step": 33020 + }, + { + "epoch": 0.57, + "learning_rate": 8.478404744252648e-05, + "loss": 2.5708, + "step": 33030 + }, + { + "epoch": 0.57, + "learning_rate": 8.472748410320065e-05, + "loss": 2.7015, + "step": 33040 + }, + { + "epoch": 0.57, + "learning_rate": 8.46709257664337e-05, + "loss": 2.6317, + "step": 33050 + }, + { + "epoch": 0.57, + "learning_rate": 8.46143724507515e-05, + "loss": 2.5631, + "step": 33060 + }, + { + "epoch": 0.57, + "learning_rate": 8.455782417467826e-05, + "loss": 2.6817, + "step": 33070 + }, + { + "epoch": 0.57, + "learning_rate": 8.450128095673655e-05, + "loss": 2.6192, + "step": 33080 + }, + { + "epoch": 0.57, + "learning_rate": 8.444474281544723e-05, + "loss": 2.7026, + "step": 33090 + }, + { + "epoch": 0.57, + "learning_rate": 8.43882097693296e-05, + "loss": 2.5941, + "step": 33100 + }, + { + "epoch": 0.57, + "learning_rate": 8.433168183690119e-05, + "loss": 2.5896, + "step": 33110 + }, + { + "epoch": 0.57, + "learning_rate": 8.427515903667789e-05, + "loss": 2.6453, + "step": 33120 + }, + { + "epoch": 0.57, + "learning_rate": 8.421864138717395e-05, + "loss": 2.6121, + "step": 33130 + }, + { + "epoch": 0.57, + "learning_rate": 8.416212890690186e-05, + "loss": 2.4978, + "step": 33140 + }, + { + "epoch": 0.57, + "learning_rate": 8.410562161437244e-05, + "loss": 2.6365, + "step": 33150 + }, + { + "epoch": 0.57, + "learning_rate": 8.404911952809491e-05, + "loss": 2.6208, + "step": 33160 + }, + { + "epoch": 0.57, + "learning_rate": 8.399262266657662e-05, + "loss": 2.5717, + "step": 33170 + }, + { + "epoch": 0.57, + "learning_rate": 8.393613104832332e-05, + "loss": 2.5737, + "step": 33180 + }, + { + "epoch": 0.57, + "learning_rate": 8.387964469183902e-05, + "loss": 2.5346, + "step": 33190 + }, + { + "epoch": 0.57, + "learning_rate": 8.382316361562597e-05, + "loss": 2.6137, + "step": 33200 + }, + { + "epoch": 0.57, + "learning_rate": 8.376668783818472e-05, + "loss": 2.5206, + "step": 33210 + }, + { + "epoch": 0.57, + "learning_rate": 8.37102173780141e-05, + "loss": 2.5274, + "step": 33220 + }, + { + "epoch": 0.58, + "learning_rate": 8.365375225361119e-05, + "loss": 2.5671, + "step": 33230 + }, + { + "epoch": 0.58, + "learning_rate": 8.359729248347127e-05, + "loss": 2.5631, + "step": 33240 + }, + { + "epoch": 0.58, + "learning_rate": 8.354083808608796e-05, + "loss": 2.641, + "step": 33250 + }, + { + "epoch": 0.58, + "learning_rate": 8.348438907995303e-05, + "loss": 2.5801, + "step": 33260 + }, + { + "epoch": 0.58, + "learning_rate": 8.342794548355652e-05, + "loss": 2.5264, + "step": 33270 + }, + { + "epoch": 0.58, + "learning_rate": 8.337150731538675e-05, + "loss": 2.6721, + "step": 33280 + }, + { + "epoch": 0.58, + "learning_rate": 8.331507459393017e-05, + "loss": 2.5475, + "step": 33290 + }, + { + "epoch": 0.58, + "learning_rate": 8.32586473376715e-05, + "loss": 2.564, + "step": 33300 + }, + { + "epoch": 0.58, + "learning_rate": 8.320222556509364e-05, + "loss": 2.4893, + "step": 33310 + }, + { + "epoch": 0.58, + "learning_rate": 8.314580929467776e-05, + "loss": 2.5932, + "step": 33320 + }, + { + "epoch": 0.58, + "learning_rate": 8.30893985449031e-05, + "loss": 2.6126, + "step": 33330 + }, + { + "epoch": 0.58, + "learning_rate": 8.303299333424727e-05, + "loss": 2.6231, + "step": 33340 + }, + { + "epoch": 0.58, + "learning_rate": 8.29765936811859e-05, + "loss": 2.6864, + "step": 33350 + }, + { + "epoch": 0.58, + "learning_rate": 8.29201996041929e-05, + "loss": 2.5874, + "step": 33360 + }, + { + "epoch": 0.58, + "learning_rate": 8.286381112174033e-05, + "loss": 2.4811, + "step": 33370 + }, + { + "epoch": 0.58, + "learning_rate": 8.280742825229836e-05, + "loss": 2.5521, + "step": 33380 + }, + { + "epoch": 0.58, + "learning_rate": 8.27510510143354e-05, + "loss": 2.5402, + "step": 33390 + }, + { + "epoch": 0.58, + "learning_rate": 8.269467942631801e-05, + "loss": 2.6696, + "step": 33400 + }, + { + "epoch": 0.58, + "learning_rate": 8.263831350671084e-05, + "loss": 2.6744, + "step": 33410 + }, + { + "epoch": 0.58, + "learning_rate": 8.25819532739767e-05, + "loss": 2.5722, + "step": 33420 + }, + { + "epoch": 0.58, + "learning_rate": 8.252559874657661e-05, + "loss": 2.5401, + "step": 33430 + }, + { + "epoch": 0.58, + "learning_rate": 8.246924994296963e-05, + "loss": 2.7006, + "step": 33440 + }, + { + "epoch": 0.58, + "learning_rate": 8.2412906881613e-05, + "loss": 2.5845, + "step": 33450 + }, + { + "epoch": 0.58, + "learning_rate": 8.235656958096202e-05, + "loss": 2.6693, + "step": 33460 + }, + { + "epoch": 0.58, + "learning_rate": 8.230023805947019e-05, + "loss": 2.5728, + "step": 33470 + }, + { + "epoch": 0.58, + "learning_rate": 8.224391233558905e-05, + "loss": 2.6132, + "step": 33480 + }, + { + "epoch": 0.58, + "learning_rate": 8.218759242776826e-05, + "loss": 2.5298, + "step": 33490 + }, + { + "epoch": 0.58, + "learning_rate": 8.21312783544556e-05, + "loss": 2.6541, + "step": 33500 + }, + { + "epoch": 0.58, + "learning_rate": 8.20749701340969e-05, + "loss": 2.5383, + "step": 33510 + }, + { + "epoch": 0.58, + "learning_rate": 8.201866778513604e-05, + "loss": 2.6517, + "step": 33520 + }, + { + "epoch": 0.58, + "learning_rate": 8.196237132601511e-05, + "loss": 2.5983, + "step": 33530 + }, + { + "epoch": 0.58, + "learning_rate": 8.190608077517413e-05, + "loss": 2.6201, + "step": 33540 + }, + { + "epoch": 0.58, + "learning_rate": 8.184979615105129e-05, + "loss": 2.6422, + "step": 33550 + }, + { + "epoch": 0.58, + "learning_rate": 8.179351747208274e-05, + "loss": 2.5995, + "step": 33560 + }, + { + "epoch": 0.58, + "learning_rate": 8.173724475670273e-05, + "loss": 2.6187, + "step": 33570 + }, + { + "epoch": 0.58, + "learning_rate": 8.16809780233436e-05, + "loss": 2.6193, + "step": 33580 + }, + { + "epoch": 0.58, + "learning_rate": 8.162471729043567e-05, + "loss": 2.6017, + "step": 33590 + }, + { + "epoch": 0.58, + "learning_rate": 8.156846257640732e-05, + "loss": 2.6517, + "step": 33600 + }, + { + "epoch": 0.58, + "learning_rate": 8.151221389968491e-05, + "loss": 2.582, + "step": 33610 + }, + { + "epoch": 0.58, + "learning_rate": 8.145597127869292e-05, + "loss": 2.6222, + "step": 33620 + }, + { + "epoch": 0.58, + "learning_rate": 8.139973473185377e-05, + "loss": 2.6, + "step": 33630 + }, + { + "epoch": 0.58, + "learning_rate": 8.134350427758786e-05, + "loss": 2.6229, + "step": 33640 + }, + { + "epoch": 0.58, + "learning_rate": 8.128727993431375e-05, + "loss": 2.64, + "step": 33650 + }, + { + "epoch": 0.58, + "learning_rate": 8.123106172044783e-05, + "loss": 2.6772, + "step": 33660 + }, + { + "epoch": 0.58, + "learning_rate": 8.117484965440454e-05, + "loss": 2.5327, + "step": 33670 + }, + { + "epoch": 0.58, + "learning_rate": 8.111864375459635e-05, + "loss": 2.617, + "step": 33680 + }, + { + "epoch": 0.58, + "learning_rate": 8.106244403943364e-05, + "loss": 2.5626, + "step": 33690 + }, + { + "epoch": 0.58, + "learning_rate": 8.100625052732477e-05, + "loss": 2.5397, + "step": 33700 + }, + { + "epoch": 0.58, + "learning_rate": 8.095006323667619e-05, + "loss": 2.561, + "step": 33710 + }, + { + "epoch": 0.58, + "learning_rate": 8.089388218589213e-05, + "loss": 2.5349, + "step": 33720 + }, + { + "epoch": 0.58, + "learning_rate": 8.08377073933749e-05, + "loss": 2.5757, + "step": 33730 + }, + { + "epoch": 0.58, + "learning_rate": 8.078153887752474e-05, + "loss": 2.6291, + "step": 33740 + }, + { + "epoch": 0.58, + "learning_rate": 8.07253766567398e-05, + "loss": 2.7031, + "step": 33750 + }, + { + "epoch": 0.58, + "learning_rate": 8.066922074941615e-05, + "loss": 2.5763, + "step": 33760 + }, + { + "epoch": 0.58, + "learning_rate": 8.061307117394787e-05, + "loss": 2.4859, + "step": 33770 + }, + { + "epoch": 0.58, + "learning_rate": 8.055692794872694e-05, + "loss": 2.5904, + "step": 33780 + }, + { + "epoch": 0.58, + "learning_rate": 8.050079109214317e-05, + "loss": 2.6739, + "step": 33790 + }, + { + "epoch": 0.58, + "learning_rate": 8.044466062258445e-05, + "loss": 2.5431, + "step": 33800 + }, + { + "epoch": 0.59, + "learning_rate": 8.038853655843642e-05, + "loss": 2.6639, + "step": 33810 + }, + { + "epoch": 0.59, + "learning_rate": 8.03324189180827e-05, + "loss": 2.536, + "step": 33820 + }, + { + "epoch": 0.59, + "learning_rate": 8.02763077199048e-05, + "loss": 2.6177, + "step": 33830 + }, + { + "epoch": 0.59, + "learning_rate": 8.022020298228215e-05, + "loss": 2.5787, + "step": 33840 + }, + { + "epoch": 0.59, + "learning_rate": 8.016410472359195e-05, + "loss": 2.5849, + "step": 33850 + }, + { + "epoch": 0.59, + "learning_rate": 8.01080129622094e-05, + "loss": 2.6167, + "step": 33860 + }, + { + "epoch": 0.59, + "learning_rate": 8.005192771650755e-05, + "loss": 2.6675, + "step": 33870 + }, + { + "epoch": 0.59, + "learning_rate": 7.99958490048572e-05, + "loss": 2.637, + "step": 33880 + }, + { + "epoch": 0.59, + "learning_rate": 7.993977684562722e-05, + "loss": 2.5651, + "step": 33890 + }, + { + "epoch": 0.59, + "learning_rate": 7.988371125718417e-05, + "loss": 2.6825, + "step": 33900 + }, + { + "epoch": 0.59, + "learning_rate": 7.982765225789247e-05, + "loss": 2.5359, + "step": 33910 + }, + { + "epoch": 0.59, + "learning_rate": 7.977159986611444e-05, + "loss": 2.6403, + "step": 33920 + }, + { + "epoch": 0.59, + "learning_rate": 7.971555410021023e-05, + "loss": 2.57, + "step": 33930 + }, + { + "epoch": 0.59, + "learning_rate": 7.965951497853776e-05, + "loss": 2.6122, + "step": 33940 + }, + { + "epoch": 0.59, + "learning_rate": 7.960348251945286e-05, + "loss": 2.5871, + "step": 33950 + }, + { + "epoch": 0.59, + "learning_rate": 7.954745674130911e-05, + "loss": 2.6274, + "step": 33960 + }, + { + "epoch": 0.59, + "learning_rate": 7.94914376624579e-05, + "loss": 2.5834, + "step": 33970 + }, + { + "epoch": 0.59, + "learning_rate": 7.943542530124848e-05, + "loss": 2.5951, + "step": 33980 + }, + { + "epoch": 0.59, + "learning_rate": 7.937941967602787e-05, + "loss": 2.4752, + "step": 33990 + }, + { + "epoch": 0.59, + "learning_rate": 7.932342080514088e-05, + "loss": 2.5861, + "step": 34000 + }, + { + "epoch": 0.59, + "learning_rate": 7.926742870693008e-05, + "loss": 2.6264, + "step": 34010 + }, + { + "epoch": 0.59, + "learning_rate": 7.921144339973588e-05, + "loss": 2.5943, + "step": 34020 + }, + { + "epoch": 0.59, + "learning_rate": 7.915546490189645e-05, + "loss": 2.5767, + "step": 34030 + }, + { + "epoch": 0.59, + "learning_rate": 7.909949323174765e-05, + "loss": 2.558, + "step": 34040 + }, + { + "epoch": 0.59, + "learning_rate": 7.904352840762326e-05, + "loss": 2.5735, + "step": 34050 + }, + { + "epoch": 0.59, + "learning_rate": 7.898757044785469e-05, + "loss": 2.5535, + "step": 34060 + }, + { + "epoch": 0.59, + "learning_rate": 7.893161937077107e-05, + "loss": 2.5727, + "step": 34070 + }, + { + "epoch": 0.59, + "learning_rate": 7.887567519469947e-05, + "loss": 2.5929, + "step": 34080 + }, + { + "epoch": 0.59, + "learning_rate": 7.881973793796448e-05, + "loss": 2.5686, + "step": 34090 + }, + { + "epoch": 0.59, + "learning_rate": 7.87638076188886e-05, + "loss": 2.6726, + "step": 34100 + }, + { + "epoch": 0.59, + "learning_rate": 7.870788425579191e-05, + "loss": 2.6218, + "step": 34110 + }, + { + "epoch": 0.59, + "learning_rate": 7.865196786699227e-05, + "loss": 2.6214, + "step": 34120 + }, + { + "epoch": 0.59, + "learning_rate": 7.859605847080531e-05, + "loss": 2.569, + "step": 34130 + }, + { + "epoch": 0.59, + "learning_rate": 7.854015608554431e-05, + "loss": 2.5777, + "step": 34140 + }, + { + "epoch": 0.59, + "learning_rate": 7.848426072952025e-05, + "loss": 2.7291, + "step": 34150 + }, + { + "epoch": 0.59, + "learning_rate": 7.842837242104182e-05, + "loss": 2.6412, + "step": 34160 + }, + { + "epoch": 0.59, + "learning_rate": 7.837249117841542e-05, + "loss": 2.58, + "step": 34170 + }, + { + "epoch": 0.59, + "learning_rate": 7.831661701994512e-05, + "loss": 2.503, + "step": 34180 + }, + { + "epoch": 0.59, + "learning_rate": 7.826074996393264e-05, + "loss": 2.5948, + "step": 34190 + }, + { + "epoch": 0.59, + "learning_rate": 7.820489002867745e-05, + "loss": 2.58, + "step": 34200 + }, + { + "epoch": 0.59, + "learning_rate": 7.814903723247662e-05, + "loss": 2.5378, + "step": 34210 + }, + { + "epoch": 0.59, + "learning_rate": 7.809319159362489e-05, + "loss": 2.5758, + "step": 34220 + }, + { + "epoch": 0.59, + "learning_rate": 7.80373531304147e-05, + "loss": 2.5994, + "step": 34230 + }, + { + "epoch": 0.59, + "learning_rate": 7.798152186113609e-05, + "loss": 2.4807, + "step": 34240 + }, + { + "epoch": 0.59, + "learning_rate": 7.792569780407671e-05, + "loss": 2.535, + "step": 34250 + }, + { + "epoch": 0.59, + "learning_rate": 7.7869880977522e-05, + "loss": 2.596, + "step": 34260 + }, + { + "epoch": 0.59, + "learning_rate": 7.781407139975485e-05, + "loss": 2.5779, + "step": 34270 + }, + { + "epoch": 0.59, + "learning_rate": 7.775826908905591e-05, + "loss": 2.597, + "step": 34280 + }, + { + "epoch": 0.59, + "learning_rate": 7.770247406370338e-05, + "loss": 2.5891, + "step": 34290 + }, + { + "epoch": 0.59, + "learning_rate": 7.764668634197308e-05, + "loss": 2.5256, + "step": 34300 + }, + { + "epoch": 0.59, + "learning_rate": 7.759090594213843e-05, + "loss": 2.588, + "step": 34310 + }, + { + "epoch": 0.59, + "learning_rate": 7.75351328824705e-05, + "loss": 2.6114, + "step": 34320 + }, + { + "epoch": 0.59, + "learning_rate": 7.747936718123791e-05, + "loss": 2.6108, + "step": 34330 + }, + { + "epoch": 0.59, + "learning_rate": 7.742360885670688e-05, + "loss": 2.6, + "step": 34340 + }, + { + "epoch": 0.59, + "learning_rate": 7.736785792714125e-05, + "loss": 2.5836, + "step": 34350 + }, + { + "epoch": 0.59, + "learning_rate": 7.731211441080239e-05, + "loss": 2.6074, + "step": 34360 + }, + { + "epoch": 0.59, + "learning_rate": 7.72563783259492e-05, + "loss": 2.5523, + "step": 34370 + }, + { + "epoch": 0.59, + "learning_rate": 7.72006496908383e-05, + "loss": 2.599, + "step": 34380 + }, + { + "epoch": 0.6, + "learning_rate": 7.714492852372372e-05, + "loss": 2.595, + "step": 34390 + }, + { + "epoch": 0.6, + "learning_rate": 7.708921484285712e-05, + "loss": 2.6365, + "step": 34400 + }, + { + "epoch": 0.6, + "learning_rate": 7.703350866648763e-05, + "loss": 2.5226, + "step": 34410 + }, + { + "epoch": 0.6, + "learning_rate": 7.697781001286203e-05, + "loss": 2.6495, + "step": 34420 + }, + { + "epoch": 0.6, + "learning_rate": 7.692211890022455e-05, + "loss": 2.5864, + "step": 34430 + }, + { + "epoch": 0.6, + "learning_rate": 7.686643534681703e-05, + "loss": 2.5863, + "step": 34440 + }, + { + "epoch": 0.6, + "learning_rate": 7.681075937087876e-05, + "loss": 2.6029, + "step": 34450 + }, + { + "epoch": 0.6, + "learning_rate": 7.675509099064656e-05, + "loss": 2.4836, + "step": 34460 + }, + { + "epoch": 0.6, + "learning_rate": 7.66994302243548e-05, + "loss": 2.5914, + "step": 34470 + }, + { + "epoch": 0.6, + "learning_rate": 7.664377709023533e-05, + "loss": 2.6024, + "step": 34480 + }, + { + "epoch": 0.6, + "learning_rate": 7.658813160651749e-05, + "loss": 2.6303, + "step": 34490 + }, + { + "epoch": 0.6, + "learning_rate": 7.653249379142813e-05, + "loss": 2.5376, + "step": 34500 + }, + { + "epoch": 0.6, + "learning_rate": 7.647686366319161e-05, + "loss": 2.5722, + "step": 34510 + }, + { + "epoch": 0.6, + "learning_rate": 7.64212412400297e-05, + "loss": 2.5708, + "step": 34520 + }, + { + "epoch": 0.6, + "learning_rate": 7.636562654016173e-05, + "loss": 2.5913, + "step": 34530 + }, + { + "epoch": 0.6, + "learning_rate": 7.631001958180446e-05, + "loss": 2.6042, + "step": 34540 + }, + { + "epoch": 0.6, + "learning_rate": 7.625442038317211e-05, + "loss": 2.5326, + "step": 34550 + }, + { + "epoch": 0.6, + "learning_rate": 7.620438775401977e-05, + "loss": 2.6521, + "step": 34560 + }, + { + "epoch": 0.6, + "learning_rate": 7.614880334903584e-05, + "loss": 2.6514, + "step": 34570 + }, + { + "epoch": 0.6, + "learning_rate": 7.609322675658373e-05, + "loss": 2.5918, + "step": 34580 + }, + { + "epoch": 0.6, + "learning_rate": 7.603765799486766e-05, + "loss": 2.5627, + "step": 34590 + }, + { + "epoch": 0.6, + "learning_rate": 7.598209708208936e-05, + "loss": 2.6356, + "step": 34600 + }, + { + "epoch": 0.6, + "learning_rate": 7.592654403644802e-05, + "loss": 2.6433, + "step": 34610 + }, + { + "epoch": 0.6, + "learning_rate": 7.587099887614017e-05, + "loss": 2.5665, + "step": 34620 + }, + { + "epoch": 0.6, + "learning_rate": 7.581546161935978e-05, + "loss": 2.6039, + "step": 34630 + }, + { + "epoch": 0.6, + "learning_rate": 7.57599322842983e-05, + "loss": 2.6104, + "step": 34640 + }, + { + "epoch": 0.6, + "learning_rate": 7.57044108891445e-05, + "loss": 2.5843, + "step": 34650 + }, + { + "epoch": 0.6, + "learning_rate": 7.564889745208459e-05, + "loss": 2.6189, + "step": 34660 + }, + { + "epoch": 0.6, + "learning_rate": 7.559339199130212e-05, + "loss": 2.594, + "step": 34670 + }, + { + "epoch": 0.6, + "learning_rate": 7.553789452497814e-05, + "loss": 2.6001, + "step": 34680 + }, + { + "epoch": 0.6, + "learning_rate": 7.5482405071291e-05, + "loss": 2.6883, + "step": 34690 + }, + { + "epoch": 0.6, + "learning_rate": 7.542692364841638e-05, + "loss": 2.5357, + "step": 34700 + }, + { + "epoch": 0.6, + "learning_rate": 7.537145027452746e-05, + "loss": 2.6797, + "step": 34710 + }, + { + "epoch": 0.6, + "learning_rate": 7.531598496779468e-05, + "loss": 2.5746, + "step": 34720 + }, + { + "epoch": 0.6, + "learning_rate": 7.526052774638585e-05, + "loss": 2.5722, + "step": 34730 + }, + { + "epoch": 0.6, + "learning_rate": 7.52050786284662e-05, + "loss": 2.6161, + "step": 34740 + }, + { + "epoch": 0.6, + "learning_rate": 7.51496376321982e-05, + "loss": 2.5552, + "step": 34750 + }, + { + "epoch": 0.6, + "learning_rate": 7.509420477574177e-05, + "loss": 2.5169, + "step": 34760 + }, + { + "epoch": 0.6, + "learning_rate": 7.503878007725409e-05, + "loss": 2.566, + "step": 34770 + }, + { + "epoch": 0.6, + "learning_rate": 7.498336355488967e-05, + "loss": 2.6428, + "step": 34780 + }, + { + "epoch": 0.6, + "learning_rate": 7.492795522680038e-05, + "loss": 2.5193, + "step": 34790 + }, + { + "epoch": 0.6, + "learning_rate": 7.48725551111354e-05, + "loss": 2.5901, + "step": 34800 + }, + { + "epoch": 0.6, + "learning_rate": 7.481716322604118e-05, + "loss": 2.5946, + "step": 34810 + }, + { + "epoch": 0.6, + "learning_rate": 7.476177958966149e-05, + "loss": 2.573, + "step": 34820 + }, + { + "epoch": 0.6, + "learning_rate": 7.470640422013744e-05, + "loss": 2.6475, + "step": 34830 + }, + { + "epoch": 0.6, + "learning_rate": 7.465103713560739e-05, + "loss": 2.6166, + "step": 34840 + }, + { + "epoch": 0.6, + "learning_rate": 7.459567835420695e-05, + "loss": 2.5889, + "step": 34850 + }, + { + "epoch": 0.6, + "learning_rate": 7.454032789406914e-05, + "loss": 2.5249, + "step": 34860 + }, + { + "epoch": 0.6, + "learning_rate": 7.448498577332412e-05, + "loss": 2.5442, + "step": 34870 + }, + { + "epoch": 0.6, + "learning_rate": 7.442965201009937e-05, + "loss": 2.6616, + "step": 34880 + }, + { + "epoch": 0.6, + "learning_rate": 7.437432662251964e-05, + "loss": 2.5627, + "step": 34890 + }, + { + "epoch": 0.6, + "learning_rate": 7.431900962870695e-05, + "loss": 2.6493, + "step": 34900 + }, + { + "epoch": 0.6, + "learning_rate": 7.426370104678051e-05, + "loss": 2.5267, + "step": 34910 + }, + { + "epoch": 0.6, + "learning_rate": 7.420840089485683e-05, + "loss": 2.6105, + "step": 34920 + }, + { + "epoch": 0.6, + "learning_rate": 7.415310919104961e-05, + "loss": 2.6312, + "step": 34930 + }, + { + "epoch": 0.6, + "learning_rate": 7.409782595346988e-05, + "loss": 2.637, + "step": 34940 + }, + { + "epoch": 0.6, + "learning_rate": 7.404255120022582e-05, + "loss": 2.5854, + "step": 34950 + }, + { + "epoch": 0.61, + "learning_rate": 7.398728494942282e-05, + "loss": 2.6318, + "step": 34960 + }, + { + "epoch": 0.61, + "learning_rate": 7.393202721916348e-05, + "loss": 2.6505, + "step": 34970 + }, + { + "epoch": 0.61, + "learning_rate": 7.387677802754771e-05, + "loss": 2.588, + "step": 34980 + }, + { + "epoch": 0.61, + "learning_rate": 7.38215373926725e-05, + "loss": 2.6385, + "step": 34990 + }, + { + "epoch": 0.61, + "learning_rate": 7.376630533263207e-05, + "loss": 2.684, + "step": 35000 + }, + { + "epoch": 0.61, + "learning_rate": 7.37110818655179e-05, + "loss": 2.5807, + "step": 35010 + }, + { + "epoch": 0.61, + "learning_rate": 7.365586700941858e-05, + "loss": 2.5373, + "step": 35020 + }, + { + "epoch": 0.61, + "learning_rate": 7.36006607824199e-05, + "loss": 2.5346, + "step": 35030 + }, + { + "epoch": 0.61, + "learning_rate": 7.354546320260483e-05, + "loss": 2.5268, + "step": 35040 + }, + { + "epoch": 0.61, + "learning_rate": 7.349027428805351e-05, + "loss": 2.6294, + "step": 35050 + }, + { + "epoch": 0.61, + "learning_rate": 7.343509405684324e-05, + "loss": 2.5228, + "step": 35060 + }, + { + "epoch": 0.61, + "learning_rate": 7.337992252704842e-05, + "loss": 2.5283, + "step": 35070 + }, + { + "epoch": 0.61, + "learning_rate": 7.332475971674075e-05, + "loss": 2.4926, + "step": 35080 + }, + { + "epoch": 0.61, + "learning_rate": 7.32696056439889e-05, + "loss": 2.6291, + "step": 35090 + }, + { + "epoch": 0.61, + "learning_rate": 7.321446032685875e-05, + "loss": 2.5814, + "step": 35100 + }, + { + "epoch": 0.61, + "learning_rate": 7.315932378341335e-05, + "loss": 2.6037, + "step": 35110 + }, + { + "epoch": 0.61, + "learning_rate": 7.310419603171283e-05, + "loss": 2.6279, + "step": 35120 + }, + { + "epoch": 0.61, + "learning_rate": 7.304907708981448e-05, + "loss": 2.5788, + "step": 35130 + }, + { + "epoch": 0.61, + "learning_rate": 7.299396697577266e-05, + "loss": 2.5565, + "step": 35140 + }, + { + "epoch": 0.61, + "learning_rate": 7.293886570763882e-05, + "loss": 2.5908, + "step": 35150 + }, + { + "epoch": 0.61, + "learning_rate": 7.288377330346158e-05, + "loss": 2.5817, + "step": 35160 + }, + { + "epoch": 0.61, + "learning_rate": 7.282868978128663e-05, + "loss": 2.6622, + "step": 35170 + }, + { + "epoch": 0.61, + "learning_rate": 7.27736151591567e-05, + "loss": 2.5478, + "step": 35180 + }, + { + "epoch": 0.61, + "learning_rate": 7.271854945511172e-05, + "loss": 2.5753, + "step": 35190 + }, + { + "epoch": 0.61, + "learning_rate": 7.266349268718859e-05, + "loss": 2.6001, + "step": 35200 + }, + { + "epoch": 0.61, + "learning_rate": 7.260844487342131e-05, + "loss": 2.5585, + "step": 35210 + }, + { + "epoch": 0.61, + "learning_rate": 7.255340603184096e-05, + "loss": 2.5817, + "step": 35220 + }, + { + "epoch": 0.61, + "learning_rate": 7.249837618047571e-05, + "loss": 2.5876, + "step": 35230 + }, + { + "epoch": 0.61, + "learning_rate": 7.244335533735073e-05, + "loss": 2.5905, + "step": 35240 + }, + { + "epoch": 0.61, + "learning_rate": 7.238834352048824e-05, + "loss": 2.676, + "step": 35250 + }, + { + "epoch": 0.61, + "learning_rate": 7.233334074790759e-05, + "loss": 2.4738, + "step": 35260 + }, + { + "epoch": 0.61, + "learning_rate": 7.227834703762507e-05, + "loss": 2.5569, + "step": 35270 + }, + { + "epoch": 0.61, + "learning_rate": 7.2223362407654e-05, + "loss": 2.6139, + "step": 35280 + }, + { + "epoch": 0.61, + "learning_rate": 7.216838687600484e-05, + "loss": 2.596, + "step": 35290 + }, + { + "epoch": 0.61, + "learning_rate": 7.21134204606849e-05, + "loss": 2.602, + "step": 35300 + }, + { + "epoch": 0.61, + "learning_rate": 7.205846317969869e-05, + "loss": 2.6326, + "step": 35310 + }, + { + "epoch": 0.61, + "learning_rate": 7.20035150510476e-05, + "loss": 2.5807, + "step": 35320 + }, + { + "epoch": 0.61, + "learning_rate": 7.194857609273002e-05, + "loss": 2.7027, + "step": 35330 + }, + { + "epoch": 0.61, + "learning_rate": 7.189364632274143e-05, + "loss": 2.5356, + "step": 35340 + }, + { + "epoch": 0.61, + "learning_rate": 7.18387257590742e-05, + "loss": 2.6058, + "step": 35350 + }, + { + "epoch": 0.61, + "learning_rate": 7.178381441971774e-05, + "loss": 2.5583, + "step": 35360 + }, + { + "epoch": 0.61, + "learning_rate": 7.172891232265842e-05, + "loss": 2.6578, + "step": 35370 + }, + { + "epoch": 0.61, + "learning_rate": 7.167401948587959e-05, + "loss": 2.4681, + "step": 35380 + }, + { + "epoch": 0.61, + "learning_rate": 7.161913592736159e-05, + "loss": 2.5298, + "step": 35390 + }, + { + "epoch": 0.61, + "learning_rate": 7.156426166508166e-05, + "loss": 2.5655, + "step": 35400 + }, + { + "epoch": 0.61, + "learning_rate": 7.150939671701404e-05, + "loss": 2.5263, + "step": 35410 + }, + { + "epoch": 0.61, + "learning_rate": 7.145454110112995e-05, + "loss": 2.505, + "step": 35420 + }, + { + "epoch": 0.61, + "learning_rate": 7.139969483539743e-05, + "loss": 2.5579, + "step": 35430 + }, + { + "epoch": 0.61, + "learning_rate": 7.134485793778162e-05, + "loss": 2.7049, + "step": 35440 + }, + { + "epoch": 0.61, + "learning_rate": 7.12900304262445e-05, + "loss": 2.6231, + "step": 35450 + }, + { + "epoch": 0.61, + "learning_rate": 7.123521231874495e-05, + "loss": 2.5835, + "step": 35460 + }, + { + "epoch": 0.61, + "learning_rate": 7.118040363323882e-05, + "loss": 2.4762, + "step": 35470 + }, + { + "epoch": 0.61, + "learning_rate": 7.112560438767884e-05, + "loss": 2.5857, + "step": 35480 + }, + { + "epoch": 0.61, + "learning_rate": 7.107081460001473e-05, + "loss": 2.5826, + "step": 35490 + }, + { + "epoch": 0.61, + "learning_rate": 7.101603428819304e-05, + "loss": 2.62, + "step": 35500 + }, + { + "epoch": 0.61, + "learning_rate": 7.096126347015719e-05, + "loss": 2.5589, + "step": 35510 + }, + { + "epoch": 0.61, + "learning_rate": 7.090650216384753e-05, + "loss": 2.6172, + "step": 35520 + }, + { + "epoch": 0.61, + "learning_rate": 7.085175038720133e-05, + "loss": 2.5633, + "step": 35530 + }, + { + "epoch": 0.62, + "learning_rate": 7.079700815815267e-05, + "loss": 2.5318, + "step": 35540 + }, + { + "epoch": 0.62, + "learning_rate": 7.07422754946325e-05, + "loss": 2.5164, + "step": 35550 + }, + { + "epoch": 0.62, + "learning_rate": 7.068755241456876e-05, + "loss": 2.6291, + "step": 35560 + }, + { + "epoch": 0.62, + "learning_rate": 7.06328389358861e-05, + "loss": 2.5749, + "step": 35570 + }, + { + "epoch": 0.62, + "learning_rate": 7.057813507650607e-05, + "loss": 2.5037, + "step": 35580 + }, + { + "epoch": 0.62, + "learning_rate": 7.052344085434713e-05, + "loss": 2.6721, + "step": 35590 + }, + { + "epoch": 0.62, + "learning_rate": 7.046875628732452e-05, + "loss": 2.6177, + "step": 35600 + }, + { + "epoch": 0.62, + "learning_rate": 7.041408139335035e-05, + "loss": 2.5694, + "step": 35610 + }, + { + "epoch": 0.62, + "learning_rate": 7.03594161903335e-05, + "loss": 2.549, + "step": 35620 + }, + { + "epoch": 0.62, + "learning_rate": 7.030476069617978e-05, + "loss": 2.5129, + "step": 35630 + }, + { + "epoch": 0.62, + "learning_rate": 7.025011492879172e-05, + "loss": 2.5913, + "step": 35640 + }, + { + "epoch": 0.62, + "learning_rate": 7.019547890606872e-05, + "loss": 2.5305, + "step": 35650 + }, + { + "epoch": 0.62, + "learning_rate": 7.014085264590698e-05, + "loss": 2.5755, + "step": 35660 + }, + { + "epoch": 0.62, + "learning_rate": 7.008623616619949e-05, + "loss": 2.6259, + "step": 35670 + }, + { + "epoch": 0.62, + "learning_rate": 7.003162948483608e-05, + "loss": 2.6205, + "step": 35680 + }, + { + "epoch": 0.62, + "learning_rate": 6.99770326197033e-05, + "loss": 2.5844, + "step": 35690 + }, + { + "epoch": 0.62, + "learning_rate": 6.99224455886845e-05, + "loss": 2.5366, + "step": 35700 + }, + { + "epoch": 0.62, + "learning_rate": 6.986786840965987e-05, + "loss": 2.6098, + "step": 35710 + }, + { + "epoch": 0.62, + "learning_rate": 6.981330110050632e-05, + "loss": 2.4961, + "step": 35720 + }, + { + "epoch": 0.62, + "learning_rate": 6.975874367909749e-05, + "loss": 2.6201, + "step": 35730 + }, + { + "epoch": 0.62, + "learning_rate": 6.97041961633039e-05, + "loss": 2.5613, + "step": 35740 + }, + { + "epoch": 0.62, + "learning_rate": 6.964965857099273e-05, + "loss": 2.5434, + "step": 35750 + }, + { + "epoch": 0.62, + "learning_rate": 6.959513092002791e-05, + "loss": 2.5496, + "step": 35760 + }, + { + "epoch": 0.62, + "learning_rate": 6.954061322827014e-05, + "loss": 2.5976, + "step": 35770 + }, + { + "epoch": 0.62, + "learning_rate": 6.94861055135769e-05, + "loss": 2.5743, + "step": 35780 + }, + { + "epoch": 0.62, + "learning_rate": 6.943160779380231e-05, + "loss": 2.6126, + "step": 35790 + }, + { + "epoch": 0.62, + "learning_rate": 6.937712008679726e-05, + "loss": 2.5781, + "step": 35800 + }, + { + "epoch": 0.62, + "learning_rate": 6.932264241040941e-05, + "loss": 2.6386, + "step": 35810 + }, + { + "epoch": 0.62, + "learning_rate": 6.926817478248306e-05, + "loss": 2.6004, + "step": 35820 + }, + { + "epoch": 0.62, + "learning_rate": 6.92137172208592e-05, + "loss": 2.5086, + "step": 35830 + }, + { + "epoch": 0.62, + "learning_rate": 6.915926974337567e-05, + "loss": 2.6832, + "step": 35840 + }, + { + "epoch": 0.62, + "learning_rate": 6.91048323678668e-05, + "loss": 2.5596, + "step": 35850 + }, + { + "epoch": 0.62, + "learning_rate": 6.905040511216382e-05, + "loss": 2.5465, + "step": 35860 + }, + { + "epoch": 0.62, + "learning_rate": 6.899598799409448e-05, + "loss": 2.5644, + "step": 35870 + }, + { + "epoch": 0.62, + "learning_rate": 6.894158103148326e-05, + "loss": 2.5368, + "step": 35880 + }, + { + "epoch": 0.62, + "learning_rate": 6.88871842421514e-05, + "loss": 2.5657, + "step": 35890 + }, + { + "epoch": 0.62, + "learning_rate": 6.883279764391666e-05, + "loss": 2.5646, + "step": 35900 + }, + { + "epoch": 0.62, + "learning_rate": 6.877842125459357e-05, + "loss": 2.6155, + "step": 35910 + }, + { + "epoch": 0.62, + "learning_rate": 6.872405509199324e-05, + "loss": 2.5773, + "step": 35920 + }, + { + "epoch": 0.62, + "learning_rate": 6.866969917392353e-05, + "loss": 2.5641, + "step": 35930 + }, + { + "epoch": 0.62, + "learning_rate": 6.861535351818885e-05, + "loss": 2.5506, + "step": 35940 + }, + { + "epoch": 0.62, + "learning_rate": 6.856101814259028e-05, + "loss": 2.4932, + "step": 35950 + }, + { + "epoch": 0.62, + "learning_rate": 6.850669306492557e-05, + "loss": 2.541, + "step": 35960 + }, + { + "epoch": 0.62, + "learning_rate": 6.845237830298903e-05, + "loss": 2.5564, + "step": 35970 + }, + { + "epoch": 0.62, + "learning_rate": 6.839807387457163e-05, + "loss": 2.555, + "step": 35980 + }, + { + "epoch": 0.62, + "learning_rate": 6.834377979746096e-05, + "loss": 2.5823, + "step": 35990 + }, + { + "epoch": 0.62, + "learning_rate": 6.828949608944122e-05, + "loss": 2.5998, + "step": 36000 + }, + { + "epoch": 0.62, + "learning_rate": 6.823522276829319e-05, + "loss": 2.5527, + "step": 36010 + }, + { + "epoch": 0.62, + "learning_rate": 6.818095985179425e-05, + "loss": 2.6562, + "step": 36020 + }, + { + "epoch": 0.62, + "learning_rate": 6.812670735771837e-05, + "loss": 2.5415, + "step": 36030 + }, + { + "epoch": 0.62, + "learning_rate": 6.807246530383617e-05, + "loss": 2.7046, + "step": 36040 + }, + { + "epoch": 0.62, + "learning_rate": 6.801823370791478e-05, + "loss": 2.5594, + "step": 36050 + }, + { + "epoch": 0.62, + "learning_rate": 6.796401258771792e-05, + "loss": 2.6434, + "step": 36060 + }, + { + "epoch": 0.62, + "learning_rate": 6.790980196100587e-05, + "loss": 2.5518, + "step": 36070 + }, + { + "epoch": 0.62, + "learning_rate": 6.78556018455355e-05, + "loss": 2.5493, + "step": 36080 + }, + { + "epoch": 0.62, + "learning_rate": 6.780141225906022e-05, + "loss": 2.5773, + "step": 36090 + }, + { + "epoch": 0.62, + "learning_rate": 6.774723321932997e-05, + "loss": 2.5247, + "step": 36100 + }, + { + "epoch": 0.62, + "learning_rate": 6.769306474409129e-05, + "loss": 2.5798, + "step": 36110 + }, + { + "epoch": 0.63, + "learning_rate": 6.763890685108723e-05, + "loss": 2.5597, + "step": 36120 + }, + { + "epoch": 0.63, + "learning_rate": 6.758475955805731e-05, + "loss": 2.5723, + "step": 36130 + }, + { + "epoch": 0.63, + "learning_rate": 6.753062288273774e-05, + "loss": 2.549, + "step": 36140 + }, + { + "epoch": 0.63, + "learning_rate": 6.747649684286109e-05, + "loss": 2.6087, + "step": 36150 + }, + { + "epoch": 0.63, + "learning_rate": 6.742238145615652e-05, + "loss": 2.5242, + "step": 36160 + }, + { + "epoch": 0.63, + "learning_rate": 6.736827674034967e-05, + "loss": 2.6114, + "step": 36170 + }, + { + "epoch": 0.63, + "learning_rate": 6.731418271316271e-05, + "loss": 2.596, + "step": 36180 + }, + { + "epoch": 0.63, + "learning_rate": 6.726009939231437e-05, + "loss": 2.5613, + "step": 36190 + }, + { + "epoch": 0.63, + "learning_rate": 6.72060267955197e-05, + "loss": 2.5341, + "step": 36200 + }, + { + "epoch": 0.63, + "learning_rate": 6.715196494049038e-05, + "loss": 2.6262, + "step": 36210 + }, + { + "epoch": 0.63, + "learning_rate": 6.709791384493458e-05, + "loss": 2.5625, + "step": 36220 + }, + { + "epoch": 0.63, + "learning_rate": 6.704387352655685e-05, + "loss": 2.5614, + "step": 36230 + }, + { + "epoch": 0.63, + "learning_rate": 6.698984400305832e-05, + "loss": 2.5329, + "step": 36240 + }, + { + "epoch": 0.63, + "learning_rate": 6.693582529213644e-05, + "loss": 2.5221, + "step": 36250 + }, + { + "epoch": 0.63, + "learning_rate": 6.688181741148527e-05, + "loss": 2.6425, + "step": 36260 + }, + { + "epoch": 0.63, + "learning_rate": 6.682782037879523e-05, + "loss": 2.6439, + "step": 36270 + }, + { + "epoch": 0.63, + "learning_rate": 6.677383421175319e-05, + "loss": 2.6683, + "step": 36280 + }, + { + "epoch": 0.63, + "learning_rate": 6.671985892804251e-05, + "loss": 2.5385, + "step": 36290 + }, + { + "epoch": 0.63, + "learning_rate": 6.666589454534298e-05, + "loss": 2.5179, + "step": 36300 + }, + { + "epoch": 0.63, + "learning_rate": 6.661194108133076e-05, + "loss": 2.6157, + "step": 36310 + }, + { + "epoch": 0.63, + "learning_rate": 6.655799855367844e-05, + "loss": 2.5276, + "step": 36320 + }, + { + "epoch": 0.63, + "learning_rate": 6.650406698005514e-05, + "loss": 2.584, + "step": 36330 + }, + { + "epoch": 0.63, + "learning_rate": 6.645014637812627e-05, + "loss": 2.6152, + "step": 36340 + }, + { + "epoch": 0.63, + "learning_rate": 6.639623676555365e-05, + "loss": 2.4669, + "step": 36350 + }, + { + "epoch": 0.63, + "learning_rate": 6.634233815999561e-05, + "loss": 2.5811, + "step": 36360 + }, + { + "epoch": 0.63, + "learning_rate": 6.628845057910675e-05, + "loss": 2.5907, + "step": 36370 + }, + { + "epoch": 0.63, + "learning_rate": 6.623457404053812e-05, + "loss": 2.5852, + "step": 36380 + }, + { + "epoch": 0.63, + "learning_rate": 6.618070856193714e-05, + "loss": 2.5621, + "step": 36390 + }, + { + "epoch": 0.63, + "learning_rate": 6.612685416094763e-05, + "loss": 2.5695, + "step": 36400 + }, + { + "epoch": 0.63, + "learning_rate": 6.607301085520976e-05, + "loss": 2.6433, + "step": 36410 + }, + { + "epoch": 0.63, + "learning_rate": 6.601917866236007e-05, + "loss": 2.573, + "step": 36420 + }, + { + "epoch": 0.63, + "learning_rate": 6.596535760003142e-05, + "loss": 2.5371, + "step": 36430 + }, + { + "epoch": 0.63, + "learning_rate": 6.591154768585312e-05, + "loss": 2.6182, + "step": 36440 + }, + { + "epoch": 0.63, + "learning_rate": 6.585774893745074e-05, + "loss": 2.5921, + "step": 36450 + }, + { + "epoch": 0.63, + "learning_rate": 6.580396137244622e-05, + "loss": 2.5038, + "step": 36460 + }, + { + "epoch": 0.63, + "learning_rate": 6.575018500845781e-05, + "loss": 2.5698, + "step": 36470 + }, + { + "epoch": 0.63, + "learning_rate": 6.56964198631002e-05, + "loss": 2.5809, + "step": 36480 + }, + { + "epoch": 0.63, + "learning_rate": 6.564266595398425e-05, + "loss": 2.5646, + "step": 36490 + }, + { + "epoch": 0.63, + "learning_rate": 6.558892329871725e-05, + "loss": 2.5122, + "step": 36500 + }, + { + "epoch": 0.63, + "learning_rate": 6.553519191490278e-05, + "loss": 2.564, + "step": 36510 + }, + { + "epoch": 0.63, + "learning_rate": 6.548147182014072e-05, + "loss": 2.581, + "step": 36520 + }, + { + "epoch": 0.63, + "learning_rate": 6.54277630320272e-05, + "loss": 2.5356, + "step": 36530 + }, + { + "epoch": 0.63, + "learning_rate": 6.537406556815475e-05, + "loss": 2.5634, + "step": 36540 + }, + { + "epoch": 0.63, + "learning_rate": 6.532037944611214e-05, + "loss": 2.6559, + "step": 36550 + }, + { + "epoch": 0.63, + "learning_rate": 6.526670468348437e-05, + "loss": 2.5757, + "step": 36560 + }, + { + "epoch": 0.63, + "learning_rate": 6.521304129785279e-05, + "loss": 2.5909, + "step": 36570 + }, + { + "epoch": 0.63, + "learning_rate": 6.5159389306795e-05, + "loss": 2.607, + "step": 36580 + }, + { + "epoch": 0.63, + "learning_rate": 6.510574872788493e-05, + "loss": 2.5227, + "step": 36590 + }, + { + "epoch": 0.63, + "learning_rate": 6.505211957869268e-05, + "loss": 2.5489, + "step": 36600 + }, + { + "epoch": 0.63, + "learning_rate": 6.499850187678463e-05, + "loss": 2.4845, + "step": 36610 + }, + { + "epoch": 0.63, + "learning_rate": 6.494489563972339e-05, + "loss": 2.6684, + "step": 36620 + }, + { + "epoch": 0.63, + "learning_rate": 6.489130088506792e-05, + "loss": 2.5734, + "step": 36630 + }, + { + "epoch": 0.63, + "learning_rate": 6.483771763037329e-05, + "loss": 2.6273, + "step": 36640 + }, + { + "epoch": 0.63, + "learning_rate": 6.478414589319085e-05, + "loss": 2.5202, + "step": 36650 + }, + { + "epoch": 0.63, + "learning_rate": 6.473058569106823e-05, + "loss": 2.578, + "step": 36660 + }, + { + "epoch": 0.63, + "learning_rate": 6.467703704154922e-05, + "loss": 2.6493, + "step": 36670 + }, + { + "epoch": 0.63, + "learning_rate": 6.46234999621738e-05, + "loss": 2.5492, + "step": 36680 + }, + { + "epoch": 0.63, + "learning_rate": 6.456997447047828e-05, + "loss": 2.5267, + "step": 36690 + }, + { + "epoch": 0.64, + "learning_rate": 6.451646058399505e-05, + "loss": 2.5268, + "step": 36700 + }, + { + "epoch": 0.64, + "learning_rate": 6.446295832025274e-05, + "loss": 2.5746, + "step": 36710 + }, + { + "epoch": 0.64, + "learning_rate": 6.440946769677618e-05, + "loss": 2.6034, + "step": 36720 + }, + { + "epoch": 0.64, + "learning_rate": 6.435598873108641e-05, + "loss": 2.595, + "step": 36730 + }, + { + "epoch": 0.64, + "learning_rate": 6.430252144070062e-05, + "loss": 2.5384, + "step": 36740 + }, + { + "epoch": 0.64, + "learning_rate": 6.424906584313215e-05, + "loss": 2.5901, + "step": 36750 + }, + { + "epoch": 0.64, + "learning_rate": 6.419562195589055e-05, + "loss": 2.6025, + "step": 36760 + }, + { + "epoch": 0.64, + "learning_rate": 6.414218979648159e-05, + "loss": 2.6463, + "step": 36770 + }, + { + "epoch": 0.64, + "learning_rate": 6.408876938240708e-05, + "loss": 2.4858, + "step": 36780 + }, + { + "epoch": 0.64, + "learning_rate": 6.403536073116508e-05, + "loss": 2.6421, + "step": 36790 + }, + { + "epoch": 0.64, + "learning_rate": 6.398196386024968e-05, + "loss": 2.5706, + "step": 36800 + }, + { + "epoch": 0.64, + "learning_rate": 6.392857878715126e-05, + "loss": 2.556, + "step": 36810 + }, + { + "epoch": 0.64, + "learning_rate": 6.387520552935624e-05, + "loss": 2.5829, + "step": 36820 + }, + { + "epoch": 0.64, + "learning_rate": 6.382184410434717e-05, + "loss": 2.6171, + "step": 36830 + }, + { + "epoch": 0.64, + "learning_rate": 6.376849452960279e-05, + "loss": 2.6515, + "step": 36840 + }, + { + "epoch": 0.64, + "learning_rate": 6.371515682259789e-05, + "loss": 2.5681, + "step": 36850 + }, + { + "epoch": 0.64, + "learning_rate": 6.366183100080338e-05, + "loss": 2.5628, + "step": 36860 + }, + { + "epoch": 0.64, + "learning_rate": 6.360851708168631e-05, + "loss": 2.5906, + "step": 36870 + }, + { + "epoch": 0.64, + "learning_rate": 6.355521508270985e-05, + "loss": 2.6443, + "step": 36880 + }, + { + "epoch": 0.64, + "learning_rate": 6.350192502133319e-05, + "loss": 2.5287, + "step": 36890 + }, + { + "epoch": 0.64, + "learning_rate": 6.344864691501162e-05, + "loss": 2.6674, + "step": 36900 + }, + { + "epoch": 0.64, + "learning_rate": 6.339538078119663e-05, + "loss": 2.5651, + "step": 36910 + }, + { + "epoch": 0.64, + "learning_rate": 6.334212663733566e-05, + "loss": 2.5261, + "step": 36920 + }, + { + "epoch": 0.64, + "learning_rate": 6.328888450087222e-05, + "loss": 2.6076, + "step": 36930 + }, + { + "epoch": 0.64, + "learning_rate": 6.323565438924601e-05, + "loss": 2.6554, + "step": 36940 + }, + { + "epoch": 0.64, + "learning_rate": 6.318243631989267e-05, + "loss": 2.4875, + "step": 36950 + }, + { + "epoch": 0.64, + "learning_rate": 6.312923031024398e-05, + "loss": 2.5971, + "step": 36960 + }, + { + "epoch": 0.64, + "learning_rate": 6.30760363777277e-05, + "loss": 2.5634, + "step": 36970 + }, + { + "epoch": 0.64, + "learning_rate": 6.302285453976765e-05, + "loss": 2.5727, + "step": 36980 + }, + { + "epoch": 0.64, + "learning_rate": 6.296968481378374e-05, + "loss": 2.5701, + "step": 36990 + }, + { + "epoch": 0.64, + "learning_rate": 6.291652721719185e-05, + "loss": 2.6221, + "step": 37000 + }, + { + "epoch": 0.64, + "learning_rate": 6.286338176740392e-05, + "loss": 2.496, + "step": 37010 + }, + { + "epoch": 0.64, + "learning_rate": 6.281024848182788e-05, + "loss": 2.5735, + "step": 37020 + }, + { + "epoch": 0.64, + "learning_rate": 6.275712737786774e-05, + "loss": 2.5896, + "step": 37030 + }, + { + "epoch": 0.64, + "learning_rate": 6.270401847292347e-05, + "loss": 2.5677, + "step": 37040 + }, + { + "epoch": 0.64, + "learning_rate": 6.265092178439101e-05, + "loss": 2.5314, + "step": 37050 + }, + { + "epoch": 0.64, + "learning_rate": 6.259783732966238e-05, + "loss": 2.5964, + "step": 37060 + }, + { + "epoch": 0.64, + "learning_rate": 6.254476512612557e-05, + "loss": 2.5033, + "step": 37070 + }, + { + "epoch": 0.64, + "learning_rate": 6.249170519116448e-05, + "loss": 2.5392, + "step": 37080 + }, + { + "epoch": 0.64, + "learning_rate": 6.243865754215912e-05, + "loss": 2.6184, + "step": 37090 + }, + { + "epoch": 0.64, + "learning_rate": 6.238562219648537e-05, + "loss": 2.637, + "step": 37100 + }, + { + "epoch": 0.64, + "learning_rate": 6.233259917151512e-05, + "loss": 2.6307, + "step": 37110 + }, + { + "epoch": 0.64, + "learning_rate": 6.227958848461623e-05, + "loss": 2.5424, + "step": 37120 + }, + { + "epoch": 0.64, + "learning_rate": 6.222659015315246e-05, + "loss": 2.5822, + "step": 37130 + }, + { + "epoch": 0.64, + "learning_rate": 6.217360419448369e-05, + "loss": 2.5492, + "step": 37140 + }, + { + "epoch": 0.64, + "learning_rate": 6.212063062596556e-05, + "loss": 2.5797, + "step": 37150 + }, + { + "epoch": 0.64, + "learning_rate": 6.206766946494973e-05, + "loss": 2.5748, + "step": 37160 + }, + { + "epoch": 0.64, + "learning_rate": 6.201472072878376e-05, + "loss": 2.6574, + "step": 37170 + }, + { + "epoch": 0.64, + "learning_rate": 6.196178443481123e-05, + "loss": 2.5532, + "step": 37180 + }, + { + "epoch": 0.64, + "learning_rate": 6.190886060037154e-05, + "loss": 2.579, + "step": 37190 + }, + { + "epoch": 0.64, + "learning_rate": 6.185594924280004e-05, + "loss": 2.6252, + "step": 37200 + }, + { + "epoch": 0.64, + "learning_rate": 6.180305037942806e-05, + "loss": 2.6023, + "step": 37210 + }, + { + "epoch": 0.64, + "learning_rate": 6.175016402758275e-05, + "loss": 2.6692, + "step": 37220 + }, + { + "epoch": 0.64, + "learning_rate": 6.169729020458717e-05, + "loss": 2.5516, + "step": 37230 + }, + { + "epoch": 0.64, + "learning_rate": 6.164442892776034e-05, + "loss": 2.5852, + "step": 37240 + }, + { + "epoch": 0.64, + "learning_rate": 6.159158021441715e-05, + "loss": 2.6332, + "step": 37250 + }, + { + "epoch": 0.64, + "learning_rate": 6.15387440818683e-05, + "loss": 2.557, + "step": 37260 + }, + { + "epoch": 0.65, + "learning_rate": 6.148592054742043e-05, + "loss": 2.527, + "step": 37270 + }, + { + "epoch": 0.65, + "learning_rate": 6.14331096283761e-05, + "loss": 2.5868, + "step": 37280 + }, + { + "epoch": 0.65, + "learning_rate": 6.138031134203364e-05, + "loss": 2.6475, + "step": 37290 + }, + { + "epoch": 0.65, + "learning_rate": 6.13275257056873e-05, + "loss": 2.5719, + "step": 37300 + }, + { + "epoch": 0.65, + "learning_rate": 6.127475273662716e-05, + "loss": 2.5748, + "step": 37310 + }, + { + "epoch": 0.65, + "learning_rate": 6.122199245213922e-05, + "loss": 2.6365, + "step": 37320 + }, + { + "epoch": 0.65, + "learning_rate": 6.116924486950524e-05, + "loss": 2.6182, + "step": 37330 + }, + { + "epoch": 0.65, + "learning_rate": 6.111651000600286e-05, + "loss": 2.5192, + "step": 37340 + }, + { + "epoch": 0.65, + "learning_rate": 6.10637878789055e-05, + "loss": 2.6013, + "step": 37350 + }, + { + "epoch": 0.65, + "learning_rate": 6.101107850548251e-05, + "loss": 2.6308, + "step": 37360 + }, + { + "epoch": 0.65, + "learning_rate": 6.0958381902998986e-05, + "loss": 2.6071, + "step": 37370 + }, + { + "epoch": 0.65, + "learning_rate": 6.090569808871582e-05, + "loss": 2.6105, + "step": 37380 + }, + { + "epoch": 0.65, + "learning_rate": 6.085302707988981e-05, + "loss": 2.6545, + "step": 37390 + }, + { + "epoch": 0.65, + "learning_rate": 6.080036889377349e-05, + "loss": 2.6144, + "step": 37400 + }, + { + "epoch": 0.65, + "learning_rate": 6.074772354761521e-05, + "loss": 2.5806, + "step": 37410 + }, + { + "epoch": 0.65, + "learning_rate": 6.069509105865907e-05, + "loss": 2.5822, + "step": 37420 + }, + { + "epoch": 0.65, + "learning_rate": 6.0642471444145075e-05, + "loss": 2.5347, + "step": 37430 + }, + { + "epoch": 0.65, + "learning_rate": 6.0589864721308886e-05, + "loss": 2.5384, + "step": 37440 + }, + { + "epoch": 0.65, + "learning_rate": 6.053727090738199e-05, + "loss": 2.6357, + "step": 37450 + }, + { + "epoch": 0.65, + "learning_rate": 6.0484690019591696e-05, + "loss": 2.4609, + "step": 37460 + }, + { + "epoch": 0.65, + "learning_rate": 6.0432122075161015e-05, + "loss": 2.4859, + "step": 37470 + }, + { + "epoch": 0.65, + "learning_rate": 6.03795670913087e-05, + "loss": 2.4926, + "step": 37480 + }, + { + "epoch": 0.65, + "learning_rate": 6.032702508524932e-05, + "loss": 2.6422, + "step": 37490 + }, + { + "epoch": 0.65, + "learning_rate": 6.027449607419318e-05, + "loss": 2.485, + "step": 37500 + }, + { + "epoch": 0.65, + "learning_rate": 6.022198007534632e-05, + "loss": 2.5795, + "step": 37510 + }, + { + "epoch": 0.65, + "learning_rate": 6.016947710591051e-05, + "loss": 2.6305, + "step": 37520 + }, + { + "epoch": 0.65, + "learning_rate": 6.011698718308323e-05, + "loss": 2.614, + "step": 37530 + }, + { + "epoch": 0.65, + "learning_rate": 6.006451032405775e-05, + "loss": 2.559, + "step": 37540 + }, + { + "epoch": 0.65, + "learning_rate": 6.001204654602302e-05, + "loss": 2.5698, + "step": 37550 + }, + { + "epoch": 0.65, + "learning_rate": 5.995959586616369e-05, + "loss": 2.5074, + "step": 37560 + }, + { + "epoch": 0.65, + "learning_rate": 5.9907158301660125e-05, + "loss": 2.5574, + "step": 37570 + }, + { + "epoch": 0.65, + "learning_rate": 5.985473386968845e-05, + "loss": 2.6022, + "step": 37580 + }, + { + "epoch": 0.65, + "learning_rate": 5.980232258742045e-05, + "loss": 2.5784, + "step": 37590 + }, + { + "epoch": 0.65, + "learning_rate": 5.9749924472023524e-05, + "loss": 2.5776, + "step": 37600 + }, + { + "epoch": 0.65, + "learning_rate": 5.969753954066093e-05, + "loss": 2.6277, + "step": 37610 + }, + { + "epoch": 0.65, + "learning_rate": 5.964516781049145e-05, + "loss": 2.6681, + "step": 37620 + }, + { + "epoch": 0.65, + "learning_rate": 5.9592809298669605e-05, + "loss": 2.5396, + "step": 37630 + }, + { + "epoch": 0.65, + "learning_rate": 5.9540464022345636e-05, + "loss": 2.5541, + "step": 37640 + }, + { + "epoch": 0.65, + "learning_rate": 5.9488131998665345e-05, + "loss": 2.5115, + "step": 37650 + }, + { + "epoch": 0.65, + "learning_rate": 5.9435813244770255e-05, + "loss": 2.4931, + "step": 37660 + }, + { + "epoch": 0.65, + "learning_rate": 5.938350777779753e-05, + "loss": 2.4796, + "step": 37670 + }, + { + "epoch": 0.65, + "learning_rate": 5.9331215614879966e-05, + "loss": 2.5555, + "step": 37680 + }, + { + "epoch": 0.65, + "learning_rate": 5.9278936773146085e-05, + "loss": 2.4983, + "step": 37690 + }, + { + "epoch": 0.65, + "learning_rate": 5.922667126971995e-05, + "loss": 2.5692, + "step": 37700 + }, + { + "epoch": 0.65, + "learning_rate": 5.917441912172127e-05, + "loss": 2.5328, + "step": 37710 + }, + { + "epoch": 0.65, + "learning_rate": 5.9122180346265375e-05, + "loss": 2.4906, + "step": 37720 + }, + { + "epoch": 0.65, + "learning_rate": 5.906995496046329e-05, + "loss": 2.5057, + "step": 37730 + }, + { + "epoch": 0.65, + "learning_rate": 5.901774298142157e-05, + "loss": 2.5872, + "step": 37740 + }, + { + "epoch": 0.65, + "learning_rate": 5.896554442624238e-05, + "loss": 2.5632, + "step": 37750 + }, + { + "epoch": 0.65, + "learning_rate": 5.891335931202355e-05, + "loss": 2.5524, + "step": 37760 + }, + { + "epoch": 0.65, + "learning_rate": 5.886118765585849e-05, + "loss": 2.5227, + "step": 37770 + }, + { + "epoch": 0.65, + "learning_rate": 5.880902947483612e-05, + "loss": 2.4414, + "step": 37780 + }, + { + "epoch": 0.65, + "learning_rate": 5.875688478604107e-05, + "loss": 2.4927, + "step": 37790 + }, + { + "epoch": 0.65, + "learning_rate": 5.870475360655346e-05, + "loss": 2.4679, + "step": 37800 + }, + { + "epoch": 0.65, + "learning_rate": 5.865263595344904e-05, + "loss": 2.6439, + "step": 37810 + }, + { + "epoch": 0.65, + "learning_rate": 5.860053184379906e-05, + "loss": 2.5945, + "step": 37820 + }, + { + "epoch": 0.65, + "learning_rate": 5.854844129467043e-05, + "loss": 2.5555, + "step": 37830 + }, + { + "epoch": 0.65, + "learning_rate": 5.849636432312555e-05, + "loss": 2.5763, + "step": 37840 + }, + { + "epoch": 0.66, + "learning_rate": 5.8444300946222374e-05, + "loss": 2.6156, + "step": 37850 + }, + { + "epoch": 0.66, + "learning_rate": 5.839225118101439e-05, + "loss": 2.5677, + "step": 37860 + }, + { + "epoch": 0.66, + "learning_rate": 5.834021504455075e-05, + "loss": 2.5279, + "step": 37870 + }, + { + "epoch": 0.66, + "learning_rate": 5.8288192553875976e-05, + "loss": 2.5518, + "step": 37880 + }, + { + "epoch": 0.66, + "learning_rate": 5.8236183726030256e-05, + "loss": 2.516, + "step": 37890 + }, + { + "epoch": 0.66, + "learning_rate": 5.818418857804915e-05, + "loss": 2.5574, + "step": 37900 + }, + { + "epoch": 0.66, + "learning_rate": 5.8132207126963915e-05, + "loss": 2.568, + "step": 37910 + }, + { + "epoch": 0.66, + "learning_rate": 5.808023938980114e-05, + "loss": 2.6242, + "step": 37920 + }, + { + "epoch": 0.66, + "learning_rate": 5.802828538358307e-05, + "loss": 2.5777, + "step": 37930 + }, + { + "epoch": 0.66, + "learning_rate": 5.7976345125327434e-05, + "loss": 2.5008, + "step": 37940 + }, + { + "epoch": 0.66, + "learning_rate": 5.792441863204734e-05, + "loss": 2.4446, + "step": 37950 + }, + { + "epoch": 0.66, + "learning_rate": 5.7872505920751504e-05, + "loss": 2.5487, + "step": 37960 + }, + { + "epoch": 0.66, + "learning_rate": 5.782060700844414e-05, + "loss": 2.6023, + "step": 37970 + }, + { + "epoch": 0.66, + "learning_rate": 5.776872191212481e-05, + "loss": 2.555, + "step": 37980 + }, + { + "epoch": 0.66, + "learning_rate": 5.771685064878868e-05, + "loss": 2.5337, + "step": 37990 + }, + { + "epoch": 0.66, + "learning_rate": 5.766499323542637e-05, + "loss": 2.5177, + "step": 38000 + }, + { + "epoch": 0.66, + "learning_rate": 5.761314968902385e-05, + "loss": 2.5643, + "step": 38010 + }, + { + "epoch": 0.66, + "learning_rate": 5.7561320026562684e-05, + "loss": 2.5264, + "step": 38020 + }, + { + "epoch": 0.66, + "learning_rate": 5.7509504265019865e-05, + "loss": 2.5389, + "step": 38030 + }, + { + "epoch": 0.66, + "learning_rate": 5.7457702421367676e-05, + "loss": 2.4855, + "step": 38040 + }, + { + "epoch": 0.66, + "learning_rate": 5.740591451257415e-05, + "loss": 2.5738, + "step": 38050 + }, + { + "epoch": 0.66, + "learning_rate": 5.7354140555602456e-05, + "loss": 2.6184, + "step": 38060 + }, + { + "epoch": 0.66, + "learning_rate": 5.730238056741132e-05, + "loss": 2.6678, + "step": 38070 + }, + { + "epoch": 0.66, + "learning_rate": 5.725063456495494e-05, + "loss": 2.5479, + "step": 38080 + }, + { + "epoch": 0.66, + "learning_rate": 5.71989025651828e-05, + "loss": 2.5564, + "step": 38090 + }, + { + "epoch": 0.66, + "learning_rate": 5.7147184585039914e-05, + "loss": 2.6261, + "step": 38100 + }, + { + "epoch": 0.66, + "learning_rate": 5.709548064146668e-05, + "loss": 2.519, + "step": 38110 + }, + { + "epoch": 0.66, + "learning_rate": 5.704379075139884e-05, + "loss": 2.5923, + "step": 38120 + }, + { + "epoch": 0.66, + "learning_rate": 5.6992114931767593e-05, + "loss": 2.4984, + "step": 38130 + }, + { + "epoch": 0.66, + "learning_rate": 5.694045319949954e-05, + "loss": 2.5886, + "step": 38140 + }, + { + "epoch": 0.66, + "learning_rate": 5.6888805571516565e-05, + "loss": 2.5002, + "step": 38150 + }, + { + "epoch": 0.66, + "learning_rate": 5.683717206473611e-05, + "loss": 2.5459, + "step": 38160 + }, + { + "epoch": 0.66, + "learning_rate": 5.6785552696070774e-05, + "loss": 2.5428, + "step": 38170 + }, + { + "epoch": 0.66, + "learning_rate": 5.673394748242868e-05, + "loss": 2.5834, + "step": 38180 + }, + { + "epoch": 0.66, + "learning_rate": 5.668235644071335e-05, + "loss": 2.6371, + "step": 38190 + }, + { + "epoch": 0.66, + "learning_rate": 5.663077958782346e-05, + "loss": 2.557, + "step": 38200 + }, + { + "epoch": 0.66, + "learning_rate": 5.657921694065322e-05, + "loss": 2.5347, + "step": 38210 + }, + { + "epoch": 0.66, + "learning_rate": 5.652766851609219e-05, + "loss": 2.6268, + "step": 38220 + }, + { + "epoch": 0.66, + "learning_rate": 5.647613433102506e-05, + "loss": 2.568, + "step": 38230 + }, + { + "epoch": 0.66, + "learning_rate": 5.64246144023322e-05, + "loss": 2.5543, + "step": 38240 + }, + { + "epoch": 0.66, + "learning_rate": 5.6373108746888966e-05, + "loss": 2.5791, + "step": 38250 + }, + { + "epoch": 0.66, + "learning_rate": 5.632161738156627e-05, + "loss": 2.5715, + "step": 38260 + }, + { + "epoch": 0.66, + "learning_rate": 5.627014032323028e-05, + "loss": 2.6108, + "step": 38270 + }, + { + "epoch": 0.66, + "learning_rate": 5.6218677588742394e-05, + "loss": 2.5312, + "step": 38280 + }, + { + "epoch": 0.66, + "learning_rate": 5.6167229194959404e-05, + "loss": 2.5703, + "step": 38290 + }, + { + "epoch": 0.66, + "learning_rate": 5.611579515873346e-05, + "loss": 2.552, + "step": 38300 + }, + { + "epoch": 0.66, + "learning_rate": 5.6064375496911834e-05, + "loss": 2.6514, + "step": 38310 + }, + { + "epoch": 0.66, + "learning_rate": 5.601297022633724e-05, + "loss": 2.5467, + "step": 38320 + }, + { + "epoch": 0.66, + "learning_rate": 5.5961579363847663e-05, + "loss": 2.5322, + "step": 38330 + }, + { + "epoch": 0.66, + "learning_rate": 5.5910202926276265e-05, + "loss": 2.5829, + "step": 38340 + }, + { + "epoch": 0.66, + "learning_rate": 5.5858840930451595e-05, + "loss": 2.5866, + "step": 38350 + }, + { + "epoch": 0.66, + "learning_rate": 5.580749339319746e-05, + "loss": 2.5347, + "step": 38360 + }, + { + "epoch": 0.66, + "learning_rate": 5.575616033133283e-05, + "loss": 2.5481, + "step": 38370 + }, + { + "epoch": 0.66, + "learning_rate": 5.5704841761672044e-05, + "loss": 2.5428, + "step": 38380 + }, + { + "epoch": 0.66, + "learning_rate": 5.565353770102468e-05, + "loss": 2.6407, + "step": 38390 + }, + { + "epoch": 0.66, + "learning_rate": 5.56022481661955e-05, + "loss": 2.6803, + "step": 38400 + }, + { + "epoch": 0.66, + "learning_rate": 5.555097317398454e-05, + "loss": 2.6337, + "step": 38410 + }, + { + "epoch": 0.66, + "learning_rate": 5.5499712741187106e-05, + "loss": 2.4687, + "step": 38420 + }, + { + "epoch": 0.67, + "learning_rate": 5.5448466884593685e-05, + "loss": 2.5581, + "step": 38430 + }, + { + "epoch": 0.67, + "learning_rate": 5.5397235620990084e-05, + "loss": 2.5476, + "step": 38440 + }, + { + "epoch": 0.67, + "learning_rate": 5.5346018967157165e-05, + "loss": 2.5992, + "step": 38450 + }, + { + "epoch": 0.67, + "learning_rate": 5.5294816939871175e-05, + "loss": 2.508, + "step": 38460 + }, + { + "epoch": 0.67, + "learning_rate": 5.524362955590341e-05, + "loss": 2.5747, + "step": 38470 + }, + { + "epoch": 0.67, + "learning_rate": 5.519245683202049e-05, + "loss": 2.564, + "step": 38480 + }, + { + "epoch": 0.67, + "learning_rate": 5.514129878498424e-05, + "loss": 2.6326, + "step": 38490 + }, + { + "epoch": 0.67, + "learning_rate": 5.5090155431551584e-05, + "loss": 2.4538, + "step": 38500 + }, + { + "epoch": 0.67, + "learning_rate": 5.503902678847467e-05, + "loss": 2.6087, + "step": 38510 + }, + { + "epoch": 0.67, + "learning_rate": 5.498791287250091e-05, + "loss": 2.5598, + "step": 38520 + }, + { + "epoch": 0.67, + "learning_rate": 5.493681370037274e-05, + "loss": 2.5342, + "step": 38530 + }, + { + "epoch": 0.67, + "learning_rate": 5.488572928882789e-05, + "loss": 2.5022, + "step": 38540 + }, + { + "epoch": 0.67, + "learning_rate": 5.483465965459924e-05, + "loss": 2.4754, + "step": 38550 + }, + { + "epoch": 0.67, + "learning_rate": 5.4783604814414745e-05, + "loss": 2.5718, + "step": 38560 + }, + { + "epoch": 0.67, + "learning_rate": 5.473766812097825e-05, + "loss": 2.5611, + "step": 38570 + }, + { + "epoch": 0.67, + "learning_rate": 5.468664143554605e-05, + "loss": 2.5326, + "step": 38580 + }, + { + "epoch": 0.67, + "learning_rate": 5.463562959264181e-05, + "loss": 2.5991, + "step": 38590 + }, + { + "epoch": 0.67, + "learning_rate": 5.45846326089747e-05, + "loss": 2.595, + "step": 38600 + }, + { + "epoch": 0.67, + "learning_rate": 5.4533650501248856e-05, + "loss": 2.5596, + "step": 38610 + }, + { + "epoch": 0.67, + "learning_rate": 5.4482683286163664e-05, + "loss": 2.5562, + "step": 38620 + }, + { + "epoch": 0.67, + "learning_rate": 5.44317309804136e-05, + "loss": 2.527, + "step": 38630 + }, + { + "epoch": 0.67, + "learning_rate": 5.4380793600688194e-05, + "loss": 2.5473, + "step": 38640 + }, + { + "epoch": 0.67, + "learning_rate": 5.432987116367219e-05, + "loss": 2.5953, + "step": 38650 + }, + { + "epoch": 0.67, + "learning_rate": 5.427896368604539e-05, + "loss": 2.561, + "step": 38660 + }, + { + "epoch": 0.67, + "learning_rate": 5.422807118448264e-05, + "loss": 2.545, + "step": 38670 + }, + { + "epoch": 0.67, + "learning_rate": 5.417719367565397e-05, + "loss": 2.6195, + "step": 38680 + }, + { + "epoch": 0.67, + "learning_rate": 5.4126331176224495e-05, + "loss": 2.5392, + "step": 38690 + }, + { + "epoch": 0.67, + "learning_rate": 5.4075483702854314e-05, + "loss": 2.4845, + "step": 38700 + }, + { + "epoch": 0.67, + "learning_rate": 5.402465127219869e-05, + "loss": 2.5936, + "step": 38710 + }, + { + "epoch": 0.67, + "learning_rate": 5.3973833900907954e-05, + "loss": 2.5265, + "step": 38720 + }, + { + "epoch": 0.67, + "learning_rate": 5.392303160562748e-05, + "loss": 2.5953, + "step": 38730 + }, + { + "epoch": 0.67, + "learning_rate": 5.387224440299779e-05, + "loss": 2.5274, + "step": 38740 + }, + { + "epoch": 0.67, + "learning_rate": 5.382147230965424e-05, + "loss": 2.538, + "step": 38750 + }, + { + "epoch": 0.67, + "learning_rate": 5.3770715342227485e-05, + "loss": 2.5551, + "step": 38760 + }, + { + "epoch": 0.67, + "learning_rate": 5.371997351734313e-05, + "loss": 2.5216, + "step": 38770 + }, + { + "epoch": 0.67, + "learning_rate": 5.3669246851621755e-05, + "loss": 2.6477, + "step": 38780 + }, + { + "epoch": 0.67, + "learning_rate": 5.361853536167905e-05, + "loss": 2.5628, + "step": 38790 + }, + { + "epoch": 0.67, + "learning_rate": 5.356783906412577e-05, + "loss": 2.5367, + "step": 38800 + }, + { + "epoch": 0.67, + "learning_rate": 5.351715797556755e-05, + "loss": 2.5349, + "step": 38810 + }, + { + "epoch": 0.67, + "learning_rate": 5.3466492112605236e-05, + "loss": 2.5072, + "step": 38820 + }, + { + "epoch": 0.67, + "learning_rate": 5.3415841491834495e-05, + "loss": 2.5564, + "step": 38830 + }, + { + "epoch": 0.67, + "learning_rate": 5.3365206129846124e-05, + "loss": 2.5445, + "step": 38840 + }, + { + "epoch": 0.67, + "learning_rate": 5.3314586043225936e-05, + "loss": 2.5431, + "step": 38850 + }, + { + "epoch": 0.67, + "learning_rate": 5.326398124855461e-05, + "loss": 2.6477, + "step": 38860 + }, + { + "epoch": 0.67, + "learning_rate": 5.3213391762407935e-05, + "loss": 2.4726, + "step": 38870 + }, + { + "epoch": 0.67, + "learning_rate": 5.3162817601356704e-05, + "loss": 2.5858, + "step": 38880 + }, + { + "epoch": 0.67, + "learning_rate": 5.311225878196655e-05, + "loss": 2.6749, + "step": 38890 + }, + { + "epoch": 0.67, + "learning_rate": 5.306171532079822e-05, + "loss": 2.6043, + "step": 38900 + }, + { + "epoch": 0.67, + "learning_rate": 5.301118723440736e-05, + "loss": 2.599, + "step": 38910 + }, + { + "epoch": 0.67, + "learning_rate": 5.296067453934459e-05, + "loss": 2.5472, + "step": 38920 + }, + { + "epoch": 0.67, + "learning_rate": 5.2910177252155567e-05, + "loss": 2.5572, + "step": 38930 + }, + { + "epoch": 0.67, + "learning_rate": 5.2859695389380725e-05, + "loss": 2.5146, + "step": 38940 + }, + { + "epoch": 0.67, + "learning_rate": 5.2809228967555604e-05, + "loss": 2.6815, + "step": 38950 + }, + { + "epoch": 0.67, + "learning_rate": 5.2758778003210675e-05, + "loss": 2.5716, + "step": 38960 + }, + { + "epoch": 0.67, + "learning_rate": 5.27083425128712e-05, + "loss": 2.5543, + "step": 38970 + }, + { + "epoch": 0.67, + "learning_rate": 5.2657922513057556e-05, + "loss": 2.5059, + "step": 38980 + }, + { + "epoch": 0.67, + "learning_rate": 5.2607518020284986e-05, + "loss": 2.6026, + "step": 38990 + }, + { + "epoch": 0.67, + "learning_rate": 5.2557129051063535e-05, + "loss": 2.5651, + "step": 39000 + }, + { + "epoch": 0.68, + "learning_rate": 5.250675562189835e-05, + "loss": 2.5059, + "step": 39010 + }, + { + "epoch": 0.68, + "learning_rate": 5.245639774928939e-05, + "loss": 2.5109, + "step": 39020 + }, + { + "epoch": 0.68, + "learning_rate": 5.2406055449731496e-05, + "loss": 2.6496, + "step": 39030 + }, + { + "epoch": 0.68, + "learning_rate": 5.2355728739714436e-05, + "loss": 2.6393, + "step": 39040 + }, + { + "epoch": 0.68, + "learning_rate": 5.230541763572294e-05, + "loss": 2.5683, + "step": 39050 + }, + { + "epoch": 0.68, + "learning_rate": 5.2255122154236494e-05, + "loss": 2.5315, + "step": 39060 + }, + { + "epoch": 0.68, + "learning_rate": 5.22048423117296e-05, + "loss": 2.5936, + "step": 39070 + }, + { + "epoch": 0.68, + "learning_rate": 5.215457812467148e-05, + "loss": 2.5391, + "step": 39080 + }, + { + "epoch": 0.68, + "learning_rate": 5.210432960952634e-05, + "loss": 2.512, + "step": 39090 + }, + { + "epoch": 0.68, + "learning_rate": 5.205409678275335e-05, + "loss": 2.5955, + "step": 39100 + }, + { + "epoch": 0.68, + "learning_rate": 5.200387966080631e-05, + "loss": 2.6164, + "step": 39110 + }, + { + "epoch": 0.68, + "learning_rate": 5.195367826013402e-05, + "loss": 2.6131, + "step": 39120 + }, + { + "epoch": 0.68, + "learning_rate": 5.190349259718015e-05, + "loss": 2.5641, + "step": 39130 + }, + { + "epoch": 0.68, + "learning_rate": 5.1853322688383074e-05, + "loss": 2.5694, + "step": 39140 + }, + { + "epoch": 0.68, + "learning_rate": 5.180316855017618e-05, + "loss": 2.6144, + "step": 39150 + }, + { + "epoch": 0.68, + "learning_rate": 5.175303019898754e-05, + "loss": 2.5567, + "step": 39160 + }, + { + "epoch": 0.68, + "learning_rate": 5.170791919439211e-05, + "loss": 2.5719, + "step": 39170 + }, + { + "epoch": 0.68, + "learning_rate": 5.1657810883779255e-05, + "loss": 2.5176, + "step": 39180 + }, + { + "epoch": 0.68, + "learning_rate": 5.160771840779699e-05, + "loss": 2.5915, + "step": 39190 + }, + { + "epoch": 0.68, + "learning_rate": 5.15576417828533e-05, + "loss": 2.4794, + "step": 39200 + }, + { + "epoch": 0.68, + "learning_rate": 5.150758102535098e-05, + "loss": 2.5073, + "step": 39210 + }, + { + "epoch": 0.68, + "learning_rate": 5.145753615168747e-05, + "loss": 2.5807, + "step": 39220 + }, + { + "epoch": 0.68, + "learning_rate": 5.14075071782552e-05, + "loss": 2.6019, + "step": 39230 + }, + { + "epoch": 0.68, + "learning_rate": 5.135749412144135e-05, + "loss": 2.554, + "step": 39240 + }, + { + "epoch": 0.68, + "learning_rate": 5.130749699762776e-05, + "loss": 2.5807, + "step": 39250 + }, + { + "epoch": 0.68, + "learning_rate": 5.1257515823191216e-05, + "loss": 2.5399, + "step": 39260 + }, + { + "epoch": 0.68, + "learning_rate": 5.120755061450313e-05, + "loss": 2.6573, + "step": 39270 + }, + { + "epoch": 0.68, + "learning_rate": 5.1157601387929776e-05, + "loss": 2.4933, + "step": 39280 + }, + { + "epoch": 0.68, + "learning_rate": 5.1107668159832255e-05, + "loss": 2.5627, + "step": 39290 + }, + { + "epoch": 0.68, + "learning_rate": 5.105775094656622e-05, + "loss": 2.5683, + "step": 39300 + }, + { + "epoch": 0.68, + "learning_rate": 5.100784976448227e-05, + "loss": 2.5203, + "step": 39310 + }, + { + "epoch": 0.68, + "learning_rate": 5.095796462992575e-05, + "loss": 2.5224, + "step": 39320 + }, + { + "epoch": 0.68, + "learning_rate": 5.090809555923657e-05, + "loss": 2.6065, + "step": 39330 + }, + { + "epoch": 0.68, + "learning_rate": 5.085824256874954e-05, + "loss": 2.568, + "step": 39340 + }, + { + "epoch": 0.68, + "learning_rate": 5.080840567479421e-05, + "loss": 2.5081, + "step": 39350 + }, + { + "epoch": 0.68, + "learning_rate": 5.075858489369474e-05, + "loss": 2.5722, + "step": 39360 + }, + { + "epoch": 0.68, + "learning_rate": 5.070878024177008e-05, + "loss": 2.5219, + "step": 39370 + }, + { + "epoch": 0.68, + "learning_rate": 5.065899173533397e-05, + "loss": 2.5827, + "step": 39380 + }, + { + "epoch": 0.68, + "learning_rate": 5.0609219390694694e-05, + "loss": 2.5009, + "step": 39390 + }, + { + "epoch": 0.68, + "learning_rate": 5.0559463224155365e-05, + "loss": 2.4868, + "step": 39400 + }, + { + "epoch": 0.68, + "learning_rate": 5.0509723252013774e-05, + "loss": 2.4914, + "step": 39410 + }, + { + "epoch": 0.68, + "learning_rate": 5.0459999490562416e-05, + "loss": 2.5647, + "step": 39420 + }, + { + "epoch": 0.68, + "learning_rate": 5.041029195608846e-05, + "loss": 2.5804, + "step": 39430 + }, + { + "epoch": 0.68, + "learning_rate": 5.0360600664873715e-05, + "loss": 2.5962, + "step": 39440 + }, + { + "epoch": 0.68, + "learning_rate": 5.031092563319474e-05, + "loss": 2.6384, + "step": 39450 + }, + { + "epoch": 0.68, + "learning_rate": 5.026126687732278e-05, + "loss": 2.6219, + "step": 39460 + }, + { + "epoch": 0.68, + "learning_rate": 5.021162441352365e-05, + "loss": 2.5512, + "step": 39470 + }, + { + "epoch": 0.68, + "learning_rate": 5.01619982580579e-05, + "loss": 2.4981, + "step": 39480 + }, + { + "epoch": 0.68, + "learning_rate": 5.011238842718078e-05, + "loss": 2.5452, + "step": 39490 + }, + { + "epoch": 0.68, + "learning_rate": 5.0062794937142075e-05, + "loss": 2.5068, + "step": 39500 + }, + { + "epoch": 0.68, + "learning_rate": 5.001321780418635e-05, + "loss": 2.5089, + "step": 39510 + }, + { + "epoch": 0.68, + "learning_rate": 4.996365704455266e-05, + "loss": 2.539, + "step": 39520 + }, + { + "epoch": 0.68, + "learning_rate": 4.9914112674474836e-05, + "loss": 2.5271, + "step": 39530 + }, + { + "epoch": 0.68, + "learning_rate": 4.986458471018132e-05, + "loss": 2.5514, + "step": 39540 + }, + { + "epoch": 0.68, + "learning_rate": 4.981507316789505e-05, + "loss": 2.5272, + "step": 39550 + }, + { + "epoch": 0.68, + "learning_rate": 4.976557806383376e-05, + "loss": 2.5713, + "step": 39560 + }, + { + "epoch": 0.68, + "learning_rate": 4.971609941420973e-05, + "loss": 2.5701, + "step": 39570 + }, + { + "epoch": 0.68, + "learning_rate": 4.966663723522973e-05, + "loss": 2.5218, + "step": 39580 + }, + { + "epoch": 0.69, + "learning_rate": 4.961719154309541e-05, + "loss": 2.5632, + "step": 39590 + }, + { + "epoch": 0.69, + "learning_rate": 4.9567762354002736e-05, + "loss": 2.5626, + "step": 39600 + }, + { + "epoch": 0.69, + "learning_rate": 4.951834968414242e-05, + "loss": 2.5008, + "step": 39610 + }, + { + "epoch": 0.69, + "learning_rate": 4.946895354969978e-05, + "loss": 2.5908, + "step": 39620 + }, + { + "epoch": 0.69, + "learning_rate": 4.941957396685459e-05, + "loss": 2.5044, + "step": 39630 + }, + { + "epoch": 0.69, + "learning_rate": 4.937021095178133e-05, + "loss": 2.6042, + "step": 39640 + }, + { + "epoch": 0.69, + "learning_rate": 4.932086452064903e-05, + "loss": 2.5444, + "step": 39650 + }, + { + "epoch": 0.69, + "learning_rate": 4.927153468962119e-05, + "loss": 2.5784, + "step": 39660 + }, + { + "epoch": 0.69, + "learning_rate": 4.9222221474855976e-05, + "loss": 2.6056, + "step": 39670 + }, + { + "epoch": 0.69, + "learning_rate": 4.917292489250613e-05, + "loss": 2.5466, + "step": 39680 + }, + { + "epoch": 0.69, + "learning_rate": 4.912364495871883e-05, + "loss": 2.5618, + "step": 39690 + }, + { + "epoch": 0.69, + "learning_rate": 4.907438168963588e-05, + "loss": 2.4998, + "step": 39700 + }, + { + "epoch": 0.69, + "learning_rate": 4.902513510139368e-05, + "loss": 2.6429, + "step": 39710 + }, + { + "epoch": 0.69, + "learning_rate": 4.897590521012301e-05, + "loss": 2.6028, + "step": 39720 + }, + { + "epoch": 0.69, + "learning_rate": 4.892669203194933e-05, + "loss": 2.4407, + "step": 39730 + }, + { + "epoch": 0.69, + "learning_rate": 4.887749558299252e-05, + "loss": 2.5261, + "step": 39740 + }, + { + "epoch": 0.69, + "learning_rate": 4.8828315879367037e-05, + "loss": 2.5019, + "step": 39750 + }, + { + "epoch": 0.69, + "learning_rate": 4.8779152937181896e-05, + "loss": 2.6259, + "step": 39760 + }, + { + "epoch": 0.69, + "learning_rate": 4.873000677254045e-05, + "loss": 2.5693, + "step": 39770 + }, + { + "epoch": 0.69, + "learning_rate": 4.8680877401540794e-05, + "loss": 2.5027, + "step": 39780 + }, + { + "epoch": 0.69, + "learning_rate": 4.8631764840275385e-05, + "loss": 2.5702, + "step": 39790 + }, + { + "epoch": 0.69, + "learning_rate": 4.8582669104831135e-05, + "loss": 2.6094, + "step": 39800 + }, + { + "epoch": 0.69, + "learning_rate": 4.853359021128957e-05, + "loss": 2.5109, + "step": 39810 + }, + { + "epoch": 0.69, + "learning_rate": 4.848452817572655e-05, + "loss": 2.5858, + "step": 39820 + }, + { + "epoch": 0.69, + "learning_rate": 4.8435483014212536e-05, + "loss": 2.6025, + "step": 39830 + }, + { + "epoch": 0.69, + "learning_rate": 4.838645474281245e-05, + "loss": 2.6243, + "step": 39840 + }, + { + "epoch": 0.69, + "learning_rate": 4.833744337758559e-05, + "loss": 2.6158, + "step": 39850 + }, + { + "epoch": 0.69, + "learning_rate": 4.8288448934585804e-05, + "loss": 2.4409, + "step": 39860 + }, + { + "epoch": 0.69, + "learning_rate": 4.8239471429861406e-05, + "loss": 2.4969, + "step": 39870 + }, + { + "epoch": 0.69, + "learning_rate": 4.8190510879455064e-05, + "loss": 2.5068, + "step": 39880 + }, + { + "epoch": 0.69, + "learning_rate": 4.814156729940398e-05, + "loss": 2.5942, + "step": 39890 + }, + { + "epoch": 0.69, + "learning_rate": 4.809264070573981e-05, + "loss": 2.5064, + "step": 39900 + }, + { + "epoch": 0.69, + "learning_rate": 4.804373111448853e-05, + "loss": 2.5615, + "step": 39910 + }, + { + "epoch": 0.69, + "learning_rate": 4.7994838541670686e-05, + "loss": 2.5664, + "step": 39920 + }, + { + "epoch": 0.69, + "learning_rate": 4.7945963003301196e-05, + "loss": 2.5089, + "step": 39930 + }, + { + "epoch": 0.69, + "learning_rate": 4.7897104515389337e-05, + "loss": 2.6441, + "step": 39940 + }, + { + "epoch": 0.69, + "learning_rate": 4.7848263093938874e-05, + "loss": 2.4369, + "step": 39950 + }, + { + "epoch": 0.69, + "learning_rate": 4.7799438754947976e-05, + "loss": 2.5307, + "step": 39960 + }, + { + "epoch": 0.69, + "learning_rate": 4.77506315144092e-05, + "loss": 2.5907, + "step": 39970 + }, + { + "epoch": 0.69, + "learning_rate": 4.770184138830955e-05, + "loss": 2.5584, + "step": 39980 + }, + { + "epoch": 0.69, + "learning_rate": 4.76530683926303e-05, + "loss": 2.5969, + "step": 39990 + }, + { + "epoch": 0.69, + "learning_rate": 4.760431254334722e-05, + "loss": 2.5448, + "step": 40000 + }, + { + "epoch": 0.69, + "learning_rate": 4.755557385643048e-05, + "loss": 2.6246, + "step": 40010 + }, + { + "epoch": 0.69, + "learning_rate": 4.750685234784451e-05, + "loss": 2.4884, + "step": 40020 + }, + { + "epoch": 0.69, + "learning_rate": 4.7458148033548234e-05, + "loss": 2.5273, + "step": 40030 + }, + { + "epoch": 0.69, + "learning_rate": 4.740946092949492e-05, + "loss": 2.5347, + "step": 40040 + }, + { + "epoch": 0.69, + "learning_rate": 4.736079105163212e-05, + "loss": 2.4027, + "step": 40050 + }, + { + "epoch": 0.69, + "learning_rate": 4.7312138415901865e-05, + "loss": 2.6107, + "step": 40060 + }, + { + "epoch": 0.69, + "learning_rate": 4.726350303824039e-05, + "loss": 2.4698, + "step": 40070 + }, + { + "epoch": 0.69, + "learning_rate": 4.721488493457841e-05, + "loss": 2.4835, + "step": 40080 + }, + { + "epoch": 0.69, + "learning_rate": 4.7166284120840965e-05, + "loss": 2.5226, + "step": 40090 + }, + { + "epoch": 0.69, + "learning_rate": 4.7117700612947326e-05, + "loss": 2.5353, + "step": 40100 + }, + { + "epoch": 0.69, + "learning_rate": 4.70691344268112e-05, + "loss": 2.5832, + "step": 40110 + }, + { + "epoch": 0.69, + "learning_rate": 4.7020585578340626e-05, + "loss": 2.5831, + "step": 40120 + }, + { + "epoch": 0.69, + "learning_rate": 4.697205408343783e-05, + "loss": 2.5876, + "step": 40130 + }, + { + "epoch": 0.69, + "learning_rate": 4.692353995799958e-05, + "loss": 2.5869, + "step": 40140 + }, + { + "epoch": 0.69, + "learning_rate": 4.6875043217916734e-05, + "loss": 2.5864, + "step": 40150 + }, + { + "epoch": 0.7, + "learning_rate": 4.682656387907456e-05, + "loss": 2.5448, + "step": 40160 + }, + { + "epoch": 0.7, + "learning_rate": 4.6778101957352675e-05, + "loss": 2.6073, + "step": 40170 + }, + { + "epoch": 0.7, + "learning_rate": 4.6729657468624823e-05, + "loss": 2.5901, + "step": 40180 + }, + { + "epoch": 0.7, + "learning_rate": 4.6681230428759206e-05, + "loss": 2.5684, + "step": 40190 + }, + { + "epoch": 0.7, + "learning_rate": 4.663282085361826e-05, + "loss": 2.5544, + "step": 40200 + }, + { + "epoch": 0.7, + "learning_rate": 4.658442875905865e-05, + "loss": 2.4368, + "step": 40210 + }, + { + "epoch": 0.7, + "learning_rate": 4.6536054160931365e-05, + "loss": 2.5639, + "step": 40220 + }, + { + "epoch": 0.7, + "learning_rate": 4.6487697075081696e-05, + "loss": 2.5146, + "step": 40230 + }, + { + "epoch": 0.7, + "learning_rate": 4.6439357517349066e-05, + "loss": 2.5331, + "step": 40240 + }, + { + "epoch": 0.7, + "learning_rate": 4.639103550356731e-05, + "loss": 2.627, + "step": 40250 + }, + { + "epoch": 0.7, + "learning_rate": 4.634273104956445e-05, + "loss": 2.6719, + "step": 40260 + }, + { + "epoch": 0.7, + "learning_rate": 4.629444417116271e-05, + "loss": 2.5787, + "step": 40270 + }, + { + "epoch": 0.7, + "learning_rate": 4.624617488417863e-05, + "loss": 2.5577, + "step": 40280 + }, + { + "epoch": 0.7, + "learning_rate": 4.6197923204423e-05, + "loss": 2.5863, + "step": 40290 + }, + { + "epoch": 0.7, + "learning_rate": 4.6149689147700726e-05, + "loss": 2.5451, + "step": 40300 + }, + { + "epoch": 0.7, + "learning_rate": 4.6101472729811105e-05, + "loss": 2.5393, + "step": 40310 + }, + { + "epoch": 0.7, + "learning_rate": 4.605327396654745e-05, + "loss": 2.528, + "step": 40320 + }, + { + "epoch": 0.7, + "learning_rate": 4.600509287369752e-05, + "loss": 2.6322, + "step": 40330 + }, + { + "epoch": 0.7, + "learning_rate": 4.5956929467043186e-05, + "loss": 2.5989, + "step": 40340 + }, + { + "epoch": 0.7, + "learning_rate": 4.590878376236044e-05, + "loss": 2.5601, + "step": 40350 + }, + { + "epoch": 0.7, + "learning_rate": 4.586065577541963e-05, + "loss": 2.4454, + "step": 40360 + }, + { + "epoch": 0.7, + "learning_rate": 4.581254552198515e-05, + "loss": 2.5779, + "step": 40370 + }, + { + "epoch": 0.7, + "learning_rate": 4.576445301781569e-05, + "loss": 2.5565, + "step": 40380 + }, + { + "epoch": 0.7, + "learning_rate": 4.571637827866414e-05, + "loss": 2.5499, + "step": 40390 + }, + { + "epoch": 0.7, + "learning_rate": 4.5668321320277464e-05, + "loss": 2.6262, + "step": 40400 + }, + { + "epoch": 0.7, + "learning_rate": 4.562028215839687e-05, + "loss": 2.5089, + "step": 40410 + }, + { + "epoch": 0.7, + "learning_rate": 4.557226080875779e-05, + "loss": 2.5647, + "step": 40420 + }, + { + "epoch": 0.7, + "learning_rate": 4.552425728708969e-05, + "loss": 2.4954, + "step": 40430 + }, + { + "epoch": 0.7, + "learning_rate": 4.54762716091163e-05, + "loss": 2.6293, + "step": 40440 + }, + { + "epoch": 0.7, + "learning_rate": 4.54283037905555e-05, + "loss": 2.5226, + "step": 40450 + }, + { + "epoch": 0.7, + "learning_rate": 4.538035384711925e-05, + "loss": 2.5026, + "step": 40460 + }, + { + "epoch": 0.7, + "learning_rate": 4.533242179451371e-05, + "loss": 2.4615, + "step": 40470 + }, + { + "epoch": 0.7, + "learning_rate": 4.528450764843921e-05, + "loss": 2.582, + "step": 40480 + }, + { + "epoch": 0.7, + "learning_rate": 4.5236611424590115e-05, + "loss": 2.6032, + "step": 40490 + }, + { + "epoch": 0.7, + "learning_rate": 4.5188733138655004e-05, + "loss": 2.4671, + "step": 40500 + }, + { + "epoch": 0.7, + "learning_rate": 4.5140872806316545e-05, + "loss": 2.546, + "step": 40510 + }, + { + "epoch": 0.7, + "learning_rate": 4.509303044325156e-05, + "loss": 2.6072, + "step": 40520 + }, + { + "epoch": 0.7, + "learning_rate": 4.5045206065130976e-05, + "loss": 2.5282, + "step": 40530 + }, + { + "epoch": 0.7, + "learning_rate": 4.499739968761975e-05, + "loss": 2.5678, + "step": 40540 + }, + { + "epoch": 0.7, + "learning_rate": 4.4949611326377025e-05, + "loss": 2.5644, + "step": 40550 + }, + { + "epoch": 0.7, + "learning_rate": 4.490184099705609e-05, + "loss": 2.6047, + "step": 40560 + }, + { + "epoch": 0.7, + "learning_rate": 4.485408871530417e-05, + "loss": 2.5434, + "step": 40570 + }, + { + "epoch": 0.7, + "learning_rate": 4.4806354496762705e-05, + "loss": 2.5188, + "step": 40580 + }, + { + "epoch": 0.7, + "learning_rate": 4.4758638357067226e-05, + "loss": 2.6543, + "step": 40590 + }, + { + "epoch": 0.7, + "learning_rate": 4.4710940311847216e-05, + "loss": 2.5529, + "step": 40600 + }, + { + "epoch": 0.7, + "learning_rate": 4.46632603767264e-05, + "loss": 2.4907, + "step": 40610 + }, + { + "epoch": 0.7, + "learning_rate": 4.4615598567322415e-05, + "loss": 2.6039, + "step": 40620 + }, + { + "epoch": 0.7, + "learning_rate": 4.4567954899247045e-05, + "loss": 2.607, + "step": 40630 + }, + { + "epoch": 0.7, + "learning_rate": 4.4520329388106174e-05, + "loss": 2.5726, + "step": 40640 + }, + { + "epoch": 0.7, + "learning_rate": 4.4472722049499615e-05, + "loss": 2.5993, + "step": 40650 + }, + { + "epoch": 0.7, + "learning_rate": 4.442513289902131e-05, + "loss": 2.5562, + "step": 40660 + }, + { + "epoch": 0.7, + "learning_rate": 4.437756195225928e-05, + "loss": 2.545, + "step": 40670 + }, + { + "epoch": 0.7, + "learning_rate": 4.4330009224795424e-05, + "loss": 2.5415, + "step": 40680 + }, + { + "epoch": 0.7, + "learning_rate": 4.4282474732205926e-05, + "loss": 2.5465, + "step": 40690 + }, + { + "epoch": 0.7, + "learning_rate": 4.423495849006074e-05, + "loss": 2.5231, + "step": 40700 + }, + { + "epoch": 0.7, + "learning_rate": 4.418746051392401e-05, + "loss": 2.5101, + "step": 40710 + }, + { + "epoch": 0.7, + "learning_rate": 4.413998081935384e-05, + "loss": 2.5986, + "step": 40720 + }, + { + "epoch": 0.7, + "learning_rate": 4.409251942190231e-05, + "loss": 2.497, + "step": 40730 + }, + { + "epoch": 0.71, + "learning_rate": 4.404507633711557e-05, + "loss": 2.5457, + "step": 40740 + }, + { + "epoch": 0.71, + "learning_rate": 4.39976515805338e-05, + "loss": 2.6033, + "step": 40750 + }, + { + "epoch": 0.71, + "learning_rate": 4.3950245167691e-05, + "loss": 2.624, + "step": 40760 + }, + { + "epoch": 0.71, + "learning_rate": 4.390285711411538e-05, + "loss": 2.512, + "step": 40770 + }, + { + "epoch": 0.71, + "learning_rate": 4.385548743532904e-05, + "loss": 2.4653, + "step": 40780 + }, + { + "epoch": 0.71, + "learning_rate": 4.3808136146848e-05, + "loss": 2.5625, + "step": 40790 + }, + { + "epoch": 0.71, + "learning_rate": 4.376080326418235e-05, + "loss": 2.5819, + "step": 40800 + }, + { + "epoch": 0.71, + "learning_rate": 4.371348880283616e-05, + "loss": 2.571, + "step": 40810 + }, + { + "epoch": 0.71, + "learning_rate": 4.366619277830736e-05, + "loss": 2.4933, + "step": 40820 + }, + { + "epoch": 0.71, + "learning_rate": 4.361891520608793e-05, + "loss": 2.5741, + "step": 40830 + }, + { + "epoch": 0.71, + "learning_rate": 4.3571656101663816e-05, + "loss": 2.5211, + "step": 40840 + }, + { + "epoch": 0.71, + "learning_rate": 4.3524415480514826e-05, + "loss": 2.5517, + "step": 40850 + }, + { + "epoch": 0.71, + "learning_rate": 4.347719335811482e-05, + "loss": 2.5616, + "step": 40860 + }, + { + "epoch": 0.71, + "learning_rate": 4.3429989749931454e-05, + "loss": 2.5276, + "step": 40870 + }, + { + "epoch": 0.71, + "learning_rate": 4.338280467142652e-05, + "loss": 2.4806, + "step": 40880 + }, + { + "epoch": 0.71, + "learning_rate": 4.333563813805563e-05, + "loss": 2.4652, + "step": 40890 + }, + { + "epoch": 0.71, + "learning_rate": 4.328849016526826e-05, + "loss": 2.475, + "step": 40900 + }, + { + "epoch": 0.71, + "learning_rate": 4.324136076850793e-05, + "loss": 2.4632, + "step": 40910 + }, + { + "epoch": 0.71, + "learning_rate": 4.3194249963211954e-05, + "loss": 2.4914, + "step": 40920 + }, + { + "epoch": 0.71, + "learning_rate": 4.3147157764811644e-05, + "loss": 2.5548, + "step": 40930 + }, + { + "epoch": 0.71, + "learning_rate": 4.310008418873225e-05, + "loss": 2.5067, + "step": 40940 + }, + { + "epoch": 0.71, + "learning_rate": 4.3053029250392774e-05, + "loss": 2.5514, + "step": 40950 + }, + { + "epoch": 0.71, + "learning_rate": 4.300599296520624e-05, + "loss": 2.5586, + "step": 40960 + }, + { + "epoch": 0.71, + "learning_rate": 4.2958975348579575e-05, + "loss": 2.518, + "step": 40970 + }, + { + "epoch": 0.71, + "learning_rate": 4.291197641591346e-05, + "loss": 2.5352, + "step": 40980 + }, + { + "epoch": 0.71, + "learning_rate": 4.2864996182602576e-05, + "loss": 2.4868, + "step": 40990 + }, + { + "epoch": 0.71, + "learning_rate": 4.281803466403548e-05, + "loss": 2.4602, + "step": 41000 + }, + { + "epoch": 0.71, + "learning_rate": 4.277109187559448e-05, + "loss": 2.4807, + "step": 41010 + }, + { + "epoch": 0.71, + "learning_rate": 4.272416783265587e-05, + "loss": 2.4934, + "step": 41020 + }, + { + "epoch": 0.71, + "learning_rate": 4.26772625505898e-05, + "loss": 2.4467, + "step": 41030 + }, + { + "epoch": 0.71, + "learning_rate": 4.263037604476017e-05, + "loss": 2.4611, + "step": 41040 + }, + { + "epoch": 0.71, + "learning_rate": 4.258350833052481e-05, + "loss": 2.4866, + "step": 41050 + }, + { + "epoch": 0.71, + "learning_rate": 4.253665942323541e-05, + "loss": 2.5404, + "step": 41060 + }, + { + "epoch": 0.71, + "learning_rate": 4.248982933823746e-05, + "loss": 2.5712, + "step": 41070 + }, + { + "epoch": 0.71, + "learning_rate": 4.2443018090870334e-05, + "loss": 2.598, + "step": 41080 + }, + { + "epoch": 0.71, + "learning_rate": 4.2396225696467116e-05, + "loss": 2.5122, + "step": 41090 + }, + { + "epoch": 0.71, + "learning_rate": 4.234945217035485e-05, + "loss": 2.5433, + "step": 41100 + }, + { + "epoch": 0.71, + "learning_rate": 4.2302697527854374e-05, + "loss": 2.5835, + "step": 41110 + }, + { + "epoch": 0.71, + "learning_rate": 4.225596178428024e-05, + "loss": 2.4946, + "step": 41120 + }, + { + "epoch": 0.71, + "learning_rate": 4.2209244954940916e-05, + "loss": 2.5683, + "step": 41130 + }, + { + "epoch": 0.71, + "learning_rate": 4.216254705513868e-05, + "loss": 2.5374, + "step": 41140 + }, + { + "epoch": 0.71, + "learning_rate": 4.21158681001695e-05, + "loss": 2.5115, + "step": 41150 + }, + { + "epoch": 0.71, + "learning_rate": 4.206920810532328e-05, + "loss": 2.5109, + "step": 41160 + }, + { + "epoch": 0.71, + "learning_rate": 4.202256708588357e-05, + "loss": 2.5624, + "step": 41170 + }, + { + "epoch": 0.71, + "learning_rate": 4.197594505712782e-05, + "loss": 2.5716, + "step": 41180 + }, + { + "epoch": 0.71, + "learning_rate": 4.192934203432724e-05, + "loss": 2.4938, + "step": 41190 + }, + { + "epoch": 0.71, + "learning_rate": 4.1882758032746724e-05, + "loss": 2.5062, + "step": 41200 + }, + { + "epoch": 0.71, + "learning_rate": 4.1836193067645046e-05, + "loss": 2.5256, + "step": 41210 + }, + { + "epoch": 0.71, + "learning_rate": 4.178964715427474e-05, + "loss": 2.5038, + "step": 41220 + }, + { + "epoch": 0.71, + "learning_rate": 4.174312030788193e-05, + "loss": 2.6479, + "step": 41230 + }, + { + "epoch": 0.71, + "learning_rate": 4.169661254370679e-05, + "loss": 2.5062, + "step": 41240 + }, + { + "epoch": 0.71, + "learning_rate": 4.1650123876982974e-05, + "loss": 2.5452, + "step": 41250 + }, + { + "epoch": 0.71, + "learning_rate": 4.160365432293801e-05, + "loss": 2.522, + "step": 41260 + }, + { + "epoch": 0.71, + "learning_rate": 4.155720389679318e-05, + "loss": 2.4831, + "step": 41270 + }, + { + "epoch": 0.71, + "learning_rate": 4.151077261376341e-05, + "loss": 2.4916, + "step": 41280 + }, + { + "epoch": 0.71, + "learning_rate": 4.146436048905741e-05, + "loss": 2.6026, + "step": 41290 + }, + { + "epoch": 0.71, + "learning_rate": 4.1417967537877657e-05, + "loss": 2.6227, + "step": 41300 + }, + { + "epoch": 0.71, + "learning_rate": 4.1371593775420245e-05, + "loss": 2.5314, + "step": 41310 + }, + { + "epoch": 0.72, + "learning_rate": 4.132523921687507e-05, + "loss": 2.5105, + "step": 41320 + }, + { + "epoch": 0.72, + "learning_rate": 4.127890387742575e-05, + "loss": 2.5371, + "step": 41330 + }, + { + "epoch": 0.72, + "learning_rate": 4.123258777224948e-05, + "loss": 2.5588, + "step": 41340 + }, + { + "epoch": 0.72, + "learning_rate": 4.118629091651728e-05, + "loss": 2.5319, + "step": 41350 + }, + { + "epoch": 0.72, + "learning_rate": 4.1140013325393864e-05, + "loss": 2.5258, + "step": 41360 + }, + { + "epoch": 0.72, + "learning_rate": 4.1093755014037524e-05, + "loss": 2.5627, + "step": 41370 + }, + { + "epoch": 0.72, + "learning_rate": 4.104751599760036e-05, + "loss": 2.5694, + "step": 41380 + }, + { + "epoch": 0.72, + "learning_rate": 4.100129629122812e-05, + "loss": 2.5746, + "step": 41390 + }, + { + "epoch": 0.72, + "learning_rate": 4.095509591006015e-05, + "loss": 2.5987, + "step": 41400 + }, + { + "epoch": 0.72, + "learning_rate": 4.0908914869229583e-05, + "loss": 2.6081, + "step": 41410 + }, + { + "epoch": 0.72, + "learning_rate": 4.0862753183863066e-05, + "loss": 2.4699, + "step": 41420 + }, + { + "epoch": 0.72, + "learning_rate": 4.08166108690811e-05, + "loss": 2.5514, + "step": 41430 + }, + { + "epoch": 0.72, + "learning_rate": 4.077048793999774e-05, + "loss": 2.4442, + "step": 41440 + }, + { + "epoch": 0.72, + "learning_rate": 4.072438441172061e-05, + "loss": 2.5195, + "step": 41450 + }, + { + "epoch": 0.72, + "learning_rate": 4.067830029935115e-05, + "loss": 2.5111, + "step": 41460 + }, + { + "epoch": 0.72, + "learning_rate": 4.063223561798426e-05, + "loss": 2.5682, + "step": 41470 + }, + { + "epoch": 0.72, + "learning_rate": 4.058619038270862e-05, + "loss": 2.5892, + "step": 41480 + }, + { + "epoch": 0.72, + "learning_rate": 4.054016460860651e-05, + "loss": 2.4836, + "step": 41490 + }, + { + "epoch": 0.72, + "learning_rate": 4.049415831075375e-05, + "loss": 2.5167, + "step": 41500 + }, + { + "epoch": 0.72, + "learning_rate": 4.044817150421988e-05, + "loss": 2.3881, + "step": 41510 + }, + { + "epoch": 0.72, + "learning_rate": 4.040220420406804e-05, + "loss": 2.5733, + "step": 41520 + }, + { + "epoch": 0.72, + "learning_rate": 4.0356256425354886e-05, + "loss": 2.5212, + "step": 41530 + }, + { + "epoch": 0.72, + "learning_rate": 4.031032818313081e-05, + "loss": 2.6043, + "step": 41540 + }, + { + "epoch": 0.72, + "learning_rate": 4.0264419492439753e-05, + "loss": 2.4941, + "step": 41550 + }, + { + "epoch": 0.72, + "learning_rate": 4.0218530368319196e-05, + "loss": 2.486, + "step": 41560 + }, + { + "epoch": 0.72, + "learning_rate": 4.017266082580029e-05, + "loss": 2.5237, + "step": 41570 + }, + { + "epoch": 0.72, + "learning_rate": 4.0126810879907784e-05, + "loss": 2.5369, + "step": 41580 + }, + { + "epoch": 0.72, + "learning_rate": 4.008098054565989e-05, + "loss": 2.5052, + "step": 41590 + }, + { + "epoch": 0.72, + "learning_rate": 4.00351698380685e-05, + "loss": 2.632, + "step": 41600 + }, + { + "epoch": 0.72, + "learning_rate": 3.998937877213907e-05, + "loss": 2.5443, + "step": 41610 + }, + { + "epoch": 0.72, + "learning_rate": 3.994360736287058e-05, + "loss": 2.5009, + "step": 41620 + }, + { + "epoch": 0.72, + "learning_rate": 3.989785562525565e-05, + "loss": 2.4294, + "step": 41630 + }, + { + "epoch": 0.72, + "learning_rate": 3.985212357428032e-05, + "loss": 2.5329, + "step": 41640 + }, + { + "epoch": 0.72, + "learning_rate": 3.980641122492429e-05, + "loss": 2.553, + "step": 41650 + }, + { + "epoch": 0.72, + "learning_rate": 3.976071859216083e-05, + "loss": 2.4872, + "step": 41660 + }, + { + "epoch": 0.72, + "learning_rate": 3.971504569095662e-05, + "loss": 2.5605, + "step": 41670 + }, + { + "epoch": 0.72, + "learning_rate": 3.966939253627199e-05, + "loss": 2.4913, + "step": 41680 + }, + { + "epoch": 0.72, + "learning_rate": 3.9623759143060815e-05, + "loss": 2.5338, + "step": 41690 + }, + { + "epoch": 0.72, + "learning_rate": 3.957814552627037e-05, + "loss": 2.5516, + "step": 41700 + }, + { + "epoch": 0.72, + "learning_rate": 3.9532551700841614e-05, + "loss": 2.6489, + "step": 41710 + }, + { + "epoch": 0.72, + "learning_rate": 3.948697768170886e-05, + "loss": 2.5624, + "step": 41720 + }, + { + "epoch": 0.72, + "learning_rate": 3.9441423483800054e-05, + "loss": 2.5125, + "step": 41730 + }, + { + "epoch": 0.72, + "learning_rate": 3.9395889122036655e-05, + "loss": 2.5168, + "step": 41740 + }, + { + "epoch": 0.72, + "learning_rate": 3.935037461133352e-05, + "loss": 2.5346, + "step": 41750 + }, + { + "epoch": 0.72, + "learning_rate": 3.9304879966599074e-05, + "loss": 2.5768, + "step": 41760 + }, + { + "epoch": 0.72, + "learning_rate": 3.925940520273527e-05, + "loss": 2.5788, + "step": 41770 + }, + { + "epoch": 0.72, + "learning_rate": 3.9213950334637406e-05, + "loss": 2.5487, + "step": 41780 + }, + { + "epoch": 0.72, + "learning_rate": 3.91685153771945e-05, + "loss": 2.5972, + "step": 41790 + }, + { + "epoch": 0.72, + "learning_rate": 3.912310034528881e-05, + "loss": 2.55, + "step": 41800 + }, + { + "epoch": 0.72, + "learning_rate": 3.9077705253796205e-05, + "loss": 2.4776, + "step": 41810 + }, + { + "epoch": 0.72, + "learning_rate": 3.903233011758599e-05, + "loss": 2.5435, + "step": 41820 + }, + { + "epoch": 0.72, + "learning_rate": 3.898697495152089e-05, + "loss": 2.5646, + "step": 41830 + }, + { + "epoch": 0.72, + "learning_rate": 3.8941639770457164e-05, + "loss": 2.5006, + "step": 41840 + }, + { + "epoch": 0.72, + "learning_rate": 3.88963245892445e-05, + "loss": 2.5832, + "step": 41850 + }, + { + "epoch": 0.72, + "learning_rate": 3.885102942272597e-05, + "loss": 2.5452, + "step": 41860 + }, + { + "epoch": 0.72, + "learning_rate": 3.880575428573817e-05, + "loss": 2.5623, + "step": 41870 + }, + { + "epoch": 0.72, + "learning_rate": 3.8760499193111145e-05, + "loss": 2.4972, + "step": 41880 + }, + { + "epoch": 0.72, + "learning_rate": 3.8715264159668265e-05, + "loss": 2.4585, + "step": 41890 + }, + { + "epoch": 0.73, + "learning_rate": 3.867004920022644e-05, + "loss": 2.524, + "step": 41900 + }, + { + "epoch": 0.73, + "learning_rate": 3.8624854329596015e-05, + "loss": 2.5401, + "step": 41910 + }, + { + "epoch": 0.73, + "learning_rate": 3.857967956258062e-05, + "loss": 2.6167, + "step": 41920 + }, + { + "epoch": 0.73, + "learning_rate": 3.853452491397743e-05, + "loss": 2.5592, + "step": 41930 + }, + { + "epoch": 0.73, + "learning_rate": 3.848939039857701e-05, + "loss": 2.5763, + "step": 41940 + }, + { + "epoch": 0.73, + "learning_rate": 3.844427603116325e-05, + "loss": 2.5027, + "step": 41950 + }, + { + "epoch": 0.73, + "learning_rate": 3.839918182651357e-05, + "loss": 2.5501, + "step": 41960 + }, + { + "epoch": 0.73, + "learning_rate": 3.83541077993986e-05, + "loss": 2.5798, + "step": 41970 + }, + { + "epoch": 0.73, + "learning_rate": 3.830905396458259e-05, + "loss": 2.5428, + "step": 41980 + }, + { + "epoch": 0.73, + "learning_rate": 3.826402033682304e-05, + "loss": 2.5047, + "step": 41990 + }, + { + "epoch": 0.73, + "learning_rate": 3.8219006930870795e-05, + "loss": 2.5302, + "step": 42000 + }, + { + "epoch": 0.73, + "learning_rate": 3.8174013761470205e-05, + "loss": 2.5816, + "step": 42010 + }, + { + "epoch": 0.73, + "learning_rate": 3.812904084335884e-05, + "loss": 2.5802, + "step": 42020 + }, + { + "epoch": 0.73, + "learning_rate": 3.808408819126776e-05, + "loss": 2.5459, + "step": 42030 + }, + { + "epoch": 0.73, + "learning_rate": 3.803915581992138e-05, + "loss": 2.582, + "step": 42040 + }, + { + "epoch": 0.73, + "learning_rate": 3.799424374403736e-05, + "loss": 2.5856, + "step": 42050 + }, + { + "epoch": 0.73, + "learning_rate": 3.794935197832681e-05, + "loss": 2.5668, + "step": 42060 + }, + { + "epoch": 0.73, + "learning_rate": 3.790448053749421e-05, + "loss": 2.4686, + "step": 42070 + }, + { + "epoch": 0.73, + "learning_rate": 3.785962943623727e-05, + "loss": 2.5662, + "step": 42080 + }, + { + "epoch": 0.73, + "learning_rate": 3.781479868924713e-05, + "loss": 2.5257, + "step": 42090 + }, + { + "epoch": 0.73, + "learning_rate": 3.776998831120828e-05, + "loss": 2.5115, + "step": 42100 + }, + { + "epoch": 0.73, + "learning_rate": 3.772519831679842e-05, + "loss": 2.5581, + "step": 42110 + }, + { + "epoch": 0.73, + "learning_rate": 3.768042872068869e-05, + "loss": 2.5704, + "step": 42120 + }, + { + "epoch": 0.73, + "learning_rate": 3.763567953754352e-05, + "loss": 2.5948, + "step": 42130 + }, + { + "epoch": 0.73, + "learning_rate": 3.75909507820206e-05, + "loss": 2.509, + "step": 42140 + }, + { + "epoch": 0.73, + "learning_rate": 3.7550712379776254e-05, + "loss": 2.5495, + "step": 42150 + }, + { + "epoch": 0.73, + "learning_rate": 3.750602247709373e-05, + "loss": 2.5668, + "step": 42160 + }, + { + "epoch": 0.73, + "learning_rate": 3.746135304450304e-05, + "loss": 2.5256, + "step": 42170 + }, + { + "epoch": 0.73, + "learning_rate": 3.7416704096635835e-05, + "loss": 2.5821, + "step": 42180 + }, + { + "epoch": 0.73, + "learning_rate": 3.737207564811694e-05, + "loss": 2.6688, + "step": 42190 + }, + { + "epoch": 0.73, + "learning_rate": 3.732746771356457e-05, + "loss": 2.5864, + "step": 42200 + }, + { + "epoch": 0.73, + "learning_rate": 3.728288030759022e-05, + "loss": 2.5649, + "step": 42210 + }, + { + "epoch": 0.73, + "learning_rate": 3.7238313444798564e-05, + "loss": 2.5278, + "step": 42220 + }, + { + "epoch": 0.73, + "learning_rate": 3.719376713978766e-05, + "loss": 2.5238, + "step": 42230 + }, + { + "epoch": 0.73, + "learning_rate": 3.714924140714881e-05, + "loss": 2.5411, + "step": 42240 + }, + { + "epoch": 0.73, + "learning_rate": 3.710473626146647e-05, + "loss": 2.5614, + "step": 42250 + }, + { + "epoch": 0.73, + "learning_rate": 3.7060251717318485e-05, + "loss": 2.5058, + "step": 42260 + }, + { + "epoch": 0.73, + "learning_rate": 3.701578778927595e-05, + "loss": 2.5624, + "step": 42270 + }, + { + "epoch": 0.73, + "learning_rate": 3.6971344491903024e-05, + "loss": 2.5472, + "step": 42280 + }, + { + "epoch": 0.73, + "learning_rate": 3.69269218397574e-05, + "loss": 2.6085, + "step": 42290 + }, + { + "epoch": 0.73, + "learning_rate": 3.6882519847389755e-05, + "loss": 2.5912, + "step": 42300 + }, + { + "epoch": 0.73, + "learning_rate": 3.683813852934411e-05, + "loss": 2.5217, + "step": 42310 + }, + { + "epoch": 0.73, + "learning_rate": 3.6793777900157767e-05, + "loss": 2.5269, + "step": 42320 + }, + { + "epoch": 0.73, + "learning_rate": 3.674943797436109e-05, + "loss": 2.5583, + "step": 42330 + }, + { + "epoch": 0.73, + "learning_rate": 3.670511876647779e-05, + "loss": 2.505, + "step": 42340 + }, + { + "epoch": 0.73, + "learning_rate": 3.66608202910248e-05, + "loss": 2.5887, + "step": 42350 + }, + { + "epoch": 0.73, + "learning_rate": 3.661654256251216e-05, + "loss": 2.6308, + "step": 42360 + }, + { + "epoch": 0.73, + "learning_rate": 3.6572285595443224e-05, + "loss": 2.4946, + "step": 42370 + }, + { + "epoch": 0.73, + "learning_rate": 3.652804940431445e-05, + "loss": 2.5464, + "step": 42380 + }, + { + "epoch": 0.73, + "learning_rate": 3.648383400361555e-05, + "loss": 2.4897, + "step": 42390 + }, + { + "epoch": 0.73, + "learning_rate": 3.643963940782945e-05, + "loss": 2.5281, + "step": 42400 + }, + { + "epoch": 0.73, + "learning_rate": 3.639546563143216e-05, + "loss": 2.5476, + "step": 42410 + }, + { + "epoch": 0.73, + "learning_rate": 3.635131268889297e-05, + "loss": 2.6139, + "step": 42420 + }, + { + "epoch": 0.73, + "learning_rate": 3.630718059467435e-05, + "loss": 2.5136, + "step": 42430 + }, + { + "epoch": 0.73, + "learning_rate": 3.6263069363231814e-05, + "loss": 2.4971, + "step": 42440 + }, + { + "epoch": 0.73, + "learning_rate": 3.621897900901419e-05, + "loss": 2.5659, + "step": 42450 + }, + { + "epoch": 0.73, + "learning_rate": 3.6174909546463377e-05, + "loss": 2.5226, + "step": 42460 + }, + { + "epoch": 0.73, + "learning_rate": 3.613086099001448e-05, + "loss": 2.5552, + "step": 42470 + }, + { + "epoch": 0.74, + "learning_rate": 3.608683335409577e-05, + "loss": 2.4933, + "step": 42480 + }, + { + "epoch": 0.74, + "learning_rate": 3.604282665312857e-05, + "loss": 2.4952, + "step": 42490 + }, + { + "epoch": 0.74, + "learning_rate": 3.599884090152743e-05, + "loss": 2.5154, + "step": 42500 + }, + { + "epoch": 0.74, + "learning_rate": 3.595487611370006e-05, + "loss": 2.5272, + "step": 42510 + }, + { + "epoch": 0.74, + "learning_rate": 3.591093230404718e-05, + "loss": 2.5415, + "step": 42520 + }, + { + "epoch": 0.74, + "learning_rate": 3.5867009486962764e-05, + "loss": 2.5929, + "step": 42530 + }, + { + "epoch": 0.74, + "learning_rate": 3.582310767683389e-05, + "loss": 2.5775, + "step": 42540 + }, + { + "epoch": 0.74, + "learning_rate": 3.577922688804066e-05, + "loss": 2.5516, + "step": 42550 + }, + { + "epoch": 0.74, + "learning_rate": 3.573536713495641e-05, + "loss": 2.5205, + "step": 42560 + }, + { + "epoch": 0.74, + "learning_rate": 3.5691528431947554e-05, + "loss": 2.5192, + "step": 42570 + }, + { + "epoch": 0.74, + "learning_rate": 3.564771079337351e-05, + "loss": 2.6706, + "step": 42580 + }, + { + "epoch": 0.74, + "learning_rate": 3.5603914233586945e-05, + "loss": 2.5791, + "step": 42590 + }, + { + "epoch": 0.74, + "learning_rate": 3.556013876693357e-05, + "loss": 2.4661, + "step": 42600 + }, + { + "epoch": 0.74, + "learning_rate": 3.5516384407752114e-05, + "loss": 2.5346, + "step": 42610 + }, + { + "epoch": 0.74, + "learning_rate": 3.547265117037449e-05, + "loss": 2.5704, + "step": 42620 + }, + { + "epoch": 0.74, + "learning_rate": 3.542893906912562e-05, + "loss": 2.4463, + "step": 42630 + }, + { + "epoch": 0.74, + "learning_rate": 3.5385248118323556e-05, + "loss": 2.5976, + "step": 42640 + }, + { + "epoch": 0.74, + "learning_rate": 3.5341578332279404e-05, + "loss": 2.6044, + "step": 42650 + }, + { + "epoch": 0.74, + "learning_rate": 3.529792972529732e-05, + "loss": 2.5707, + "step": 42660 + }, + { + "epoch": 0.74, + "learning_rate": 3.525430231167459e-05, + "loss": 2.5339, + "step": 42670 + }, + { + "epoch": 0.74, + "learning_rate": 3.5210696105701425e-05, + "loss": 2.5022, + "step": 42680 + }, + { + "epoch": 0.74, + "learning_rate": 3.516711112166121e-05, + "loss": 2.5007, + "step": 42690 + }, + { + "epoch": 0.74, + "learning_rate": 3.5123547373830367e-05, + "loss": 2.5258, + "step": 42700 + }, + { + "epoch": 0.74, + "learning_rate": 3.508000487647828e-05, + "loss": 2.5698, + "step": 42710 + }, + { + "epoch": 0.74, + "learning_rate": 3.503648364386745e-05, + "loss": 2.4969, + "step": 42720 + }, + { + "epoch": 0.74, + "learning_rate": 3.4992983690253426e-05, + "loss": 2.6348, + "step": 42730 + }, + { + "epoch": 0.74, + "learning_rate": 3.4949505029884666e-05, + "loss": 2.5165, + "step": 42740 + }, + { + "epoch": 0.74, + "learning_rate": 3.4906047677002795e-05, + "loss": 2.5093, + "step": 42750 + }, + { + "epoch": 0.74, + "learning_rate": 3.486261164584241e-05, + "loss": 2.5023, + "step": 42760 + }, + { + "epoch": 0.74, + "learning_rate": 3.481919695063108e-05, + "loss": 2.5034, + "step": 42770 + }, + { + "epoch": 0.74, + "learning_rate": 3.477580360558942e-05, + "loss": 2.5827, + "step": 42780 + }, + { + "epoch": 0.74, + "learning_rate": 3.473243162493111e-05, + "loss": 2.5877, + "step": 42790 + }, + { + "epoch": 0.74, + "learning_rate": 3.468908102286269e-05, + "loss": 2.6437, + "step": 42800 + }, + { + "epoch": 0.74, + "learning_rate": 3.4645751813583825e-05, + "loss": 2.5462, + "step": 42810 + }, + { + "epoch": 0.74, + "learning_rate": 3.460244401128716e-05, + "loss": 2.5516, + "step": 42820 + }, + { + "epoch": 0.74, + "learning_rate": 3.455915763015819e-05, + "loss": 2.4767, + "step": 42830 + }, + { + "epoch": 0.74, + "learning_rate": 3.451589268437564e-05, + "loss": 2.559, + "step": 42840 + }, + { + "epoch": 0.74, + "learning_rate": 3.4472649188110974e-05, + "loss": 2.5323, + "step": 42850 + }, + { + "epoch": 0.74, + "learning_rate": 3.442942715552876e-05, + "loss": 2.4977, + "step": 42860 + }, + { + "epoch": 0.74, + "learning_rate": 3.4386226600786555e-05, + "loss": 2.5363, + "step": 42870 + }, + { + "epoch": 0.74, + "learning_rate": 3.434304753803473e-05, + "loss": 2.5781, + "step": 42880 + }, + { + "epoch": 0.74, + "learning_rate": 3.429988998141679e-05, + "loss": 2.5571, + "step": 42890 + }, + { + "epoch": 0.74, + "learning_rate": 3.425675394506914e-05, + "loss": 2.4743, + "step": 42900 + }, + { + "epoch": 0.74, + "learning_rate": 3.4213639443121045e-05, + "loss": 2.5738, + "step": 42910 + }, + { + "epoch": 0.74, + "learning_rate": 3.417054648969488e-05, + "loss": 2.5596, + "step": 42920 + }, + { + "epoch": 0.74, + "learning_rate": 3.412747509890577e-05, + "loss": 2.5569, + "step": 42930 + }, + { + "epoch": 0.74, + "learning_rate": 3.4084425284861954e-05, + "loss": 2.444, + "step": 42940 + }, + { + "epoch": 0.74, + "learning_rate": 3.4041397061664535e-05, + "loss": 2.5074, + "step": 42950 + }, + { + "epoch": 0.74, + "learning_rate": 3.399839044340748e-05, + "loss": 2.5952, + "step": 42960 + }, + { + "epoch": 0.74, + "learning_rate": 3.3955405444177766e-05, + "loss": 2.5129, + "step": 42970 + }, + { + "epoch": 0.74, + "learning_rate": 3.3912442078055305e-05, + "loss": 2.5912, + "step": 42980 + }, + { + "epoch": 0.74, + "learning_rate": 3.3869500359112796e-05, + "loss": 2.5266, + "step": 42990 + }, + { + "epoch": 0.74, + "learning_rate": 3.382658030141598e-05, + "loss": 2.5444, + "step": 43000 + }, + { + "epoch": 0.74, + "learning_rate": 3.3783681919023424e-05, + "loss": 2.4989, + "step": 43010 + }, + { + "epoch": 0.74, + "learning_rate": 3.374080522598666e-05, + "loss": 2.4919, + "step": 43020 + }, + { + "epoch": 0.74, + "learning_rate": 3.369795023635008e-05, + "loss": 2.4714, + "step": 43030 + }, + { + "epoch": 0.74, + "learning_rate": 3.3655116964150925e-05, + "loss": 2.5057, + "step": 43040 + }, + { + "epoch": 0.75, + "learning_rate": 3.361230542341937e-05, + "loss": 2.5288, + "step": 43050 + }, + { + "epoch": 0.75, + "learning_rate": 3.356951562817852e-05, + "loss": 2.5934, + "step": 43060 + }, + { + "epoch": 0.75, + "learning_rate": 3.352674759244423e-05, + "loss": 2.5628, + "step": 43070 + }, + { + "epoch": 0.75, + "learning_rate": 3.3484001330225325e-05, + "loss": 2.5307, + "step": 43080 + }, + { + "epoch": 0.75, + "learning_rate": 3.3441276855523506e-05, + "loss": 2.544, + "step": 43090 + }, + { + "epoch": 0.75, + "learning_rate": 3.339857418233325e-05, + "loss": 2.5073, + "step": 43100 + }, + { + "epoch": 0.75, + "learning_rate": 3.3355893324641983e-05, + "loss": 2.4149, + "step": 43110 + }, + { + "epoch": 0.75, + "learning_rate": 3.3313234296429965e-05, + "loss": 2.5447, + "step": 43120 + }, + { + "epoch": 0.75, + "learning_rate": 3.327059711167023e-05, + "loss": 2.5465, + "step": 43130 + }, + { + "epoch": 0.75, + "learning_rate": 3.322798178432877e-05, + "loss": 2.4919, + "step": 43140 + }, + { + "epoch": 0.75, + "learning_rate": 3.3185388328364366e-05, + "loss": 2.6218, + "step": 43150 + }, + { + "epoch": 0.75, + "learning_rate": 3.314281675772858e-05, + "loss": 2.5094, + "step": 43160 + }, + { + "epoch": 0.75, + "learning_rate": 3.310026708636592e-05, + "loss": 2.5572, + "step": 43170 + }, + { + "epoch": 0.75, + "learning_rate": 3.305773932821361e-05, + "loss": 2.5184, + "step": 43180 + }, + { + "epoch": 0.75, + "learning_rate": 3.301523349720178e-05, + "loss": 2.5419, + "step": 43190 + }, + { + "epoch": 0.75, + "learning_rate": 3.297274960725332e-05, + "loss": 2.5569, + "step": 43200 + }, + { + "epoch": 0.75, + "learning_rate": 3.2930287672283975e-05, + "loss": 2.6031, + "step": 43210 + }, + { + "epoch": 0.75, + "learning_rate": 3.288784770620231e-05, + "loss": 2.4799, + "step": 43220 + }, + { + "epoch": 0.75, + "learning_rate": 3.2845429722909595e-05, + "loss": 2.4468, + "step": 43230 + }, + { + "epoch": 0.75, + "learning_rate": 3.2803033736300006e-05, + "loss": 2.5641, + "step": 43240 + }, + { + "epoch": 0.75, + "learning_rate": 3.2760659760260514e-05, + "loss": 2.5145, + "step": 43250 + }, + { + "epoch": 0.75, + "learning_rate": 3.2718307808670765e-05, + "loss": 2.5873, + "step": 43260 + }, + { + "epoch": 0.75, + "learning_rate": 3.267597789540332e-05, + "loss": 2.3454, + "step": 43270 + }, + { + "epoch": 0.75, + "learning_rate": 3.263367003432348e-05, + "loss": 2.517, + "step": 43280 + }, + { + "epoch": 0.75, + "learning_rate": 3.259138423928927e-05, + "loss": 2.4778, + "step": 43290 + }, + { + "epoch": 0.75, + "learning_rate": 3.254912052415154e-05, + "loss": 2.4511, + "step": 43300 + }, + { + "epoch": 0.75, + "learning_rate": 3.250687890275396e-05, + "loss": 2.6253, + "step": 43310 + }, + { + "epoch": 0.75, + "learning_rate": 3.246465938893282e-05, + "loss": 2.5494, + "step": 43320 + }, + { + "epoch": 0.75, + "learning_rate": 3.242246199651727e-05, + "loss": 2.4524, + "step": 43330 + }, + { + "epoch": 0.75, + "learning_rate": 3.2380286739329246e-05, + "loss": 2.4994, + "step": 43340 + }, + { + "epoch": 0.75, + "learning_rate": 3.233813363118331e-05, + "loss": 2.5236, + "step": 43350 + }, + { + "epoch": 0.75, + "learning_rate": 3.229600268588687e-05, + "loss": 2.5285, + "step": 43360 + }, + { + "epoch": 0.75, + "learning_rate": 3.225389391724007e-05, + "loss": 2.5124, + "step": 43370 + }, + { + "epoch": 0.75, + "learning_rate": 3.221180733903569e-05, + "loss": 2.6231, + "step": 43380 + }, + { + "epoch": 0.75, + "learning_rate": 3.216974296505943e-05, + "loss": 2.5323, + "step": 43390 + }, + { + "epoch": 0.75, + "learning_rate": 3.212770080908951e-05, + "loss": 2.4653, + "step": 43400 + }, + { + "epoch": 0.75, + "learning_rate": 3.2085680884896996e-05, + "loss": 2.5722, + "step": 43410 + }, + { + "epoch": 0.75, + "learning_rate": 3.204368320624568e-05, + "loss": 2.592, + "step": 43420 + }, + { + "epoch": 0.75, + "learning_rate": 3.2001707786891956e-05, + "loss": 2.5583, + "step": 43430 + }, + { + "epoch": 0.75, + "learning_rate": 3.1959754640585024e-05, + "loss": 2.5034, + "step": 43440 + }, + { + "epoch": 0.75, + "learning_rate": 3.1917823781066825e-05, + "loss": 2.4903, + "step": 43450 + }, + { + "epoch": 0.75, + "learning_rate": 3.187591522207185e-05, + "loss": 2.5095, + "step": 43460 + }, + { + "epoch": 0.75, + "learning_rate": 3.183402897732745e-05, + "loss": 2.5087, + "step": 43470 + }, + { + "epoch": 0.75, + "learning_rate": 3.179216506055351e-05, + "loss": 2.4659, + "step": 43480 + }, + { + "epoch": 0.75, + "learning_rate": 3.175032348546274e-05, + "loss": 2.5296, + "step": 43490 + }, + { + "epoch": 0.75, + "learning_rate": 3.17085042657605e-05, + "loss": 2.4899, + "step": 43500 + }, + { + "epoch": 0.75, + "learning_rate": 3.1666707415144724e-05, + "loss": 2.601, + "step": 43510 + }, + { + "epoch": 0.75, + "learning_rate": 3.1624932947306155e-05, + "loss": 2.588, + "step": 43520 + }, + { + "epoch": 0.75, + "learning_rate": 3.158318087592814e-05, + "loss": 2.6258, + "step": 43530 + }, + { + "epoch": 0.75, + "learning_rate": 3.1541451214686676e-05, + "loss": 2.5327, + "step": 43540 + }, + { + "epoch": 0.75, + "learning_rate": 3.149974397725044e-05, + "loss": 2.4026, + "step": 43550 + }, + { + "epoch": 0.75, + "learning_rate": 3.145805917728079e-05, + "loss": 2.4658, + "step": 43560 + }, + { + "epoch": 0.75, + "learning_rate": 3.141639682843167e-05, + "loss": 2.5559, + "step": 43570 + }, + { + "epoch": 0.75, + "learning_rate": 3.137475694434978e-05, + "loss": 2.5742, + "step": 43580 + }, + { + "epoch": 0.75, + "learning_rate": 3.13331395386743e-05, + "loss": 2.5556, + "step": 43590 + }, + { + "epoch": 0.75, + "learning_rate": 3.1291544625037196e-05, + "loss": 2.5352, + "step": 43600 + }, + { + "epoch": 0.75, + "learning_rate": 3.124997221706302e-05, + "loss": 2.6444, + "step": 43610 + }, + { + "epoch": 0.75, + "learning_rate": 3.120842232836888e-05, + "loss": 2.5285, + "step": 43620 + }, + { + "epoch": 0.76, + "learning_rate": 3.11668949725646e-05, + "loss": 2.5036, + "step": 43630 + }, + { + "epoch": 0.76, + "learning_rate": 3.112539016325262e-05, + "loss": 2.5238, + "step": 43640 + }, + { + "epoch": 0.76, + "learning_rate": 3.108390791402791e-05, + "loss": 2.4695, + "step": 43650 + }, + { + "epoch": 0.76, + "learning_rate": 3.104244823847814e-05, + "loss": 2.5193, + "step": 43660 + }, + { + "epoch": 0.76, + "learning_rate": 3.100101115018357e-05, + "loss": 2.5707, + "step": 43670 + }, + { + "epoch": 0.76, + "learning_rate": 3.0959596662716996e-05, + "loss": 2.4961, + "step": 43680 + }, + { + "epoch": 0.76, + "learning_rate": 3.091820478964388e-05, + "loss": 2.5322, + "step": 43690 + }, + { + "epoch": 0.76, + "learning_rate": 3.0876835544522284e-05, + "loss": 2.4882, + "step": 43700 + }, + { + "epoch": 0.76, + "learning_rate": 3.083548894090279e-05, + "loss": 2.5493, + "step": 43710 + }, + { + "epoch": 0.76, + "learning_rate": 3.079416499232865e-05, + "loss": 2.5787, + "step": 43720 + }, + { + "epoch": 0.76, + "learning_rate": 3.0752863712335566e-05, + "loss": 2.4588, + "step": 43730 + }, + { + "epoch": 0.76, + "learning_rate": 3.071158511445194e-05, + "loss": 2.4853, + "step": 43740 + }, + { + "epoch": 0.76, + "learning_rate": 3.067032921219876e-05, + "loss": 2.6028, + "step": 43750 + }, + { + "epoch": 0.76, + "learning_rate": 3.062909601908944e-05, + "loss": 2.5888, + "step": 43760 + }, + { + "epoch": 0.76, + "learning_rate": 3.0587885548630116e-05, + "loss": 2.4672, + "step": 43770 + }, + { + "epoch": 0.76, + "learning_rate": 3.054669781431932e-05, + "loss": 2.5515, + "step": 43780 + }, + { + "epoch": 0.76, + "learning_rate": 3.0505532829648242e-05, + "loss": 2.4605, + "step": 43790 + }, + { + "epoch": 0.76, + "learning_rate": 3.0464390608100655e-05, + "loss": 2.5269, + "step": 43800 + }, + { + "epoch": 0.76, + "learning_rate": 3.0423271163152743e-05, + "loss": 2.4804, + "step": 43810 + }, + { + "epoch": 0.76, + "learning_rate": 3.0382174508273342e-05, + "loss": 2.6437, + "step": 43820 + }, + { + "epoch": 0.76, + "learning_rate": 3.0341100656923805e-05, + "loss": 2.5205, + "step": 43830 + }, + { + "epoch": 0.76, + "learning_rate": 3.0300049622557936e-05, + "loss": 2.5718, + "step": 43840 + }, + { + "epoch": 0.76, + "learning_rate": 3.025902141862217e-05, + "loss": 2.5009, + "step": 43850 + }, + { + "epoch": 0.76, + "learning_rate": 3.0218016058555443e-05, + "loss": 2.5628, + "step": 43860 + }, + { + "epoch": 0.76, + "learning_rate": 3.0177033555789125e-05, + "loss": 2.5749, + "step": 43870 + }, + { + "epoch": 0.76, + "learning_rate": 3.0136073923747177e-05, + "loss": 2.533, + "step": 43880 + }, + { + "epoch": 0.76, + "learning_rate": 3.00951371758461e-05, + "loss": 2.5292, + "step": 43890 + }, + { + "epoch": 0.76, + "learning_rate": 3.005422332549478e-05, + "loss": 2.53, + "step": 43900 + }, + { + "epoch": 0.76, + "learning_rate": 3.0013332386094707e-05, + "loss": 2.6632, + "step": 43910 + }, + { + "epoch": 0.76, + "learning_rate": 2.9972464371039855e-05, + "loss": 2.548, + "step": 43920 + }, + { + "epoch": 0.76, + "learning_rate": 2.9931619293716584e-05, + "loss": 2.4937, + "step": 43930 + }, + { + "epoch": 0.76, + "learning_rate": 2.9890797167503937e-05, + "loss": 2.5871, + "step": 43940 + }, + { + "epoch": 0.76, + "learning_rate": 2.984999800577324e-05, + "loss": 2.542, + "step": 43950 + }, + { + "epoch": 0.76, + "learning_rate": 2.98092218218884e-05, + "loss": 2.4845, + "step": 43960 + }, + { + "epoch": 0.76, + "learning_rate": 2.9768468629205825e-05, + "loss": 2.5843, + "step": 43970 + }, + { + "epoch": 0.76, + "learning_rate": 2.972773844107429e-05, + "loss": 2.5331, + "step": 43980 + }, + { + "epoch": 0.76, + "learning_rate": 2.968703127083511e-05, + "loss": 2.5705, + "step": 43990 + }, + { + "epoch": 0.76, + "learning_rate": 2.9646347131822074e-05, + "loss": 2.519, + "step": 44000 + }, + { + "epoch": 0.76, + "learning_rate": 2.9605686037361324e-05, + "loss": 2.5676, + "step": 44010 + }, + { + "epoch": 0.76, + "learning_rate": 2.956504800077161e-05, + "loss": 2.586, + "step": 44020 + }, + { + "epoch": 0.76, + "learning_rate": 2.9524433035363973e-05, + "loss": 2.5514, + "step": 44030 + }, + { + "epoch": 0.76, + "learning_rate": 2.9483841154441993e-05, + "loss": 2.4804, + "step": 44040 + }, + { + "epoch": 0.76, + "learning_rate": 2.944327237130169e-05, + "loss": 2.566, + "step": 44050 + }, + { + "epoch": 0.76, + "learning_rate": 2.940272669923144e-05, + "loss": 2.5655, + "step": 44060 + }, + { + "epoch": 0.76, + "learning_rate": 2.936220415151214e-05, + "loss": 2.471, + "step": 44070 + }, + { + "epoch": 0.76, + "learning_rate": 2.932170474141709e-05, + "loss": 2.5666, + "step": 44080 + }, + { + "epoch": 0.76, + "learning_rate": 2.928122848221193e-05, + "loss": 2.5233, + "step": 44090 + }, + { + "epoch": 0.76, + "learning_rate": 2.9240775387154818e-05, + "loss": 2.6073, + "step": 44100 + }, + { + "epoch": 0.76, + "learning_rate": 2.9200345469496294e-05, + "loss": 2.4669, + "step": 44110 + }, + { + "epoch": 0.76, + "learning_rate": 2.91599387424793e-05, + "loss": 2.3816, + "step": 44120 + }, + { + "epoch": 0.76, + "learning_rate": 2.91195552193392e-05, + "loss": 2.5423, + "step": 44130 + }, + { + "epoch": 0.76, + "learning_rate": 2.9079194913303688e-05, + "loss": 2.4817, + "step": 44140 + }, + { + "epoch": 0.76, + "learning_rate": 2.9038857837592938e-05, + "loss": 2.576, + "step": 44150 + }, + { + "epoch": 0.76, + "learning_rate": 2.8998544005419504e-05, + "loss": 2.5877, + "step": 44160 + }, + { + "epoch": 0.76, + "learning_rate": 2.8958253429988237e-05, + "loss": 2.6272, + "step": 44170 + }, + { + "epoch": 0.76, + "learning_rate": 2.8917986124496488e-05, + "loss": 2.5159, + "step": 44180 + }, + { + "epoch": 0.76, + "learning_rate": 2.8877742102133953e-05, + "loss": 2.5218, + "step": 44190 + }, + { + "epoch": 0.76, + "learning_rate": 2.8837521376082622e-05, + "loss": 2.5569, + "step": 44200 + }, + { + "epoch": 0.77, + "learning_rate": 2.8797323959516943e-05, + "loss": 2.5387, + "step": 44210 + }, + { + "epoch": 0.77, + "learning_rate": 2.8757149865603738e-05, + "loss": 2.5285, + "step": 44220 + }, + { + "epoch": 0.77, + "learning_rate": 2.8716999107502095e-05, + "loss": 2.6267, + "step": 44230 + }, + { + "epoch": 0.77, + "learning_rate": 2.867687169836355e-05, + "loss": 2.5713, + "step": 44240 + }, + { + "epoch": 0.77, + "learning_rate": 2.8636767651331975e-05, + "loss": 2.5927, + "step": 44250 + }, + { + "epoch": 0.77, + "learning_rate": 2.859668697954352e-05, + "loss": 2.5991, + "step": 44260 + }, + { + "epoch": 0.77, + "learning_rate": 2.855662969612679e-05, + "loss": 2.5914, + "step": 44270 + }, + { + "epoch": 0.77, + "learning_rate": 2.8516595814202594e-05, + "loss": 2.4942, + "step": 44280 + }, + { + "epoch": 0.77, + "learning_rate": 2.8476585346884187e-05, + "loss": 2.5438, + "step": 44290 + }, + { + "epoch": 0.77, + "learning_rate": 2.8436598307277173e-05, + "loss": 2.5113, + "step": 44300 + }, + { + "epoch": 0.77, + "learning_rate": 2.8396634708479365e-05, + "loss": 2.6933, + "step": 44310 + }, + { + "epoch": 0.77, + "learning_rate": 2.8356694563580997e-05, + "loss": 2.497, + "step": 44320 + }, + { + "epoch": 0.77, + "learning_rate": 2.8316777885664548e-05, + "loss": 2.491, + "step": 44330 + }, + { + "epoch": 0.77, + "learning_rate": 2.827688468780485e-05, + "loss": 2.4912, + "step": 44340 + }, + { + "epoch": 0.77, + "learning_rate": 2.8237014983069097e-05, + "loss": 2.481, + "step": 44350 + }, + { + "epoch": 0.77, + "learning_rate": 2.8197168784516658e-05, + "loss": 2.5018, + "step": 44360 + }, + { + "epoch": 0.77, + "learning_rate": 2.815734610519931e-05, + "loss": 2.5099, + "step": 44370 + }, + { + "epoch": 0.77, + "learning_rate": 2.8117546958161124e-05, + "loss": 2.6286, + "step": 44380 + }, + { + "epoch": 0.77, + "learning_rate": 2.8077771356438364e-05, + "loss": 2.5131, + "step": 44390 + }, + { + "epoch": 0.77, + "learning_rate": 2.8038019313059695e-05, + "loss": 2.5061, + "step": 44400 + }, + { + "epoch": 0.77, + "learning_rate": 2.7998290841046025e-05, + "loss": 2.5672, + "step": 44410 + }, + { + "epoch": 0.77, + "learning_rate": 2.79585859534105e-05, + "loss": 2.5632, + "step": 44420 + }, + { + "epoch": 0.77, + "learning_rate": 2.7918904663158583e-05, + "loss": 2.4887, + "step": 44430 + }, + { + "epoch": 0.77, + "learning_rate": 2.7879246983288044e-05, + "loss": 2.5168, + "step": 44440 + }, + { + "epoch": 0.77, + "learning_rate": 2.7839612926788827e-05, + "loss": 2.4818, + "step": 44450 + }, + { + "epoch": 0.77, + "learning_rate": 2.7800002506643196e-05, + "loss": 2.5514, + "step": 44460 + }, + { + "epoch": 0.77, + "learning_rate": 2.7760415735825707e-05, + "loss": 2.4487, + "step": 44470 + }, + { + "epoch": 0.77, + "learning_rate": 2.772085262730303e-05, + "loss": 2.4403, + "step": 44480 + }, + { + "epoch": 0.77, + "learning_rate": 2.7681313194034307e-05, + "loss": 2.5543, + "step": 44490 + }, + { + "epoch": 0.77, + "learning_rate": 2.7641797448970707e-05, + "loss": 2.5828, + "step": 44500 + }, + { + "epoch": 0.77, + "learning_rate": 2.7602305405055762e-05, + "loss": 2.4763, + "step": 44510 + }, + { + "epoch": 0.77, + "learning_rate": 2.7562837075225234e-05, + "loss": 2.509, + "step": 44520 + }, + { + "epoch": 0.77, + "learning_rate": 2.752339247240705e-05, + "loss": 2.5391, + "step": 44530 + }, + { + "epoch": 0.77, + "learning_rate": 2.748397160952143e-05, + "loss": 2.4682, + "step": 44540 + }, + { + "epoch": 0.77, + "learning_rate": 2.7444574499480825e-05, + "loss": 2.5257, + "step": 44550 + }, + { + "epoch": 0.77, + "learning_rate": 2.7405201155189818e-05, + "loss": 2.6108, + "step": 44560 + }, + { + "epoch": 0.77, + "learning_rate": 2.7365851589545334e-05, + "loss": 2.5159, + "step": 44570 + }, + { + "epoch": 0.77, + "learning_rate": 2.7326525815436377e-05, + "loss": 2.5781, + "step": 44580 + }, + { + "epoch": 0.77, + "learning_rate": 2.728722384574426e-05, + "loss": 2.5066, + "step": 44590 + }, + { + "epoch": 0.77, + "learning_rate": 2.7247945693342468e-05, + "loss": 2.5795, + "step": 44600 + }, + { + "epoch": 0.77, + "learning_rate": 2.7208691371096662e-05, + "loss": 2.4539, + "step": 44610 + }, + { + "epoch": 0.77, + "learning_rate": 2.7169460891864706e-05, + "loss": 2.4463, + "step": 44620 + }, + { + "epoch": 0.77, + "learning_rate": 2.7130254268496713e-05, + "loss": 2.5823, + "step": 44630 + }, + { + "epoch": 0.77, + "learning_rate": 2.7091071513834854e-05, + "loss": 2.5011, + "step": 44640 + }, + { + "epoch": 0.77, + "learning_rate": 2.7051912640713604e-05, + "loss": 2.5509, + "step": 44650 + }, + { + "epoch": 0.77, + "learning_rate": 2.7012777661959576e-05, + "loss": 2.6694, + "step": 44660 + }, + { + "epoch": 0.77, + "learning_rate": 2.697366659039153e-05, + "loss": 2.5975, + "step": 44670 + }, + { + "epoch": 0.77, + "learning_rate": 2.6934579438820472e-05, + "loss": 2.5635, + "step": 44680 + }, + { + "epoch": 0.77, + "learning_rate": 2.6895516220049433e-05, + "loss": 2.5176, + "step": 44690 + }, + { + "epoch": 0.77, + "learning_rate": 2.685647694687373e-05, + "loss": 2.5055, + "step": 44700 + }, + { + "epoch": 0.77, + "learning_rate": 2.6817461632080832e-05, + "loss": 2.4566, + "step": 44710 + }, + { + "epoch": 0.77, + "learning_rate": 2.6778470288450242e-05, + "loss": 2.5184, + "step": 44720 + }, + { + "epoch": 0.77, + "learning_rate": 2.6739502928753747e-05, + "loss": 2.5605, + "step": 44730 + }, + { + "epoch": 0.77, + "learning_rate": 2.6700559565755234e-05, + "loss": 2.5384, + "step": 44740 + }, + { + "epoch": 0.77, + "learning_rate": 2.666164021221067e-05, + "loss": 2.4972, + "step": 44750 + }, + { + "epoch": 0.77, + "learning_rate": 2.6622744880868243e-05, + "loss": 2.4705, + "step": 44760 + }, + { + "epoch": 0.77, + "learning_rate": 2.6583873584468256e-05, + "loss": 2.4942, + "step": 44770 + }, + { + "epoch": 0.77, + "learning_rate": 2.6545026335743062e-05, + "loss": 2.5046, + "step": 44780 + }, + { + "epoch": 0.78, + "learning_rate": 2.650620314741723e-05, + "loss": 2.4633, + "step": 44790 + }, + { + "epoch": 0.78, + "learning_rate": 2.6467404032207443e-05, + "loss": 2.5678, + "step": 44800 + }, + { + "epoch": 0.78, + "learning_rate": 2.6428629002822404e-05, + "loss": 2.5394, + "step": 44810 + }, + { + "epoch": 0.78, + "learning_rate": 2.6389878071963048e-05, + "loss": 2.5427, + "step": 44820 + }, + { + "epoch": 0.78, + "learning_rate": 2.6351151252322305e-05, + "loss": 2.5705, + "step": 44830 + }, + { + "epoch": 0.78, + "learning_rate": 2.631244855658527e-05, + "loss": 2.4733, + "step": 44840 + }, + { + "epoch": 0.78, + "learning_rate": 2.6273769997429208e-05, + "loss": 2.506, + "step": 44850 + }, + { + "epoch": 0.78, + "learning_rate": 2.6235115587523306e-05, + "loss": 2.5642, + "step": 44860 + }, + { + "epoch": 0.78, + "learning_rate": 2.6196485339529008e-05, + "loss": 2.5215, + "step": 44870 + }, + { + "epoch": 0.78, + "learning_rate": 2.6157879266099695e-05, + "loss": 2.562, + "step": 44880 + }, + { + "epoch": 0.78, + "learning_rate": 2.6119297379880935e-05, + "loss": 2.5564, + "step": 44890 + }, + { + "epoch": 0.78, + "learning_rate": 2.608073969351037e-05, + "loss": 2.639, + "step": 44900 + }, + { + "epoch": 0.78, + "learning_rate": 2.6042206219617638e-05, + "loss": 2.528, + "step": 44910 + }, + { + "epoch": 0.78, + "learning_rate": 2.600369697082452e-05, + "loss": 2.4309, + "step": 44920 + }, + { + "epoch": 0.78, + "learning_rate": 2.596521195974486e-05, + "loss": 2.5085, + "step": 44930 + }, + { + "epoch": 0.78, + "learning_rate": 2.592675119898449e-05, + "loss": 2.4627, + "step": 44940 + }, + { + "epoch": 0.78, + "learning_rate": 2.588831470114137e-05, + "loss": 2.5464, + "step": 44950 + }, + { + "epoch": 0.78, + "learning_rate": 2.5849902478805533e-05, + "loss": 2.5323, + "step": 44960 + }, + { + "epoch": 0.78, + "learning_rate": 2.5811514544558946e-05, + "loss": 2.5573, + "step": 44970 + }, + { + "epoch": 0.78, + "learning_rate": 2.577315091097574e-05, + "loss": 2.6269, + "step": 44980 + }, + { + "epoch": 0.78, + "learning_rate": 2.5734811590622054e-05, + "loss": 2.5644, + "step": 44990 + }, + { + "epoch": 0.78, + "learning_rate": 2.5696496596055987e-05, + "loss": 2.5012, + "step": 45000 + }, + { + "epoch": 0.78, + "learning_rate": 2.5658205939827773e-05, + "loss": 2.5699, + "step": 45010 + }, + { + "epoch": 0.78, + "learning_rate": 2.561993963447965e-05, + "loss": 2.5635, + "step": 45020 + }, + { + "epoch": 0.78, + "learning_rate": 2.558169769254579e-05, + "loss": 2.527, + "step": 45030 + }, + { + "epoch": 0.78, + "learning_rate": 2.5543480126552566e-05, + "loss": 2.5896, + "step": 45040 + }, + { + "epoch": 0.78, + "learning_rate": 2.550528694901817e-05, + "loss": 2.5352, + "step": 45050 + }, + { + "epoch": 0.78, + "learning_rate": 2.546711817245292e-05, + "loss": 2.4838, + "step": 45060 + }, + { + "epoch": 0.78, + "learning_rate": 2.5428973809359147e-05, + "loss": 2.5489, + "step": 45070 + }, + { + "epoch": 0.78, + "learning_rate": 2.539085387223109e-05, + "loss": 2.5443, + "step": 45080 + }, + { + "epoch": 0.78, + "learning_rate": 2.5352758373555073e-05, + "loss": 2.557, + "step": 45090 + }, + { + "epoch": 0.78, + "learning_rate": 2.531468732580945e-05, + "loss": 2.4796, + "step": 45100 + }, + { + "epoch": 0.78, + "learning_rate": 2.527664074146442e-05, + "loss": 2.4994, + "step": 45110 + }, + { + "epoch": 0.78, + "learning_rate": 2.5238618632982326e-05, + "loss": 2.5368, + "step": 45120 + }, + { + "epoch": 0.78, + "learning_rate": 2.520062101281736e-05, + "loss": 2.4717, + "step": 45130 + }, + { + "epoch": 0.78, + "learning_rate": 2.51626478934158e-05, + "loss": 2.5297, + "step": 45140 + }, + { + "epoch": 0.78, + "learning_rate": 2.512469928721588e-05, + "loss": 2.4936, + "step": 45150 + }, + { + "epoch": 0.78, + "learning_rate": 2.5086775206647718e-05, + "loss": 2.6056, + "step": 45160 + }, + { + "epoch": 0.78, + "learning_rate": 2.5048875664133497e-05, + "loss": 2.5431, + "step": 45170 + }, + { + "epoch": 0.78, + "learning_rate": 2.5011000672087347e-05, + "loss": 2.4713, + "step": 45180 + }, + { + "epoch": 0.78, + "learning_rate": 2.497315024291529e-05, + "loss": 2.5066, + "step": 45190 + }, + { + "epoch": 0.78, + "learning_rate": 2.493532438901538e-05, + "loss": 2.4532, + "step": 45200 + }, + { + "epoch": 0.78, + "learning_rate": 2.4897523122777568e-05, + "loss": 2.527, + "step": 45210 + }, + { + "epoch": 0.78, + "learning_rate": 2.48597464565838e-05, + "loss": 2.5489, + "step": 45220 + }, + { + "epoch": 0.78, + "learning_rate": 2.4821994402807945e-05, + "loss": 2.5762, + "step": 45230 + }, + { + "epoch": 0.78, + "learning_rate": 2.478426697381576e-05, + "loss": 2.5533, + "step": 45240 + }, + { + "epoch": 0.78, + "learning_rate": 2.4746564181965004e-05, + "loss": 2.5348, + "step": 45250 + }, + { + "epoch": 0.78, + "learning_rate": 2.470888603960536e-05, + "loss": 2.5625, + "step": 45260 + }, + { + "epoch": 0.78, + "learning_rate": 2.4671232559078374e-05, + "loss": 2.4244, + "step": 45270 + }, + { + "epoch": 0.78, + "learning_rate": 2.4633603752717572e-05, + "loss": 2.5248, + "step": 45280 + }, + { + "epoch": 0.78, + "learning_rate": 2.4595999632848432e-05, + "loss": 2.502, + "step": 45290 + }, + { + "epoch": 0.78, + "learning_rate": 2.4558420211788225e-05, + "loss": 2.52, + "step": 45300 + }, + { + "epoch": 0.78, + "learning_rate": 2.4520865501846234e-05, + "loss": 2.5576, + "step": 45310 + }, + { + "epoch": 0.78, + "learning_rate": 2.4483335515323646e-05, + "loss": 2.5337, + "step": 45320 + }, + { + "epoch": 0.78, + "learning_rate": 2.444583026451348e-05, + "loss": 2.538, + "step": 45330 + }, + { + "epoch": 0.78, + "learning_rate": 2.44083497617007e-05, + "loss": 2.5528, + "step": 45340 + }, + { + "epoch": 0.78, + "learning_rate": 2.4370894019162204e-05, + "loss": 2.6211, + "step": 45350 + }, + { + "epoch": 0.79, + "learning_rate": 2.433346304916667e-05, + "loss": 2.5674, + "step": 45360 + }, + { + "epoch": 0.79, + "learning_rate": 2.429605686397479e-05, + "loss": 2.4276, + "step": 45370 + }, + { + "epoch": 0.79, + "learning_rate": 2.4258675475839e-05, + "loss": 2.5568, + "step": 45380 + }, + { + "epoch": 0.79, + "learning_rate": 2.4221318897003707e-05, + "loss": 2.52, + "step": 45390 + }, + { + "epoch": 0.79, + "learning_rate": 2.4183987139705246e-05, + "loss": 2.6002, + "step": 45400 + }, + { + "epoch": 0.79, + "learning_rate": 2.4146680216171657e-05, + "loss": 2.5582, + "step": 45410 + }, + { + "epoch": 0.79, + "learning_rate": 2.4109398138622998e-05, + "loss": 2.4476, + "step": 45420 + }, + { + "epoch": 0.79, + "learning_rate": 2.407586552223956e-05, + "loss": 2.5274, + "step": 45430 + }, + { + "epoch": 0.79, + "learning_rate": 2.4038630685699127e-05, + "loss": 2.5803, + "step": 45440 + }, + { + "epoch": 0.79, + "learning_rate": 2.400142073053555e-05, + "loss": 2.5087, + "step": 45450 + }, + { + "epoch": 0.79, + "learning_rate": 2.3964235668937097e-05, + "loss": 2.5109, + "step": 45460 + }, + { + "epoch": 0.79, + "learning_rate": 2.3927075513083775e-05, + "loss": 2.4902, + "step": 45470 + }, + { + "epoch": 0.79, + "learning_rate": 2.388994027514757e-05, + "loss": 2.5645, + "step": 45480 + }, + { + "epoch": 0.79, + "learning_rate": 2.385282996729219e-05, + "loss": 2.5272, + "step": 45490 + }, + { + "epoch": 0.79, + "learning_rate": 2.3815744601673252e-05, + "loss": 2.5292, + "step": 45500 + }, + { + "epoch": 0.79, + "learning_rate": 2.3778684190438204e-05, + "loss": 2.5316, + "step": 45510 + }, + { + "epoch": 0.79, + "learning_rate": 2.374164874572622e-05, + "loss": 2.6085, + "step": 45520 + }, + { + "epoch": 0.79, + "learning_rate": 2.3704638279668457e-05, + "loss": 2.5362, + "step": 45530 + }, + { + "epoch": 0.79, + "learning_rate": 2.3667652804387795e-05, + "loss": 2.5266, + "step": 45540 + }, + { + "epoch": 0.79, + "learning_rate": 2.3630692331998892e-05, + "loss": 2.6989, + "step": 45550 + }, + { + "epoch": 0.79, + "learning_rate": 2.359375687460832e-05, + "loss": 2.5881, + "step": 45560 + }, + { + "epoch": 0.79, + "learning_rate": 2.355684644431434e-05, + "loss": 2.4495, + "step": 45570 + }, + { + "epoch": 0.79, + "learning_rate": 2.3519961053207108e-05, + "loss": 2.5047, + "step": 45580 + }, + { + "epoch": 0.79, + "learning_rate": 2.3483100713368555e-05, + "loss": 2.5724, + "step": 45590 + }, + { + "epoch": 0.79, + "learning_rate": 2.344626543687236e-05, + "loss": 2.506, + "step": 45600 + }, + { + "epoch": 0.79, + "learning_rate": 2.3409455235784062e-05, + "loss": 2.5196, + "step": 45610 + }, + { + "epoch": 0.79, + "learning_rate": 2.3372670122160957e-05, + "loss": 2.546, + "step": 45620 + }, + { + "epoch": 0.79, + "learning_rate": 2.3335910108052095e-05, + "loss": 2.485, + "step": 45630 + }, + { + "epoch": 0.79, + "learning_rate": 2.3299175205498335e-05, + "loss": 2.5376, + "step": 45640 + }, + { + "epoch": 0.79, + "learning_rate": 2.3262465426532342e-05, + "loss": 2.4808, + "step": 45650 + }, + { + "epoch": 0.79, + "learning_rate": 2.3225780783178463e-05, + "loss": 2.5199, + "step": 45660 + }, + { + "epoch": 0.79, + "learning_rate": 2.3189121287452888e-05, + "loss": 2.507, + "step": 45670 + }, + { + "epoch": 0.79, + "learning_rate": 2.315248695136357e-05, + "loss": 2.5011, + "step": 45680 + }, + { + "epoch": 0.79, + "learning_rate": 2.3115877786910146e-05, + "loss": 2.4669, + "step": 45690 + }, + { + "epoch": 0.79, + "learning_rate": 2.307929380608409e-05, + "loss": 2.5593, + "step": 45700 + }, + { + "epoch": 0.79, + "learning_rate": 2.304273502086859e-05, + "loss": 2.4753, + "step": 45710 + }, + { + "epoch": 0.79, + "learning_rate": 2.3006201443238585e-05, + "loss": 2.4593, + "step": 45720 + }, + { + "epoch": 0.79, + "learning_rate": 2.2969693085160803e-05, + "loss": 2.5658, + "step": 45730 + }, + { + "epoch": 0.79, + "learning_rate": 2.2933209958593594e-05, + "loss": 2.5008, + "step": 45740 + }, + { + "epoch": 0.79, + "learning_rate": 2.289675207548715e-05, + "loss": 2.5614, + "step": 45750 + }, + { + "epoch": 0.79, + "learning_rate": 2.2860319447783385e-05, + "loss": 2.4891, + "step": 45760 + }, + { + "epoch": 0.79, + "learning_rate": 2.2823912087415867e-05, + "loss": 2.5366, + "step": 45770 + }, + { + "epoch": 0.79, + "learning_rate": 2.2787530006309986e-05, + "loss": 2.4776, + "step": 45780 + }, + { + "epoch": 0.79, + "learning_rate": 2.2751173216382772e-05, + "loss": 2.4869, + "step": 45790 + }, + { + "epoch": 0.79, + "learning_rate": 2.271484172954299e-05, + "loss": 2.5097, + "step": 45800 + }, + { + "epoch": 0.79, + "learning_rate": 2.2678535557691184e-05, + "loss": 2.5385, + "step": 45810 + }, + { + "epoch": 0.79, + "learning_rate": 2.264225471271949e-05, + "loss": 2.5438, + "step": 45820 + }, + { + "epoch": 0.79, + "learning_rate": 2.260599920651183e-05, + "loss": 2.5339, + "step": 45830 + }, + { + "epoch": 0.79, + "learning_rate": 2.2569769050943845e-05, + "loss": 2.5376, + "step": 45840 + }, + { + "epoch": 0.79, + "learning_rate": 2.2533564257882754e-05, + "loss": 2.503, + "step": 45850 + }, + { + "epoch": 0.79, + "learning_rate": 2.2497384839187608e-05, + "loss": 2.4333, + "step": 45860 + }, + { + "epoch": 0.79, + "learning_rate": 2.2461230806709076e-05, + "loss": 2.4657, + "step": 45870 + }, + { + "epoch": 0.79, + "learning_rate": 2.2425102172289502e-05, + "loss": 2.5299, + "step": 45880 + }, + { + "epoch": 0.79, + "learning_rate": 2.238899894776293e-05, + "loss": 2.4558, + "step": 45890 + }, + { + "epoch": 0.79, + "learning_rate": 2.2352921144955084e-05, + "loss": 2.574, + "step": 45900 + }, + { + "epoch": 0.79, + "learning_rate": 2.2316868775683377e-05, + "loss": 2.4309, + "step": 45910 + }, + { + "epoch": 0.79, + "learning_rate": 2.2280841851756874e-05, + "loss": 2.4905, + "step": 45920 + }, + { + "epoch": 0.79, + "learning_rate": 2.224484038497626e-05, + "loss": 2.4555, + "step": 45930 + }, + { + "epoch": 0.8, + "learning_rate": 2.2208864387133964e-05, + "loss": 2.4848, + "step": 45940 + }, + { + "epoch": 0.8, + "learning_rate": 2.2172913870014043e-05, + "loss": 2.4735, + "step": 45950 + }, + { + "epoch": 0.8, + "learning_rate": 2.213698884539215e-05, + "loss": 2.5167, + "step": 45960 + }, + { + "epoch": 0.8, + "learning_rate": 2.2101089325035664e-05, + "loss": 2.4934, + "step": 45970 + }, + { + "epoch": 0.8, + "learning_rate": 2.2065215320703602e-05, + "loss": 2.5275, + "step": 45980 + }, + { + "epoch": 0.8, + "learning_rate": 2.202936684414656e-05, + "loss": 2.5345, + "step": 45990 + }, + { + "epoch": 0.8, + "learning_rate": 2.1993543907106828e-05, + "loss": 2.6249, + "step": 46000 + }, + { + "epoch": 0.8, + "learning_rate": 2.1957746521318358e-05, + "loss": 2.5658, + "step": 46010 + }, + { + "epoch": 0.8, + "learning_rate": 2.192197469850663e-05, + "loss": 2.5489, + "step": 46020 + }, + { + "epoch": 0.8, + "learning_rate": 2.188622845038888e-05, + "loss": 2.5934, + "step": 46030 + }, + { + "epoch": 0.8, + "learning_rate": 2.1850507788673823e-05, + "loss": 2.4515, + "step": 46040 + }, + { + "epoch": 0.8, + "learning_rate": 2.1814812725061917e-05, + "loss": 2.5006, + "step": 46050 + }, + { + "epoch": 0.8, + "learning_rate": 2.1779143271245216e-05, + "loss": 2.538, + "step": 46060 + }, + { + "epoch": 0.8, + "learning_rate": 2.1743499438907255e-05, + "loss": 2.4532, + "step": 46070 + }, + { + "epoch": 0.8, + "learning_rate": 2.1707881239723383e-05, + "loss": 2.4935, + "step": 46080 + }, + { + "epoch": 0.8, + "learning_rate": 2.1672288685360432e-05, + "loss": 2.5318, + "step": 46090 + }, + { + "epoch": 0.8, + "learning_rate": 2.163672178747682e-05, + "loss": 2.5548, + "step": 46100 + }, + { + "epoch": 0.8, + "learning_rate": 2.160118055772262e-05, + "loss": 2.495, + "step": 46110 + }, + { + "epoch": 0.8, + "learning_rate": 2.156566500773943e-05, + "loss": 2.5218, + "step": 46120 + }, + { + "epoch": 0.8, + "learning_rate": 2.1530175149160504e-05, + "loss": 2.5489, + "step": 46130 + }, + { + "epoch": 0.8, + "learning_rate": 2.1494710993610677e-05, + "loss": 2.4966, + "step": 46140 + }, + { + "epoch": 0.8, + "learning_rate": 2.1459272552706277e-05, + "loss": 2.4817, + "step": 46150 + }, + { + "epoch": 0.8, + "learning_rate": 2.1423859838055317e-05, + "loss": 2.5772, + "step": 46160 + }, + { + "epoch": 0.8, + "learning_rate": 2.1388472861257358e-05, + "loss": 2.5615, + "step": 46170 + }, + { + "epoch": 0.8, + "learning_rate": 2.1353111633903456e-05, + "loss": 2.633, + "step": 46180 + }, + { + "epoch": 0.8, + "learning_rate": 2.131777616757632e-05, + "loss": 2.5249, + "step": 46190 + }, + { + "epoch": 0.8, + "learning_rate": 2.1282466473850206e-05, + "loss": 2.5948, + "step": 46200 + }, + { + "epoch": 0.8, + "learning_rate": 2.124718256429087e-05, + "loss": 2.5282, + "step": 46210 + }, + { + "epoch": 0.8, + "learning_rate": 2.121192445045569e-05, + "loss": 2.5079, + "step": 46220 + }, + { + "epoch": 0.8, + "learning_rate": 2.1176692143893572e-05, + "loss": 2.5068, + "step": 46230 + }, + { + "epoch": 0.8, + "learning_rate": 2.1141485656144945e-05, + "loss": 2.4683, + "step": 46240 + }, + { + "epoch": 0.8, + "learning_rate": 2.110630499874181e-05, + "loss": 2.5872, + "step": 46250 + }, + { + "epoch": 0.8, + "learning_rate": 2.107115018320769e-05, + "loss": 2.53, + "step": 46260 + }, + { + "epoch": 0.8, + "learning_rate": 2.1036021221057666e-05, + "loss": 2.5642, + "step": 46270 + }, + { + "epoch": 0.8, + "learning_rate": 2.1000918123798365e-05, + "loss": 2.4543, + "step": 46280 + }, + { + "epoch": 0.8, + "learning_rate": 2.0965840902927846e-05, + "loss": 2.4975, + "step": 46290 + }, + { + "epoch": 0.8, + "learning_rate": 2.0930789569935795e-05, + "loss": 2.4953, + "step": 46300 + }, + { + "epoch": 0.8, + "learning_rate": 2.08957641363034e-05, + "loss": 2.5204, + "step": 46310 + }, + { + "epoch": 0.8, + "learning_rate": 2.0860764613503304e-05, + "loss": 2.5749, + "step": 46320 + }, + { + "epoch": 0.8, + "learning_rate": 2.0825791012999752e-05, + "loss": 2.4214, + "step": 46330 + }, + { + "epoch": 0.8, + "learning_rate": 2.0790843346248412e-05, + "loss": 2.555, + "step": 46340 + }, + { + "epoch": 0.8, + "learning_rate": 2.0755921624696515e-05, + "loss": 2.5707, + "step": 46350 + }, + { + "epoch": 0.8, + "learning_rate": 2.0721025859782804e-05, + "loss": 2.4581, + "step": 46360 + }, + { + "epoch": 0.8, + "learning_rate": 2.0686156062937446e-05, + "loss": 2.4691, + "step": 46370 + }, + { + "epoch": 0.8, + "learning_rate": 2.0651312245582166e-05, + "loss": 2.5005, + "step": 46380 + }, + { + "epoch": 0.8, + "learning_rate": 2.0616494419130193e-05, + "loss": 2.5222, + "step": 46390 + }, + { + "epoch": 0.8, + "learning_rate": 2.0581702594986163e-05, + "loss": 2.5501, + "step": 46400 + }, + { + "epoch": 0.8, + "learning_rate": 2.0546936784546266e-05, + "loss": 2.3928, + "step": 46410 + }, + { + "epoch": 0.8, + "learning_rate": 2.0512196999198173e-05, + "loss": 2.5047, + "step": 46420 + }, + { + "epoch": 0.8, + "learning_rate": 2.0477483250320952e-05, + "loss": 2.5531, + "step": 46430 + }, + { + "epoch": 0.8, + "learning_rate": 2.044279554928522e-05, + "loss": 2.5472, + "step": 46440 + }, + { + "epoch": 0.8, + "learning_rate": 2.0408133907453053e-05, + "loss": 2.5784, + "step": 46450 + }, + { + "epoch": 0.8, + "learning_rate": 2.0373498336177977e-05, + "loss": 2.5483, + "step": 46460 + }, + { + "epoch": 0.8, + "learning_rate": 2.0338888846804983e-05, + "loss": 2.5909, + "step": 46470 + }, + { + "epoch": 0.8, + "learning_rate": 2.030430545067048e-05, + "loss": 2.5005, + "step": 46480 + }, + { + "epoch": 0.8, + "learning_rate": 2.0269748159102388e-05, + "loss": 2.4953, + "step": 46490 + }, + { + "epoch": 0.8, + "learning_rate": 2.0235216983420067e-05, + "loss": 2.4482, + "step": 46500 + }, + { + "epoch": 0.8, + "learning_rate": 2.020071193493427e-05, + "loss": 2.5316, + "step": 46510 + }, + { + "epoch": 0.81, + "learning_rate": 2.0166233024947255e-05, + "loss": 2.5154, + "step": 46520 + }, + { + "epoch": 0.81, + "learning_rate": 2.013178026475271e-05, + "loss": 2.5296, + "step": 46530 + }, + { + "epoch": 0.81, + "learning_rate": 2.0097353665635698e-05, + "loss": 2.4914, + "step": 46540 + }, + { + "epoch": 0.81, + "learning_rate": 2.0062953238872784e-05, + "loss": 2.5747, + "step": 46550 + }, + { + "epoch": 0.81, + "learning_rate": 2.002857899573195e-05, + "loss": 2.4536, + "step": 46560 + }, + { + "epoch": 0.81, + "learning_rate": 1.9994230947472526e-05, + "loss": 2.5603, + "step": 46570 + }, + { + "epoch": 0.81, + "learning_rate": 1.99599091053454e-05, + "loss": 2.5688, + "step": 46580 + }, + { + "epoch": 0.81, + "learning_rate": 1.9925613480592708e-05, + "loss": 2.5443, + "step": 46590 + }, + { + "epoch": 0.81, + "learning_rate": 1.989134408444813e-05, + "loss": 2.505, + "step": 46600 + }, + { + "epoch": 0.81, + "learning_rate": 1.9857100928136728e-05, + "loss": 2.511, + "step": 46610 + }, + { + "epoch": 0.81, + "learning_rate": 1.982288402287489e-05, + "loss": 2.543, + "step": 46620 + }, + { + "epoch": 0.81, + "learning_rate": 1.9788693379870536e-05, + "loss": 2.5477, + "step": 46630 + }, + { + "epoch": 0.81, + "learning_rate": 1.9754529010322907e-05, + "loss": 2.5597, + "step": 46640 + }, + { + "epoch": 0.81, + "learning_rate": 1.9720390925422605e-05, + "loss": 2.5216, + "step": 46650 + }, + { + "epoch": 0.81, + "learning_rate": 1.9686279136351705e-05, + "loss": 2.5307, + "step": 46660 + }, + { + "epoch": 0.81, + "learning_rate": 1.965219365428359e-05, + "loss": 2.5298, + "step": 46670 + }, + { + "epoch": 0.81, + "learning_rate": 1.9618134490383068e-05, + "loss": 2.5805, + "step": 46680 + }, + { + "epoch": 0.81, + "learning_rate": 1.9584101655806363e-05, + "loss": 2.5044, + "step": 46690 + }, + { + "epoch": 0.81, + "learning_rate": 1.955009516170099e-05, + "loss": 2.4197, + "step": 46700 + }, + { + "epoch": 0.81, + "learning_rate": 1.951611501920588e-05, + "loss": 2.549, + "step": 46710 + }, + { + "epoch": 0.81, + "learning_rate": 1.9482161239451367e-05, + "loss": 2.4986, + "step": 46720 + }, + { + "epoch": 0.81, + "learning_rate": 1.944823383355907e-05, + "loss": 2.6847, + "step": 46730 + }, + { + "epoch": 0.81, + "learning_rate": 1.9414332812642043e-05, + "loss": 2.5001, + "step": 46740 + }, + { + "epoch": 0.81, + "learning_rate": 1.9380458187804674e-05, + "loss": 2.4404, + "step": 46750 + }, + { + "epoch": 0.81, + "learning_rate": 1.934660997014267e-05, + "loss": 2.4275, + "step": 46760 + }, + { + "epoch": 0.81, + "learning_rate": 1.9312788170743136e-05, + "loss": 2.5151, + "step": 46770 + }, + { + "epoch": 0.81, + "learning_rate": 1.9278992800684515e-05, + "loss": 2.5229, + "step": 46780 + }, + { + "epoch": 0.81, + "learning_rate": 1.9245223871036556e-05, + "loss": 2.473, + "step": 46790 + }, + { + "epoch": 0.81, + "learning_rate": 1.9211481392860375e-05, + "loss": 2.5356, + "step": 46800 + }, + { + "epoch": 0.81, + "learning_rate": 1.9177765377208446e-05, + "loss": 2.546, + "step": 46810 + }, + { + "epoch": 0.81, + "learning_rate": 1.9144075835124542e-05, + "loss": 2.4378, + "step": 46820 + }, + { + "epoch": 0.81, + "learning_rate": 1.9110412777643793e-05, + "loss": 2.422, + "step": 46830 + }, + { + "epoch": 0.81, + "learning_rate": 1.90767762157926e-05, + "loss": 2.4726, + "step": 46840 + }, + { + "epoch": 0.81, + "learning_rate": 1.9043166160588734e-05, + "loss": 2.4896, + "step": 46850 + }, + { + "epoch": 0.81, + "learning_rate": 1.900958262304131e-05, + "loss": 2.5855, + "step": 46860 + }, + { + "epoch": 0.81, + "learning_rate": 1.8976025614150648e-05, + "loss": 2.5565, + "step": 46870 + }, + { + "epoch": 0.81, + "learning_rate": 1.8942495144908513e-05, + "loss": 2.4976, + "step": 46880 + }, + { + "epoch": 0.81, + "learning_rate": 1.8908991226297845e-05, + "loss": 2.4944, + "step": 46890 + }, + { + "epoch": 0.81, + "learning_rate": 1.8875513869293005e-05, + "loss": 2.5116, + "step": 46900 + }, + { + "epoch": 0.81, + "learning_rate": 1.8842063084859605e-05, + "loss": 2.4379, + "step": 46910 + }, + { + "epoch": 0.81, + "learning_rate": 1.880863888395451e-05, + "loss": 2.5218, + "step": 46920 + }, + { + "epoch": 0.81, + "learning_rate": 1.8775241277525946e-05, + "loss": 2.5143, + "step": 46930 + }, + { + "epoch": 0.81, + "learning_rate": 1.8741870276513417e-05, + "loss": 2.4415, + "step": 46940 + }, + { + "epoch": 0.81, + "learning_rate": 1.8708525891847652e-05, + "loss": 2.4104, + "step": 46950 + }, + { + "epoch": 0.81, + "learning_rate": 1.8675208134450728e-05, + "loss": 2.573, + "step": 46960 + }, + { + "epoch": 0.81, + "learning_rate": 1.864191701523601e-05, + "loss": 2.5403, + "step": 46970 + }, + { + "epoch": 0.81, + "learning_rate": 1.8608652545108042e-05, + "loss": 2.5791, + "step": 46980 + }, + { + "epoch": 0.81, + "learning_rate": 1.8575414734962736e-05, + "loss": 2.5238, + "step": 46990 + }, + { + "epoch": 0.81, + "learning_rate": 1.8542203595687235e-05, + "loss": 2.5465, + "step": 47000 + }, + { + "epoch": 0.81, + "learning_rate": 1.8509019138159944e-05, + "loss": 2.5392, + "step": 47010 + }, + { + "epoch": 0.81, + "learning_rate": 1.8475861373250568e-05, + "loss": 2.4853, + "step": 47020 + }, + { + "epoch": 0.81, + "learning_rate": 1.8442730311819968e-05, + "loss": 2.5202, + "step": 47030 + }, + { + "epoch": 0.81, + "learning_rate": 1.8409625964720368e-05, + "loss": 2.5706, + "step": 47040 + }, + { + "epoch": 0.81, + "learning_rate": 1.8376548342795207e-05, + "loss": 2.49, + "step": 47050 + }, + { + "epoch": 0.81, + "learning_rate": 1.834349745687911e-05, + "loss": 2.4857, + "step": 47060 + }, + { + "epoch": 0.81, + "learning_rate": 1.8310473317798037e-05, + "loss": 2.4578, + "step": 47070 + }, + { + "epoch": 0.81, + "learning_rate": 1.827747593636916e-05, + "loss": 2.509, + "step": 47080 + }, + { + "epoch": 0.81, + "learning_rate": 1.8244505323400806e-05, + "loss": 2.5089, + "step": 47090 + }, + { + "epoch": 0.82, + "learning_rate": 1.821156148969265e-05, + "loss": 2.5338, + "step": 47100 + }, + { + "epoch": 0.82, + "learning_rate": 1.8178644446035552e-05, + "loss": 2.4574, + "step": 47110 + }, + { + "epoch": 0.82, + "learning_rate": 1.814575420321154e-05, + "loss": 2.5145, + "step": 47120 + }, + { + "epoch": 0.82, + "learning_rate": 1.8112890771993973e-05, + "loss": 2.5978, + "step": 47130 + }, + { + "epoch": 0.82, + "learning_rate": 1.8080054163147318e-05, + "loss": 2.4177, + "step": 47140 + }, + { + "epoch": 0.82, + "learning_rate": 1.804724438742732e-05, + "loss": 2.5606, + "step": 47150 + }, + { + "epoch": 0.82, + "learning_rate": 1.8014461455580955e-05, + "loss": 2.5167, + "step": 47160 + }, + { + "epoch": 0.82, + "learning_rate": 1.798170537834628e-05, + "loss": 2.4608, + "step": 47170 + }, + { + "epoch": 0.82, + "learning_rate": 1.7948976166452746e-05, + "loss": 2.4138, + "step": 47180 + }, + { + "epoch": 0.82, + "learning_rate": 1.791627383062089e-05, + "loss": 2.4895, + "step": 47190 + }, + { + "epoch": 0.82, + "learning_rate": 1.7883598381562417e-05, + "loss": 2.5031, + "step": 47200 + }, + { + "epoch": 0.82, + "learning_rate": 1.7850949829980324e-05, + "loss": 2.5539, + "step": 47210 + }, + { + "epoch": 0.82, + "learning_rate": 1.7818328186568678e-05, + "loss": 2.4521, + "step": 47220 + }, + { + "epoch": 0.82, + "learning_rate": 1.7785733462012832e-05, + "loss": 2.4766, + "step": 47230 + }, + { + "epoch": 0.82, + "learning_rate": 1.7753165666989314e-05, + "loss": 2.5322, + "step": 47240 + }, + { + "epoch": 0.82, + "learning_rate": 1.7720624812165754e-05, + "loss": 2.5655, + "step": 47250 + }, + { + "epoch": 0.82, + "learning_rate": 1.7688110908201027e-05, + "loss": 2.4969, + "step": 47260 + }, + { + "epoch": 0.82, + "learning_rate": 1.7655623965745195e-05, + "loss": 2.4679, + "step": 47270 + }, + { + "epoch": 0.82, + "learning_rate": 1.7623163995439384e-05, + "loss": 2.4941, + "step": 47280 + }, + { + "epoch": 0.82, + "learning_rate": 1.7590731007916006e-05, + "loss": 2.5208, + "step": 47290 + }, + { + "epoch": 0.82, + "learning_rate": 1.7558325013798603e-05, + "loss": 2.557, + "step": 47300 + }, + { + "epoch": 0.82, + "learning_rate": 1.7525946023701788e-05, + "loss": 2.4897, + "step": 47310 + }, + { + "epoch": 0.82, + "learning_rate": 1.7493594048231443e-05, + "loss": 2.5451, + "step": 47320 + }, + { + "epoch": 0.82, + "learning_rate": 1.746126909798458e-05, + "loss": 2.5248, + "step": 47330 + }, + { + "epoch": 0.82, + "learning_rate": 1.742897118354927e-05, + "loss": 2.5054, + "step": 47340 + }, + { + "epoch": 0.82, + "learning_rate": 1.7396700315504834e-05, + "loss": 2.5184, + "step": 47350 + }, + { + "epoch": 0.82, + "learning_rate": 1.736445650442168e-05, + "loss": 2.5579, + "step": 47360 + }, + { + "epoch": 0.82, + "learning_rate": 1.7332239760861368e-05, + "loss": 2.4726, + "step": 47370 + }, + { + "epoch": 0.82, + "learning_rate": 1.730005009537663e-05, + "loss": 2.5119, + "step": 47380 + }, + { + "epoch": 0.82, + "learning_rate": 1.7267887518511218e-05, + "loss": 2.6059, + "step": 47390 + }, + { + "epoch": 0.82, + "learning_rate": 1.7235752040800114e-05, + "loss": 2.4903, + "step": 47400 + }, + { + "epoch": 0.82, + "learning_rate": 1.720364367276942e-05, + "loss": 2.4533, + "step": 47410 + }, + { + "epoch": 0.82, + "learning_rate": 1.7171562424936273e-05, + "loss": 2.5459, + "step": 47420 + }, + { + "epoch": 0.82, + "learning_rate": 1.7139508307809038e-05, + "loss": 2.5567, + "step": 47430 + }, + { + "epoch": 0.82, + "learning_rate": 1.71074813318871e-05, + "loss": 2.5515, + "step": 47440 + }, + { + "epoch": 0.82, + "learning_rate": 1.7075481507660985e-05, + "loss": 2.4951, + "step": 47450 + }, + { + "epoch": 0.82, + "learning_rate": 1.7043508845612387e-05, + "loss": 2.4426, + "step": 47460 + }, + { + "epoch": 0.82, + "learning_rate": 1.7011563356213987e-05, + "loss": 2.5018, + "step": 47470 + }, + { + "epoch": 0.82, + "learning_rate": 1.697964504992965e-05, + "loss": 2.522, + "step": 47480 + }, + { + "epoch": 0.82, + "learning_rate": 1.6947753937214352e-05, + "loss": 2.5495, + "step": 47490 + }, + { + "epoch": 0.82, + "learning_rate": 1.6915890028514047e-05, + "loss": 2.5229, + "step": 47500 + }, + { + "epoch": 0.82, + "learning_rate": 1.6884053334265904e-05, + "loss": 2.5123, + "step": 47510 + }, + { + "epoch": 0.82, + "learning_rate": 1.6852243864898155e-05, + "loss": 2.5311, + "step": 47520 + }, + { + "epoch": 0.82, + "learning_rate": 1.682046163083002e-05, + "loss": 2.5249, + "step": 47530 + }, + { + "epoch": 0.82, + "learning_rate": 1.6788706642471896e-05, + "loss": 2.463, + "step": 47540 + }, + { + "epoch": 0.82, + "learning_rate": 1.675697891022524e-05, + "loss": 2.4819, + "step": 47550 + }, + { + "epoch": 0.82, + "learning_rate": 1.6725278444482552e-05, + "loss": 2.4526, + "step": 47560 + }, + { + "epoch": 0.82, + "learning_rate": 1.6693605255627455e-05, + "loss": 2.5096, + "step": 47570 + }, + { + "epoch": 0.82, + "learning_rate": 1.666195935403453e-05, + "loss": 2.4901, + "step": 47580 + }, + { + "epoch": 0.82, + "learning_rate": 1.6630340750069516e-05, + "loss": 2.4723, + "step": 47590 + }, + { + "epoch": 0.82, + "learning_rate": 1.6598749454089225e-05, + "loss": 2.578, + "step": 47600 + }, + { + "epoch": 0.82, + "learning_rate": 1.6567185476441415e-05, + "loss": 2.542, + "step": 47610 + }, + { + "epoch": 0.82, + "learning_rate": 1.6535648827465e-05, + "loss": 2.4866, + "step": 47620 + }, + { + "epoch": 0.82, + "learning_rate": 1.6504139517489914e-05, + "loss": 2.4678, + "step": 47630 + }, + { + "epoch": 0.82, + "learning_rate": 1.647265755683709e-05, + "loss": 2.4989, + "step": 47640 + }, + { + "epoch": 0.82, + "learning_rate": 1.6441202955818557e-05, + "loss": 2.5396, + "step": 47650 + }, + { + "epoch": 0.82, + "learning_rate": 1.6409775724737397e-05, + "loss": 2.4865, + "step": 47660 + }, + { + "epoch": 0.82, + "learning_rate": 1.6378375873887643e-05, + "loss": 2.5655, + "step": 47670 + }, + { + "epoch": 0.83, + "learning_rate": 1.634700341355446e-05, + "loss": 2.5263, + "step": 47680 + }, + { + "epoch": 0.83, + "learning_rate": 1.631565835401394e-05, + "loss": 2.4884, + "step": 47690 + }, + { + "epoch": 0.83, + "learning_rate": 1.6284340705533273e-05, + "loss": 2.5125, + "step": 47700 + }, + { + "epoch": 0.83, + "learning_rate": 1.6253050478370692e-05, + "loss": 2.4956, + "step": 47710 + }, + { + "epoch": 0.83, + "learning_rate": 1.622491272762242e-05, + "loss": 2.4779, + "step": 47720 + }, + { + "epoch": 0.83, + "learning_rate": 1.6193674629193244e-05, + "loss": 2.5453, + "step": 47730 + }, + { + "epoch": 0.83, + "learning_rate": 1.6162463981780096e-05, + "loss": 2.5055, + "step": 47740 + }, + { + "epoch": 0.83, + "learning_rate": 1.613128079560606e-05, + "loss": 2.5322, + "step": 47750 + }, + { + "epoch": 0.83, + "learning_rate": 1.610012508088532e-05, + "loss": 2.5337, + "step": 47760 + }, + { + "epoch": 0.83, + "learning_rate": 1.6068996847823036e-05, + "loss": 2.4255, + "step": 47770 + }, + { + "epoch": 0.83, + "learning_rate": 1.6037896106615312e-05, + "loss": 2.4172, + "step": 47780 + }, + { + "epoch": 0.83, + "learning_rate": 1.6006822867449322e-05, + "loss": 2.4624, + "step": 47790 + }, + { + "epoch": 0.83, + "learning_rate": 1.597577714050321e-05, + "loss": 2.473, + "step": 47800 + }, + { + "epoch": 0.83, + "learning_rate": 1.594475893594606e-05, + "loss": 2.5448, + "step": 47810 + }, + { + "epoch": 0.83, + "learning_rate": 1.591376826393801e-05, + "loss": 2.6252, + "step": 47820 + }, + { + "epoch": 0.83, + "learning_rate": 1.5882805134630117e-05, + "loss": 2.4294, + "step": 47830 + }, + { + "epoch": 0.83, + "learning_rate": 1.5851869558164478e-05, + "loss": 2.579, + "step": 47840 + }, + { + "epoch": 0.83, + "learning_rate": 1.5820961544674118e-05, + "loss": 2.5872, + "step": 47850 + }, + { + "epoch": 0.83, + "learning_rate": 1.5790081104283062e-05, + "loss": 2.5012, + "step": 47860 + }, + { + "epoch": 0.83, + "learning_rate": 1.5759228247106307e-05, + "loss": 2.5293, + "step": 47870 + }, + { + "epoch": 0.83, + "learning_rate": 1.5728402983249756e-05, + "loss": 2.5059, + "step": 47880 + }, + { + "epoch": 0.83, + "learning_rate": 1.569760532281034e-05, + "loss": 2.5112, + "step": 47890 + }, + { + "epoch": 0.83, + "learning_rate": 1.5666835275875957e-05, + "loss": 2.5532, + "step": 47900 + }, + { + "epoch": 0.83, + "learning_rate": 1.5636092852525376e-05, + "loss": 2.459, + "step": 47910 + }, + { + "epoch": 0.83, + "learning_rate": 1.5605378062828392e-05, + "loss": 2.4963, + "step": 47920 + }, + { + "epoch": 0.83, + "learning_rate": 1.5574690916845756e-05, + "loss": 2.5077, + "step": 47930 + }, + { + "epoch": 0.83, + "learning_rate": 1.554403142462908e-05, + "loss": 2.5688, + "step": 47940 + }, + { + "epoch": 0.83, + "learning_rate": 1.5513399596221013e-05, + "loss": 2.5913, + "step": 47950 + }, + { + "epoch": 0.83, + "learning_rate": 1.5482795441655118e-05, + "loss": 2.4515, + "step": 47960 + }, + { + "epoch": 0.83, + "learning_rate": 1.5452218970955833e-05, + "loss": 2.5822, + "step": 47970 + }, + { + "epoch": 0.83, + "learning_rate": 1.5421670194138582e-05, + "loss": 2.5282, + "step": 47980 + }, + { + "epoch": 0.83, + "learning_rate": 1.5391149121209757e-05, + "loss": 2.6064, + "step": 47990 + }, + { + "epoch": 0.83, + "learning_rate": 1.5360655762166575e-05, + "loss": 2.5353, + "step": 48000 + }, + { + "epoch": 0.83, + "learning_rate": 1.5330190126997246e-05, + "loss": 2.4505, + "step": 48010 + }, + { + "epoch": 0.83, + "learning_rate": 1.5299752225680898e-05, + "loss": 2.5093, + "step": 48020 + }, + { + "epoch": 0.83, + "learning_rate": 1.52693420681875e-05, + "loss": 2.5375, + "step": 48030 + }, + { + "epoch": 0.83, + "learning_rate": 1.5238959664478081e-05, + "loss": 2.4839, + "step": 48040 + }, + { + "epoch": 0.83, + "learning_rate": 1.5208605024504418e-05, + "loss": 2.5704, + "step": 48050 + }, + { + "epoch": 0.83, + "learning_rate": 1.5178278158209291e-05, + "loss": 2.4686, + "step": 48060 + }, + { + "epoch": 0.83, + "learning_rate": 1.5147979075526374e-05, + "loss": 2.4342, + "step": 48070 + }, + { + "epoch": 0.83, + "learning_rate": 1.5117707786380186e-05, + "loss": 2.5121, + "step": 48080 + }, + { + "epoch": 0.83, + "learning_rate": 1.5087464300686206e-05, + "loss": 2.5265, + "step": 48090 + }, + { + "epoch": 0.83, + "learning_rate": 1.5057248628350784e-05, + "loss": 2.4519, + "step": 48100 + }, + { + "epoch": 0.83, + "learning_rate": 1.5027060779271119e-05, + "loss": 2.5077, + "step": 48110 + }, + { + "epoch": 0.83, + "learning_rate": 1.4996900763335386e-05, + "loss": 2.5558, + "step": 48120 + }, + { + "epoch": 0.83, + "learning_rate": 1.4966768590422532e-05, + "loss": 2.4916, + "step": 48130 + }, + { + "epoch": 0.83, + "learning_rate": 1.4936664270402479e-05, + "loss": 2.5275, + "step": 48140 + }, + { + "epoch": 0.83, + "learning_rate": 1.4906587813135986e-05, + "loss": 2.5077, + "step": 48150 + }, + { + "epoch": 0.83, + "learning_rate": 1.4876539228474673e-05, + "loss": 2.5265, + "step": 48160 + }, + { + "epoch": 0.83, + "learning_rate": 1.4846518526261043e-05, + "loss": 2.599, + "step": 48170 + }, + { + "epoch": 0.83, + "learning_rate": 1.481652571632851e-05, + "loss": 2.573, + "step": 48180 + }, + { + "epoch": 0.83, + "learning_rate": 1.4786560808501248e-05, + "loss": 2.5243, + "step": 48190 + }, + { + "epoch": 0.83, + "learning_rate": 1.4756623812594383e-05, + "loss": 2.5333, + "step": 48200 + }, + { + "epoch": 0.83, + "learning_rate": 1.4726714738413905e-05, + "loss": 2.4746, + "step": 48210 + }, + { + "epoch": 0.83, + "learning_rate": 1.469683359575652e-05, + "loss": 2.4178, + "step": 48220 + }, + { + "epoch": 0.83, + "learning_rate": 1.4666980394410013e-05, + "loss": 2.4978, + "step": 48230 + }, + { + "epoch": 0.83, + "learning_rate": 1.4637155144152803e-05, + "loss": 2.5781, + "step": 48240 + }, + { + "epoch": 0.84, + "learning_rate": 1.4607357854754267e-05, + "loss": 2.4974, + "step": 48250 + }, + { + "epoch": 0.84, + "learning_rate": 1.4577588535974629e-05, + "loss": 2.4957, + "step": 48260 + }, + { + "epoch": 0.84, + "learning_rate": 1.4547847197564856e-05, + "loss": 2.535, + "step": 48270 + }, + { + "epoch": 0.84, + "learning_rate": 1.4518133849266857e-05, + "loss": 2.5558, + "step": 48280 + }, + { + "epoch": 0.84, + "learning_rate": 1.4488448500813334e-05, + "loss": 2.5067, + "step": 48290 + }, + { + "epoch": 0.84, + "learning_rate": 1.4458791161927786e-05, + "loss": 2.5502, + "step": 48300 + }, + { + "epoch": 0.84, + "learning_rate": 1.4429161842324568e-05, + "loss": 2.4576, + "step": 48310 + }, + { + "epoch": 0.84, + "learning_rate": 1.439956055170889e-05, + "loss": 2.4175, + "step": 48320 + }, + { + "epoch": 0.84, + "learning_rate": 1.4369987299776689e-05, + "loss": 2.5984, + "step": 48330 + }, + { + "epoch": 0.84, + "learning_rate": 1.4340442096214802e-05, + "loss": 2.5909, + "step": 48340 + }, + { + "epoch": 0.84, + "learning_rate": 1.4310924950700877e-05, + "loss": 2.334, + "step": 48350 + }, + { + "epoch": 0.84, + "learning_rate": 1.4281435872903293e-05, + "loss": 2.5335, + "step": 48360 + }, + { + "epoch": 0.84, + "learning_rate": 1.4251974872481333e-05, + "loss": 2.5722, + "step": 48370 + }, + { + "epoch": 0.84, + "learning_rate": 1.422254195908499e-05, + "loss": 2.5308, + "step": 48380 + }, + { + "epoch": 0.84, + "learning_rate": 1.4193137142355128e-05, + "loss": 2.5538, + "step": 48390 + }, + { + "epoch": 0.84, + "learning_rate": 1.416376043192339e-05, + "loss": 2.5012, + "step": 48400 + }, + { + "epoch": 0.84, + "learning_rate": 1.4134411837412188e-05, + "loss": 2.4503, + "step": 48410 + }, + { + "epoch": 0.84, + "learning_rate": 1.4105091368434787e-05, + "loss": 2.5163, + "step": 48420 + }, + { + "epoch": 0.84, + "learning_rate": 1.407579903459515e-05, + "loss": 2.5544, + "step": 48430 + }, + { + "epoch": 0.84, + "learning_rate": 1.4046534845488069e-05, + "loss": 2.4609, + "step": 48440 + }, + { + "epoch": 0.84, + "learning_rate": 1.4017298810699164e-05, + "loss": 2.4856, + "step": 48450 + }, + { + "epoch": 0.84, + "learning_rate": 1.3988090939804743e-05, + "loss": 2.5197, + "step": 48460 + }, + { + "epoch": 0.84, + "learning_rate": 1.395891124237193e-05, + "loss": 2.4552, + "step": 48470 + }, + { + "epoch": 0.84, + "learning_rate": 1.392975972795867e-05, + "loss": 2.4796, + "step": 48480 + }, + { + "epoch": 0.84, + "learning_rate": 1.3900636406113576e-05, + "loss": 2.4535, + "step": 48490 + }, + { + "epoch": 0.84, + "learning_rate": 1.38715412863761e-05, + "loss": 2.5788, + "step": 48500 + }, + { + "epoch": 0.84, + "learning_rate": 1.3842474378276472e-05, + "loss": 2.6014, + "step": 48510 + }, + { + "epoch": 0.84, + "learning_rate": 1.3813435691335586e-05, + "loss": 2.4451, + "step": 48520 + }, + { + "epoch": 0.84, + "learning_rate": 1.378442523506518e-05, + "loss": 2.5426, + "step": 48530 + }, + { + "epoch": 0.84, + "learning_rate": 1.3755443018967739e-05, + "loss": 2.5106, + "step": 48540 + }, + { + "epoch": 0.84, + "learning_rate": 1.3726489052536429e-05, + "loss": 2.453, + "step": 48550 + }, + { + "epoch": 0.84, + "learning_rate": 1.3697563345255238e-05, + "loss": 2.5374, + "step": 48560 + }, + { + "epoch": 0.84, + "learning_rate": 1.3668665906598887e-05, + "loss": 2.4687, + "step": 48570 + }, + { + "epoch": 0.84, + "learning_rate": 1.363979674603275e-05, + "loss": 2.5745, + "step": 48580 + }, + { + "epoch": 0.84, + "learning_rate": 1.3610955873013109e-05, + "loss": 2.5078, + "step": 48590 + }, + { + "epoch": 0.84, + "learning_rate": 1.3582143296986793e-05, + "loss": 2.3621, + "step": 48600 + }, + { + "epoch": 0.84, + "learning_rate": 1.3553359027391488e-05, + "loss": 2.5373, + "step": 48610 + }, + { + "epoch": 0.84, + "learning_rate": 1.3524603073655584e-05, + "loss": 2.5549, + "step": 48620 + }, + { + "epoch": 0.84, + "learning_rate": 1.3495875445198148e-05, + "loss": 2.5551, + "step": 48630 + }, + { + "epoch": 0.84, + "learning_rate": 1.3467176151428996e-05, + "loss": 2.5041, + "step": 48640 + }, + { + "epoch": 0.84, + "learning_rate": 1.3438505201748719e-05, + "loss": 2.5193, + "step": 48650 + }, + { + "epoch": 0.84, + "learning_rate": 1.3409862605548517e-05, + "loss": 2.5457, + "step": 48660 + }, + { + "epoch": 0.84, + "learning_rate": 1.3381248372210409e-05, + "loss": 2.4669, + "step": 48670 + }, + { + "epoch": 0.84, + "learning_rate": 1.3352662511107017e-05, + "loss": 2.39, + "step": 48680 + }, + { + "epoch": 0.84, + "learning_rate": 1.3324105031601763e-05, + "loss": 2.51, + "step": 48690 + }, + { + "epoch": 0.84, + "learning_rate": 1.3295575943048755e-05, + "loss": 2.3754, + "step": 48700 + }, + { + "epoch": 0.84, + "learning_rate": 1.3267075254792739e-05, + "loss": 2.4566, + "step": 48710 + }, + { + "epoch": 0.84, + "learning_rate": 1.3238602976169211e-05, + "loss": 2.4808, + "step": 48720 + }, + { + "epoch": 0.84, + "learning_rate": 1.3210159116504395e-05, + "loss": 2.5548, + "step": 48730 + }, + { + "epoch": 0.84, + "learning_rate": 1.31817436851151e-05, + "loss": 2.5511, + "step": 48740 + }, + { + "epoch": 0.84, + "learning_rate": 1.3153356691308916e-05, + "loss": 2.4522, + "step": 48750 + }, + { + "epoch": 0.84, + "learning_rate": 1.3124998144384115e-05, + "loss": 2.4619, + "step": 48760 + }, + { + "epoch": 0.84, + "learning_rate": 1.3096668053629558e-05, + "loss": 2.4771, + "step": 48770 + }, + { + "epoch": 0.84, + "learning_rate": 1.3068366428324941e-05, + "loss": 2.5325, + "step": 48780 + }, + { + "epoch": 0.84, + "learning_rate": 1.3040093277740472e-05, + "loss": 2.5046, + "step": 48790 + }, + { + "epoch": 0.84, + "learning_rate": 1.3011848611137134e-05, + "loss": 2.5942, + "step": 48800 + }, + { + "epoch": 0.84, + "learning_rate": 1.298363243776658e-05, + "loss": 2.5246, + "step": 48810 + }, + { + "epoch": 0.84, + "learning_rate": 1.295544476687106e-05, + "loss": 2.5173, + "step": 48820 + }, + { + "epoch": 0.85, + "learning_rate": 1.2927285607683549e-05, + "loss": 2.5275, + "step": 48830 + }, + { + "epoch": 0.85, + "learning_rate": 1.2899154969427684e-05, + "loss": 2.4934, + "step": 48840 + }, + { + "epoch": 0.85, + "learning_rate": 1.28710528613177e-05, + "loss": 2.4827, + "step": 48850 + }, + { + "epoch": 0.85, + "learning_rate": 1.2842979292558544e-05, + "loss": 2.5016, + "step": 48860 + }, + { + "epoch": 0.85, + "learning_rate": 1.2814934272345836e-05, + "loss": 2.4563, + "step": 48870 + }, + { + "epoch": 0.85, + "learning_rate": 1.278691780986575e-05, + "loss": 2.4724, + "step": 48880 + }, + { + "epoch": 0.85, + "learning_rate": 1.2758929914295192e-05, + "loss": 2.4508, + "step": 48890 + }, + { + "epoch": 0.85, + "learning_rate": 1.2730970594801694e-05, + "loss": 2.4539, + "step": 48900 + }, + { + "epoch": 0.85, + "learning_rate": 1.2703039860543387e-05, + "loss": 2.6043, + "step": 48910 + }, + { + "epoch": 0.85, + "learning_rate": 1.2675137720669095e-05, + "loss": 2.4805, + "step": 48920 + }, + { + "epoch": 0.85, + "learning_rate": 1.264726418431822e-05, + "loss": 2.4925, + "step": 48930 + }, + { + "epoch": 0.85, + "learning_rate": 1.2619419260620835e-05, + "loss": 2.4963, + "step": 48940 + }, + { + "epoch": 0.85, + "learning_rate": 1.2591602958697635e-05, + "loss": 2.4975, + "step": 48950 + }, + { + "epoch": 0.85, + "learning_rate": 1.256381528765993e-05, + "loss": 2.4697, + "step": 48960 + }, + { + "epoch": 0.85, + "learning_rate": 1.2536056256609675e-05, + "loss": 2.4954, + "step": 48970 + }, + { + "epoch": 0.85, + "learning_rate": 1.2508325874639393e-05, + "loss": 2.4859, + "step": 48980 + }, + { + "epoch": 0.85, + "learning_rate": 1.2480624150832266e-05, + "loss": 2.5084, + "step": 48990 + }, + { + "epoch": 0.85, + "learning_rate": 1.2452951094262099e-05, + "loss": 2.416, + "step": 49000 + }, + { + "epoch": 0.85, + "learning_rate": 1.2425306713993246e-05, + "loss": 2.4958, + "step": 49010 + }, + { + "epoch": 0.85, + "learning_rate": 1.2397691019080737e-05, + "loss": 2.4938, + "step": 49020 + }, + { + "epoch": 0.85, + "learning_rate": 1.2370104018570184e-05, + "loss": 2.4997, + "step": 49030 + }, + { + "epoch": 0.85, + "learning_rate": 1.2342545721497756e-05, + "loss": 2.5336, + "step": 49040 + }, + { + "epoch": 0.85, + "learning_rate": 1.2315016136890279e-05, + "loss": 2.5574, + "step": 49050 + }, + { + "epoch": 0.85, + "learning_rate": 1.2287515273765172e-05, + "loss": 2.5439, + "step": 49060 + }, + { + "epoch": 0.85, + "learning_rate": 1.2260043141130384e-05, + "loss": 2.5115, + "step": 49070 + }, + { + "epoch": 0.85, + "learning_rate": 1.2232599747984509e-05, + "loss": 2.4628, + "step": 49080 + }, + { + "epoch": 0.85, + "learning_rate": 1.2205185103316753e-05, + "loss": 2.509, + "step": 49090 + }, + { + "epoch": 0.85, + "learning_rate": 1.21777992161068e-05, + "loss": 2.4924, + "step": 49100 + }, + { + "epoch": 0.85, + "learning_rate": 1.2150442095325032e-05, + "loss": 2.4964, + "step": 49110 + }, + { + "epoch": 0.85, + "learning_rate": 1.2123113749932346e-05, + "loss": 2.5316, + "step": 49120 + }, + { + "epoch": 0.85, + "learning_rate": 1.2095814188880184e-05, + "loss": 2.4942, + "step": 49130 + }, + { + "epoch": 0.85, + "learning_rate": 1.2068543421110667e-05, + "loss": 2.4776, + "step": 49140 + }, + { + "epoch": 0.85, + "learning_rate": 1.2041301455556364e-05, + "loss": 2.4401, + "step": 49150 + }, + { + "epoch": 0.85, + "learning_rate": 1.2014088301140492e-05, + "loss": 2.5714, + "step": 49160 + }, + { + "epoch": 0.85, + "learning_rate": 1.1986903966776808e-05, + "loss": 2.5306, + "step": 49170 + }, + { + "epoch": 0.85, + "learning_rate": 1.1962462714353707e-05, + "loss": 2.5619, + "step": 49180 + }, + { + "epoch": 0.85, + "learning_rate": 1.1935333162612705e-05, + "loss": 2.5539, + "step": 49190 + }, + { + "epoch": 0.85, + "learning_rate": 1.190823245672038e-05, + "loss": 2.5177, + "step": 49200 + }, + { + "epoch": 0.85, + "learning_rate": 1.188116060555362e-05, + "loss": 2.5042, + "step": 49210 + }, + { + "epoch": 0.85, + "learning_rate": 1.1854117617979899e-05, + "loss": 2.4575, + "step": 49220 + }, + { + "epoch": 0.85, + "learning_rate": 1.182710350285725e-05, + "loss": 2.4445, + "step": 49230 + }, + { + "epoch": 0.85, + "learning_rate": 1.18001182690342e-05, + "loss": 2.4278, + "step": 49240 + }, + { + "epoch": 0.85, + "learning_rate": 1.1773161925349852e-05, + "loss": 2.6486, + "step": 49250 + }, + { + "epoch": 0.85, + "learning_rate": 1.1746234480633844e-05, + "loss": 2.5331, + "step": 49260 + }, + { + "epoch": 0.85, + "learning_rate": 1.1719335943706344e-05, + "loss": 2.4793, + "step": 49270 + }, + { + "epoch": 0.85, + "learning_rate": 1.169246632337807e-05, + "loss": 2.5264, + "step": 49280 + }, + { + "epoch": 0.85, + "learning_rate": 1.1665625628450194e-05, + "loss": 2.4717, + "step": 49290 + }, + { + "epoch": 0.85, + "learning_rate": 1.1638813867714503e-05, + "loss": 2.5463, + "step": 49300 + } + ], + "logging_steps": 10, + "max_steps": 57782, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "total_flos": 1.671676728138793e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}