{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.25438041080051527, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0003999999840334324, "loss": 8.1004, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.000399999936133732, "loss": 6.2987, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0003999998563009066, "loss": 6.0792, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.00039999974453496884, "loss": 5.8398, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0003999996008359366, "loss": 5.5406, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.0003999994252038328, "loss": 5.2281, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.0003999992176386855, "loss": 5.0488, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00039999897814052787, "loss": 4.8797, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.00039999870670939813, "loss": 4.726, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.00039999840334533965, "loss": 4.5608, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.0003999980680484007, "loss": 4.4583, "step": 110 }, { "epoch": 0.0, "learning_rate": 0.0003999977008186351, "loss": 4.4164, "step": 120 }, { "epoch": 0.0, "learning_rate": 0.0003999973016561012, "loss": 4.3347, "step": 130 }, { "epoch": 0.0, "learning_rate": 0.00039999687056086294, "loss": 4.2288, "step": 140 }, { "epoch": 0.0, "learning_rate": 0.00039999640753298903, "loss": 4.1609, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.0003999959125725535, "loss": 4.0503, "step": 160 }, { "epoch": 0.0, "learning_rate": 0.00039999538567963525, "loss": 4.0077, "step": 170 }, { "epoch": 0.0, "learning_rate": 0.0003999948268543185, "loss": 3.9593, "step": 180 }, { "epoch": 0.0, "learning_rate": 0.00039999423609669247, "loss": 3.9097, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.0003999936134068514, "loss": 3.8487, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.0003999929587848948, "loss": 3.7907, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.00039999227223092726, "loss": 3.7483, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00039999155374505826, "loss": 3.6961, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.0003999908033274025, "loss": 3.5971, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.0003999900209780799, "loss": 3.594, "step": 250 }, { "epoch": 0.01, "learning_rate": 0.0003999892066972154, "loss": 3.5695, "step": 260 }, { "epoch": 0.01, "learning_rate": 0.0003999883604849389, "loss": 3.4888, "step": 270 }, { "epoch": 0.01, "learning_rate": 0.0003999874823413855, "loss": 3.4582, "step": 280 }, { "epoch": 0.01, "learning_rate": 0.0003999865722666956, "loss": 3.4195, "step": 290 }, { "epoch": 0.01, "learning_rate": 0.0003999856302610142, "loss": 3.3806, "step": 300 }, { "epoch": 0.01, "learning_rate": 0.000399984656324492, "loss": 3.3137, "step": 310 }, { "epoch": 0.01, "learning_rate": 0.0003999836504572844, "loss": 3.2644, "step": 320 }, { "epoch": 0.01, "learning_rate": 0.00039998261265955195, "loss": 3.21, "step": 330 }, { "epoch": 0.01, "learning_rate": 0.00039998154293146036, "loss": 3.1669, "step": 340 }, { "epoch": 0.01, "learning_rate": 0.0003999804412731805, "loss": 3.129, "step": 350 }, { "epoch": 0.01, "learning_rate": 0.00039997930768488827, "loss": 3.0799, "step": 360 }, { "epoch": 0.01, "learning_rate": 0.0003999781421667645, "loss": 3.0365, "step": 370 }, { "epoch": 0.01, "learning_rate": 0.0003999769447189955, "loss": 3.0261, "step": 380 }, { "epoch": 0.01, "learning_rate": 0.0003999757153417723, "loss": 2.9473, "step": 390 }, { "epoch": 0.01, "learning_rate": 0.00039997445403529134, "loss": 2.9481, "step": 400 }, { "epoch": 0.01, "learning_rate": 0.00039997316079975386, "loss": 2.9903, "step": 410 }, { "epoch": 0.01, "learning_rate": 0.0003999718356353664, "loss": 2.9129, "step": 420 }, { "epoch": 0.01, "learning_rate": 0.0003999704785423406, "loss": 2.9139, "step": 430 }, { "epoch": 0.01, "learning_rate": 0.00039996908952089305, "loss": 2.8713, "step": 440 }, { "epoch": 0.01, "learning_rate": 0.0003999676685712456, "loss": 2.8558, "step": 450 }, { "epoch": 0.01, "learning_rate": 0.00039996621569362504, "loss": 2.8262, "step": 460 }, { "epoch": 0.01, "learning_rate": 0.0003999647308882635, "loss": 2.7535, "step": 470 }, { "epoch": 0.01, "learning_rate": 0.0003999632141553979, "loss": 2.7011, "step": 480 }, { "epoch": 0.01, "learning_rate": 0.00039996166549527044, "loss": 2.7204, "step": 490 }, { "epoch": 0.01, "learning_rate": 0.0003999600849081285, "loss": 2.6671, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.00039995847239422417, "loss": 2.6731, "step": 510 }, { "epoch": 0.01, "learning_rate": 0.0003999568279538153, "loss": 2.673, "step": 520 }, { "epoch": 0.01, "learning_rate": 0.00039995515158716417, "loss": 2.6212, "step": 530 }, { "epoch": 0.01, "learning_rate": 0.00039995344329453854, "loss": 2.6267, "step": 540 }, { "epoch": 0.01, "learning_rate": 0.00039995170307621114, "loss": 2.5705, "step": 550 }, { "epoch": 0.01, "learning_rate": 0.0003999499309324598, "loss": 2.539, "step": 560 }, { "epoch": 0.01, "learning_rate": 0.0003999481268635675, "loss": 2.6173, "step": 570 }, { "epoch": 0.01, "learning_rate": 0.0003999462908698223, "loss": 2.5528, "step": 580 }, { "epoch": 0.02, "learning_rate": 0.00039994442295151735, "loss": 2.5721, "step": 590 }, { "epoch": 0.02, "learning_rate": 0.00039994252310895093, "loss": 2.5522, "step": 600 }, { "epoch": 0.02, "learning_rate": 0.0003999405913424262, "loss": 2.5407, "step": 610 }, { "epoch": 0.02, "learning_rate": 0.0003999386276522518, "loss": 2.4878, "step": 620 }, { "epoch": 0.02, "learning_rate": 0.00039993663203874116, "loss": 2.4971, "step": 630 }, { "epoch": 0.02, "learning_rate": 0.00039993460450221294, "loss": 2.5823, "step": 640 }, { "epoch": 0.02, "learning_rate": 0.00039993254504299086, "loss": 2.4845, "step": 650 }, { "epoch": 0.02, "learning_rate": 0.00039993045366140375, "loss": 2.4013, "step": 660 }, { "epoch": 0.02, "learning_rate": 0.0003999283303577856, "loss": 2.4794, "step": 670 }, { "epoch": 0.02, "learning_rate": 0.00039992617513247525, "loss": 2.3949, "step": 680 }, { "epoch": 0.02, "learning_rate": 0.00039992398798581696, "loss": 2.4262, "step": 690 }, { "epoch": 0.02, "learning_rate": 0.00039992176891815996, "loss": 2.3461, "step": 700 }, { "epoch": 0.02, "learning_rate": 0.00039991951792985844, "loss": 2.3657, "step": 710 }, { "epoch": 0.02, "learning_rate": 0.00039991723502127193, "loss": 2.3655, "step": 720 }, { "epoch": 0.02, "learning_rate": 0.00039991492019276483, "loss": 2.407, "step": 730 }, { "epoch": 0.02, "learning_rate": 0.0003999125734447068, "loss": 2.3978, "step": 740 }, { "epoch": 0.02, "learning_rate": 0.00039991019477747254, "loss": 2.3706, "step": 750 }, { "epoch": 0.02, "learning_rate": 0.0003999077841914418, "loss": 2.3245, "step": 760 }, { "epoch": 0.02, "learning_rate": 0.0003999053416869995, "loss": 2.3302, "step": 770 }, { "epoch": 0.02, "learning_rate": 0.00039990286726453557, "loss": 2.3197, "step": 780 }, { "epoch": 0.02, "learning_rate": 0.0003999003609244452, "loss": 2.3013, "step": 790 }, { "epoch": 0.02, "learning_rate": 0.0003998978226671284, "loss": 2.3447, "step": 800 }, { "epoch": 0.02, "learning_rate": 0.0003998952524929906, "loss": 2.3617, "step": 810 }, { "epoch": 0.02, "learning_rate": 0.0003998926504024421, "loss": 2.3264, "step": 820 }, { "epoch": 0.02, "learning_rate": 0.0003998900163958984, "loss": 2.2853, "step": 830 }, { "epoch": 0.02, "learning_rate": 0.00039988735047377996, "loss": 2.3353, "step": 840 }, { "epoch": 0.02, "learning_rate": 0.00039988465263651263, "loss": 2.3734, "step": 850 }, { "epoch": 0.02, "learning_rate": 0.0003998819228845269, "loss": 2.2988, "step": 860 }, { "epoch": 0.02, "learning_rate": 0.0003998791612182589, "loss": 2.2447, "step": 870 }, { "epoch": 0.02, "learning_rate": 0.0003998763676381493, "loss": 2.2734, "step": 880 }, { "epoch": 0.02, "learning_rate": 0.0003998735421446444, "loss": 2.3543, "step": 890 }, { "epoch": 0.02, "learning_rate": 0.00039987068473819516, "loss": 2.3313, "step": 900 }, { "epoch": 0.02, "learning_rate": 0.00039986779541925784, "loss": 2.2926, "step": 910 }, { "epoch": 0.02, "learning_rate": 0.0003998648741882938, "loss": 2.2219, "step": 920 }, { "epoch": 0.02, "learning_rate": 0.0003998619210457695, "loss": 2.1825, "step": 930 }, { "epoch": 0.02, "learning_rate": 0.0003998589359921563, "loss": 2.2279, "step": 940 }, { "epoch": 0.02, "learning_rate": 0.0003998559190279309, "loss": 2.18, "step": 950 }, { "epoch": 0.02, "learning_rate": 0.0003998528701535751, "loss": 2.2806, "step": 960 }, { "epoch": 0.02, "learning_rate": 0.00039984978936957554, "loss": 2.2602, "step": 970 }, { "epoch": 0.02, "learning_rate": 0.00039984667667642424, "loss": 2.2162, "step": 980 }, { "epoch": 0.03, "learning_rate": 0.0003998435320746181, "loss": 2.2006, "step": 990 }, { "epoch": 0.03, "learning_rate": 0.00039984035556465925, "loss": 2.2247, "step": 1000 }, { "epoch": 0.03, "learning_rate": 0.00039983714714705485, "loss": 2.2148, "step": 1010 }, { "epoch": 0.03, "learning_rate": 0.00039983390682231726, "loss": 2.1869, "step": 1020 }, { "epoch": 0.03, "learning_rate": 0.00039983063459096376, "loss": 2.1773, "step": 1030 }, { "epoch": 0.03, "learning_rate": 0.00039982733045351677, "loss": 2.1144, "step": 1040 }, { "epoch": 0.03, "learning_rate": 0.00039982399441050397, "loss": 2.1769, "step": 1050 }, { "epoch": 0.03, "learning_rate": 0.0003998206264624579, "loss": 2.1617, "step": 1060 }, { "epoch": 0.03, "learning_rate": 0.00039981722660991634, "loss": 2.1872, "step": 1070 }, { "epoch": 0.03, "learning_rate": 0.00039981379485342223, "loss": 2.1406, "step": 1080 }, { "epoch": 0.03, "learning_rate": 0.00039981033119352335, "loss": 2.1804, "step": 1090 }, { "epoch": 0.03, "learning_rate": 0.00039980683563077286, "loss": 2.1605, "step": 1100 }, { "epoch": 0.03, "learning_rate": 0.0003998033081657288, "loss": 2.159, "step": 1110 }, { "epoch": 0.03, "learning_rate": 0.0003997997487989543, "loss": 2.1817, "step": 1120 }, { "epoch": 0.03, "learning_rate": 0.0003997961575310179, "loss": 2.1418, "step": 1130 }, { "epoch": 0.03, "learning_rate": 0.00039979253436249284, "loss": 2.166, "step": 1140 }, { "epoch": 0.03, "learning_rate": 0.0003997888792939576, "loss": 2.1097, "step": 1150 }, { "epoch": 0.03, "learning_rate": 0.00039978519232599584, "loss": 2.1546, "step": 1160 }, { "epoch": 0.03, "learning_rate": 0.00039978147345919626, "loss": 2.1352, "step": 1170 }, { "epoch": 0.03, "learning_rate": 0.00039977772269415255, "loss": 2.1723, "step": 1180 }, { "epoch": 0.03, "learning_rate": 0.00039977394003146366, "loss": 2.1614, "step": 1190 }, { "epoch": 0.03, "learning_rate": 0.00039977012547173346, "loss": 2.0975, "step": 1200 }, { "epoch": 0.03, "learning_rate": 0.00039976627901557114, "loss": 2.1734, "step": 1210 }, { "epoch": 0.03, "learning_rate": 0.0003997624006635907, "loss": 2.1047, "step": 1220 }, { "epoch": 0.03, "learning_rate": 0.00039975849041641153, "loss": 2.1343, "step": 1230 }, { "epoch": 0.03, "learning_rate": 0.00039975454827465777, "loss": 2.0768, "step": 1240 }, { "epoch": 0.03, "learning_rate": 0.00039975057423895905, "loss": 2.1033, "step": 1250 }, { "epoch": 0.03, "learning_rate": 0.0003997465683099497, "loss": 2.1109, "step": 1260 }, { "epoch": 0.03, "learning_rate": 0.00039974253048826944, "loss": 2.1695, "step": 1270 }, { "epoch": 0.03, "learning_rate": 0.00039973846077456305, "loss": 2.1097, "step": 1280 }, { "epoch": 0.03, "learning_rate": 0.00039973435916948013, "loss": 2.0771, "step": 1290 }, { "epoch": 0.03, "learning_rate": 0.0003997302256736757, "loss": 2.1272, "step": 1300 }, { "epoch": 0.03, "learning_rate": 0.00039972606028780967, "loss": 2.1463, "step": 1310 }, { "epoch": 0.03, "learning_rate": 0.00039972186301254713, "loss": 2.0335, "step": 1320 }, { "epoch": 0.03, "learning_rate": 0.00039971763384855823, "loss": 2.0344, "step": 1330 }, { "epoch": 0.03, "learning_rate": 0.0003997133727965183, "loss": 2.145, "step": 1340 }, { "epoch": 0.03, "learning_rate": 0.0003997090798571076, "loss": 2.1313, "step": 1350 }, { "epoch": 0.03, "learning_rate": 0.00039970475503101164, "loss": 2.1041, "step": 1360 }, { "epoch": 0.03, "learning_rate": 0.00039970039831892083, "loss": 2.0418, "step": 1370 }, { "epoch": 0.04, "learning_rate": 0.0003996960097215309, "loss": 2.0691, "step": 1380 }, { "epoch": 0.04, "learning_rate": 0.0003996915892395425, "loss": 2.0488, "step": 1390 }, { "epoch": 0.04, "learning_rate": 0.00039968713687366135, "loss": 2.104, "step": 1400 }, { "epoch": 0.04, "learning_rate": 0.0003996826526245985, "loss": 2.107, "step": 1410 }, { "epoch": 0.04, "learning_rate": 0.00039967813649306993, "loss": 2.023, "step": 1420 }, { "epoch": 0.04, "learning_rate": 0.00039967358847979654, "loss": 2.1037, "step": 1430 }, { "epoch": 0.04, "learning_rate": 0.00039966900858550466, "loss": 2.1022, "step": 1440 }, { "epoch": 0.04, "learning_rate": 0.0003996643968109254, "loss": 2.1065, "step": 1450 }, { "epoch": 0.04, "learning_rate": 0.0003996597531567953, "loss": 2.0959, "step": 1460 }, { "epoch": 0.04, "learning_rate": 0.00039965507762385554, "loss": 2.0508, "step": 1470 }, { "epoch": 0.04, "learning_rate": 0.00039965037021285287, "loss": 2.0902, "step": 1480 }, { "epoch": 0.04, "learning_rate": 0.00039964563092453876, "loss": 2.0, "step": 1490 }, { "epoch": 0.04, "learning_rate": 0.0003996408597596699, "loss": 2.0425, "step": 1500 }, { "epoch": 0.04, "learning_rate": 0.0003996360567190082, "loss": 2.0393, "step": 1510 }, { "epoch": 0.04, "learning_rate": 0.0003996312218033205, "loss": 2.0449, "step": 1520 }, { "epoch": 0.04, "learning_rate": 0.00039962635501337876, "loss": 2.1379, "step": 1530 }, { "epoch": 0.04, "learning_rate": 0.00039962145634995995, "loss": 2.1077, "step": 1540 }, { "epoch": 0.04, "learning_rate": 0.00039961652581384634, "loss": 2.0143, "step": 1550 }, { "epoch": 0.04, "learning_rate": 0.00039961156340582516, "loss": 2.0115, "step": 1560 }, { "epoch": 0.04, "learning_rate": 0.00039960656912668864, "loss": 1.9883, "step": 1570 }, { "epoch": 0.04, "learning_rate": 0.0003996015429772343, "loss": 2.0018, "step": 1580 }, { "epoch": 0.04, "learning_rate": 0.0003995964849582646, "loss": 1.9963, "step": 1590 }, { "epoch": 0.04, "learning_rate": 0.00039959139507058706, "loss": 1.9747, "step": 1600 }, { "epoch": 0.04, "learning_rate": 0.0003995862733150145, "loss": 2.0209, "step": 1610 }, { "epoch": 0.04, "learning_rate": 0.00039958111969236456, "loss": 2.05, "step": 1620 }, { "epoch": 0.04, "learning_rate": 0.00039957593420346024, "loss": 2.0429, "step": 1630 }, { "epoch": 0.04, "learning_rate": 0.0003995707168491293, "loss": 2.0028, "step": 1640 }, { "epoch": 0.04, "learning_rate": 0.0003995654676302049, "loss": 1.9768, "step": 1650 }, { "epoch": 0.04, "learning_rate": 0.0003995601865475252, "loss": 1.9517, "step": 1660 }, { "epoch": 0.04, "learning_rate": 0.0003995548736019333, "loss": 1.9689, "step": 1670 }, { "epoch": 0.04, "learning_rate": 0.00039954952879427754, "loss": 2.0619, "step": 1680 }, { "epoch": 0.04, "learning_rate": 0.0003995441521254113, "loss": 2.0244, "step": 1690 }, { "epoch": 0.04, "learning_rate": 0.000399538743596193, "loss": 2.0409, "step": 1700 }, { "epoch": 0.04, "learning_rate": 0.00039953330320748623, "loss": 2.1296, "step": 1710 }, { "epoch": 0.04, "learning_rate": 0.00039952783096015973, "loss": 2.0191, "step": 1720 }, { "epoch": 0.04, "learning_rate": 0.00039952232685508713, "loss": 2.036, "step": 1730 }, { "epoch": 0.04, "learning_rate": 0.00039951679089314724, "loss": 2.0275, "step": 1740 }, { "epoch": 0.04, "learning_rate": 0.000399511223075224, "loss": 2.0407, "step": 1750 }, { "epoch": 0.04, "learning_rate": 0.0003995056234022064, "loss": 1.9921, "step": 1760 }, { "epoch": 0.05, "learning_rate": 0.00039949999187498844, "loss": 1.9966, "step": 1770 }, { "epoch": 0.05, "learning_rate": 0.0003994943284944694, "loss": 1.9928, "step": 1780 }, { "epoch": 0.05, "learning_rate": 0.0003994886332615535, "loss": 1.9647, "step": 1790 }, { "epoch": 0.05, "learning_rate": 0.0003994829061771499, "loss": 1.9395, "step": 1800 }, { "epoch": 0.05, "learning_rate": 0.0003994771472421733, "loss": 1.9352, "step": 1810 }, { "epoch": 0.05, "learning_rate": 0.00039947135645754305, "loss": 1.9727, "step": 1820 }, { "epoch": 0.05, "learning_rate": 0.00039946553382418375, "loss": 2.0184, "step": 1830 }, { "epoch": 0.05, "learning_rate": 0.00039945967934302504, "loss": 1.9454, "step": 1840 }, { "epoch": 0.05, "learning_rate": 0.0003994537930150018, "loss": 1.9974, "step": 1850 }, { "epoch": 0.05, "learning_rate": 0.00039944787484105375, "loss": 1.9923, "step": 1860 }, { "epoch": 0.05, "learning_rate": 0.00039944192482212584, "loss": 1.9374, "step": 1870 }, { "epoch": 0.05, "learning_rate": 0.0003994359429591681, "loss": 2.035, "step": 1880 }, { "epoch": 0.05, "learning_rate": 0.0003994299292531357, "loss": 1.9926, "step": 1890 }, { "epoch": 0.05, "learning_rate": 0.00039942388370498873, "loss": 1.9723, "step": 1900 }, { "epoch": 0.05, "learning_rate": 0.0003994178063156925, "loss": 1.9967, "step": 1910 }, { "epoch": 0.05, "learning_rate": 0.0003994116970862173, "loss": 2.0542, "step": 1920 }, { "epoch": 0.05, "learning_rate": 0.0003994055560175387, "loss": 2.0183, "step": 1930 }, { "epoch": 0.05, "learning_rate": 0.000399399383110637, "loss": 2.0496, "step": 1940 }, { "epoch": 0.05, "learning_rate": 0.000399393178366498, "loss": 1.9737, "step": 1950 }, { "epoch": 0.05, "learning_rate": 0.0003993869417861123, "loss": 1.9379, "step": 1960 }, { "epoch": 0.05, "learning_rate": 0.0003993806733704757, "loss": 2.0235, "step": 1970 }, { "epoch": 0.05, "learning_rate": 0.00039937437312058903, "loss": 2.0181, "step": 1980 }, { "epoch": 0.05, "learning_rate": 0.00039936804103745825, "loss": 2.0143, "step": 1990 }, { "epoch": 0.05, "learning_rate": 0.0003993616771220944, "loss": 1.9753, "step": 2000 }, { "epoch": 0.05, "learning_rate": 0.0003993552813755134, "loss": 1.9484, "step": 2010 }, { "epoch": 0.05, "learning_rate": 0.0003993488537987366, "loss": 1.964, "step": 2020 }, { "epoch": 0.05, "learning_rate": 0.00039934239439279025, "loss": 1.9522, "step": 2030 }, { "epoch": 0.05, "learning_rate": 0.00039933590315870566, "loss": 2.0053, "step": 2040 }, { "epoch": 0.05, "learning_rate": 0.0003993293800975193, "loss": 1.9453, "step": 2050 }, { "epoch": 0.05, "learning_rate": 0.00039932282521027264, "loss": 1.8729, "step": 2060 }, { "epoch": 0.05, "learning_rate": 0.00039931623849801223, "loss": 2.0346, "step": 2070 }, { "epoch": 0.05, "learning_rate": 0.0003993096199617899, "loss": 1.9505, "step": 2080 }, { "epoch": 0.05, "learning_rate": 0.00039930296960266224, "loss": 1.9834, "step": 2090 }, { "epoch": 0.05, "learning_rate": 0.0003992962874216911, "loss": 1.9864, "step": 2100 }, { "epoch": 0.05, "learning_rate": 0.0003992895734199434, "loss": 1.9417, "step": 2110 }, { "epoch": 0.05, "learning_rate": 0.00039928282759849125, "loss": 1.9021, "step": 2120 }, { "epoch": 0.05, "learning_rate": 0.0003992760499584116, "loss": 1.9185, "step": 2130 }, { "epoch": 0.05, "learning_rate": 0.0003992692405007867, "loss": 1.9042, "step": 2140 }, { "epoch": 0.05, "learning_rate": 0.00039926239922670367, "loss": 1.9236, "step": 2150 }, { "epoch": 0.05, "learning_rate": 0.00039925552613725496, "loss": 2.0219, "step": 2160 }, { "epoch": 0.06, "learning_rate": 0.00039924862123353786, "loss": 1.9175, "step": 2170 }, { "epoch": 0.06, "learning_rate": 0.0003992416845166549, "loss": 1.9339, "step": 2180 }, { "epoch": 0.06, "learning_rate": 0.0003992347159877136, "loss": 1.8682, "step": 2190 }, { "epoch": 0.06, "learning_rate": 0.00039922771564782663, "loss": 1.9337, "step": 2200 }, { "epoch": 0.06, "learning_rate": 0.0003992206834981118, "loss": 1.9015, "step": 2210 }, { "epoch": 0.06, "learning_rate": 0.0003992136195396916, "loss": 1.9687, "step": 2220 }, { "epoch": 0.06, "learning_rate": 0.00039920652377369427, "loss": 1.9354, "step": 2230 }, { "epoch": 0.06, "learning_rate": 0.0003991993962012525, "loss": 1.9228, "step": 2240 }, { "epoch": 0.06, "learning_rate": 0.00039919223682350437, "loss": 1.9972, "step": 2250 }, { "epoch": 0.06, "learning_rate": 0.000399185045641593, "loss": 1.9626, "step": 2260 }, { "epoch": 0.06, "learning_rate": 0.0003991778226566667, "loss": 1.9724, "step": 2270 }, { "epoch": 0.06, "learning_rate": 0.00039917056786987863, "loss": 1.8766, "step": 2280 }, { "epoch": 0.06, "learning_rate": 0.00039916328128238704, "loss": 1.8419, "step": 2290 }, { "epoch": 0.06, "learning_rate": 0.0003991559628953555, "loss": 1.855, "step": 2300 }, { "epoch": 0.06, "learning_rate": 0.0003991486127099525, "loss": 1.966, "step": 2310 }, { "epoch": 0.06, "learning_rate": 0.0003991412307273515, "loss": 1.9684, "step": 2320 }, { "epoch": 0.06, "learning_rate": 0.00039913381694873113, "loss": 1.9364, "step": 2330 }, { "epoch": 0.06, "learning_rate": 0.0003991263713752753, "loss": 1.9438, "step": 2340 }, { "epoch": 0.06, "learning_rate": 0.0003991188940081727, "loss": 1.9081, "step": 2350 }, { "epoch": 0.06, "learning_rate": 0.00039911138484861707, "loss": 1.8837, "step": 2360 }, { "epoch": 0.06, "learning_rate": 0.00039910384389780764, "loss": 1.8935, "step": 2370 }, { "epoch": 0.06, "learning_rate": 0.0003990962711569483, "loss": 1.8873, "step": 2380 }, { "epoch": 0.06, "learning_rate": 0.0003990886666272481, "loss": 1.8894, "step": 2390 }, { "epoch": 0.06, "learning_rate": 0.0003990810303099213, "loss": 1.9223, "step": 2400 }, { "epoch": 0.06, "learning_rate": 0.0003990733622061872, "loss": 1.8759, "step": 2410 }, { "epoch": 0.06, "learning_rate": 0.0003990656623172701, "loss": 1.9005, "step": 2420 }, { "epoch": 0.06, "learning_rate": 0.0003990579306443993, "loss": 1.9132, "step": 2430 }, { "epoch": 0.06, "learning_rate": 0.00039905016718880937, "loss": 1.8808, "step": 2440 }, { "epoch": 0.06, "learning_rate": 0.0003990423719517399, "loss": 1.891, "step": 2450 }, { "epoch": 0.06, "learning_rate": 0.0003990345449344355, "loss": 1.8811, "step": 2460 }, { "epoch": 0.06, "learning_rate": 0.0003990266861381459, "loss": 1.8724, "step": 2470 }, { "epoch": 0.06, "learning_rate": 0.00039901879556412576, "loss": 1.9121, "step": 2480 }, { "epoch": 0.06, "learning_rate": 0.00039901087321363507, "loss": 1.9112, "step": 2490 }, { "epoch": 0.06, "learning_rate": 0.0003990029190879387, "loss": 1.931, "step": 2500 }, { "epoch": 0.06, "learning_rate": 0.00039899493318830664, "loss": 1.8886, "step": 2510 }, { "epoch": 0.06, "learning_rate": 0.00039898691551601396, "loss": 1.953, "step": 2520 }, { "epoch": 0.06, "learning_rate": 0.0003989788660723408, "loss": 1.9476, "step": 2530 }, { "epoch": 0.06, "learning_rate": 0.00039897078485857257, "loss": 1.916, "step": 2540 }, { "epoch": 0.06, "learning_rate": 0.0003989626718759993, "loss": 1.9513, "step": 2550 }, { "epoch": 0.07, "learning_rate": 0.00039895452712591646, "loss": 1.9378, "step": 2560 }, { "epoch": 0.07, "learning_rate": 0.0003989463506096244, "loss": 1.9231, "step": 2570 }, { "epoch": 0.07, "learning_rate": 0.00039893814232842877, "loss": 1.8928, "step": 2580 }, { "epoch": 0.07, "learning_rate": 0.0003989299022836401, "loss": 1.8642, "step": 2590 }, { "epoch": 0.07, "learning_rate": 0.00039892163047657405, "loss": 1.8858, "step": 2600 }, { "epoch": 0.07, "learning_rate": 0.0003989133269085513, "loss": 1.9193, "step": 2610 }, { "epoch": 0.07, "learning_rate": 0.0003989049915808977, "loss": 1.8847, "step": 2620 }, { "epoch": 0.07, "learning_rate": 0.0003988966244949441, "loss": 1.9073, "step": 2630 }, { "epoch": 0.07, "learning_rate": 0.0003988882256520264, "loss": 1.9436, "step": 2640 }, { "epoch": 0.07, "learning_rate": 0.00039887979505348565, "loss": 1.9591, "step": 2650 }, { "epoch": 0.07, "learning_rate": 0.0003988713327006679, "loss": 1.8816, "step": 2660 }, { "epoch": 0.07, "learning_rate": 0.0003988628385949242, "loss": 1.9548, "step": 2670 }, { "epoch": 0.07, "learning_rate": 0.00039885431273761095, "loss": 1.9138, "step": 2680 }, { "epoch": 0.07, "learning_rate": 0.0003988457551300894, "loss": 2.158, "step": 2690 }, { "epoch": 0.07, "learning_rate": 0.00039883716577372583, "loss": 2.0845, "step": 2700 }, { "epoch": 0.07, "learning_rate": 0.00039882854466989176, "loss": 2.0201, "step": 2710 }, { "epoch": 0.07, "learning_rate": 0.00039881989181996354, "loss": 1.9172, "step": 2720 }, { "epoch": 0.07, "learning_rate": 0.00039881120722532284, "loss": 1.9177, "step": 2730 }, { "epoch": 0.07, "learning_rate": 0.0003988024908873563, "loss": 1.9243, "step": 2740 }, { "epoch": 0.07, "learning_rate": 0.0003987937428074556, "loss": 1.9129, "step": 2750 }, { "epoch": 0.07, "learning_rate": 0.00039878496298701745, "loss": 1.8875, "step": 2760 }, { "epoch": 0.07, "learning_rate": 0.00039877615142744376, "loss": 1.8674, "step": 2770 }, { "epoch": 0.07, "learning_rate": 0.0003987673081301414, "loss": 1.913, "step": 2780 }, { "epoch": 0.07, "learning_rate": 0.00039875843309652233, "loss": 1.9136, "step": 2790 }, { "epoch": 0.07, "learning_rate": 0.0003987495263280037, "loss": 1.8634, "step": 2800 }, { "epoch": 0.07, "learning_rate": 0.00039874058782600745, "loss": 1.8743, "step": 2810 }, { "epoch": 0.07, "learning_rate": 0.00039873161759196085, "loss": 1.7969, "step": 2820 }, { "epoch": 0.07, "learning_rate": 0.00039872261562729606, "loss": 1.9213, "step": 2830 }, { "epoch": 0.07, "learning_rate": 0.00039871358193345046, "loss": 1.8824, "step": 2840 }, { "epoch": 0.07, "learning_rate": 0.0003987045165118664, "loss": 1.9052, "step": 2850 }, { "epoch": 0.07, "learning_rate": 0.00039869541936399136, "loss": 1.8925, "step": 2860 }, { "epoch": 0.07, "learning_rate": 0.0003986862904912777, "loss": 1.8846, "step": 2870 }, { "epoch": 0.07, "learning_rate": 0.0003986771298951832, "loss": 1.8653, "step": 2880 }, { "epoch": 0.07, "learning_rate": 0.0003986679375771703, "loss": 1.8782, "step": 2890 }, { "epoch": 0.07, "learning_rate": 0.00039865871353870683, "loss": 1.959, "step": 2900 }, { "epoch": 0.07, "learning_rate": 0.0003986494577812655, "loss": 1.8541, "step": 2910 }, { "epoch": 0.07, "learning_rate": 0.0003986401703063241, "loss": 1.8953, "step": 2920 }, { "epoch": 0.07, "learning_rate": 0.0003986308511153656, "loss": 1.9349, "step": 2930 }, { "epoch": 0.07, "learning_rate": 0.0003986215002098778, "loss": 1.8454, "step": 2940 }, { "epoch": 0.08, "learning_rate": 0.00039861211759135393, "loss": 1.8725, "step": 2950 }, { "epoch": 0.08, "learning_rate": 0.000398602703261292, "loss": 1.9361, "step": 2960 }, { "epoch": 0.08, "learning_rate": 0.000398593257221195, "loss": 1.886, "step": 2970 }, { "epoch": 0.08, "learning_rate": 0.00039858377947257133, "loss": 1.9202, "step": 2980 }, { "epoch": 0.08, "learning_rate": 0.0003985742700169342, "loss": 1.9123, "step": 2990 }, { "epoch": 0.08, "learning_rate": 0.00039856472885580195, "loss": 1.9366, "step": 3000 }, { "epoch": 0.08, "learning_rate": 0.000398555155990698, "loss": 1.8829, "step": 3010 }, { "epoch": 0.08, "learning_rate": 0.00039854555142315067, "loss": 1.9275, "step": 3020 }, { "epoch": 0.08, "learning_rate": 0.00039853591515469366, "loss": 1.9413, "step": 3030 }, { "epoch": 0.08, "learning_rate": 0.00039852624718686544, "loss": 1.8638, "step": 3040 }, { "epoch": 0.08, "learning_rate": 0.00039851654752120976, "loss": 1.9021, "step": 3050 }, { "epoch": 0.08, "learning_rate": 0.00039850681615927517, "loss": 1.9018, "step": 3060 }, { "epoch": 0.08, "learning_rate": 0.00039849705310261556, "loss": 1.8719, "step": 3070 }, { "epoch": 0.08, "learning_rate": 0.00039848725835278973, "loss": 1.8713, "step": 3080 }, { "epoch": 0.08, "learning_rate": 0.00039847743191136144, "loss": 1.8903, "step": 3090 }, { "epoch": 0.08, "learning_rate": 0.00039846757377989985, "loss": 1.8186, "step": 3100 }, { "epoch": 0.08, "learning_rate": 0.00039845768395997883, "loss": 1.8552, "step": 3110 }, { "epoch": 0.08, "learning_rate": 0.00039844776245317743, "loss": 1.8805, "step": 3120 }, { "epoch": 0.08, "learning_rate": 0.0003984378092610799, "loss": 1.9612, "step": 3130 }, { "epoch": 0.08, "learning_rate": 0.00039842782438527524, "loss": 1.8515, "step": 3140 }, { "epoch": 0.08, "learning_rate": 0.0003984178078273579, "loss": 1.8468, "step": 3150 }, { "epoch": 0.08, "learning_rate": 0.00039840775958892695, "loss": 1.8468, "step": 3160 }, { "epoch": 0.08, "learning_rate": 0.00039839767967158696, "loss": 1.8479, "step": 3170 }, { "epoch": 0.08, "learning_rate": 0.00039838756807694724, "loss": 1.8419, "step": 3180 }, { "epoch": 0.08, "learning_rate": 0.0003983774248066222, "loss": 1.8238, "step": 3190 }, { "epoch": 0.08, "learning_rate": 0.0003983672498622316, "loss": 1.8248, "step": 3200 }, { "epoch": 0.08, "learning_rate": 0.0003983570432453998, "loss": 1.8572, "step": 3210 }, { "epoch": 0.08, "learning_rate": 0.00039834680495775654, "loss": 1.9093, "step": 3220 }, { "epoch": 0.08, "learning_rate": 0.00039833653500093646, "loss": 1.898, "step": 3230 }, { "epoch": 0.08, "learning_rate": 0.00039832623337657946, "loss": 1.847, "step": 3240 }, { "epoch": 0.08, "learning_rate": 0.00039831590008633023, "loss": 1.8168, "step": 3250 }, { "epoch": 0.08, "learning_rate": 0.0003983055351318387, "loss": 1.8661, "step": 3260 }, { "epoch": 0.08, "learning_rate": 0.00039829513851475973, "loss": 1.882, "step": 3270 }, { "epoch": 0.08, "learning_rate": 0.00039828471023675336, "loss": 1.8127, "step": 3280 }, { "epoch": 0.08, "learning_rate": 0.0003982742502994847, "loss": 1.8136, "step": 3290 }, { "epoch": 0.08, "learning_rate": 0.00039826375870462364, "loss": 1.8412, "step": 3300 }, { "epoch": 0.08, "learning_rate": 0.00039825323545384555, "loss": 1.8997, "step": 3310 }, { "epoch": 0.08, "learning_rate": 0.00039824268054883044, "loss": 1.8213, "step": 3320 }, { "epoch": 0.08, "learning_rate": 0.0003982320939912637, "loss": 1.8443, "step": 3330 }, { "epoch": 0.08, "learning_rate": 0.00039822147578283564, "loss": 1.8576, "step": 3340 }, { "epoch": 0.09, "learning_rate": 0.0003982108259252415, "loss": 1.8235, "step": 3350 }, { "epoch": 0.09, "learning_rate": 0.0003982001444201818, "loss": 1.8112, "step": 3360 }, { "epoch": 0.09, "learning_rate": 0.00039818943126936204, "loss": 1.8501, "step": 3370 }, { "epoch": 0.09, "learning_rate": 0.0003981786864744926, "loss": 1.9042, "step": 3380 }, { "epoch": 0.09, "learning_rate": 0.00039816791003728923, "loss": 1.8704, "step": 3390 }, { "epoch": 0.09, "learning_rate": 0.00039815710195947244, "loss": 1.8226, "step": 3400 }, { "epoch": 0.09, "learning_rate": 0.00039814626224276797, "loss": 1.8682, "step": 3410 }, { "epoch": 0.09, "learning_rate": 0.00039813539088890646, "loss": 1.822, "step": 3420 }, { "epoch": 0.09, "learning_rate": 0.0003981244878996238, "loss": 1.8546, "step": 3430 }, { "epoch": 0.09, "learning_rate": 0.0003981135532766607, "loss": 1.8375, "step": 3440 }, { "epoch": 0.09, "learning_rate": 0.00039810258702176323, "loss": 1.9318, "step": 3450 }, { "epoch": 0.09, "learning_rate": 0.00039809158913668214, "loss": 1.8846, "step": 3460 }, { "epoch": 0.09, "learning_rate": 0.0003980805596231735, "loss": 1.841, "step": 3470 }, { "epoch": 0.09, "learning_rate": 0.0003980694984829984, "loss": 1.7964, "step": 3480 }, { "epoch": 0.09, "learning_rate": 0.00039805840571792276, "loss": 1.8562, "step": 3490 }, { "epoch": 0.09, "learning_rate": 0.0003980472813297179, "loss": 1.8546, "step": 3500 }, { "epoch": 0.09, "learning_rate": 0.00039803612532015984, "loss": 1.8304, "step": 3510 }, { "epoch": 0.09, "learning_rate": 0.00039802493769102994, "loss": 1.8223, "step": 3520 }, { "epoch": 0.09, "learning_rate": 0.0003980137184441143, "loss": 1.8645, "step": 3530 }, { "epoch": 0.09, "learning_rate": 0.0003980024675812046, "loss": 1.8605, "step": 3540 }, { "epoch": 0.09, "learning_rate": 0.0003979911851040968, "loss": 1.8072, "step": 3550 }, { "epoch": 0.09, "learning_rate": 0.0003979798710145926, "loss": 1.8886, "step": 3560 }, { "epoch": 0.09, "learning_rate": 0.0003979685253144984, "loss": 1.7982, "step": 3570 }, { "epoch": 0.09, "learning_rate": 0.0003979571480056257, "loss": 1.8072, "step": 3580 }, { "epoch": 0.09, "learning_rate": 0.0003979457390897911, "loss": 1.8435, "step": 3590 }, { "epoch": 0.09, "learning_rate": 0.0003979342985688161, "loss": 1.832, "step": 3600 }, { "epoch": 0.09, "learning_rate": 0.0003979228264445275, "loss": 1.8205, "step": 3610 }, { "epoch": 0.09, "learning_rate": 0.0003979113227187569, "loss": 1.8369, "step": 3620 }, { "epoch": 0.09, "learning_rate": 0.0003978997873933412, "loss": 1.8123, "step": 3630 }, { "epoch": 0.09, "learning_rate": 0.000397888220470122, "loss": 1.7854, "step": 3640 }, { "epoch": 0.09, "learning_rate": 0.0003978766219509463, "loss": 1.7781, "step": 3650 }, { "epoch": 0.09, "learning_rate": 0.0003978649918376658, "loss": 1.8043, "step": 3660 }, { "epoch": 0.09, "learning_rate": 0.00039785333013213765, "loss": 1.8317, "step": 3670 }, { "epoch": 0.09, "learning_rate": 0.00039784163683622374, "loss": 1.7834, "step": 3680 }, { "epoch": 0.09, "learning_rate": 0.0003978299119517911, "loss": 1.8493, "step": 3690 }, { "epoch": 0.09, "learning_rate": 0.00039781815548071167, "loss": 1.7785, "step": 3700 }, { "epoch": 0.09, "learning_rate": 0.0003978063674248627, "loss": 1.8433, "step": 3710 }, { "epoch": 0.09, "learning_rate": 0.0003977945477861263, "loss": 1.7909, "step": 3720 }, { "epoch": 0.09, "learning_rate": 0.0003977826965663896, "loss": 1.83, "step": 3730 }, { "epoch": 0.1, "learning_rate": 0.00039777081376754487, "loss": 1.7919, "step": 3740 }, { "epoch": 0.1, "learning_rate": 0.00039775889939148946, "loss": 1.7883, "step": 3750 }, { "epoch": 0.1, "learning_rate": 0.0003977469534401256, "loss": 1.7908, "step": 3760 }, { "epoch": 0.1, "learning_rate": 0.00039773497591536063, "loss": 1.7429, "step": 3770 }, { "epoch": 0.1, "learning_rate": 0.000397722966819107, "loss": 1.8088, "step": 3780 }, { "epoch": 0.1, "learning_rate": 0.00039771092615328217, "loss": 1.8726, "step": 3790 }, { "epoch": 0.1, "learning_rate": 0.0003976988539198086, "loss": 1.8244, "step": 3800 }, { "epoch": 0.1, "learning_rate": 0.0003976867501206138, "loss": 1.8047, "step": 3810 }, { "epoch": 0.1, "learning_rate": 0.0003976746147576303, "loss": 1.8664, "step": 3820 }, { "epoch": 0.1, "learning_rate": 0.0003976624478327958, "loss": 1.8955, "step": 3830 }, { "epoch": 0.1, "learning_rate": 0.00039765024934805283, "loss": 1.8355, "step": 3840 }, { "epoch": 0.1, "learning_rate": 0.00039763801930534917, "loss": 1.8964, "step": 3850 }, { "epoch": 0.1, "learning_rate": 0.00039762575770663737, "loss": 1.9149, "step": 3860 }, { "epoch": 0.1, "learning_rate": 0.00039761346455387535, "loss": 1.878, "step": 3870 }, { "epoch": 0.1, "learning_rate": 0.0003976011398490259, "loss": 1.7572, "step": 3880 }, { "epoch": 0.1, "learning_rate": 0.00039758878359405676, "loss": 1.786, "step": 3890 }, { "epoch": 0.1, "learning_rate": 0.00039757639579094084, "loss": 1.8305, "step": 3900 }, { "epoch": 0.1, "learning_rate": 0.0003975639764416561, "loss": 1.7545, "step": 3910 }, { "epoch": 0.1, "learning_rate": 0.00039755152554818543, "loss": 1.7633, "step": 3920 }, { "epoch": 0.1, "learning_rate": 0.0003975390431125168, "loss": 1.7908, "step": 3930 }, { "epoch": 0.1, "learning_rate": 0.0003975265291366432, "loss": 1.8037, "step": 3940 }, { "epoch": 0.1, "learning_rate": 0.0003975139836225628, "loss": 1.8299, "step": 3950 }, { "epoch": 0.1, "learning_rate": 0.00039750140657227853, "loss": 1.7984, "step": 3960 }, { "epoch": 0.1, "learning_rate": 0.00039748879798779866, "loss": 1.8757, "step": 3970 }, { "epoch": 0.1, "learning_rate": 0.0003974761578711363, "loss": 1.764, "step": 3980 }, { "epoch": 0.1, "learning_rate": 0.0003974634862243096, "loss": 1.7753, "step": 3990 }, { "epoch": 0.1, "learning_rate": 0.0003974507830493418, "loss": 1.7783, "step": 4000 }, { "epoch": 0.1, "learning_rate": 0.0003974380483482612, "loss": 1.8552, "step": 4010 }, { "epoch": 0.1, "learning_rate": 0.0003974252821231011, "loss": 1.8819, "step": 4020 }, { "epoch": 0.1, "learning_rate": 0.0003974124843758998, "loss": 1.8257, "step": 4030 }, { "epoch": 0.1, "learning_rate": 0.0003973996551087006, "loss": 1.8212, "step": 4040 }, { "epoch": 0.1, "learning_rate": 0.00039738679432355193, "loss": 1.84, "step": 4050 }, { "epoch": 0.1, "learning_rate": 0.0003973739020225073, "loss": 1.8493, "step": 4060 }, { "epoch": 0.1, "learning_rate": 0.0003973609782076251, "loss": 1.8039, "step": 4070 }, { "epoch": 0.1, "learning_rate": 0.0003973480228809689, "loss": 1.7981, "step": 4080 }, { "epoch": 0.1, "learning_rate": 0.00039733503604460704, "loss": 1.8088, "step": 4090 }, { "epoch": 0.1, "learning_rate": 0.0003973220177006132, "loss": 1.7446, "step": 4100 }, { "epoch": 0.1, "learning_rate": 0.00039730896785106597, "loss": 1.8402, "step": 4110 }, { "epoch": 0.1, "learning_rate": 0.00039729588649804894, "loss": 1.799, "step": 4120 }, { "epoch": 0.11, "learning_rate": 0.0003972827736436507, "loss": 1.8064, "step": 4130 }, { "epoch": 0.11, "learning_rate": 0.00039726962928996503, "loss": 1.7946, "step": 4140 }, { "epoch": 0.11, "learning_rate": 0.00039725645343909055, "loss": 1.8193, "step": 4150 }, { "epoch": 0.11, "learning_rate": 0.00039724324609313104, "loss": 1.7728, "step": 4160 }, { "epoch": 0.11, "learning_rate": 0.00039723000725419517, "loss": 1.8646, "step": 4170 }, { "epoch": 0.11, "learning_rate": 0.00039721673692439687, "loss": 1.8326, "step": 4180 }, { "epoch": 0.11, "learning_rate": 0.00039720343510585483, "loss": 1.8098, "step": 4190 }, { "epoch": 0.11, "learning_rate": 0.00039719010180069294, "loss": 1.8529, "step": 4200 }, { "epoch": 0.11, "learning_rate": 0.0003971767370110401, "loss": 1.811, "step": 4210 }, { "epoch": 0.11, "learning_rate": 0.00039716334073903016, "loss": 1.8112, "step": 4220 }, { "epoch": 0.11, "learning_rate": 0.0003971499129868021, "loss": 1.7949, "step": 4230 }, { "epoch": 0.11, "learning_rate": 0.00039713645375649985, "loss": 1.8224, "step": 4240 }, { "epoch": 0.11, "learning_rate": 0.0003971229630502723, "loss": 1.8517, "step": 4250 }, { "epoch": 0.11, "learning_rate": 0.0003971094408702736, "loss": 1.8168, "step": 4260 }, { "epoch": 0.11, "learning_rate": 0.00039709588721866267, "loss": 1.7842, "step": 4270 }, { "epoch": 0.11, "learning_rate": 0.00039708230209760365, "loss": 1.8217, "step": 4280 }, { "epoch": 0.11, "learning_rate": 0.0003970686855092655, "loss": 1.7968, "step": 4290 }, { "epoch": 0.11, "learning_rate": 0.0003970550374558224, "loss": 1.7345, "step": 4300 }, { "epoch": 0.11, "learning_rate": 0.0003970413579394535, "loss": 1.8, "step": 4310 }, { "epoch": 0.11, "learning_rate": 0.0003970276469623429, "loss": 1.808, "step": 4320 }, { "epoch": 0.11, "learning_rate": 0.0003970139045266798, "loss": 1.8049, "step": 4330 }, { "epoch": 0.11, "learning_rate": 0.0003970001306346583, "loss": 1.8013, "step": 4340 }, { "epoch": 0.11, "learning_rate": 0.0003969863252884778, "loss": 1.8282, "step": 4350 }, { "epoch": 0.11, "learning_rate": 0.0003969724884903424, "loss": 1.8122, "step": 4360 }, { "epoch": 0.11, "learning_rate": 0.0003969586202424614, "loss": 1.8082, "step": 4370 }, { "epoch": 0.11, "learning_rate": 0.0003969447205470491, "loss": 1.7345, "step": 4380 }, { "epoch": 0.11, "learning_rate": 0.00039693078940632474, "loss": 1.7867, "step": 4390 }, { "epoch": 0.11, "learning_rate": 0.0003969168268225127, "loss": 1.7596, "step": 4400 }, { "epoch": 0.11, "learning_rate": 0.0003969028327978424, "loss": 1.8147, "step": 4410 }, { "epoch": 0.11, "learning_rate": 0.0003968888073345481, "loss": 1.8479, "step": 4420 }, { "epoch": 0.11, "learning_rate": 0.00039687475043486916, "loss": 1.7887, "step": 4430 }, { "epoch": 0.11, "learning_rate": 0.00039686066210105006, "loss": 1.8487, "step": 4440 }, { "epoch": 0.11, "learning_rate": 0.00039684654233534017, "loss": 1.8783, "step": 4450 }, { "epoch": 0.11, "learning_rate": 0.00039683239113999394, "loss": 1.7634, "step": 4460 }, { "epoch": 0.11, "learning_rate": 0.0003968182085172709, "loss": 1.7534, "step": 4470 }, { "epoch": 0.11, "learning_rate": 0.0003968039944694355, "loss": 1.7874, "step": 4480 }, { "epoch": 0.11, "learning_rate": 0.00039678974899875715, "loss": 1.7343, "step": 4490 }, { "epoch": 0.11, "learning_rate": 0.0003967754721075105, "loss": 1.7943, "step": 4500 }, { "epoch": 0.11, "learning_rate": 0.00039676116379797494, "loss": 1.7694, "step": 4510 }, { "epoch": 0.11, "learning_rate": 0.0003967468240724351, "loss": 1.823, "step": 4520 }, { "epoch": 0.12, "learning_rate": 0.0003967324529331805, "loss": 1.8147, "step": 4530 }, { "epoch": 0.12, "learning_rate": 0.0003967180503825058, "loss": 1.779, "step": 4540 }, { "epoch": 0.12, "learning_rate": 0.0003967036164227105, "loss": 1.7434, "step": 4550 }, { "epoch": 0.12, "learning_rate": 0.00039668915105609925, "loss": 1.8157, "step": 4560 }, { "epoch": 0.12, "learning_rate": 0.0003966746542849816, "loss": 1.7972, "step": 4570 }, { "epoch": 0.12, "learning_rate": 0.0003966601261116723, "loss": 1.768, "step": 4580 }, { "epoch": 0.12, "learning_rate": 0.000396645566538491, "loss": 1.7521, "step": 4590 }, { "epoch": 0.12, "learning_rate": 0.0003966309755677623, "loss": 1.8067, "step": 4600 }, { "epoch": 0.12, "learning_rate": 0.0003966163532018158, "loss": 1.7907, "step": 4610 }, { "epoch": 0.12, "learning_rate": 0.0003966016994429864, "loss": 1.7259, "step": 4620 }, { "epoch": 0.12, "learning_rate": 0.0003965870142936136, "loss": 1.8267, "step": 4630 }, { "epoch": 0.12, "learning_rate": 0.0003965722977560422, "loss": 1.7433, "step": 4640 }, { "epoch": 0.12, "learning_rate": 0.000396557549832622, "loss": 1.7433, "step": 4650 }, { "epoch": 0.12, "learning_rate": 0.0003965427705257076, "loss": 1.8058, "step": 4660 }, { "epoch": 0.12, "learning_rate": 0.0003965279598376588, "loss": 1.8024, "step": 4670 }, { "epoch": 0.12, "learning_rate": 0.0003965131177708404, "loss": 1.7884, "step": 4680 }, { "epoch": 0.12, "learning_rate": 0.0003964982443276221, "loss": 1.7835, "step": 4690 }, { "epoch": 0.12, "learning_rate": 0.0003964833395103788, "loss": 1.8312, "step": 4700 }, { "epoch": 0.12, "learning_rate": 0.0003964684033214901, "loss": 1.7961, "step": 4710 }, { "epoch": 0.12, "learning_rate": 0.000396453435763341, "loss": 1.7441, "step": 4720 }, { "epoch": 0.12, "learning_rate": 0.00039643843683832115, "loss": 1.7585, "step": 4730 }, { "epoch": 0.12, "learning_rate": 0.00039642340654882544, "loss": 1.7572, "step": 4740 }, { "epoch": 0.12, "learning_rate": 0.00039640834489725366, "loss": 1.8212, "step": 4750 }, { "epoch": 0.12, "learning_rate": 0.0003963932518860106, "loss": 1.741, "step": 4760 }, { "epoch": 0.12, "learning_rate": 0.00039637812751750623, "loss": 1.7815, "step": 4770 }, { "epoch": 0.12, "learning_rate": 0.0003963629717941553, "loss": 1.7903, "step": 4780 }, { "epoch": 0.12, "learning_rate": 0.00039634778471837764, "loss": 1.768, "step": 4790 }, { "epoch": 0.12, "learning_rate": 0.0003963325662925981, "loss": 1.7375, "step": 4800 }, { "epoch": 0.12, "learning_rate": 0.00039631731651924666, "loss": 1.7954, "step": 4810 }, { "epoch": 0.12, "learning_rate": 0.000396302035400758, "loss": 1.7816, "step": 4820 }, { "epoch": 0.12, "learning_rate": 0.0003962867229395721, "loss": 1.8233, "step": 4830 }, { "epoch": 0.12, "learning_rate": 0.0003962713791381338, "loss": 1.7717, "step": 4840 }, { "epoch": 0.12, "learning_rate": 0.0003962560039988931, "loss": 1.8, "step": 4850 }, { "epoch": 0.12, "learning_rate": 0.00039624059752430473, "loss": 1.7848, "step": 4860 }, { "epoch": 0.12, "learning_rate": 0.0003962251597168286, "loss": 1.7773, "step": 4870 }, { "epoch": 0.12, "learning_rate": 0.00039620969057892967, "loss": 1.8012, "step": 4880 }, { "epoch": 0.12, "learning_rate": 0.0003961941901130778, "loss": 1.7854, "step": 4890 }, { "epoch": 0.12, "learning_rate": 0.0003961786583217478, "loss": 1.7833, "step": 4900 }, { "epoch": 0.12, "learning_rate": 0.0003961630952074197, "loss": 1.7826, "step": 4910 }, { "epoch": 0.13, "learning_rate": 0.00039614750077257826, "loss": 1.8489, "step": 4920 }, { "epoch": 0.13, "learning_rate": 0.00039613187501971346, "loss": 1.8329, "step": 4930 }, { "epoch": 0.13, "learning_rate": 0.0003961162179513202, "loss": 1.7486, "step": 4940 }, { "epoch": 0.13, "learning_rate": 0.0003961005295698984, "loss": 1.7631, "step": 4950 }, { "epoch": 0.13, "learning_rate": 0.00039608480987795284, "loss": 1.7728, "step": 4960 }, { "epoch": 0.13, "learning_rate": 0.0003960690588779935, "loss": 1.7824, "step": 4970 }, { "epoch": 0.13, "learning_rate": 0.0003960532765725352, "loss": 1.7664, "step": 4980 }, { "epoch": 0.13, "learning_rate": 0.000396037462964098, "loss": 1.7976, "step": 4990 }, { "epoch": 0.13, "learning_rate": 0.00039602161805520666, "loss": 1.7426, "step": 5000 }, { "epoch": 0.13, "learning_rate": 0.0003960057418483911, "loss": 1.806, "step": 5010 }, { "epoch": 0.13, "learning_rate": 0.0003959898343461862, "loss": 1.7553, "step": 5020 }, { "epoch": 0.13, "learning_rate": 0.0003959738955511318, "loss": 1.7616, "step": 5030 }, { "epoch": 0.13, "learning_rate": 0.00039595792546577276, "loss": 1.767, "step": 5040 }, { "epoch": 0.13, "learning_rate": 0.00039594192409265913, "loss": 1.762, "step": 5050 }, { "epoch": 0.13, "learning_rate": 0.00039592589143434565, "loss": 1.8521, "step": 5060 }, { "epoch": 0.13, "learning_rate": 0.0003959098274933921, "loss": 2.1704, "step": 5070 }, { "epoch": 0.13, "learning_rate": 0.00039589373227236354, "loss": 1.8074, "step": 5080 }, { "epoch": 0.13, "learning_rate": 0.0003958776057738297, "loss": 1.854, "step": 5090 }, { "epoch": 0.13, "learning_rate": 0.00039586144800036544, "loss": 1.7474, "step": 5100 }, { "epoch": 0.13, "learning_rate": 0.0003958452589545506, "loss": 1.7693, "step": 5110 }, { "epoch": 0.13, "learning_rate": 0.00039582903863897, "loss": 1.7798, "step": 5120 }, { "epoch": 0.13, "learning_rate": 0.00039581278705621355, "loss": 1.7063, "step": 5130 }, { "epoch": 0.13, "learning_rate": 0.000395796504208876, "loss": 1.7747, "step": 5140 }, { "epoch": 0.13, "learning_rate": 0.00039578019009955717, "loss": 1.7526, "step": 5150 }, { "epoch": 0.13, "learning_rate": 0.0003957638447308619, "loss": 1.7272, "step": 5160 }, { "epoch": 0.13, "learning_rate": 0.0003957474681053999, "loss": 1.727, "step": 5170 }, { "epoch": 0.13, "learning_rate": 0.000395731060225786, "loss": 1.7858, "step": 5180 }, { "epoch": 0.13, "learning_rate": 0.00039571462109464005, "loss": 1.7879, "step": 5190 }, { "epoch": 0.13, "learning_rate": 0.0003956981507145867, "loss": 1.7494, "step": 5200 }, { "epoch": 0.13, "learning_rate": 0.0003956816490882558, "loss": 1.7716, "step": 5210 }, { "epoch": 0.13, "learning_rate": 0.00039566511621828203, "loss": 1.7585, "step": 5220 }, { "epoch": 0.13, "learning_rate": 0.00039564855210730515, "loss": 1.7309, "step": 5230 }, { "epoch": 0.13, "learning_rate": 0.0003956319567579698, "loss": 1.7503, "step": 5240 }, { "epoch": 0.13, "learning_rate": 0.0003956153301729258, "loss": 1.7631, "step": 5250 }, { "epoch": 0.13, "learning_rate": 0.00039559867235482784, "loss": 1.6883, "step": 5260 }, { "epoch": 0.13, "learning_rate": 0.00039558198330633555, "loss": 1.7946, "step": 5270 }, { "epoch": 0.13, "learning_rate": 0.00039556526303011354, "loss": 1.7962, "step": 5280 }, { "epoch": 0.13, "learning_rate": 0.00039554851152883157, "loss": 1.7869, "step": 5290 }, { "epoch": 0.13, "learning_rate": 0.0003955317288051643, "loss": 1.7507, "step": 5300 }, { "epoch": 0.14, "learning_rate": 0.0003955149148617912, "loss": 1.7614, "step": 5310 }, { "epoch": 0.14, "learning_rate": 0.0003954980697013971, "loss": 1.7288, "step": 5320 }, { "epoch": 0.14, "learning_rate": 0.0003954811933266714, "loss": 1.8662, "step": 5330 }, { "epoch": 0.14, "learning_rate": 0.0003954642857403088, "loss": 1.7724, "step": 5340 }, { "epoch": 0.14, "learning_rate": 0.00039544734694500874, "loss": 1.8028, "step": 5350 }, { "epoch": 0.14, "learning_rate": 0.0003954303769434759, "loss": 1.7507, "step": 5360 }, { "epoch": 0.14, "learning_rate": 0.0003954133757384197, "loss": 1.74, "step": 5370 }, { "epoch": 0.14, "learning_rate": 0.0003953963433325547, "loss": 1.7622, "step": 5380 }, { "epoch": 0.14, "learning_rate": 0.00039537927972860043, "loss": 1.7755, "step": 5390 }, { "epoch": 0.14, "learning_rate": 0.0003953621849292813, "loss": 1.7695, "step": 5400 }, { "epoch": 0.14, "learning_rate": 0.00039534505893732677, "loss": 1.7455, "step": 5410 }, { "epoch": 0.14, "learning_rate": 0.0003953279017554713, "loss": 1.8059, "step": 5420 }, { "epoch": 0.14, "learning_rate": 0.0003953107133864542, "loss": 1.7896, "step": 5430 }, { "epoch": 0.14, "learning_rate": 0.00039529349383302006, "loss": 1.7552, "step": 5440 }, { "epoch": 0.14, "learning_rate": 0.00039527624309791806, "loss": 1.7444, "step": 5450 }, { "epoch": 0.14, "learning_rate": 0.00039525896118390266, "loss": 1.714, "step": 5460 }, { "epoch": 0.14, "learning_rate": 0.00039524164809373315, "loss": 1.8, "step": 5470 }, { "epoch": 0.14, "learning_rate": 0.0003952243038301738, "loss": 1.7523, "step": 5480 }, { "epoch": 0.14, "learning_rate": 0.000395206928395994, "loss": 1.7239, "step": 5490 }, { "epoch": 0.14, "learning_rate": 0.00039518952179396795, "loss": 1.7658, "step": 5500 }, { "epoch": 0.14, "learning_rate": 0.00039517208402687487, "loss": 1.7425, "step": 5510 }, { "epoch": 0.14, "learning_rate": 0.00039515461509749897, "loss": 1.7497, "step": 5520 }, { "epoch": 0.14, "learning_rate": 0.00039513711500862946, "loss": 1.7329, "step": 5530 }, { "epoch": 0.14, "learning_rate": 0.00039511958376306055, "loss": 1.7289, "step": 5540 }, { "epoch": 0.14, "learning_rate": 0.0003951020213635912, "loss": 1.7602, "step": 5550 }, { "epoch": 0.14, "learning_rate": 0.00039508442781302576, "loss": 1.8221, "step": 5560 }, { "epoch": 0.14, "learning_rate": 0.0003950668031141732, "loss": 1.8125, "step": 5570 }, { "epoch": 0.14, "learning_rate": 0.00039504914726984754, "loss": 1.7633, "step": 5580 }, { "epoch": 0.14, "learning_rate": 0.00039503146028286787, "loss": 1.7667, "step": 5590 }, { "epoch": 0.14, "learning_rate": 0.0003950137421560582, "loss": 1.7723, "step": 5600 }, { "epoch": 0.14, "learning_rate": 0.0003949959928922475, "loss": 1.7784, "step": 5610 }, { "epoch": 0.14, "learning_rate": 0.0003949782124942697, "loss": 1.7889, "step": 5620 }, { "epoch": 0.14, "learning_rate": 0.00039496040096496373, "loss": 1.776, "step": 5630 }, { "epoch": 0.14, "learning_rate": 0.00039494255830717346, "loss": 1.7307, "step": 5640 }, { "epoch": 0.14, "learning_rate": 0.00039492468452374775, "loss": 1.7699, "step": 5650 }, { "epoch": 0.14, "learning_rate": 0.00039490677961754044, "loss": 1.7956, "step": 5660 }, { "epoch": 0.14, "learning_rate": 0.0003948888435914104, "loss": 1.7993, "step": 5670 }, { "epoch": 0.14, "learning_rate": 0.00039487087644822126, "loss": 1.7359, "step": 5680 }, { "epoch": 0.14, "learning_rate": 0.0003948528781908419, "loss": 1.7289, "step": 5690 }, { "epoch": 0.14, "learning_rate": 0.0003948348488221459, "loss": 1.8222, "step": 5700 }, { "epoch": 0.15, "learning_rate": 0.00039481678834501203, "loss": 1.768, "step": 5710 }, { "epoch": 0.15, "learning_rate": 0.00039479869676232386, "loss": 1.7411, "step": 5720 }, { "epoch": 0.15, "learning_rate": 0.00039478057407697, "loss": 1.7199, "step": 5730 }, { "epoch": 0.15, "learning_rate": 0.000394762420291844, "loss": 1.7037, "step": 5740 }, { "epoch": 0.15, "learning_rate": 0.0003947442354098445, "loss": 1.7107, "step": 5750 }, { "epoch": 0.15, "learning_rate": 0.00039472601943387495, "loss": 1.7102, "step": 5760 }, { "epoch": 0.15, "learning_rate": 0.00039470777236684377, "loss": 1.7865, "step": 5770 }, { "epoch": 0.15, "learning_rate": 0.00039468949421166436, "loss": 1.7583, "step": 5780 }, { "epoch": 0.15, "learning_rate": 0.0003946711849712553, "loss": 1.7925, "step": 5790 }, { "epoch": 0.15, "learning_rate": 0.0003946528446485398, "loss": 1.7322, "step": 5800 }, { "epoch": 0.15, "learning_rate": 0.00039463447324644614, "loss": 1.7248, "step": 5810 }, { "epoch": 0.15, "learning_rate": 0.00039461607076790773, "loss": 1.7555, "step": 5820 }, { "epoch": 0.15, "learning_rate": 0.00039459763721586265, "loss": 1.7371, "step": 5830 }, { "epoch": 0.15, "learning_rate": 0.0003945791725932543, "loss": 1.713, "step": 5840 }, { "epoch": 0.15, "learning_rate": 0.0003945606769030307, "loss": 1.7754, "step": 5850 }, { "epoch": 0.15, "learning_rate": 0.00039454215014814506, "loss": 1.7139, "step": 5860 }, { "epoch": 0.15, "learning_rate": 0.0003945235923315554, "loss": 1.7106, "step": 5870 }, { "epoch": 0.15, "learning_rate": 0.00039450500345622485, "loss": 1.7615, "step": 5880 }, { "epoch": 0.15, "learning_rate": 0.0003944863835251214, "loss": 1.7612, "step": 5890 }, { "epoch": 0.15, "learning_rate": 0.0003944677325412179, "loss": 1.7308, "step": 5900 }, { "epoch": 0.15, "learning_rate": 0.0003944490505074924, "loss": 1.7304, "step": 5910 }, { "epoch": 0.15, "learning_rate": 0.00039443033742692774, "loss": 1.7037, "step": 5920 }, { "epoch": 0.15, "learning_rate": 0.00039441159330251167, "loss": 1.725, "step": 5930 }, { "epoch": 0.15, "learning_rate": 0.0003943928181372372, "loss": 1.7284, "step": 5940 }, { "epoch": 0.15, "learning_rate": 0.00039437401193410183, "loss": 1.7352, "step": 5950 }, { "epoch": 0.15, "learning_rate": 0.0003943551746961084, "loss": 1.7093, "step": 5960 }, { "epoch": 0.15, "learning_rate": 0.0003943363064262646, "loss": 1.8308, "step": 5970 }, { "epoch": 0.15, "learning_rate": 0.0003943174071275829, "loss": 2.0842, "step": 5980 }, { "epoch": 0.15, "learning_rate": 0.000394298476803081, "loss": 1.7864, "step": 5990 }, { "epoch": 0.15, "learning_rate": 0.0003942795154557814, "loss": 1.6993, "step": 6000 }, { "epoch": 0.15, "learning_rate": 0.0003942605230887116, "loss": 1.7407, "step": 6010 }, { "epoch": 0.15, "learning_rate": 0.00039424149970490396, "loss": 1.695, "step": 6020 }, { "epoch": 0.15, "learning_rate": 0.00039422244530739584, "loss": 1.7297, "step": 6030 }, { "epoch": 0.15, "learning_rate": 0.0003942033598992297, "loss": 1.7075, "step": 6040 }, { "epoch": 0.15, "learning_rate": 0.0003941842434834527, "loss": 1.7377, "step": 6050 }, { "epoch": 0.15, "learning_rate": 0.0003941650960631172, "loss": 1.7527, "step": 6060 }, { "epoch": 0.15, "learning_rate": 0.0003941459176412802, "loss": 1.7288, "step": 6070 }, { "epoch": 0.15, "learning_rate": 0.00039412670822100405, "loss": 1.7208, "step": 6080 }, { "epoch": 0.15, "learning_rate": 0.0003941074678053557, "loss": 1.7281, "step": 6090 }, { "epoch": 0.16, "learning_rate": 0.0003940881963974072, "loss": 1.7372, "step": 6100 }, { "epoch": 0.16, "learning_rate": 0.00039406889400023557, "loss": 1.767, "step": 6110 }, { "epoch": 0.16, "learning_rate": 0.00039404956061692267, "loss": 1.6922, "step": 6120 }, { "epoch": 0.16, "learning_rate": 0.0003940301962505555, "loss": 1.6791, "step": 6130 }, { "epoch": 0.16, "learning_rate": 0.00039401080090422573, "loss": 1.7537, "step": 6140 }, { "epoch": 0.16, "learning_rate": 0.00039399137458103026, "loss": 1.7717, "step": 6150 }, { "epoch": 0.16, "learning_rate": 0.00039397191728407076, "loss": 1.7228, "step": 6160 }, { "epoch": 0.16, "learning_rate": 0.0003939524290164539, "loss": 1.7459, "step": 6170 }, { "epoch": 0.16, "learning_rate": 0.00039393290978129126, "loss": 1.7059, "step": 6180 }, { "epoch": 0.16, "learning_rate": 0.0003939133595816994, "loss": 1.7093, "step": 6190 }, { "epoch": 0.16, "learning_rate": 0.00039389377842079986, "loss": 1.737, "step": 6200 }, { "epoch": 0.16, "learning_rate": 0.00039387416630171904, "loss": 1.7162, "step": 6210 }, { "epoch": 0.16, "learning_rate": 0.00039385452322758833, "loss": 1.7225, "step": 6220 }, { "epoch": 0.16, "learning_rate": 0.00039383484920154407, "loss": 1.73, "step": 6230 }, { "epoch": 0.16, "learning_rate": 0.00039381514422672745, "loss": 1.7722, "step": 6240 }, { "epoch": 0.16, "learning_rate": 0.0003937954083062848, "loss": 1.7125, "step": 6250 }, { "epoch": 0.16, "learning_rate": 0.00039377564144336713, "loss": 1.7239, "step": 6260 }, { "epoch": 0.16, "learning_rate": 0.00039375584364113067, "loss": 1.7147, "step": 6270 }, { "epoch": 0.16, "learning_rate": 0.0003937360149027364, "loss": 1.7597, "step": 6280 }, { "epoch": 0.16, "learning_rate": 0.00039371615523135024, "loss": 1.7395, "step": 6290 }, { "epoch": 0.16, "learning_rate": 0.0003936962646301432, "loss": 1.6462, "step": 6300 }, { "epoch": 0.16, "learning_rate": 0.0003936763431022909, "loss": 1.738, "step": 6310 }, { "epoch": 0.16, "learning_rate": 0.00039365639065097445, "loss": 1.7236, "step": 6320 }, { "epoch": 0.16, "learning_rate": 0.00039363640727937927, "loss": 1.6844, "step": 6330 }, { "epoch": 0.16, "learning_rate": 0.0003936163929906963, "loss": 1.7056, "step": 6340 }, { "epoch": 0.16, "learning_rate": 0.00039359634778812086, "loss": 1.7279, "step": 6350 }, { "epoch": 0.16, "learning_rate": 0.00039357627167485365, "loss": 1.7891, "step": 6360 }, { "epoch": 0.16, "learning_rate": 0.0003935561646541001, "loss": 1.8111, "step": 6370 }, { "epoch": 0.16, "learning_rate": 0.00039353602672907067, "loss": 1.7169, "step": 6380 }, { "epoch": 0.16, "learning_rate": 0.0003935158579029806, "loss": 1.7333, "step": 6390 }, { "epoch": 0.16, "learning_rate": 0.0003934956581790501, "loss": 1.7419, "step": 6400 }, { "epoch": 0.16, "learning_rate": 0.00039347542756050453, "loss": 1.7139, "step": 6410 }, { "epoch": 0.16, "learning_rate": 0.00039345516605057397, "loss": 1.7196, "step": 6420 }, { "epoch": 0.16, "learning_rate": 0.00039343487365249346, "loss": 1.6798, "step": 6430 }, { "epoch": 0.16, "learning_rate": 0.0003934145503695031, "loss": 1.7796, "step": 6440 }, { "epoch": 0.16, "learning_rate": 0.0003933941962048476, "loss": 1.7407, "step": 6450 }, { "epoch": 0.16, "learning_rate": 0.00039337381116177705, "loss": 1.68, "step": 6460 }, { "epoch": 0.16, "learning_rate": 0.0003933533952435461, "loss": 1.7518, "step": 6470 }, { "epoch": 0.16, "learning_rate": 0.00039333294845341453, "loss": 1.7048, "step": 6480 }, { "epoch": 0.17, "learning_rate": 0.000393312470794647, "loss": 1.746, "step": 6490 }, { "epoch": 0.17, "learning_rate": 0.0003932919622705131, "loss": 1.7358, "step": 6500 }, { "epoch": 0.17, "learning_rate": 0.00039327142288428726, "loss": 1.7378, "step": 6510 }, { "epoch": 0.17, "learning_rate": 0.000393250852639249, "loss": 1.7156, "step": 6520 }, { "epoch": 0.17, "learning_rate": 0.0003932302515386826, "loss": 1.7146, "step": 6530 }, { "epoch": 0.17, "learning_rate": 0.00039320961958587745, "loss": 1.7649, "step": 6540 }, { "epoch": 0.17, "learning_rate": 0.00039318895678412766, "loss": 1.7608, "step": 6550 }, { "epoch": 0.17, "learning_rate": 0.0003931682631367324, "loss": 1.6887, "step": 6560 }, { "epoch": 0.17, "learning_rate": 0.0003931475386469958, "loss": 1.7031, "step": 6570 }, { "epoch": 0.17, "learning_rate": 0.00039312678331822684, "loss": 1.7654, "step": 6580 }, { "epoch": 0.17, "learning_rate": 0.0003931059971537394, "loss": 1.7527, "step": 6590 }, { "epoch": 0.17, "learning_rate": 0.00039308518015685227, "loss": 1.7217, "step": 6600 }, { "epoch": 0.17, "learning_rate": 0.00039306433233088925, "loss": 1.7602, "step": 6610 }, { "epoch": 0.17, "learning_rate": 0.000393043453679179, "loss": 1.7707, "step": 6620 }, { "epoch": 0.17, "learning_rate": 0.0003930225442050552, "loss": 1.7279, "step": 6630 }, { "epoch": 0.17, "learning_rate": 0.00039300160391185637, "loss": 1.7004, "step": 6640 }, { "epoch": 0.17, "learning_rate": 0.0003929806328029258, "loss": 1.7322, "step": 6650 }, { "epoch": 0.17, "learning_rate": 0.00039295963088161205, "loss": 1.7124, "step": 6660 }, { "epoch": 0.17, "learning_rate": 0.00039293859815126833, "loss": 1.7329, "step": 6670 }, { "epoch": 0.17, "learning_rate": 0.0003929175346152528, "loss": 1.7658, "step": 6680 }, { "epoch": 0.17, "learning_rate": 0.00039289644027692863, "loss": 1.6922, "step": 6690 }, { "epoch": 0.17, "learning_rate": 0.00039287531513966387, "loss": 1.7263, "step": 6700 }, { "epoch": 0.17, "learning_rate": 0.00039285415920683146, "loss": 1.7444, "step": 6710 }, { "epoch": 0.17, "learning_rate": 0.00039283297248180924, "loss": 1.7123, "step": 6720 }, { "epoch": 0.17, "learning_rate": 0.00039281175496798013, "loss": 1.7342, "step": 6730 }, { "epoch": 0.17, "learning_rate": 0.0003927905066687317, "loss": 1.7511, "step": 6740 }, { "epoch": 0.17, "learning_rate": 0.00039276922758745665, "loss": 1.6741, "step": 6750 }, { "epoch": 0.17, "learning_rate": 0.0003927479177275525, "loss": 1.6909, "step": 6760 }, { "epoch": 0.17, "learning_rate": 0.0003927265770924216, "loss": 1.7151, "step": 6770 }, { "epoch": 0.17, "learning_rate": 0.00039270520568547156, "loss": 1.7389, "step": 6780 }, { "epoch": 0.17, "learning_rate": 0.00039268380351011446, "loss": 1.7088, "step": 6790 }, { "epoch": 0.17, "learning_rate": 0.00039266237056976753, "loss": 1.7261, "step": 6800 }, { "epoch": 0.17, "learning_rate": 0.00039264090686785293, "loss": 1.7018, "step": 6810 }, { "epoch": 0.17, "learning_rate": 0.00039261941240779756, "loss": 1.6874, "step": 6820 }, { "epoch": 0.17, "learning_rate": 0.0003925978871930335, "loss": 1.7276, "step": 6830 }, { "epoch": 0.17, "learning_rate": 0.00039257633122699755, "loss": 1.7157, "step": 6840 }, { "epoch": 0.17, "learning_rate": 0.0003925547445131313, "loss": 1.7039, "step": 6850 }, { "epoch": 0.17, "learning_rate": 0.0003925331270548816, "loss": 1.7528, "step": 6860 }, { "epoch": 0.17, "learning_rate": 0.0003925114788557, "loss": 1.6982, "step": 6870 }, { "epoch": 0.18, "learning_rate": 0.0003924897999190429, "loss": 1.7167, "step": 6880 }, { "epoch": 0.18, "learning_rate": 0.00039246809024837164, "loss": 1.6949, "step": 6890 }, { "epoch": 0.18, "learning_rate": 0.00039244634984715257, "loss": 1.8046, "step": 6900 }, { "epoch": 0.18, "learning_rate": 0.00039242457871885696, "loss": 1.7028, "step": 6910 }, { "epoch": 0.18, "learning_rate": 0.0003924027768669608, "loss": 1.7132, "step": 6920 }, { "epoch": 0.18, "learning_rate": 0.0003923809442949452, "loss": 1.6916, "step": 6930 }, { "epoch": 0.18, "learning_rate": 0.0003923590810062959, "loss": 1.7176, "step": 6940 }, { "epoch": 0.18, "learning_rate": 0.00039233718700450393, "loss": 1.7359, "step": 6950 }, { "epoch": 0.18, "learning_rate": 0.00039231526229306483, "loss": 1.7271, "step": 6960 }, { "epoch": 0.18, "learning_rate": 0.00039229330687547934, "loss": 1.6948, "step": 6970 }, { "epoch": 0.18, "learning_rate": 0.00039227132075525295, "loss": 1.7418, "step": 6980 }, { "epoch": 0.18, "learning_rate": 0.0003922493039358961, "loss": 1.7207, "step": 6990 }, { "epoch": 0.18, "learning_rate": 0.0003922272564209241, "loss": 1.6874, "step": 7000 }, { "epoch": 0.18, "learning_rate": 0.00039220517821385715, "loss": 1.7025, "step": 7010 }, { "epoch": 0.18, "learning_rate": 0.00039218306931822043, "loss": 1.7748, "step": 7020 }, { "epoch": 0.18, "learning_rate": 0.000392160929737544, "loss": 1.7008, "step": 7030 }, { "epoch": 0.18, "learning_rate": 0.0003921387594753627, "loss": 1.6817, "step": 7040 }, { "epoch": 0.18, "grad_norm": 0.1620354801416397, "learning_rate": 0.0003921165585352165, "loss": 1.7891, "step": 7050 }, { "epoch": 0.18, "grad_norm": 0.148910254240036, "learning_rate": 0.00039209432692064995, "loss": 1.7461, "step": 7060 }, { "epoch": 0.18, "grad_norm": 0.15584000945091248, "learning_rate": 0.0003920720646352128, "loss": 1.7547, "step": 7070 }, { "epoch": 0.18, "grad_norm": 0.16131040453910828, "learning_rate": 0.0003920497716824596, "loss": 1.7804, "step": 7080 }, { "epoch": 0.18, "grad_norm": 0.1430855393409729, "learning_rate": 0.0003920274480659496, "loss": 1.7793, "step": 7090 }, { "epoch": 0.18, "grad_norm": 0.15294766426086426, "learning_rate": 0.0003920050937892473, "loss": 1.7156, "step": 7100 }, { "epoch": 0.18, "grad_norm": 0.13739879429340363, "learning_rate": 0.00039198270885592174, "loss": 1.7659, "step": 7110 }, { "epoch": 0.18, "grad_norm": 0.1557493805885315, "learning_rate": 0.0003919602932695472, "loss": 1.7918, "step": 7120 }, { "epoch": 0.18, "grad_norm": 0.12877637147903442, "learning_rate": 0.00039193784703370264, "loss": 1.7088, "step": 7130 }, { "epoch": 0.18, "grad_norm": 0.13669374585151672, "learning_rate": 0.00039191537015197185, "loss": 1.7093, "step": 7140 }, { "epoch": 0.18, "grad_norm": 0.12947019934654236, "learning_rate": 0.00039189286262794376, "loss": 1.6805, "step": 7150 }, { "epoch": 0.18, "grad_norm": 0.14283278584480286, "learning_rate": 0.0003918703244652119, "loss": 1.6415, "step": 7160 }, { "epoch": 0.18, "grad_norm": 0.13801805675029755, "learning_rate": 0.00039184775566737494, "loss": 1.692, "step": 7170 }, { "epoch": 0.18, "grad_norm": 0.16947269439697266, "learning_rate": 0.0003918251562380363, "loss": 1.7506, "step": 7180 }, { "epoch": 0.18, "grad_norm": 0.13640892505645752, "learning_rate": 0.0003918025261808043, "loss": 1.6777, "step": 7190 }, { "epoch": 0.18, "grad_norm": 0.1325259953737259, "learning_rate": 0.0003917798654992923, "loss": 1.6815, "step": 7200 }, { "epoch": 0.18, "grad_norm": 0.13371969759464264, "learning_rate": 0.00039175717419711833, "loss": 1.6929, "step": 7210 }, { "epoch": 0.18, "grad_norm": 0.13861477375030518, "learning_rate": 0.0003917344522779054, "loss": 1.7506, "step": 7220 }, { "epoch": 0.18, "grad_norm": 0.14447219669818878, "learning_rate": 0.00039171169974528157, "loss": 1.7032, "step": 7230 }, { "epoch": 0.18, "grad_norm": 0.13237527012825012, "learning_rate": 0.0003916889166028795, "loss": 1.6761, "step": 7240 }, { "epoch": 0.18, "grad_norm": 0.1482444852590561, "learning_rate": 0.00039166610285433685, "loss": 1.711, "step": 7250 }, { "epoch": 0.18, "grad_norm": 0.13787992298603058, "learning_rate": 0.0003916432585032963, "loss": 1.7039, "step": 7260 }, { "epoch": 0.18, "grad_norm": 0.13764281570911407, "learning_rate": 0.0003916203835534052, "loss": 1.6459, "step": 7270 }, { "epoch": 0.19, "grad_norm": 0.14277833700180054, "learning_rate": 0.000391597478008316, "loss": 1.7331, "step": 7280 }, { "epoch": 0.19, "grad_norm": 0.14394375681877136, "learning_rate": 0.0003915745418716859, "loss": 1.7245, "step": 7290 }, { "epoch": 0.19, "grad_norm": 0.12185429036617279, "learning_rate": 0.00039155157514717703, "loss": 1.6878, "step": 7300 }, { "epoch": 0.19, "grad_norm": 0.12890471518039703, "learning_rate": 0.0003915285778384563, "loss": 1.6844, "step": 7310 }, { "epoch": 0.19, "grad_norm": 0.13018891215324402, "learning_rate": 0.0003915055499491957, "loss": 1.6959, "step": 7320 }, { "epoch": 0.19, "grad_norm": 0.169634148478508, "learning_rate": 0.0003914824914830719, "loss": 1.7361, "step": 7330 }, { "epoch": 0.19, "grad_norm": 0.1414484977722168, "learning_rate": 0.00039145940244376655, "loss": 1.7753, "step": 7340 }, { "epoch": 0.19, "grad_norm": 0.14286017417907715, "learning_rate": 0.0003914362828349663, "loss": 1.6969, "step": 7350 }, { "epoch": 0.19, "grad_norm": 0.13231833279132843, "learning_rate": 0.0003914131326603624, "loss": 1.7277, "step": 7360 }, { "epoch": 0.19, "grad_norm": 0.14872434735298157, "learning_rate": 0.00039138995192365125, "loss": 1.736, "step": 7370 }, { "epoch": 0.19, "grad_norm": 0.1488579511642456, "learning_rate": 0.000391366740628534, "loss": 1.6918, "step": 7380 }, { "epoch": 0.19, "grad_norm": 0.1341143697500229, "learning_rate": 0.0003913434987787166, "loss": 1.6209, "step": 7390 }, { "epoch": 0.19, "grad_norm": 0.1347276121377945, "learning_rate": 0.0003913202263779101, "loss": 1.6827, "step": 7400 }, { "epoch": 0.19, "grad_norm": 0.12921172380447388, "learning_rate": 0.00039129692342983023, "loss": 1.6474, "step": 7410 }, { "epoch": 0.19, "grad_norm": 0.1317102164030075, "learning_rate": 0.00039127358993819777, "loss": 1.6786, "step": 7420 }, { "epoch": 0.19, "grad_norm": 0.14124788343906403, "learning_rate": 0.0003912502259067381, "loss": 1.764, "step": 7430 }, { "epoch": 0.19, "grad_norm": 0.14556260406970978, "learning_rate": 0.0003912268313391818, "loss": 1.701, "step": 7440 }, { "epoch": 0.19, "grad_norm": 0.13149979710578918, "learning_rate": 0.0003912034062392642, "loss": 1.7006, "step": 7450 }, { "epoch": 0.19, "grad_norm": 0.13681216537952423, "learning_rate": 0.0003911799506107253, "loss": 1.706, "step": 7460 }, { "epoch": 0.19, "grad_norm": 0.13194845616817474, "learning_rate": 0.0003911564644573103, "loss": 1.7716, "step": 7470 }, { "epoch": 0.19, "grad_norm": 0.13681434094905853, "learning_rate": 0.0003911329477827692, "loss": 1.7356, "step": 7480 }, { "epoch": 0.19, "grad_norm": 0.1333349198102951, "learning_rate": 0.00039110940059085665, "loss": 1.7158, "step": 7490 }, { "epoch": 0.19, "grad_norm": 0.13811297714710236, "learning_rate": 0.0003910858228853324, "loss": 1.7472, "step": 7500 }, { "epoch": 0.19, "grad_norm": 0.1425902247428894, "learning_rate": 0.000391062214669961, "loss": 1.7062, "step": 7510 }, { "epoch": 0.19, "grad_norm": 0.13320201635360718, "learning_rate": 0.0003910385759485119, "loss": 1.6833, "step": 7520 }, { "epoch": 0.19, "grad_norm": 0.13622677326202393, "learning_rate": 0.0003910149067247593, "loss": 1.7262, "step": 7530 }, { "epoch": 0.19, "grad_norm": 0.13262036442756653, "learning_rate": 0.00039099120700248247, "loss": 1.68, "step": 7540 }, { "epoch": 0.19, "grad_norm": 0.13869726657867432, "learning_rate": 0.00039096747678546537, "loss": 1.6351, "step": 7550 }, { "epoch": 0.19, "grad_norm": 0.13586406409740448, "learning_rate": 0.000390943716077497, "loss": 1.7085, "step": 7560 }, { "epoch": 0.19, "grad_norm": 0.13860741257667542, "learning_rate": 0.00039091992488237093, "loss": 1.7097, "step": 7570 }, { "epoch": 0.19, "grad_norm": 0.14486007392406464, "learning_rate": 0.00039089610320388604, "loss": 1.711, "step": 7580 }, { "epoch": 0.19, "grad_norm": 0.14704862236976624, "learning_rate": 0.00039087225104584563, "loss": 1.6913, "step": 7590 }, { "epoch": 0.19, "grad_norm": 0.15811263024806976, "learning_rate": 0.0003908483684120582, "loss": 1.6907, "step": 7600 }, { "epoch": 0.19, "grad_norm": 0.13361206650733948, "learning_rate": 0.0003908244553063369, "loss": 1.6782, "step": 7610 }, { "epoch": 0.19, "grad_norm": 0.13869741559028625, "learning_rate": 0.0003908005117324999, "loss": 1.7114, "step": 7620 }, { "epoch": 0.19, "grad_norm": 0.12840087711811066, "learning_rate": 0.0003907765376943702, "loss": 1.6481, "step": 7630 }, { "epoch": 0.19, "grad_norm": 0.14002574980258942, "learning_rate": 0.0003907525331957755, "loss": 1.6603, "step": 7640 }, { "epoch": 0.19, "grad_norm": 0.14109046757221222, "learning_rate": 0.00039072849824054866, "loss": 1.7, "step": 7650 }, { "epoch": 0.19, "grad_norm": 0.12924519181251526, "learning_rate": 0.0003907044328325271, "loss": 1.6869, "step": 7660 }, { "epoch": 0.2, "grad_norm": 0.13750408589839935, "learning_rate": 0.00039068033697555333, "loss": 1.6731, "step": 7670 }, { "epoch": 0.2, "grad_norm": 0.16169866919517517, "learning_rate": 0.0003906562106734745, "loss": 1.7461, "step": 7680 }, { "epoch": 0.2, "grad_norm": 0.13251982629299164, "learning_rate": 0.00039063205393014287, "loss": 1.6876, "step": 7690 }, { "epoch": 0.2, "grad_norm": 0.15520523488521576, "learning_rate": 0.0003906078667494154, "loss": 1.7065, "step": 7700 }, { "epoch": 0.2, "grad_norm": 0.12816987931728363, "learning_rate": 0.000390583649135154, "loss": 1.654, "step": 7710 }, { "epoch": 0.2, "grad_norm": 0.13748766481876373, "learning_rate": 0.00039055940109122535, "loss": 1.7374, "step": 7720 }, { "epoch": 0.2, "grad_norm": 0.14492934942245483, "learning_rate": 0.0003905351226215011, "loss": 1.732, "step": 7730 }, { "epoch": 0.2, "grad_norm": 0.1277233511209488, "learning_rate": 0.0003905108137298575, "loss": 1.685, "step": 7740 }, { "epoch": 0.2, "grad_norm": 0.13751693069934845, "learning_rate": 0.00039048647442017605, "loss": 1.7166, "step": 7750 }, { "epoch": 0.2, "grad_norm": 0.2509312927722931, "learning_rate": 0.00039046210469634274, "loss": 2.0125, "step": 7760 }, { "epoch": 0.2, "grad_norm": 0.14716836810112, "learning_rate": 0.00039043770456224876, "loss": 1.864, "step": 7770 }, { "epoch": 0.2, "grad_norm": 0.13424529135227203, "learning_rate": 0.00039041327402178984, "loss": 1.72, "step": 7780 }, { "epoch": 0.2, "grad_norm": 0.12159378826618195, "learning_rate": 0.00039038881307886674, "loss": 1.7137, "step": 7790 }, { "epoch": 0.2, "grad_norm": 0.12906280159950256, "learning_rate": 0.00039036432173738503, "loss": 1.7289, "step": 7800 }, { "epoch": 0.2, "grad_norm": 0.1405516117811203, "learning_rate": 0.00039033980000125515, "loss": 1.7146, "step": 7810 }, { "epoch": 0.2, "grad_norm": 0.1447688192129135, "learning_rate": 0.00039031524787439236, "loss": 1.7115, "step": 7820 }, { "epoch": 0.2, "grad_norm": 0.13648471236228943, "learning_rate": 0.00039029066536071683, "loss": 1.7023, "step": 7830 }, { "epoch": 0.2, "grad_norm": 0.1418386846780777, "learning_rate": 0.0003902660524641534, "loss": 1.7176, "step": 7840 }, { "epoch": 0.2, "grad_norm": 0.12786869704723358, "learning_rate": 0.00039024140918863214, "loss": 1.6776, "step": 7850 }, { "epoch": 0.2, "grad_norm": 0.12492866069078445, "learning_rate": 0.00039021673553808756, "loss": 1.661, "step": 7860 }, { "epoch": 0.2, "grad_norm": 0.12730900943279266, "learning_rate": 0.0003901920315164592, "loss": 1.7203, "step": 7870 }, { "epoch": 0.2, "grad_norm": 0.13647036254405975, "learning_rate": 0.00039016729712769156, "loss": 1.7012, "step": 7880 }, { "epoch": 0.2, "grad_norm": 0.14049872756004333, "learning_rate": 0.0003901425323757337, "loss": 1.6655, "step": 7890 }, { "epoch": 0.2, "grad_norm": 0.11917974799871445, "learning_rate": 0.00039011773726453994, "loss": 1.6759, "step": 7900 }, { "epoch": 0.2, "grad_norm": 0.13427309691905975, "learning_rate": 0.000390092911798069, "loss": 1.6681, "step": 7910 }, { "epoch": 0.2, "grad_norm": 0.13851284980773926, "learning_rate": 0.00039006805598028473, "loss": 1.7163, "step": 7920 }, { "epoch": 0.2, "grad_norm": 0.1267043501138687, "learning_rate": 0.0003900431698151557, "loss": 1.7098, "step": 7930 }, { "epoch": 0.2, "grad_norm": 0.22532662749290466, "learning_rate": 0.0003900182533066555, "loss": 1.6815, "step": 7940 }, { "epoch": 0.2, "grad_norm": 0.14643071591854095, "learning_rate": 0.00038999330645876233, "loss": 1.7363, "step": 7950 }, { "epoch": 0.2, "grad_norm": 0.15773718059062958, "learning_rate": 0.0003899683292754594, "loss": 1.7213, "step": 7960 }, { "epoch": 0.2, "grad_norm": 0.1535635143518448, "learning_rate": 0.00038994332176073466, "loss": 1.6854, "step": 7970 }, { "epoch": 0.2, "grad_norm": 0.12380396574735641, "learning_rate": 0.00038991828391858103, "loss": 1.6882, "step": 7980 }, { "epoch": 0.2, "grad_norm": 0.13407306373119354, "learning_rate": 0.00038989321575299613, "loss": 1.7648, "step": 7990 }, { "epoch": 0.2, "grad_norm": 0.13147538900375366, "learning_rate": 0.00038986811726798246, "loss": 1.7537, "step": 8000 }, { "epoch": 0.2, "grad_norm": 0.13497044146060944, "learning_rate": 0.00038984298846754745, "loss": 1.6978, "step": 8010 }, { "epoch": 0.2, "grad_norm": 0.13441677391529083, "learning_rate": 0.0003898178293557033, "loss": 1.6786, "step": 8020 }, { "epoch": 0.2, "grad_norm": 0.13331085443496704, "learning_rate": 0.000389792639936467, "loss": 1.6941, "step": 8030 }, { "epoch": 0.2, "grad_norm": 0.1468220353126526, "learning_rate": 0.0003897674202138605, "loss": 1.6199, "step": 8040 }, { "epoch": 0.2, "grad_norm": 0.1379866600036621, "learning_rate": 0.00038974217019191053, "loss": 1.7195, "step": 8050 }, { "epoch": 0.21, "grad_norm": 0.13483931124210358, "learning_rate": 0.0003897168898746486, "loss": 1.6968, "step": 8060 }, { "epoch": 0.21, "grad_norm": 0.13262777030467987, "learning_rate": 0.0003896915792661111, "loss": 1.6964, "step": 8070 }, { "epoch": 0.21, "grad_norm": 0.12962651252746582, "learning_rate": 0.00038966623837033936, "loss": 1.7032, "step": 8080 }, { "epoch": 0.21, "grad_norm": 0.13349364697933197, "learning_rate": 0.0003896408671913793, "loss": 1.6942, "step": 8090 }, { "epoch": 0.21, "grad_norm": 0.12977685034275055, "learning_rate": 0.000389615465733282, "loss": 1.7319, "step": 8100 }, { "epoch": 0.21, "grad_norm": 0.1472356915473938, "learning_rate": 0.0003895900340001031, "loss": 1.6924, "step": 8110 }, { "epoch": 0.21, "grad_norm": 0.1957908570766449, "learning_rate": 0.0003895645719959032, "loss": 1.6861, "step": 8120 }, { "epoch": 0.21, "grad_norm": 0.14227932691574097, "learning_rate": 0.00038953907972474764, "loss": 1.7058, "step": 8130 }, { "epoch": 0.21, "grad_norm": 0.12813404202461243, "learning_rate": 0.00038951355719070674, "loss": 1.6742, "step": 8140 }, { "epoch": 0.21, "grad_norm": 0.1380937248468399, "learning_rate": 0.00038948800439785557, "loss": 1.7324, "step": 8150 }, { "epoch": 0.21, "grad_norm": 0.13374464213848114, "learning_rate": 0.00038946242135027404, "loss": 1.6414, "step": 8160 }, { "epoch": 0.21, "grad_norm": 0.13308840990066528, "learning_rate": 0.0003894368080520468, "loss": 1.6614, "step": 8170 }, { "epoch": 0.21, "grad_norm": 0.13562729954719543, "learning_rate": 0.00038941116450726354, "loss": 1.6515, "step": 8180 }, { "epoch": 0.21, "grad_norm": 0.13918358087539673, "learning_rate": 0.0003893854907200185, "loss": 1.6598, "step": 8190 }, { "epoch": 0.21, "grad_norm": 0.15010669827461243, "learning_rate": 0.00038935978669441104, "loss": 1.7217, "step": 8200 }, { "epoch": 0.21, "grad_norm": 0.1475742906332016, "learning_rate": 0.0003893340524345452, "loss": 1.6392, "step": 8210 }, { "epoch": 0.21, "grad_norm": 0.1437012404203415, "learning_rate": 0.00038930828794452976, "loss": 1.7013, "step": 8220 }, { "epoch": 0.21, "grad_norm": 0.14719869196414948, "learning_rate": 0.00038928249322847853, "loss": 1.7308, "step": 8230 }, { "epoch": 0.21, "grad_norm": 0.16744408011436462, "learning_rate": 0.00038925666829051, "loss": 1.7185, "step": 8240 }, { "epoch": 0.21, "grad_norm": 0.1367211937904358, "learning_rate": 0.0003892308131347475, "loss": 1.7045, "step": 8250 }, { "epoch": 0.21, "grad_norm": 0.13218046724796295, "learning_rate": 0.00038920492776531925, "loss": 1.6705, "step": 8260 }, { "epoch": 0.21, "grad_norm": 0.13375000655651093, "learning_rate": 0.0003891790121863582, "loss": 1.6399, "step": 8270 }, { "epoch": 0.21, "grad_norm": 0.13972359895706177, "learning_rate": 0.00038915306640200216, "loss": 1.6861, "step": 8280 }, { "epoch": 0.21, "grad_norm": 0.13885724544525146, "learning_rate": 0.00038912709041639395, "loss": 1.6812, "step": 8290 }, { "epoch": 0.21, "grad_norm": 0.14410728216171265, "learning_rate": 0.0003891010842336809, "loss": 1.7016, "step": 8300 }, { "epoch": 0.21, "grad_norm": 0.13372208178043365, "learning_rate": 0.0003890750478580153, "loss": 1.6539, "step": 8310 }, { "epoch": 0.21, "grad_norm": 0.133956640958786, "learning_rate": 0.00038904898129355435, "loss": 1.6481, "step": 8320 }, { "epoch": 0.21, "grad_norm": 0.14632944762706757, "learning_rate": 0.00038902288454445997, "loss": 1.6756, "step": 8330 }, { "epoch": 0.21, "grad_norm": 0.1364748626947403, "learning_rate": 0.0003889967576148988, "loss": 1.6624, "step": 8340 }, { "epoch": 0.21, "grad_norm": 0.13148286938667297, "learning_rate": 0.0003889706005090425, "loss": 1.6931, "step": 8350 }, { "epoch": 0.21, "grad_norm": 0.14017271995544434, "learning_rate": 0.0003889444132310675, "loss": 1.7128, "step": 8360 }, { "epoch": 0.21, "grad_norm": 0.1335548460483551, "learning_rate": 0.00038891819578515494, "loss": 1.7638, "step": 8370 }, { "epoch": 0.21, "grad_norm": 0.13582909107208252, "learning_rate": 0.00038889194817549085, "loss": 1.6896, "step": 8380 }, { "epoch": 0.21, "grad_norm": 0.1565767079591751, "learning_rate": 0.00038886567040626616, "loss": 1.7231, "step": 8390 }, { "epoch": 0.21, "grad_norm": 0.15570221841335297, "learning_rate": 0.0003888393624816764, "loss": 1.6965, "step": 8400 }, { "epoch": 0.21, "grad_norm": 0.14114753901958466, "learning_rate": 0.0003888130244059221, "loss": 1.6967, "step": 8410 }, { "epoch": 0.21, "grad_norm": 0.13201847672462463, "learning_rate": 0.00038878665618320864, "loss": 1.6502, "step": 8420 }, { "epoch": 0.21, "grad_norm": 0.13043740391731262, "learning_rate": 0.00038876025781774603, "loss": 1.7026, "step": 8430 }, { "epoch": 0.21, "grad_norm": 0.16153882443904877, "learning_rate": 0.0003887338293137491, "loss": 1.6899, "step": 8440 }, { "epoch": 0.21, "grad_norm": 0.12798282504081726, "learning_rate": 0.0003887073706754377, "loss": 1.6431, "step": 8450 }, { "epoch": 0.22, "grad_norm": 0.13147814571857452, "learning_rate": 0.0003886808819070363, "loss": 1.6663, "step": 8460 }, { "epoch": 0.22, "grad_norm": 0.13304992020130157, "learning_rate": 0.0003886543630127743, "loss": 1.6965, "step": 8470 }, { "epoch": 0.22, "grad_norm": 0.13692429661750793, "learning_rate": 0.00038862781399688585, "loss": 1.6628, "step": 8480 }, { "epoch": 0.22, "grad_norm": 0.22576695680618286, "learning_rate": 0.00038860123486360987, "loss": 1.6373, "step": 8490 }, { "epoch": 0.22, "grad_norm": 0.15108773112297058, "learning_rate": 0.0003885746256171902, "loss": 1.655, "step": 8500 }, { "epoch": 0.22, "grad_norm": 0.1294201761484146, "learning_rate": 0.0003885479862618754, "loss": 1.6525, "step": 8510 }, { "epoch": 0.22, "grad_norm": 0.12410447001457214, "learning_rate": 0.00038852131680191875, "loss": 1.6522, "step": 8520 }, { "epoch": 0.22, "grad_norm": 0.1444026529788971, "learning_rate": 0.0003884946172415786, "loss": 1.6696, "step": 8530 }, { "epoch": 0.22, "grad_norm": 0.134349063038826, "learning_rate": 0.00038846788758511785, "loss": 1.6533, "step": 8540 }, { "epoch": 0.22, "grad_norm": 0.12954683601856232, "learning_rate": 0.00038844112783680446, "loss": 1.6767, "step": 8550 }, { "epoch": 0.22, "grad_norm": 0.13951031863689423, "learning_rate": 0.00038841433800091085, "loss": 1.7252, "step": 8560 }, { "epoch": 0.22, "grad_norm": 0.14629290997982025, "learning_rate": 0.0003883875180817146, "loss": 1.6573, "step": 8570 }, { "epoch": 0.22, "grad_norm": 0.15033113956451416, "learning_rate": 0.00038836066808349784, "loss": 1.7045, "step": 8580 }, { "epoch": 0.22, "grad_norm": 0.13196027278900146, "learning_rate": 0.00038833378801054753, "loss": 1.7253, "step": 8590 }, { "epoch": 0.22, "grad_norm": 0.1399390995502472, "learning_rate": 0.00038830687786715564, "loss": 1.6248, "step": 8600 }, { "epoch": 0.22, "grad_norm": 0.12518121302127838, "learning_rate": 0.00038827993765761876, "loss": 1.6735, "step": 8610 }, { "epoch": 0.22, "grad_norm": 0.13088391721248627, "learning_rate": 0.00038825296738623824, "loss": 1.6529, "step": 8620 }, { "epoch": 0.22, "grad_norm": 0.1249965950846672, "learning_rate": 0.00038822596705732036, "loss": 1.7086, "step": 8630 }, { "epoch": 0.22, "grad_norm": 0.13543793559074402, "learning_rate": 0.00038819893667517614, "loss": 1.6193, "step": 8640 }, { "epoch": 0.22, "grad_norm": 0.16318467259407043, "learning_rate": 0.00038817187624412146, "loss": 1.7019, "step": 8650 }, { "epoch": 0.22, "grad_norm": 0.1324823647737503, "learning_rate": 0.00038814478576847686, "loss": 1.7179, "step": 8660 }, { "epoch": 0.22, "grad_norm": 0.1390790194272995, "learning_rate": 0.0003881176652525677, "loss": 1.6679, "step": 8670 }, { "epoch": 0.22, "grad_norm": 0.12248550355434418, "learning_rate": 0.00038809051470072443, "loss": 1.6866, "step": 8680 }, { "epoch": 0.22, "grad_norm": 0.14330117404460907, "learning_rate": 0.0003880633341172818, "loss": 1.6583, "step": 8690 }, { "epoch": 0.22, "grad_norm": 0.12497661262750626, "learning_rate": 0.00038803612350657977, "loss": 1.6371, "step": 8700 }, { "epoch": 0.22, "grad_norm": 0.14796240627765656, "learning_rate": 0.0003880088828729629, "loss": 1.669, "step": 8710 }, { "epoch": 0.22, "grad_norm": 0.198830246925354, "learning_rate": 0.00038798161222078055, "loss": 1.6904, "step": 8720 }, { "epoch": 0.22, "grad_norm": 0.1437385082244873, "learning_rate": 0.00038795431155438703, "loss": 1.6763, "step": 8730 }, { "epoch": 0.22, "grad_norm": 0.1363937258720398, "learning_rate": 0.00038792698087814116, "loss": 1.6551, "step": 8740 }, { "epoch": 0.22, "grad_norm": 0.13170404732227325, "learning_rate": 0.0003878996201964068, "loss": 1.6615, "step": 8750 }, { "epoch": 0.22, "grad_norm": 0.12872740626335144, "learning_rate": 0.00038787222951355247, "loss": 1.6544, "step": 8760 }, { "epoch": 0.22, "grad_norm": 0.15284587442874908, "learning_rate": 0.0003878448088339516, "loss": 1.6436, "step": 8770 }, { "epoch": 0.22, "grad_norm": 0.14942800998687744, "learning_rate": 0.0003878173581619822, "loss": 1.6953, "step": 8780 }, { "epoch": 0.22, "grad_norm": 0.1378229409456253, "learning_rate": 0.00038778987750202735, "loss": 1.7536, "step": 8790 }, { "epoch": 0.22, "grad_norm": 0.13812203705310822, "learning_rate": 0.0003877623668584747, "loss": 1.7113, "step": 8800 }, { "epoch": 0.22, "grad_norm": 0.1441105157136917, "learning_rate": 0.0003877348262357167, "loss": 1.7004, "step": 8810 }, { "epoch": 0.22, "grad_norm": 0.1562652587890625, "learning_rate": 0.0003877072556381507, "loss": 1.7129, "step": 8820 }, { "epoch": 0.22, "grad_norm": 0.13454972207546234, "learning_rate": 0.0003876796550701788, "loss": 1.6665, "step": 8830 }, { "epoch": 0.22, "grad_norm": 0.14589405059814453, "learning_rate": 0.00038765202453620775, "loss": 1.6646, "step": 8840 }, { "epoch": 0.23, "grad_norm": 0.13198499381542206, "learning_rate": 0.0003876243640406494, "loss": 1.6491, "step": 8850 }, { "epoch": 0.23, "grad_norm": 0.16170230507850647, "learning_rate": 0.00038759667358792, "loss": 1.6992, "step": 8860 }, { "epoch": 0.23, "grad_norm": 0.12688037753105164, "learning_rate": 0.00038756895318244083, "loss": 1.651, "step": 8870 }, { "epoch": 0.23, "grad_norm": 0.1396840214729309, "learning_rate": 0.0003875412028286379, "loss": 1.6469, "step": 8880 }, { "epoch": 0.23, "grad_norm": 0.13911014795303345, "learning_rate": 0.00038751342253094196, "loss": 1.6992, "step": 8890 }, { "epoch": 0.23, "grad_norm": 0.1247856467962265, "learning_rate": 0.0003874856122937886, "loss": 1.6641, "step": 8900 }, { "epoch": 0.23, "grad_norm": 0.1389637291431427, "learning_rate": 0.0003874577721216182, "loss": 1.66, "step": 8910 }, { "epoch": 0.23, "grad_norm": 0.11789216846227646, "learning_rate": 0.0003874299020188757, "loss": 1.6096, "step": 8920 }, { "epoch": 0.23, "grad_norm": 0.1422717571258545, "learning_rate": 0.0003874020019900112, "loss": 1.6885, "step": 8930 }, { "epoch": 0.23, "grad_norm": 0.13758403062820435, "learning_rate": 0.0003873740720394793, "loss": 1.6719, "step": 8940 }, { "epoch": 0.23, "grad_norm": 0.1851162314414978, "learning_rate": 0.00038734611217173945, "loss": 1.841, "step": 8950 }, { "epoch": 0.23, "grad_norm": 0.16305282711982727, "learning_rate": 0.00038731812239125587, "loss": 1.6986, "step": 8960 }, { "epoch": 0.23, "grad_norm": 0.1490446776151657, "learning_rate": 0.00038729010270249764, "loss": 1.7077, "step": 8970 }, { "epoch": 0.23, "grad_norm": 0.13517969846725464, "learning_rate": 0.0003872620531099384, "loss": 1.6802, "step": 8980 }, { "epoch": 0.23, "grad_norm": 0.13483189046382904, "learning_rate": 0.00038723397361805685, "loss": 1.6995, "step": 8990 }, { "epoch": 0.23, "grad_norm": 0.13785387575626373, "learning_rate": 0.0003872058642313363, "loss": 1.7246, "step": 9000 }, { "epoch": 0.23, "grad_norm": 0.127422034740448, "learning_rate": 0.00038717772495426475, "loss": 1.6759, "step": 9010 }, { "epoch": 0.23, "grad_norm": 0.14029677212238312, "learning_rate": 0.00038714955579133514, "loss": 1.691, "step": 9020 }, { "epoch": 0.23, "grad_norm": 0.1330009549856186, "learning_rate": 0.0003871213567470452, "loss": 1.6435, "step": 9030 }, { "epoch": 0.23, "grad_norm": 0.12364694476127625, "learning_rate": 0.0003870931278258972, "loss": 1.6694, "step": 9040 }, { "epoch": 0.23, "grad_norm": 0.13656748831272125, "learning_rate": 0.0003870648690323984, "loss": 1.7141, "step": 9050 }, { "epoch": 0.23, "grad_norm": 0.1401910036802292, "learning_rate": 0.00038703658037106083, "loss": 1.6992, "step": 9060 }, { "epoch": 0.23, "grad_norm": 0.13535091280937195, "learning_rate": 0.0003870082618464011, "loss": 1.6243, "step": 9070 }, { "epoch": 0.23, "grad_norm": 0.13311734795570374, "learning_rate": 0.0003869799134629408, "loss": 1.6122, "step": 9080 }, { "epoch": 0.23, "grad_norm": 0.12910255789756775, "learning_rate": 0.0003869515352252061, "loss": 1.7029, "step": 9090 }, { "epoch": 0.23, "grad_norm": 0.12881791591644287, "learning_rate": 0.0003869231271377281, "loss": 1.6627, "step": 9100 }, { "epoch": 0.23, "grad_norm": 0.12921388447284698, "learning_rate": 0.0003868946892050426, "loss": 1.6297, "step": 9110 }, { "epoch": 0.23, "grad_norm": 0.12428681552410126, "learning_rate": 0.0003868662214316901, "loss": 1.6419, "step": 9120 }, { "epoch": 0.23, "grad_norm": 0.15118862688541412, "learning_rate": 0.000386837723822216, "loss": 1.7649, "step": 9130 }, { "epoch": 0.23, "grad_norm": 0.1526978313922882, "learning_rate": 0.00038680919638117033, "loss": 1.6654, "step": 9140 }, { "epoch": 0.23, "grad_norm": 0.15959547460079193, "learning_rate": 0.00038678063911310796, "loss": 1.6896, "step": 9150 }, { "epoch": 0.23, "grad_norm": 0.14070424437522888, "learning_rate": 0.0003867520520225886, "loss": 1.6588, "step": 9160 }, { "epoch": 0.23, "grad_norm": 0.15812811255455017, "learning_rate": 0.00038672343511417646, "loss": 1.6973, "step": 9170 }, { "epoch": 0.23, "grad_norm": 0.1548108160495758, "learning_rate": 0.0003866947883924408, "loss": 1.6617, "step": 9180 }, { "epoch": 0.23, "grad_norm": 0.1341230422258377, "learning_rate": 0.0003866661118619554, "loss": 1.7306, "step": 9190 }, { "epoch": 0.23, "grad_norm": 0.13387803733348846, "learning_rate": 0.0003866374055272991, "loss": 1.6905, "step": 9200 }, { "epoch": 0.23, "grad_norm": 0.17781074345111847, "learning_rate": 0.0003866086693930552, "loss": 1.6869, "step": 9210 }, { "epoch": 0.23, "grad_norm": 0.13148914277553558, "learning_rate": 0.0003865799034638118, "loss": 1.6822, "step": 9220 }, { "epoch": 0.23, "grad_norm": 0.12276733666658401, "learning_rate": 0.00038655110774416196, "loss": 1.7114, "step": 9230 }, { "epoch": 0.24, "grad_norm": 0.1315593272447586, "learning_rate": 0.00038652228223870337, "loss": 1.683, "step": 9240 }, { "epoch": 0.24, "grad_norm": 0.13443982601165771, "learning_rate": 0.0003864934269520384, "loss": 1.6736, "step": 9250 }, { "epoch": 0.24, "grad_norm": 0.132383331656456, "learning_rate": 0.0003864645418887743, "loss": 1.6831, "step": 9260 }, { "epoch": 0.24, "grad_norm": 0.12144575268030167, "learning_rate": 0.00038643562705352295, "loss": 1.6697, "step": 9270 }, { "epoch": 0.24, "grad_norm": 0.14845208823680878, "learning_rate": 0.0003864066824509012, "loss": 1.6565, "step": 9280 }, { "epoch": 0.24, "grad_norm": 0.13534559309482574, "learning_rate": 0.0003863777080855303, "loss": 1.6132, "step": 9290 }, { "epoch": 0.24, "grad_norm": 0.12392674386501312, "learning_rate": 0.0003863487039620366, "loss": 1.7073, "step": 9300 }, { "epoch": 0.24, "grad_norm": 0.12857109308242798, "learning_rate": 0.0003863196700850511, "loss": 1.6741, "step": 9310 }, { "epoch": 0.24, "grad_norm": 0.13830944895744324, "learning_rate": 0.0003862906064592094, "loss": 1.6617, "step": 9320 }, { "epoch": 0.24, "grad_norm": 0.13779057562351227, "learning_rate": 0.0003862615130891521, "loss": 1.6576, "step": 9330 }, { "epoch": 0.24, "grad_norm": 0.12795256078243256, "learning_rate": 0.0003862323899795243, "loss": 1.6418, "step": 9340 }, { "epoch": 0.24, "grad_norm": 0.12998293340206146, "learning_rate": 0.000386203237134976, "loss": 1.645, "step": 9350 }, { "epoch": 0.24, "grad_norm": 0.13535144925117493, "learning_rate": 0.00038617405456016187, "loss": 1.7063, "step": 9360 }, { "epoch": 0.24, "grad_norm": 0.14552073180675507, "learning_rate": 0.00038614484225974144, "loss": 1.7002, "step": 9370 }, { "epoch": 0.24, "grad_norm": 0.13565002381801605, "learning_rate": 0.00038611560023837883, "loss": 1.6609, "step": 9380 }, { "epoch": 0.24, "grad_norm": 0.13093070685863495, "learning_rate": 0.00038608632850074305, "loss": 1.6438, "step": 9390 }, { "epoch": 0.24, "grad_norm": 0.15767642855644226, "learning_rate": 0.0003860570270515077, "loss": 1.7212, "step": 9400 }, { "epoch": 0.24, "grad_norm": 0.12853416800498962, "learning_rate": 0.0003860276958953514, "loss": 1.6376, "step": 9410 }, { "epoch": 0.24, "grad_norm": 0.15520334243774414, "learning_rate": 0.00038599833503695713, "loss": 1.7403, "step": 9420 }, { "epoch": 0.24, "grad_norm": 0.13134929537773132, "learning_rate": 0.00038596894448101297, "loss": 1.6265, "step": 9430 }, { "epoch": 0.24, "grad_norm": 0.12647289037704468, "learning_rate": 0.0003859395242322115, "loss": 1.6685, "step": 9440 }, { "epoch": 0.24, "grad_norm": 0.1194876879453659, "learning_rate": 0.00038591007429525, "loss": 1.629, "step": 9450 }, { "epoch": 0.24, "grad_norm": 0.12292616069316864, "learning_rate": 0.0003858805946748309, "loss": 1.6883, "step": 9460 }, { "epoch": 0.24, "grad_norm": 0.1364501565694809, "learning_rate": 0.00038585108537566085, "loss": 1.6677, "step": 9470 }, { "epoch": 0.24, "grad_norm": 0.1332101970911026, "learning_rate": 0.00038582154640245156, "loss": 1.6828, "step": 9480 }, { "epoch": 0.24, "grad_norm": 0.13498814404010773, "learning_rate": 0.0003857919777599194, "loss": 1.6912, "step": 9490 }, { "epoch": 0.24, "grad_norm": 0.1320127248764038, "learning_rate": 0.00038576237945278543, "loss": 1.6613, "step": 9500 }, { "epoch": 0.24, "grad_norm": 0.12627199292182922, "learning_rate": 0.0003857327514857755, "loss": 1.6771, "step": 9510 }, { "epoch": 0.24, "grad_norm": 0.12172795087099075, "learning_rate": 0.00038570309386362015, "loss": 1.6874, "step": 9520 }, { "epoch": 0.24, "grad_norm": 0.14356915652751923, "learning_rate": 0.00038567340659105483, "loss": 1.7213, "step": 9530 }, { "epoch": 0.24, "grad_norm": 0.14241376519203186, "learning_rate": 0.00038564368967281936, "loss": 1.7341, "step": 9540 }, { "epoch": 0.24, "grad_norm": 0.15648552775382996, "learning_rate": 0.00038561394311365866, "loss": 1.7055, "step": 9550 }, { "epoch": 0.24, "grad_norm": 0.1445528119802475, "learning_rate": 0.00038558416691832217, "loss": 1.6674, "step": 9560 }, { "epoch": 0.24, "grad_norm": 0.13047567009925842, "learning_rate": 0.0003855543610915642, "loss": 1.6434, "step": 9570 }, { "epoch": 0.24, "grad_norm": 0.137510746717453, "learning_rate": 0.0003855245256381436, "loss": 1.6924, "step": 9580 }, { "epoch": 0.24, "grad_norm": 0.12807559967041016, "learning_rate": 0.00038549466056282417, "loss": 1.6805, "step": 9590 }, { "epoch": 0.24, "grad_norm": 0.13783133029937744, "learning_rate": 0.0003854647658703743, "loss": 1.6968, "step": 9600 }, { "epoch": 0.24, "grad_norm": 0.13350753486156464, "learning_rate": 0.0003854348415655671, "loss": 1.6599, "step": 9610 }, { "epoch": 0.24, "grad_norm": 0.15386106073856354, "learning_rate": 0.0003854048876531805, "loss": 1.6706, "step": 9620 }, { "epoch": 0.24, "grad_norm": 0.14253292977809906, "learning_rate": 0.0003853749041379972, "loss": 1.6567, "step": 9630 }, { "epoch": 0.25, "grad_norm": 0.1428574025630951, "learning_rate": 0.00038534489102480437, "loss": 1.6911, "step": 9640 }, { "epoch": 0.25, "grad_norm": 0.12866978347301483, "learning_rate": 0.0003853148483183942, "loss": 1.6676, "step": 9650 }, { "epoch": 0.25, "grad_norm": 0.12328783422708511, "learning_rate": 0.00038528477602356345, "loss": 1.639, "step": 9660 }, { "epoch": 0.25, "grad_norm": 0.12650468945503235, "learning_rate": 0.00038525467414511356, "loss": 1.6606, "step": 9670 }, { "epoch": 0.25, "grad_norm": 0.13679854571819305, "learning_rate": 0.0003852245426878508, "loss": 1.6582, "step": 9680 }, { "epoch": 0.25, "grad_norm": 0.13160301744937897, "learning_rate": 0.0003851943816565862, "loss": 1.6697, "step": 9690 }, { "epoch": 0.25, "grad_norm": 0.13433463871479034, "learning_rate": 0.00038516419105613544, "loss": 1.5862, "step": 9700 }, { "epoch": 0.25, "grad_norm": 0.12435674667358398, "learning_rate": 0.0003851339708913188, "loss": 1.6992, "step": 9710 }, { "epoch": 0.25, "grad_norm": 0.1282668113708496, "learning_rate": 0.0003851037211669615, "loss": 1.6415, "step": 9720 }, { "epoch": 0.25, "grad_norm": 0.13300156593322754, "learning_rate": 0.00038507344188789335, "loss": 1.6636, "step": 9730 }, { "epoch": 0.25, "grad_norm": 0.13623046875, "learning_rate": 0.00038504313305894896, "loss": 1.6138, "step": 9740 }, { "epoch": 0.25, "grad_norm": 0.1425723135471344, "learning_rate": 0.0003850127946849676, "loss": 1.6753, "step": 9750 }, { "epoch": 0.25, "grad_norm": 0.1205907091498375, "learning_rate": 0.0003849824267707932, "loss": 1.6981, "step": 9760 }, { "epoch": 0.25, "grad_norm": 0.13399383425712585, "learning_rate": 0.0003849520293212745, "loss": 1.6687, "step": 9770 }, { "epoch": 0.25, "grad_norm": 0.14110614359378815, "learning_rate": 0.000384921602341265, "loss": 1.6718, "step": 9780 }, { "epoch": 0.25, "grad_norm": 0.14635838568210602, "learning_rate": 0.0003848911458356227, "loss": 1.6424, "step": 9790 }, { "epoch": 0.25, "grad_norm": 0.12122263759374619, "learning_rate": 0.0003848606598092106, "loss": 1.6415, "step": 9800 }, { "epoch": 0.25, "grad_norm": 0.13565990328788757, "learning_rate": 0.00038483014426689624, "loss": 1.6716, "step": 9810 }, { "epoch": 0.25, "grad_norm": 0.1413806825876236, "learning_rate": 0.0003847995992135519, "loss": 1.6768, "step": 9820 }, { "epoch": 0.25, "grad_norm": 0.14945995807647705, "learning_rate": 0.0003847690246540545, "loss": 1.6907, "step": 9830 }, { "epoch": 0.25, "grad_norm": 0.1399151086807251, "learning_rate": 0.00038473842059328587, "loss": 1.7095, "step": 9840 }, { "epoch": 0.25, "grad_norm": 0.1341298669576645, "learning_rate": 0.00038470778703613226, "loss": 1.6107, "step": 9850 }, { "epoch": 0.25, "grad_norm": 0.1368744671344757, "learning_rate": 0.00038467712398748504, "loss": 1.6942, "step": 9860 }, { "epoch": 0.25, "grad_norm": 0.12841053307056427, "learning_rate": 0.00038464643145223976, "loss": 1.6767, "step": 9870 }, { "epoch": 0.25, "grad_norm": 0.12721240520477295, "learning_rate": 0.0003846157094352973, "loss": 1.6485, "step": 9880 }, { "epoch": 0.25, "grad_norm": 0.1607305407524109, "learning_rate": 0.0003845849579415626, "loss": 1.6569, "step": 9890 }, { "epoch": 0.25, "grad_norm": 0.12862354516983032, "learning_rate": 0.00038455417697594573, "loss": 1.6933, "step": 9900 }, { "epoch": 0.25, "grad_norm": 0.12964557111263275, "learning_rate": 0.0003845233665433614, "loss": 1.6559, "step": 9910 }, { "epoch": 0.25, "grad_norm": 0.13759677112102509, "learning_rate": 0.00038449252664872897, "loss": 1.6401, "step": 9920 }, { "epoch": 0.25, "grad_norm": 0.13009068369865417, "learning_rate": 0.00038446165729697244, "loss": 1.7112, "step": 9930 }, { "epoch": 0.25, "grad_norm": 0.12266763299703598, "learning_rate": 0.0003844307584930207, "loss": 1.6665, "step": 9940 }, { "epoch": 0.25, "grad_norm": 0.16423115134239197, "learning_rate": 0.0003843998302418071, "loss": 1.6358, "step": 9950 }, { "epoch": 0.25, "grad_norm": 0.1375732421875, "learning_rate": 0.0003843688725482699, "loss": 1.643, "step": 9960 }, { "epoch": 0.25, "grad_norm": 0.1389283388853073, "learning_rate": 0.000384337885417352, "loss": 1.7376, "step": 9970 }, { "epoch": 0.25, "grad_norm": 0.14045849442481995, "learning_rate": 0.00038430686885400096, "loss": 1.6542, "step": 9980 }, { "epoch": 0.25, "grad_norm": 0.1282881647348404, "learning_rate": 0.000384275822863169, "loss": 1.6977, "step": 9990 }, { "epoch": 0.25, "grad_norm": 0.14488451182842255, "learning_rate": 0.00038424474744981315, "loss": 1.6779, "step": 10000 } ], "logging_steps": 10, "max_steps": 78622, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 8.458132783104e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }