{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999956610404825, "global_step": 17285, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 8.2501, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.936416184971099e-07, "loss": 7.9965, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.0057803468208094e-06, "loss": 8.0268, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.317919075144509e-06, "loss": 7.9878, "step": 30 }, { "epoch": 0.01, "learning_rate": 7.398843930635839e-06, "loss": 7.6933, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.710982658959537e-06, "loss": 7.4624, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.2023121387283238e-05, "loss": 7.3907, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4335260115606938e-05, "loss": 7.0312, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6647398843930635e-05, "loss": 6.6125, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.8959537572254336e-05, "loss": 6.3013, "step": 90 }, { "epoch": 0.02, "learning_rate": 2.1271676300578036e-05, "loss": 6.068, "step": 100 }, { "epoch": 0.02, "learning_rate": 2.3583815028901734e-05, "loss": 5.8308, "step": 110 }, { "epoch": 0.02, "learning_rate": 2.5895953757225434e-05, "loss": 5.7656, "step": 120 }, { "epoch": 0.02, "learning_rate": 2.8208092485549138e-05, "loss": 5.5955, "step": 130 }, { "epoch": 0.02, "learning_rate": 3.0520231213872835e-05, "loss": 5.3842, "step": 140 }, { "epoch": 0.03, "learning_rate": 3.283236994219653e-05, "loss": 5.2866, "step": 150 }, { "epoch": 0.03, "learning_rate": 3.514450867052023e-05, "loss": 5.0532, "step": 160 }, { "epoch": 0.03, "learning_rate": 3.7456647398843934e-05, "loss": 4.9624, "step": 170 }, { "epoch": 0.03, "learning_rate": 3.976878612716764e-05, "loss": 4.8342, "step": 180 }, { "epoch": 0.03, "learning_rate": 4.2080924855491335e-05, "loss": 4.6055, "step": 190 }, { "epoch": 0.03, "learning_rate": 4.439306358381503e-05, "loss": 4.5276, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.670520231213873e-05, "loss": 4.3676, "step": 210 }, { "epoch": 0.04, "learning_rate": 4.9017341040462426e-05, "loss": 4.2029, "step": 220 }, { "epoch": 0.04, "learning_rate": 5.1329479768786124e-05, "loss": 4.0336, "step": 230 }, { "epoch": 0.04, "learning_rate": 5.364161849710983e-05, "loss": 3.8993, "step": 240 }, { "epoch": 0.04, "learning_rate": 5.595375722543353e-05, "loss": 3.834, "step": 250 }, { "epoch": 0.05, "learning_rate": 5.8265895953757235e-05, "loss": 3.7466, "step": 260 }, { "epoch": 0.05, "learning_rate": 6.057803468208093e-05, "loss": 3.6144, "step": 270 }, { "epoch": 0.05, "learning_rate": 6.289017341040462e-05, "loss": 3.4977, "step": 280 }, { "epoch": 0.05, "learning_rate": 6.520231213872833e-05, "loss": 3.4428, "step": 290 }, { "epoch": 0.05, "learning_rate": 6.751445086705203e-05, "loss": 3.2823, "step": 300 }, { "epoch": 0.05, "learning_rate": 6.982658959537573e-05, "loss": 3.2296, "step": 310 }, { "epoch": 0.06, "learning_rate": 7.213872832369943e-05, "loss": 3.1029, "step": 320 }, { "epoch": 0.06, "learning_rate": 7.445086705202312e-05, "loss": 3.1717, "step": 330 }, { "epoch": 0.06, "learning_rate": 7.676300578034682e-05, "loss": 3.0968, "step": 340 }, { "epoch": 0.06, "learning_rate": 7.907514450867053e-05, "loss": 2.9926, "step": 350 }, { "epoch": 0.06, "learning_rate": 8.138728323699423e-05, "loss": 3.0021, "step": 360 }, { "epoch": 0.06, "learning_rate": 8.369942196531792e-05, "loss": 2.891, "step": 370 }, { "epoch": 0.07, "learning_rate": 8.601156069364162e-05, "loss": 2.8498, "step": 380 }, { "epoch": 0.07, "learning_rate": 8.832369942196532e-05, "loss": 2.8172, "step": 390 }, { "epoch": 0.07, "learning_rate": 9.063583815028902e-05, "loss": 2.8302, "step": 400 }, { "epoch": 0.07, "learning_rate": 9.294797687861271e-05, "loss": 2.7333, "step": 410 }, { "epoch": 0.07, "learning_rate": 9.526011560693642e-05, "loss": 2.7135, "step": 420 }, { "epoch": 0.07, "learning_rate": 9.757225433526012e-05, "loss": 2.6811, "step": 430 }, { "epoch": 0.08, "learning_rate": 9.988439306358382e-05, "loss": 2.6537, "step": 440 }, { "epoch": 0.08, "learning_rate": 0.00010219653179190752, "loss": 2.6031, "step": 450 }, { "epoch": 0.08, "learning_rate": 0.00010450867052023121, "loss": 2.6037, "step": 460 }, { "epoch": 0.08, "learning_rate": 0.00010682080924855491, "loss": 2.5387, "step": 470 }, { "epoch": 0.08, "learning_rate": 0.00010913294797687861, "loss": 2.5393, "step": 480 }, { "epoch": 0.09, "learning_rate": 0.00011144508670520233, "loss": 2.5387, "step": 490 }, { "epoch": 0.09, "learning_rate": 0.00011375722543352603, "loss": 2.4848, "step": 500 }, { "epoch": 0.09, "learning_rate": 0.00011606936416184973, "loss": 2.4773, "step": 510 }, { "epoch": 0.09, "learning_rate": 0.00011838150289017342, "loss": 2.4453, "step": 520 }, { "epoch": 0.09, "learning_rate": 0.00012069364161849712, "loss": 2.3941, "step": 530 }, { "epoch": 0.09, "learning_rate": 0.00012300578034682083, "loss": 2.431, "step": 540 }, { "epoch": 0.1, "learning_rate": 0.00012531791907514453, "loss": 2.4208, "step": 550 }, { "epoch": 0.1, "learning_rate": 0.00012763005780346823, "loss": 2.4313, "step": 560 }, { "epoch": 0.1, "learning_rate": 0.00012971098265895952, "loss": 2.3427, "step": 570 }, { "epoch": 0.1, "learning_rate": 0.00013202312138728322, "loss": 2.3415, "step": 580 }, { "epoch": 0.1, "learning_rate": 0.00013433526011560694, "loss": 2.2621, "step": 590 }, { "epoch": 0.1, "learning_rate": 0.00013664739884393064, "loss": 2.3606, "step": 600 }, { "epoch": 0.11, "learning_rate": 0.00013895953757225434, "loss": 2.3175, "step": 610 }, { "epoch": 0.11, "learning_rate": 0.00014127167630057804, "loss": 2.2297, "step": 620 }, { "epoch": 0.11, "learning_rate": 0.00014358381502890176, "loss": 2.1856, "step": 630 }, { "epoch": 0.11, "learning_rate": 0.00014589595375722546, "loss": 2.2633, "step": 640 }, { "epoch": 0.11, "learning_rate": 0.00014820809248554915, "loss": 2.2474, "step": 650 }, { "epoch": 0.11, "learning_rate": 0.00015052023121387285, "loss": 2.2024, "step": 660 }, { "epoch": 0.12, "learning_rate": 0.00015260115606936415, "loss": 2.1947, "step": 670 }, { "epoch": 0.12, "learning_rate": 0.00015491329479768785, "loss": 2.234, "step": 680 }, { "epoch": 0.12, "learning_rate": 0.00015722543352601157, "loss": 2.2061, "step": 690 }, { "epoch": 0.12, "learning_rate": 0.0001593063583815029, "loss": 2.1553, "step": 700 }, { "epoch": 0.12, "learning_rate": 0.0001616184971098266, "loss": 2.2286, "step": 710 }, { "epoch": 0.12, "learning_rate": 0.0001639306358381503, "loss": 2.161, "step": 720 }, { "epoch": 0.13, "learning_rate": 0.000166242774566474, "loss": 2.1628, "step": 730 }, { "epoch": 0.13, "learning_rate": 0.00016855491329479768, "loss": 2.1371, "step": 740 }, { "epoch": 0.13, "learning_rate": 0.00017063583815028904, "loss": 2.2181, "step": 750 }, { "epoch": 0.13, "learning_rate": 0.00017271676300578033, "loss": 2.154, "step": 760 }, { "epoch": 0.13, "learning_rate": 0.00017502890173410406, "loss": 2.1695, "step": 770 }, { "epoch": 0.14, "learning_rate": 0.00017734104046242776, "loss": 2.1685, "step": 780 }, { "epoch": 0.14, "learning_rate": 0.00017965317919075145, "loss": 2.192, "step": 790 }, { "epoch": 0.14, "learning_rate": 0.00018196531791907515, "loss": 2.1567, "step": 800 }, { "epoch": 0.14, "learning_rate": 0.00018427745664739887, "loss": 2.0987, "step": 810 }, { "epoch": 0.14, "learning_rate": 0.00018658959537572257, "loss": 2.1687, "step": 820 }, { "epoch": 0.14, "learning_rate": 0.00018890173410404627, "loss": 2.0736, "step": 830 }, { "epoch": 0.15, "learning_rate": 0.00019121387283236997, "loss": 2.0683, "step": 840 }, { "epoch": 0.15, "learning_rate": 0.00019352601156069366, "loss": 2.0872, "step": 850 }, { "epoch": 0.15, "learning_rate": 0.00019583815028901736, "loss": 2.1029, "step": 860 }, { "epoch": 0.15, "learning_rate": 0.00019815028901734106, "loss": 2.0301, "step": 870 }, { "epoch": 0.15, "learning_rate": 0.00019999999267878048, "loss": 2.0957, "step": 880 }, { "epoch": 0.15, "learning_rate": 0.0001999997364362091, "loss": 2.0484, "step": 890 }, { "epoch": 0.16, "learning_rate": 0.00019999911413373273, "loss": 2.0489, "step": 900 }, { "epoch": 0.16, "learning_rate": 0.00019999812577362934, "loss": 2.0073, "step": 910 }, { "epoch": 0.16, "learning_rate": 0.0001999967713595169, "loss": 2.0241, "step": 920 }, { "epoch": 0.16, "learning_rate": 0.00019999505089635347, "loss": 2.0097, "step": 930 }, { "epoch": 0.16, "learning_rate": 0.0001999929643904369, "loss": 2.0251, "step": 940 }, { "epoch": 0.16, "learning_rate": 0.00019999051184940516, "loss": 1.9893, "step": 950 }, { "epoch": 0.17, "learning_rate": 0.00019998769328223598, "loss": 1.9893, "step": 960 }, { "epoch": 0.17, "learning_rate": 0.00019998450869924703, "loss": 1.9321, "step": 970 }, { "epoch": 0.17, "learning_rate": 0.00019998095811209587, "loss": 2.0008, "step": 980 }, { "epoch": 0.17, "learning_rate": 0.00019997704153377978, "loss": 2.0254, "step": 990 }, { "epoch": 0.17, "learning_rate": 0.0001999727589786358, "loss": 1.873, "step": 1000 }, { "epoch": 0.17, "eval_loss": 1.0050371885299683, "eval_runtime": 62.5449, "eval_samples_per_second": 8.378, "eval_steps_per_second": 0.528, "step": 1000 }, { "epoch": 0.18, "learning_rate": 0.00019996811046234077, "loss": 1.9664, "step": 1010 }, { "epoch": 0.18, "learning_rate": 0.00019996309600191098, "loss": 1.9404, "step": 1020 }, { "epoch": 0.18, "learning_rate": 0.00019995771561570248, "loss": 1.969, "step": 1030 }, { "epoch": 0.18, "learning_rate": 0.00019995196932341073, "loss": 1.9545, "step": 1040 }, { "epoch": 0.18, "learning_rate": 0.00019994585714607066, "loss": 1.9141, "step": 1050 }, { "epoch": 0.18, "learning_rate": 0.00019993937910605658, "loss": 1.9299, "step": 1060 }, { "epoch": 0.19, "learning_rate": 0.00019993253522708205, "loss": 1.9305, "step": 1070 }, { "epoch": 0.19, "learning_rate": 0.0001999253255341998, "loss": 1.902, "step": 1080 }, { "epoch": 0.19, "learning_rate": 0.00019991775005380173, "loss": 1.9416, "step": 1090 }, { "epoch": 0.19, "learning_rate": 0.00019991061939600934, "loss": 1.9164, "step": 1100 }, { "epoch": 0.19, "learning_rate": 0.00019990234899683635, "loss": 1.947, "step": 1110 }, { "epoch": 0.19, "learning_rate": 0.00019989371289425568, "loss": 1.9242, "step": 1120 }, { "epoch": 0.2, "learning_rate": 0.00019988471111988062, "loss": 1.9037, "step": 1130 }, { "epoch": 0.2, "learning_rate": 0.00019987534370666328, "loss": 1.915, "step": 1140 }, { "epoch": 0.2, "learning_rate": 0.000199865610688894, "loss": 1.9268, "step": 1150 }, { "epoch": 0.2, "learning_rate": 0.00019985551210220158, "loss": 1.9268, "step": 1160 }, { "epoch": 0.2, "learning_rate": 0.00019984611084327463, "loss": 1.9629, "step": 1170 }, { "epoch": 0.2, "learning_rate": 0.00019983531777857815, "loss": 1.854, "step": 1180 }, { "epoch": 0.21, "learning_rate": 0.00019982415925584902, "loss": 1.9051, "step": 1190 }, { "epoch": 0.21, "learning_rate": 0.00019981263531593422, "loss": 1.8801, "step": 1200 }, { "epoch": 0.21, "learning_rate": 0.00019980074600101837, "loss": 1.8322, "step": 1210 }, { "epoch": 0.21, "learning_rate": 0.00019978849135462366, "loss": 1.8857, "step": 1220 }, { "epoch": 0.21, "learning_rate": 0.00019977587142160945, "loss": 1.8805, "step": 1230 }, { "epoch": 0.22, "learning_rate": 0.00019976288624817248, "loss": 1.8511, "step": 1240 }, { "epoch": 0.22, "learning_rate": 0.00019974953588184632, "loss": 1.8872, "step": 1250 }, { "epoch": 0.22, "learning_rate": 0.00019973582037150148, "loss": 1.8636, "step": 1260 }, { "epoch": 0.22, "learning_rate": 0.00019972173976734507, "loss": 1.8701, "step": 1270 }, { "epoch": 0.22, "learning_rate": 0.00019970729412092063, "loss": 1.8454, "step": 1280 }, { "epoch": 0.22, "learning_rate": 0.00019969248348510808, "loss": 1.8941, "step": 1290 }, { "epoch": 0.23, "learning_rate": 0.00019967730791412328, "loss": 1.8561, "step": 1300 }, { "epoch": 0.23, "learning_rate": 0.00019966176746351818, "loss": 1.8992, "step": 1310 }, { "epoch": 0.23, "learning_rate": 0.00019964586219018018, "loss": 1.8372, "step": 1320 }, { "epoch": 0.23, "learning_rate": 0.0001996295921523323, "loss": 1.8278, "step": 1330 }, { "epoch": 0.23, "learning_rate": 0.00019961295740953278, "loss": 1.8311, "step": 1340 }, { "epoch": 0.23, "learning_rate": 0.00019959595802267492, "loss": 1.8281, "step": 1350 }, { "epoch": 0.24, "learning_rate": 0.0001995785940539868, "loss": 1.8188, "step": 1360 }, { "epoch": 0.24, "learning_rate": 0.00019956086556703113, "loss": 1.8156, "step": 1370 }, { "epoch": 0.24, "learning_rate": 0.00019954277262670495, "loss": 1.7751, "step": 1380 }, { "epoch": 0.24, "learning_rate": 0.00019952431529923949, "loss": 1.832, "step": 1390 }, { "epoch": 0.24, "learning_rate": 0.00019950549365219968, "loss": 1.8475, "step": 1400 }, { "epoch": 0.24, "learning_rate": 0.00019948630775448433, "loss": 1.8329, "step": 1410 }, { "epoch": 0.25, "learning_rate": 0.00019946675767632544, "loss": 1.8352, "step": 1420 }, { "epoch": 0.25, "learning_rate": 0.00019944684348928822, "loss": 1.8325, "step": 1430 }, { "epoch": 0.25, "learning_rate": 0.00019942860946808643, "loss": 1.8484, "step": 1440 }, { "epoch": 0.25, "learning_rate": 0.00019940800367611585, "loss": 1.837, "step": 1450 }, { "epoch": 0.25, "learning_rate": 0.00019938703399034234, "loss": 1.8295, "step": 1460 }, { "epoch": 0.26, "learning_rate": 0.00019936570048752775, "loss": 1.8153, "step": 1470 }, { "epoch": 0.26, "learning_rate": 0.00019934400324576564, "loss": 1.7925, "step": 1480 }, { "epoch": 0.26, "learning_rate": 0.0001993219423444811, "loss": 1.8383, "step": 1490 }, { "epoch": 0.26, "learning_rate": 0.0001992995178644305, "loss": 1.8135, "step": 1500 }, { "epoch": 0.26, "learning_rate": 0.00019927672988770105, "loss": 1.8036, "step": 1510 }, { "epoch": 0.26, "learning_rate": 0.00019925357849771066, "loss": 1.8035, "step": 1520 }, { "epoch": 0.27, "learning_rate": 0.00019923243159839809, "loss": 1.8135, "step": 1530 }, { "epoch": 0.27, "learning_rate": 0.00019920858995779232, "loss": 1.7839, "step": 1540 }, { "epoch": 0.27, "learning_rate": 0.00019918438515335927, "loss": 1.7759, "step": 1550 }, { "epoch": 0.27, "learning_rate": 0.00019915981727370316, "loss": 1.7933, "step": 1560 }, { "epoch": 0.27, "learning_rate": 0.00019913488640875744, "loss": 1.7977, "step": 1570 }, { "epoch": 0.27, "learning_rate": 0.00019910959264978422, "loss": 1.7797, "step": 1580 }, { "epoch": 0.28, "learning_rate": 0.00019908393608937406, "loss": 1.7656, "step": 1590 }, { "epoch": 0.28, "learning_rate": 0.00019905791682144557, "loss": 1.798, "step": 1600 }, { "epoch": 0.28, "learning_rate": 0.00019903153494124518, "loss": 1.7618, "step": 1610 }, { "epoch": 0.28, "learning_rate": 0.00019900479054534652, "loss": 1.7879, "step": 1620 }, { "epoch": 0.28, "learning_rate": 0.00019897768373165046, "loss": 1.7972, "step": 1630 }, { "epoch": 0.28, "learning_rate": 0.00019895297781409127, "loss": 1.7738, "step": 1640 }, { "epoch": 0.29, "learning_rate": 0.00019892518268104788, "loss": 1.7901, "step": 1650 }, { "epoch": 0.29, "learning_rate": 0.00019889702542162026, "loss": 1.7489, "step": 1660 }, { "epoch": 0.29, "learning_rate": 0.00019887137435523912, "loss": 1.8051, "step": 1670 }, { "epoch": 0.29, "learning_rate": 0.00019884252934074216, "loss": 1.7428, "step": 1680 }, { "epoch": 0.29, "learning_rate": 0.0001988133225024225, "loss": 1.7961, "step": 1690 }, { "epoch": 0.3, "learning_rate": 0.00019878375394719502, "loss": 1.7779, "step": 1700 }, { "epoch": 0.3, "learning_rate": 0.00019875382378329857, "loss": 1.8037, "step": 1710 }, { "epoch": 0.3, "learning_rate": 0.0001987235321202958, "loss": 1.7767, "step": 1720 }, { "epoch": 0.3, "learning_rate": 0.00019869287906907265, "loss": 1.8044, "step": 1730 }, { "epoch": 0.3, "learning_rate": 0.0001986618647418379, "loss": 1.7517, "step": 1740 }, { "epoch": 0.3, "learning_rate": 0.0001986304892521229, "loss": 1.8253, "step": 1750 }, { "epoch": 0.31, "learning_rate": 0.00019859875271478102, "loss": 1.7588, "step": 1760 }, { "epoch": 0.31, "learning_rate": 0.00019856665524598733, "loss": 1.7948, "step": 1770 }, { "epoch": 0.31, "learning_rate": 0.00019853419696323806, "loss": 1.8023, "step": 1780 }, { "epoch": 0.31, "learning_rate": 0.00019850467611100676, "loss": 1.7663, "step": 1790 }, { "epoch": 0.31, "learning_rate": 0.00019847153261017426, "loss": 1.7566, "step": 1800 }, { "epoch": 0.31, "learning_rate": 0.00019843802864359298, "loss": 1.7882, "step": 1810 }, { "epoch": 0.32, "learning_rate": 0.00019840416433390782, "loss": 1.782, "step": 1820 }, { "epoch": 0.32, "learning_rate": 0.00019836993980508268, "loss": 1.7849, "step": 1830 }, { "epoch": 0.32, "learning_rate": 0.00019833535518240031, "loss": 1.7793, "step": 1840 }, { "epoch": 0.32, "learning_rate": 0.0001983004105924614, "loss": 1.7761, "step": 1850 }, { "epoch": 0.32, "learning_rate": 0.00019826865279520944, "loss": 1.7676, "step": 1860 }, { "epoch": 0.32, "learning_rate": 0.0001982330246209872, "loss": 1.7274, "step": 1870 }, { "epoch": 0.33, "learning_rate": 0.00019819703685410058, "loss": 1.7513, "step": 1880 }, { "epoch": 0.33, "learning_rate": 0.0001981606896262867, "loss": 1.7478, "step": 1890 }, { "epoch": 0.33, "learning_rate": 0.00019812398307059856, "loss": 1.781, "step": 1900 }, { "epoch": 0.33, "learning_rate": 0.00019808691732140448, "loss": 1.7504, "step": 1910 }, { "epoch": 0.33, "learning_rate": 0.00019804949251438767, "loss": 1.7552, "step": 1920 }, { "epoch": 0.33, "learning_rate": 0.0001980117087865457, "loss": 1.8154, "step": 1930 }, { "epoch": 0.34, "learning_rate": 0.00019797356627619, "loss": 1.7762, "step": 1940 }, { "epoch": 0.34, "learning_rate": 0.00019793506512294542, "loss": 1.7263, "step": 1950 }, { "epoch": 0.34, "learning_rate": 0.00019789620546774956, "loss": 1.7446, "step": 1960 }, { "epoch": 0.34, "learning_rate": 0.00019786485971773587, "loss": 1.694, "step": 1970 }, { "epoch": 0.34, "learning_rate": 0.0001978253551183793, "loss": 1.7198, "step": 1980 }, { "epoch": 0.35, "learning_rate": 0.00019778549241867687, "loss": 1.7423, "step": 1990 }, { "epoch": 0.35, "learning_rate": 0.0001977452717645503, "loss": 1.7434, "step": 2000 }, { "epoch": 0.35, "eval_loss": 0.90117347240448, "eval_runtime": 61.8399, "eval_samples_per_second": 8.473, "eval_steps_per_second": 0.534, "step": 2000 }, { "epoch": 0.35, "learning_rate": 0.00019770469330323174, "loss": 1.7791, "step": 2010 }, { "epoch": 0.35, "learning_rate": 0.00019766375718326297, "loss": 1.7459, "step": 2020 }, { "epoch": 0.35, "learning_rate": 0.00019762246355449516, "loss": 1.7342, "step": 2030 }, { "epoch": 0.35, "learning_rate": 0.00019758081256808816, "loss": 1.7564, "step": 2040 }, { "epoch": 0.36, "learning_rate": 0.00019753880437650985, "loss": 1.7394, "step": 2050 }, { "epoch": 0.36, "learning_rate": 0.00019749643913353582, "loss": 1.7663, "step": 2060 }, { "epoch": 0.36, "learning_rate": 0.00019745371699424864, "loss": 1.7222, "step": 2070 }, { "epoch": 0.36, "learning_rate": 0.00019741063811503734, "loss": 1.7046, "step": 2080 }, { "epoch": 0.36, "learning_rate": 0.0001973759182648501, "loss": 1.6904, "step": 2090 }, { "epoch": 0.36, "learning_rate": 0.00019733219765204383, "loss": 1.6956, "step": 2100 }, { "epoch": 0.37, "learning_rate": 0.00019728812074414819, "loss": 1.7511, "step": 2110 }, { "epoch": 0.37, "learning_rate": 0.00019724368770251155, "loss": 1.7262, "step": 2120 }, { "epoch": 0.37, "learning_rate": 0.0001971988986897858, "loss": 1.7114, "step": 2130 }, { "epoch": 0.37, "learning_rate": 0.00019715375386992608, "loss": 1.7182, "step": 2140 }, { "epoch": 0.37, "learning_rate": 0.00019710825340818987, "loss": 1.7034, "step": 2150 }, { "epoch": 0.37, "learning_rate": 0.00019706239747113656, "loss": 1.7282, "step": 2160 }, { "epoch": 0.38, "learning_rate": 0.00019701618622662678, "loss": 1.74, "step": 2170 }, { "epoch": 0.38, "learning_rate": 0.00019696961984382182, "loss": 1.6854, "step": 2180 }, { "epoch": 0.38, "learning_rate": 0.00019692269849318303, "loss": 1.756, "step": 2190 }, { "epoch": 0.38, "learning_rate": 0.00019687542234647106, "loss": 1.7159, "step": 2200 }, { "epoch": 0.38, "learning_rate": 0.00019682779157674537, "loss": 1.7095, "step": 2210 }, { "epoch": 0.39, "learning_rate": 0.00019677980635836363, "loss": 1.7071, "step": 2220 }, { "epoch": 0.39, "learning_rate": 0.00019673146686698093, "loss": 1.7077, "step": 2230 }, { "epoch": 0.39, "learning_rate": 0.00019668277327954917, "loss": 1.7144, "step": 2240 }, { "epoch": 0.39, "learning_rate": 0.00019663372577431663, "loss": 1.6873, "step": 2250 }, { "epoch": 0.39, "learning_rate": 0.0001965892805682537, "loss": 1.7081, "step": 2260 }, { "epoch": 0.39, "learning_rate": 0.00019653956111491275, "loss": 1.6979, "step": 2270 }, { "epoch": 0.4, "learning_rate": 0.00019648948826801467, "loss": 1.698, "step": 2280 }, { "epoch": 0.4, "learning_rate": 0.00019644412070578336, "loss": 1.692, "step": 2290 }, { "epoch": 0.4, "learning_rate": 0.00019639337691717884, "loss": 1.6938, "step": 2300 }, { "epoch": 0.4, "learning_rate": 0.00019634228027014033, "loss": 1.7152, "step": 2310 }, { "epoch": 0.4, "learning_rate": 0.00019629083095171264, "loss": 1.7155, "step": 2320 }, { "epoch": 0.4, "learning_rate": 0.0001962390291502316, "loss": 1.6839, "step": 2330 }, { "epoch": 0.41, "learning_rate": 0.00019618687505532334, "loss": 1.6888, "step": 2340 }, { "epoch": 0.41, "learning_rate": 0.0001961343688579036, "loss": 1.6962, "step": 2350 }, { "epoch": 0.41, "learning_rate": 0.000196081510750177, "loss": 1.6784, "step": 2360 }, { "epoch": 0.41, "learning_rate": 0.00019602830092563643, "loss": 1.672, "step": 2370 }, { "epoch": 0.41, "learning_rate": 0.00019597473957906224, "loss": 1.6769, "step": 2380 }, { "epoch": 0.41, "learning_rate": 0.00019592082690652148, "loss": 1.6975, "step": 2390 }, { "epoch": 0.42, "learning_rate": 0.00019586656310536743, "loss": 1.7687, "step": 2400 }, { "epoch": 0.42, "learning_rate": 0.00019581194837423857, "loss": 1.685, "step": 2410 }, { "epoch": 0.42, "learning_rate": 0.00019575698291305813, "loss": 1.6858, "step": 2420 }, { "epoch": 0.42, "learning_rate": 0.0001957016669230331, "loss": 1.6883, "step": 2430 }, { "epoch": 0.42, "learning_rate": 0.00019565158299718013, "loss": 1.6471, "step": 2440 }, { "epoch": 0.43, "learning_rate": 0.0001955956015612708, "loss": 1.6831, "step": 2450 }, { "epoch": 0.43, "learning_rate": 0.0001955392701872709, "loss": 1.6686, "step": 2460 }, { "epoch": 0.43, "learning_rate": 0.00019548258908138753, "loss": 1.7006, "step": 2470 }, { "epoch": 0.43, "learning_rate": 0.00019542555845110805, "loss": 1.7317, "step": 2480 }, { "epoch": 0.43, "learning_rate": 0.00019536817850519927, "loss": 1.6572, "step": 2490 }, { "epoch": 0.43, "learning_rate": 0.0001953104494537067, "loss": 1.6916, "step": 2500 }, { "epoch": 0.44, "learning_rate": 0.0001952523715079538, "loss": 1.6533, "step": 2510 }, { "epoch": 0.44, "learning_rate": 0.00019519394488054127, "loss": 1.6463, "step": 2520 }, { "epoch": 0.44, "learning_rate": 0.00019513516978534608, "loss": 1.6984, "step": 2530 }, { "epoch": 0.44, "learning_rate": 0.00019508197443751353, "loss": 1.6643, "step": 2540 }, { "epoch": 0.44, "learning_rate": 0.0001950284971627635, "loss": 1.6853, "step": 2550 }, { "epoch": 0.44, "learning_rate": 0.00019496874750645754, "loss": 1.6864, "step": 2560 }, { "epoch": 0.45, "learning_rate": 0.00019490865020672837, "loss": 1.6562, "step": 2570 }, { "epoch": 0.45, "learning_rate": 0.00019484820548356873, "loss": 1.6825, "step": 2580 }, { "epoch": 0.45, "learning_rate": 0.00019478741355824313, "loss": 1.7296, "step": 2590 }, { "epoch": 0.45, "learning_rate": 0.00019472627465328692, "loss": 1.7077, "step": 2600 }, { "epoch": 0.45, "learning_rate": 0.0001946647889925058, "loss": 1.7098, "step": 2610 }, { "epoch": 0.45, "learning_rate": 0.00019460915560757066, "loss": 1.6647, "step": 2620 }, { "epoch": 0.46, "learning_rate": 0.000194547011731852, "loss": 1.6919, "step": 2630 }, { "epoch": 0.46, "learning_rate": 0.00019448452175651983, "loss": 1.6805, "step": 2640 }, { "epoch": 0.46, "learning_rate": 0.0001944216859103255, "loss": 1.7276, "step": 2650 }, { "epoch": 0.46, "learning_rate": 0.00019435850442328637, "loss": 1.6987, "step": 2660 }, { "epoch": 0.46, "learning_rate": 0.00019429497752668516, "loss": 1.6923, "step": 2670 }, { "epoch": 0.47, "learning_rate": 0.00019423110545306908, "loss": 1.6908, "step": 2680 }, { "epoch": 0.47, "learning_rate": 0.00019416688843624873, "loss": 1.6799, "step": 2690 }, { "epoch": 0.47, "learning_rate": 0.00019410232671129745, "loss": 1.7065, "step": 2700 }, { "epoch": 0.47, "learning_rate": 0.0001940374205145505, "loss": 1.682, "step": 2710 }, { "epoch": 0.47, "learning_rate": 0.00019397217008360404, "loss": 1.654, "step": 2720 }, { "epoch": 0.47, "learning_rate": 0.0001939065756573144, "loss": 1.6809, "step": 2730 }, { "epoch": 0.48, "learning_rate": 0.00019384063747579706, "loss": 1.6426, "step": 2740 }, { "epoch": 0.48, "learning_rate": 0.00019377435578042592, "loss": 1.6453, "step": 2750 }, { "epoch": 0.48, "learning_rate": 0.00019370773081383235, "loss": 1.6419, "step": 2760 }, { "epoch": 0.48, "learning_rate": 0.00019364076281990427, "loss": 1.7025, "step": 2770 }, { "epoch": 0.48, "learning_rate": 0.0001935734520437853, "loss": 1.6897, "step": 2780 }, { "epoch": 0.48, "learning_rate": 0.00019350579873187384, "loss": 1.652, "step": 2790 }, { "epoch": 0.49, "learning_rate": 0.0001934378031318222, "loss": 1.6461, "step": 2800 }, { "epoch": 0.49, "learning_rate": 0.00019336946549253567, "loss": 1.6482, "step": 2810 }, { "epoch": 0.49, "learning_rate": 0.00019330078606417164, "loss": 1.684, "step": 2820 }, { "epoch": 0.49, "learning_rate": 0.00019323176509813855, "loss": 1.7247, "step": 2830 }, { "epoch": 0.49, "learning_rate": 0.0001931624028470952, "loss": 1.6417, "step": 2840 }, { "epoch": 0.49, "learning_rate": 0.00019309269956494963, "loss": 1.688, "step": 2850 }, { "epoch": 0.5, "learning_rate": 0.00019302967524028727, "loss": 1.6498, "step": 2860 }, { "epoch": 0.5, "learning_rate": 0.00019295932470303454, "loss": 1.6872, "step": 2870 }, { "epoch": 0.5, "learning_rate": 0.00019289571826614754, "loss": 1.6668, "step": 2880 }, { "epoch": 0.5, "learning_rate": 0.00019283183638479643, "loss": 1.652, "step": 2890 }, { "epoch": 0.5, "learning_rate": 0.00019276053369488895, "loss": 1.6777, "step": 2900 }, { "epoch": 0.51, "learning_rate": 0.000192688891444965, "loss": 1.6466, "step": 2910 }, { "epoch": 0.51, "learning_rate": 0.00019261690989727875, "loss": 1.6432, "step": 2920 }, { "epoch": 0.51, "learning_rate": 0.00019254458931532655, "loss": 1.6499, "step": 2930 }, { "epoch": 0.51, "learning_rate": 0.00019247192996384572, "loss": 1.6599, "step": 2940 }, { "epoch": 0.51, "learning_rate": 0.00019239893210881373, "loss": 1.6458, "step": 2950 }, { "epoch": 0.51, "learning_rate": 0.00019232559601744712, "loss": 1.69, "step": 2960 }, { "epoch": 0.52, "learning_rate": 0.00019225192195820067, "loss": 1.6294, "step": 2970 }, { "epoch": 0.52, "learning_rate": 0.00019217791020076627, "loss": 1.7088, "step": 2980 }, { "epoch": 0.52, "learning_rate": 0.000192103561016072, "loss": 1.6531, "step": 2990 }, { "epoch": 0.52, "learning_rate": 0.00019202887467628115, "loss": 1.6708, "step": 3000 }, { "epoch": 0.52, "eval_loss": 0.862983226776123, "eval_runtime": 61.7517, "eval_samples_per_second": 8.486, "eval_steps_per_second": 0.534, "step": 3000 }, { "epoch": 0.52, "learning_rate": 0.00019195385145479116, "loss": 1.6732, "step": 3010 }, { "epoch": 0.52, "learning_rate": 0.0001918784916262327, "loss": 1.6961, "step": 3020 }, { "epoch": 0.53, "learning_rate": 0.0001918027954664686, "loss": 1.6361, "step": 3030 }, { "epoch": 0.53, "learning_rate": 0.00019172676325259288, "loss": 1.708, "step": 3040 }, { "epoch": 0.53, "learning_rate": 0.00019165039526292975, "loss": 1.6377, "step": 3050 }, { "epoch": 0.53, "learning_rate": 0.0001915736917770325, "loss": 1.667, "step": 3060 }, { "epoch": 0.53, "learning_rate": 0.00019149665307568263, "loss": 1.6649, "step": 3070 }, { "epoch": 0.53, "learning_rate": 0.00019141927944088863, "loss": 1.6981, "step": 3080 }, { "epoch": 0.54, "learning_rate": 0.0001913415711558851, "loss": 1.6095, "step": 3090 }, { "epoch": 0.54, "learning_rate": 0.00019127916377084718, "loss": 1.6629, "step": 3100 }, { "epoch": 0.54, "learning_rate": 0.00019120085383312737, "loss": 1.6908, "step": 3110 }, { "epoch": 0.54, "learning_rate": 0.00019112221004476872, "loss": 1.6602, "step": 3120 }, { "epoch": 0.54, "learning_rate": 0.00019104323269365537, "loss": 1.6378, "step": 3130 }, { "epoch": 0.54, "learning_rate": 0.00019096392206889248, "loss": 1.642, "step": 3140 }, { "epoch": 0.55, "learning_rate": 0.00019088427846080527, "loss": 1.6605, "step": 3150 }, { "epoch": 0.55, "learning_rate": 0.00019080430216093778, "loss": 1.6055, "step": 3160 }, { "epoch": 0.55, "learning_rate": 0.00019072399346205197, "loss": 1.6423, "step": 3170 }, { "epoch": 0.55, "learning_rate": 0.00019064335265812652, "loss": 1.6856, "step": 3180 }, { "epoch": 0.55, "learning_rate": 0.00019056238004435593, "loss": 1.6115, "step": 3190 }, { "epoch": 0.56, "learning_rate": 0.0001904810759171492, "loss": 1.6145, "step": 3200 }, { "epoch": 0.56, "learning_rate": 0.00019042396593693816, "loss": 1.6301, "step": 3210 }, { "epoch": 0.56, "learning_rate": 0.00019034209892058318, "loss": 1.6615, "step": 3220 }, { "epoch": 0.56, "learning_rate": 0.00019025990119715506, "loss": 1.6515, "step": 3230 }, { "epoch": 0.56, "learning_rate": 0.00019017737306754754, "loss": 1.7024, "step": 3240 }, { "epoch": 0.56, "learning_rate": 0.00019009451483386375, "loss": 1.6598, "step": 3250 }, { "epoch": 0.57, "learning_rate": 0.0001900113267994153, "loss": 1.6995, "step": 3260 }, { "epoch": 0.57, "learning_rate": 0.00018992780926872102, "loss": 1.684, "step": 3270 }, { "epoch": 0.57, "learning_rate": 0.00018984396254750593, "loss": 1.6553, "step": 3280 }, { "epoch": 0.57, "learning_rate": 0.00018975978694270003, "loss": 1.6515, "step": 3290 }, { "epoch": 0.57, "learning_rate": 0.00018967528276243734, "loss": 1.6754, "step": 3300 }, { "epoch": 0.57, "learning_rate": 0.00018959045031605453, "loss": 1.6483, "step": 3310 }, { "epoch": 0.58, "learning_rate": 0.00018950528991409, "loss": 1.6569, "step": 3320 }, { "epoch": 0.58, "learning_rate": 0.00018941980186828263, "loss": 1.6626, "step": 3330 }, { "epoch": 0.58, "learning_rate": 0.0001893339864915708, "loss": 1.6508, "step": 3340 }, { "epoch": 0.58, "learning_rate": 0.00018924784409809093, "loss": 1.6662, "step": 3350 }, { "epoch": 0.58, "learning_rate": 0.0001891613750031767, "loss": 1.6426, "step": 3360 }, { "epoch": 0.58, "learning_rate": 0.00018907457952335754, "loss": 1.6468, "step": 3370 }, { "epoch": 0.59, "learning_rate": 0.0001889874579763578, "loss": 1.6326, "step": 3380 }, { "epoch": 0.59, "learning_rate": 0.00018890001068109534, "loss": 1.6034, "step": 3390 }, { "epoch": 0.59, "learning_rate": 0.00018881223795768053, "loss": 1.6951, "step": 3400 }, { "epoch": 0.59, "learning_rate": 0.00018872414012741494, "loss": 1.5691, "step": 3410 }, { "epoch": 0.59, "learning_rate": 0.0001886445743803333, "loss": 1.6343, "step": 3420 }, { "epoch": 0.6, "learning_rate": 0.00018857362860914253, "loss": 1.6143, "step": 3430 }, { "epoch": 0.6, "learning_rate": 0.00018848465460459042, "loss": 1.663, "step": 3440 }, { "epoch": 0.6, "learning_rate": 0.00018839535669234195, "loss": 1.617, "step": 3450 }, { "epoch": 0.6, "learning_rate": 0.00018830573519928195, "loss": 1.6374, "step": 3460 }, { "epoch": 0.6, "learning_rate": 0.0001882157904534795, "loss": 1.6472, "step": 3470 }, { "epoch": 0.6, "learning_rate": 0.00018812552278418726, "loss": 1.6326, "step": 3480 }, { "epoch": 0.61, "learning_rate": 0.00018803493252183976, "loss": 1.6444, "step": 3490 }, { "epoch": 0.61, "learning_rate": 0.00018794401999805248, "loss": 1.6167, "step": 3500 }, { "epoch": 0.61, "learning_rate": 0.00018785278554562065, "loss": 1.6498, "step": 3510 }, { "epoch": 0.61, "learning_rate": 0.00018776122949851792, "loss": 1.6605, "step": 3520 }, { "epoch": 0.61, "learning_rate": 0.00018766935219189507, "loss": 1.6455, "step": 3530 }, { "epoch": 0.61, "learning_rate": 0.00018757715396207903, "loss": 1.671, "step": 3540 }, { "epoch": 0.62, "learning_rate": 0.00018748463514657146, "loss": 1.6176, "step": 3550 }, { "epoch": 0.62, "learning_rate": 0.00018739179608404747, "loss": 1.6459, "step": 3560 }, { "epoch": 0.62, "learning_rate": 0.00018729863711435457, "loss": 1.6481, "step": 3570 }, { "epoch": 0.62, "learning_rate": 0.00018720515857851132, "loss": 1.6823, "step": 3580 }, { "epoch": 0.62, "learning_rate": 0.00018711136081870605, "loss": 1.6239, "step": 3590 }, { "epoch": 0.62, "learning_rate": 0.00018701724417829565, "loss": 1.6209, "step": 3600 }, { "epoch": 0.63, "learning_rate": 0.0001869228090018043, "loss": 1.6381, "step": 3610 }, { "epoch": 0.63, "learning_rate": 0.00018682805563492225, "loss": 1.6498, "step": 3620 }, { "epoch": 0.63, "learning_rate": 0.00018673298442450448, "loss": 1.6377, "step": 3630 }, { "epoch": 0.63, "learning_rate": 0.00018663759571856952, "loss": 1.6513, "step": 3640 }, { "epoch": 0.63, "learning_rate": 0.0001865514747131347, "loss": 1.6385, "step": 3650 }, { "epoch": 0.64, "learning_rate": 0.00018646509707450926, "loss": 1.6137, "step": 3660 }, { "epoch": 0.64, "learning_rate": 0.00018636882124247248, "loss": 1.6402, "step": 3670 }, { "epoch": 0.64, "learning_rate": 0.0001862819026646694, "loss": 1.6949, "step": 3680 }, { "epoch": 0.64, "learning_rate": 0.0001861850264262445, "loss": 1.6283, "step": 3690 }, { "epoch": 0.64, "learning_rate": 0.00018608783469816221, "loss": 1.633, "step": 3700 }, { "epoch": 0.64, "learning_rate": 0.00018599032783620342, "loss": 1.6442, "step": 3710 }, { "epoch": 0.65, "learning_rate": 0.00018589250619730253, "loss": 1.629, "step": 3720 }, { "epoch": 0.65, "learning_rate": 0.00018580419788394125, "loss": 1.599, "step": 3730 }, { "epoch": 0.65, "learning_rate": 0.00018570577915633075, "loss": 1.6526, "step": 3740 }, { "epoch": 0.65, "learning_rate": 0.00018560704669339962, "loss": 1.6345, "step": 3750 }, { "epoch": 0.65, "learning_rate": 0.00018550800085656875, "loss": 1.6609, "step": 3760 }, { "epoch": 0.65, "learning_rate": 0.00018540864200840615, "loss": 1.6241, "step": 3770 }, { "epoch": 0.66, "learning_rate": 0.0001853089705126257, "loss": 1.6081, "step": 3780 }, { "epoch": 0.66, "learning_rate": 0.00018520898673408576, "loss": 1.6591, "step": 3790 }, { "epoch": 0.66, "learning_rate": 0.00018510869103878781, "loss": 1.6196, "step": 3800 }, { "epoch": 0.66, "learning_rate": 0.00018500808379387515, "loss": 1.6015, "step": 3810 }, { "epoch": 0.66, "learning_rate": 0.00018490716536763153, "loss": 1.6196, "step": 3820 }, { "epoch": 0.66, "learning_rate": 0.00018480593612947978, "loss": 1.6504, "step": 3830 }, { "epoch": 0.67, "learning_rate": 0.00018470439644998062, "loss": 1.6474, "step": 3840 }, { "epoch": 0.67, "learning_rate": 0.00018460254670083103, "loss": 1.6038, "step": 3850 }, { "epoch": 0.67, "learning_rate": 0.00018450038725486306, "loss": 1.712, "step": 3860 }, { "epoch": 0.67, "learning_rate": 0.00018439791848604253, "loss": 1.6311, "step": 3870 }, { "epoch": 0.67, "learning_rate": 0.00018429514076946746, "loss": 1.626, "step": 3880 }, { "epoch": 0.68, "learning_rate": 0.00018419205448136686, "loss": 1.6255, "step": 3890 }, { "epoch": 0.68, "learning_rate": 0.00018408865999909932, "loss": 1.6269, "step": 3900 }, { "epoch": 0.68, "learning_rate": 0.00018398495770115153, "loss": 1.5649, "step": 3910 }, { "epoch": 0.68, "learning_rate": 0.0001838809479671371, "loss": 1.6243, "step": 3920 }, { "epoch": 0.68, "learning_rate": 0.0001837766311777949, "loss": 1.6366, "step": 3930 }, { "epoch": 0.68, "learning_rate": 0.00018367200771498787, "loss": 1.6387, "step": 3940 }, { "epoch": 0.69, "learning_rate": 0.00018356707796170161, "loss": 1.6256, "step": 3950 }, { "epoch": 0.69, "learning_rate": 0.00018346184230204292, "loss": 1.6158, "step": 3960 }, { "epoch": 0.69, "learning_rate": 0.0001833563011212383, "loss": 1.6103, "step": 3970 }, { "epoch": 0.69, "learning_rate": 0.00018325045480563273, "loss": 1.6038, "step": 3980 }, { "epoch": 0.69, "learning_rate": 0.00018314430374268817, "loss": 1.5909, "step": 3990 }, { "epoch": 0.69, "learning_rate": 0.0001830378483209821, "loss": 1.612, "step": 4000 }, { "epoch": 0.69, "eval_loss": 0.8420035243034363, "eval_runtime": 62.0042, "eval_samples_per_second": 8.451, "eval_steps_per_second": 0.532, "step": 4000 }, { "epoch": 0.7, "learning_rate": 0.0001829310889302062, "loss": 1.6545, "step": 4010 }, { "epoch": 0.7, "learning_rate": 0.00018284546282243836, "loss": 1.6391, "step": 4020 }, { "epoch": 0.7, "learning_rate": 0.00018273815727291054, "loss": 1.6237, "step": 4030 }, { "epoch": 0.7, "learning_rate": 0.00018263054885136454, "loss": 1.6281, "step": 4040 }, { "epoch": 0.7, "learning_rate": 0.00018252263795171263, "loss": 1.6102, "step": 4050 }, { "epoch": 0.7, "learning_rate": 0.00018241442496897444, "loss": 1.6246, "step": 4060 }, { "epoch": 0.71, "learning_rate": 0.00018230591029927537, "loss": 1.5991, "step": 4070 }, { "epoch": 0.71, "learning_rate": 0.00018219709433984512, "loss": 1.6252, "step": 4080 }, { "epoch": 0.71, "learning_rate": 0.00018208797748901637, "loss": 1.6047, "step": 4090 }, { "epoch": 0.71, "learning_rate": 0.0001819785601462232, "loss": 1.6173, "step": 4100 }, { "epoch": 0.71, "learning_rate": 0.00018186884271199967, "loss": 1.5678, "step": 4110 }, { "epoch": 0.72, "learning_rate": 0.0001817588255879784, "loss": 1.6143, "step": 4120 }, { "epoch": 0.72, "learning_rate": 0.000181648509176889, "loss": 1.6248, "step": 4130 }, { "epoch": 0.72, "learning_rate": 0.00018153789388255677, "loss": 1.6552, "step": 4140 }, { "epoch": 0.72, "learning_rate": 0.0001814269801099009, "loss": 1.626, "step": 4150 }, { "epoch": 0.72, "learning_rate": 0.00018131576826493337, "loss": 1.6096, "step": 4160 }, { "epoch": 0.72, "learning_rate": 0.00018120425875475723, "loss": 1.6182, "step": 4170 }, { "epoch": 0.73, "learning_rate": 0.00018109245198756518, "loss": 1.6014, "step": 4180 }, { "epoch": 0.73, "learning_rate": 0.00018099157208059183, "loss": 1.5923, "step": 4190 }, { "epoch": 0.73, "learning_rate": 0.0001808792016535363, "loss": 1.5841, "step": 4200 }, { "epoch": 0.73, "learning_rate": 0.00018076653515937166, "loss": 1.5837, "step": 4210 }, { "epoch": 0.73, "learning_rate": 0.00018065357301052593, "loss": 1.6354, "step": 4220 }, { "epoch": 0.73, "learning_rate": 0.00018054031562050928, "loss": 1.6433, "step": 4230 }, { "epoch": 0.74, "learning_rate": 0.0001804267634039127, "loss": 1.6109, "step": 4240 }, { "epoch": 0.74, "learning_rate": 0.0001803243146757791, "loss": 1.6059, "step": 4250 }, { "epoch": 0.74, "learning_rate": 0.00018021020343474294, "loss": 1.6127, "step": 4260 }, { "epoch": 0.74, "learning_rate": 0.0001800957985755384, "loss": 1.6006, "step": 4270 }, { "epoch": 0.74, "learning_rate": 0.00017998110051695688, "loss": 1.5927, "step": 4280 }, { "epoch": 0.74, "learning_rate": 0.0001798661096788631, "loss": 1.6142, "step": 4290 }, { "epoch": 0.75, "learning_rate": 0.00017975082648219356, "loss": 1.6272, "step": 4300 }, { "epoch": 0.75, "learning_rate": 0.0001796352513489549, "loss": 1.6459, "step": 4310 }, { "epoch": 0.75, "learning_rate": 0.00017951938470222247, "loss": 1.6373, "step": 4320 }, { "epoch": 0.75, "learning_rate": 0.0001794032269661387, "loss": 1.6331, "step": 4330 }, { "epoch": 0.75, "learning_rate": 0.00017928677856591163, "loss": 1.6007, "step": 4340 }, { "epoch": 0.75, "learning_rate": 0.0001791700399278133, "loss": 1.6359, "step": 4350 }, { "epoch": 0.76, "learning_rate": 0.00017905301147917816, "loss": 1.5939, "step": 4360 }, { "epoch": 0.76, "learning_rate": 0.00017893569364840154, "loss": 1.5889, "step": 4370 }, { "epoch": 0.76, "learning_rate": 0.0001788180868649382, "loss": 1.6206, "step": 4380 }, { "epoch": 0.76, "learning_rate": 0.00017870019155930047, "loss": 1.5902, "step": 4390 }, { "epoch": 0.76, "learning_rate": 0.00017858200816305697, "loss": 1.6394, "step": 4400 }, { "epoch": 0.77, "learning_rate": 0.00017846353710883087, "loss": 1.6193, "step": 4410 }, { "epoch": 0.77, "learning_rate": 0.00017835666757086383, "loss": 1.6162, "step": 4420 }, { "epoch": 0.77, "learning_rate": 0.00017823765116211767, "loss": 1.6329, "step": 4430 }, { "epoch": 0.77, "learning_rate": 0.0001781183483559451, "loss": 1.6248, "step": 4440 }, { "epoch": 0.77, "learning_rate": 0.00017799875958906703, "loss": 1.6109, "step": 4450 }, { "epoch": 0.77, "learning_rate": 0.0001778788852992512, "loss": 1.5499, "step": 4460 }, { "epoch": 0.78, "learning_rate": 0.0001777587259253104, "loss": 1.6107, "step": 4470 }, { "epoch": 0.78, "learning_rate": 0.00017763828190710113, "loss": 1.5865, "step": 4480 }, { "epoch": 0.78, "learning_rate": 0.00017751755368552178, "loss": 1.6013, "step": 4490 }, { "epoch": 0.78, "learning_rate": 0.00017739654170251116, "loss": 1.5829, "step": 4500 }, { "epoch": 0.78, "learning_rate": 0.00017727524640104674, "loss": 1.6356, "step": 4510 }, { "epoch": 0.78, "learning_rate": 0.00017715366822514318, "loss": 1.6237, "step": 4520 }, { "epoch": 0.79, "learning_rate": 0.00017703180761985063, "loss": 1.5802, "step": 4530 }, { "epoch": 0.79, "learning_rate": 0.00017690966503125307, "loss": 1.5659, "step": 4540 }, { "epoch": 0.79, "learning_rate": 0.0001767872409064667, "loss": 1.61, "step": 4550 }, { "epoch": 0.79, "learning_rate": 0.00017666453569363836, "loss": 1.6184, "step": 4560 }, { "epoch": 0.79, "learning_rate": 0.00017654154984194382, "loss": 1.5797, "step": 4570 }, { "epoch": 0.79, "learning_rate": 0.00017641828380158612, "loss": 1.6256, "step": 4580 }, { "epoch": 0.8, "learning_rate": 0.00017629473802379403, "loss": 1.5783, "step": 4590 }, { "epoch": 0.8, "learning_rate": 0.00017617091296082032, "loss": 1.5988, "step": 4600 }, { "epoch": 0.8, "learning_rate": 0.0001760468090659401, "loss": 1.5904, "step": 4610 }, { "epoch": 0.8, "learning_rate": 0.0001759224267934491, "loss": 1.611, "step": 4620 }, { "epoch": 0.8, "learning_rate": 0.00017579776659866218, "loss": 1.6066, "step": 4630 }, { "epoch": 0.81, "learning_rate": 0.00017569783864540068, "loss": 1.5947, "step": 4640 }, { "epoch": 0.81, "learning_rate": 0.00017557267934112085, "loss": 1.576, "step": 4650 }, { "epoch": 0.81, "learning_rate": 0.00017544724339483368, "loss": 1.6143, "step": 4660 }, { "epoch": 0.81, "learning_rate": 0.00017532153126571107, "loss": 1.5985, "step": 4670 }, { "epoch": 0.81, "learning_rate": 0.00017519554341393593, "loss": 1.5992, "step": 4680 }, { "epoch": 0.81, "learning_rate": 0.00017506928030070054, "loss": 1.5891, "step": 4690 }, { "epoch": 0.82, "learning_rate": 0.00017494274238820468, "loss": 1.5622, "step": 4700 }, { "epoch": 0.82, "learning_rate": 0.00017482862369640954, "loss": 1.5646, "step": 4710 }, { "epoch": 0.82, "learning_rate": 0.00017470156494228082, "loss": 1.6121, "step": 4720 }, { "epoch": 0.82, "learning_rate": 0.0001745742327349537, "loss": 1.5766, "step": 4730 }, { "epoch": 0.82, "learning_rate": 0.00017444662754054156, "loss": 1.557, "step": 4740 }, { "epoch": 0.82, "learning_rate": 0.00017431874982615708, "loss": 1.5716, "step": 4750 }, { "epoch": 0.83, "learning_rate": 0.00017419060005991054, "loss": 1.5992, "step": 4760 }, { "epoch": 0.83, "learning_rate": 0.0001740621787109081, "loss": 1.6036, "step": 4770 }, { "epoch": 0.83, "learning_rate": 0.00017393348624925004, "loss": 1.6121, "step": 4780 }, { "epoch": 0.83, "learning_rate": 0.00017380452314602916, "loss": 1.6076, "step": 4790 }, { "epoch": 0.83, "learning_rate": 0.00017367528987332885, "loss": 1.5798, "step": 4800 }, { "epoch": 0.83, "learning_rate": 0.00017354578690422157, "loss": 1.5597, "step": 4810 }, { "epoch": 0.84, "learning_rate": 0.00017341601471276708, "loss": 1.5834, "step": 4820 }, { "epoch": 0.84, "learning_rate": 0.0001732859737740105, "loss": 1.6169, "step": 4830 }, { "epoch": 0.84, "learning_rate": 0.00017315566456398086, "loss": 1.5933, "step": 4840 }, { "epoch": 0.84, "learning_rate": 0.00017303815729724509, "loss": 1.6006, "step": 4850 }, { "epoch": 0.84, "learning_rate": 0.00017292043346556449, "loss": 1.6013, "step": 4860 }, { "epoch": 0.85, "learning_rate": 0.0001727893756367969, "loss": 1.6042, "step": 4870 }, { "epoch": 0.85, "learning_rate": 0.00017265805135460778, "loss": 1.5738, "step": 4880 }, { "epoch": 0.85, "learning_rate": 0.00017252646109972383, "loss": 1.6376, "step": 4890 }, { "epoch": 0.85, "learning_rate": 0.00017240780286177955, "loss": 1.5904, "step": 4900 }, { "epoch": 0.85, "learning_rate": 0.0001722757085866635, "loss": 1.5651, "step": 4910 }, { "epoch": 0.85, "learning_rate": 0.00017214334973845988, "loss": 1.5923, "step": 4920 }, { "epoch": 0.86, "learning_rate": 0.0001720107268016827, "loss": 1.6032, "step": 4930 }, { "epoch": 0.86, "learning_rate": 0.00017187784026181265, "loss": 1.5859, "step": 4940 }, { "epoch": 0.86, "learning_rate": 0.00017174469060529527, "loss": 1.5376, "step": 4950 }, { "epoch": 0.86, "learning_rate": 0.00017161127831953946, "loss": 1.5445, "step": 4960 }, { "epoch": 0.86, "learning_rate": 0.0001714776038929153, "loss": 1.5652, "step": 4970 }, { "epoch": 0.86, "learning_rate": 0.00017134366781475262, "loss": 1.5267, "step": 4980 }, { "epoch": 0.87, "learning_rate": 0.00017120947057533897, "loss": 1.5805, "step": 4990 }, { "epoch": 0.87, "learning_rate": 0.00017108847017299018, "loss": 1.5876, "step": 5000 }, { "epoch": 0.87, "eval_loss": 0.826651930809021, "eval_runtime": 61.8972, "eval_samples_per_second": 8.466, "eval_steps_per_second": 0.533, "step": 5000 }, { "epoch": 0.87, "learning_rate": 0.00017095377808136445, "loss": 1.6163, "step": 5010 }, { "epoch": 0.87, "learning_rate": 0.00017083233311224484, "loss": 1.629, "step": 5020 }, { "epoch": 0.87, "learning_rate": 0.0001706971479483343, "loss": 1.5993, "step": 5030 }, { "epoch": 0.87, "learning_rate": 0.00017056170398982906, "loss": 1.6056, "step": 5040 }, { "epoch": 0.88, "learning_rate": 0.00017042600173253645, "loss": 1.5728, "step": 5050 }, { "epoch": 0.88, "learning_rate": 0.00017029004167320926, "loss": 1.6298, "step": 5060 }, { "epoch": 0.88, "learning_rate": 0.00017015382430954413, "loss": 1.5792, "step": 5070 }, { "epoch": 0.88, "learning_rate": 0.00017001735014017955, "loss": 1.5615, "step": 5080 }, { "epoch": 0.88, "learning_rate": 0.000169880619664694, "loss": 1.6449, "step": 5090 }, { "epoch": 0.89, "learning_rate": 0.00016974363338360425, "loss": 1.5903, "step": 5100 }, { "epoch": 0.89, "learning_rate": 0.0001696063917983635, "loss": 1.5682, "step": 5110 }, { "epoch": 0.89, "learning_rate": 0.00016946889541135946, "loss": 1.5754, "step": 5120 }, { "epoch": 0.89, "learning_rate": 0.00016933114472591262, "loss": 1.6168, "step": 5130 }, { "epoch": 0.89, "learning_rate": 0.00016919314024627436, "loss": 1.5977, "step": 5140 }, { "epoch": 0.89, "learning_rate": 0.00016906871963807865, "loss": 1.6037, "step": 5150 }, { "epoch": 0.9, "learning_rate": 0.00016894409423469082, "loss": 1.582, "step": 5160 }, { "epoch": 0.9, "learning_rate": 0.00016880538182183466, "loss": 1.5863, "step": 5170 }, { "epoch": 0.9, "learning_rate": 0.00016866641753939926, "loss": 1.5792, "step": 5180 }, { "epoch": 0.9, "learning_rate": 0.00016852720189607857, "loss": 1.5481, "step": 5190 }, { "epoch": 0.9, "learning_rate": 0.00016838773540148655, "loss": 1.594, "step": 5200 }, { "epoch": 0.9, "learning_rate": 0.00016824801856615547, "loss": 1.5484, "step": 5210 }, { "epoch": 0.91, "learning_rate": 0.00016810805190153397, "loss": 1.551, "step": 5220 }, { "epoch": 0.91, "learning_rate": 0.0001679678359199853, "loss": 1.6115, "step": 5230 }, { "epoch": 0.91, "learning_rate": 0.0001678273711347852, "loss": 1.5713, "step": 5240 }, { "epoch": 0.91, "learning_rate": 0.00016770074052593968, "loss": 1.532, "step": 5250 }, { "epoch": 0.91, "learning_rate": 0.00016755980443113736, "loss": 1.6103, "step": 5260 }, { "epoch": 0.91, "learning_rate": 0.00016741862102632728, "loss": 1.5881, "step": 5270 }, { "epoch": 0.92, "learning_rate": 0.00016727719082832666, "loss": 1.5909, "step": 5280 }, { "epoch": 0.92, "learning_rate": 0.00016713551435485608, "loss": 1.5632, "step": 5290 }, { "epoch": 0.92, "learning_rate": 0.0001669935921245377, "loss": 1.5794, "step": 5300 }, { "epoch": 0.92, "learning_rate": 0.00016685142465689326, "loss": 1.5779, "step": 5310 }, { "epoch": 0.92, "learning_rate": 0.00016670901247234224, "loss": 1.615, "step": 5320 }, { "epoch": 0.93, "learning_rate": 0.0001665663560921999, "loss": 1.6188, "step": 5330 }, { "epoch": 0.93, "learning_rate": 0.00016642345603867545, "loss": 1.5351, "step": 5340 }, { "epoch": 0.93, "learning_rate": 0.00016628031283487006, "loss": 1.6056, "step": 5350 }, { "epoch": 0.93, "learning_rate": 0.00016613692700477494, "loss": 1.6137, "step": 5360 }, { "epoch": 0.93, "learning_rate": 0.0001659932990732696, "loss": 1.5894, "step": 5370 }, { "epoch": 0.93, "learning_rate": 0.00016584942956611963, "loss": 1.6144, "step": 5380 }, { "epoch": 0.94, "learning_rate": 0.00016570531900997497, "loss": 1.5606, "step": 5390 }, { "epoch": 0.94, "learning_rate": 0.00016556096793236805, "loss": 1.5671, "step": 5400 }, { "epoch": 0.94, "learning_rate": 0.00016541637686171167, "loss": 1.5839, "step": 5410 }, { "epoch": 0.94, "learning_rate": 0.00016527154632729713, "loss": 1.5991, "step": 5420 }, { "epoch": 0.94, "learning_rate": 0.00016512647685929235, "loss": 1.5912, "step": 5430 }, { "epoch": 0.94, "learning_rate": 0.0001649811689887399, "loss": 1.578, "step": 5440 }, { "epoch": 0.95, "learning_rate": 0.00016483562324755502, "loss": 1.5861, "step": 5450 }, { "epoch": 0.95, "learning_rate": 0.00016468984016852374, "loss": 1.5587, "step": 5460 }, { "epoch": 0.95, "learning_rate": 0.0001645438202853008, "loss": 1.5391, "step": 5470 }, { "epoch": 0.95, "learning_rate": 0.00016439756413240793, "loss": 1.5762, "step": 5480 }, { "epoch": 0.95, "learning_rate": 0.00016425107224523168, "loss": 1.6125, "step": 5490 }, { "epoch": 0.95, "learning_rate": 0.0001641190284371531, "loss": 1.5776, "step": 5500 }, { "epoch": 0.96, "learning_rate": 0.00016397209013291726, "loss": 1.5469, "step": 5510 }, { "epoch": 0.96, "learning_rate": 0.00016382491765189186, "loss": 1.5636, "step": 5520 }, { "epoch": 0.96, "learning_rate": 0.00016367751153281774, "loss": 1.5732, "step": 5530 }, { "epoch": 0.96, "learning_rate": 0.00016352987231529103, "loss": 1.547, "step": 5540 }, { "epoch": 0.96, "learning_rate": 0.00016338200053976108, "loss": 1.6132, "step": 5550 }, { "epoch": 0.96, "learning_rate": 0.00016323389674752868, "loss": 1.5797, "step": 5560 }, { "epoch": 0.97, "learning_rate": 0.00016308556148074378, "loss": 1.5448, "step": 5570 }, { "epoch": 0.97, "learning_rate": 0.00016293699528240386, "loss": 1.5573, "step": 5580 }, { "epoch": 0.97, "learning_rate": 0.0001627881986963515, "loss": 1.5472, "step": 5590 }, { "epoch": 0.97, "learning_rate": 0.00016263917226727286, "loss": 1.5518, "step": 5600 }, { "epoch": 0.97, "learning_rate": 0.0001624899165406954, "loss": 1.5813, "step": 5610 }, { "epoch": 0.98, "learning_rate": 0.00016234043206298586, "loss": 1.5408, "step": 5620 }, { "epoch": 0.98, "learning_rate": 0.00016219071938134845, "loss": 1.5346, "step": 5630 }, { "epoch": 0.98, "learning_rate": 0.0001620557833064539, "loss": 1.5452, "step": 5640 }, { "epoch": 0.98, "learning_rate": 0.00016192066323037722, "loss": 1.5825, "step": 5650 }, { "epoch": 0.98, "learning_rate": 0.00016177031449597098, "loss": 1.5871, "step": 5660 }, { "epoch": 0.98, "learning_rate": 0.0001616197396446142, "loss": 1.5518, "step": 5670 }, { "epoch": 0.99, "learning_rate": 0.0001614689392275025, "loss": 1.5645, "step": 5680 }, { "epoch": 0.99, "learning_rate": 0.00016131791379665717, "loss": 1.549, "step": 5690 }, { "epoch": 0.99, "learning_rate": 0.00016116666390492325, "loss": 1.5868, "step": 5700 }, { "epoch": 0.99, "learning_rate": 0.00016101519010596743, "loss": 1.5491, "step": 5710 }, { "epoch": 0.99, "learning_rate": 0.00016086349295427595, "loss": 1.5768, "step": 5720 }, { "epoch": 0.99, "learning_rate": 0.00016072677501010647, "loss": 1.5492, "step": 5730 }, { "epoch": 1.0, "learning_rate": 0.00016057465501875367, "loss": 1.5695, "step": 5740 }, { "epoch": 1.0, "learning_rate": 0.00016042231328729185, "loss": 1.5693, "step": 5750 }, { "epoch": 1.0, "learning_rate": 0.0001602697503733844, "loss": 1.5273, "step": 5760 }, { "epoch": 1.0, "learning_rate": 0.00016011696683550456, "loss": 1.4587, "step": 5770 }, { "epoch": 1.0, "learning_rate": 0.00015996396323293295, "loss": 1.4827, "step": 5780 }, { "epoch": 1.0, "learning_rate": 0.00015981074012575593, "loss": 1.4734, "step": 5790 }, { "epoch": 1.01, "learning_rate": 0.0001596572980748634, "loss": 1.4913, "step": 5800 }, { "epoch": 1.01, "learning_rate": 0.00015950363764194662, "loss": 1.4804, "step": 5810 }, { "epoch": 1.01, "learning_rate": 0.0001593497593894963, "loss": 1.4434, "step": 5820 }, { "epoch": 1.01, "learning_rate": 0.00015919566388080048, "loss": 1.4501, "step": 5830 }, { "epoch": 1.01, "learning_rate": 0.00015904135167994264, "loss": 1.4815, "step": 5840 }, { "epoch": 1.02, "learning_rate": 0.00015888682335179924, "loss": 1.4588, "step": 5850 }, { "epoch": 1.02, "learning_rate": 0.00015873207946203802, "loss": 1.4268, "step": 5860 }, { "epoch": 1.02, "learning_rate": 0.00015857712057711592, "loss": 1.4502, "step": 5870 }, { "epoch": 1.02, "learning_rate": 0.00015843747422863421, "loss": 1.4486, "step": 5880 }, { "epoch": 1.02, "learning_rate": 0.00015828210841631188, "loss": 1.4331, "step": 5890 }, { "epoch": 1.02, "learning_rate": 0.0001581265292559965, "loss": 1.4565, "step": 5900 }, { "epoch": 1.03, "learning_rate": 0.00015797073731720253, "loss": 1.453, "step": 5910 }, { "epoch": 1.03, "learning_rate": 0.00015781473317022333, "loss": 1.45, "step": 5920 }, { "epoch": 1.03, "learning_rate": 0.00015765851738612895, "loss": 1.4356, "step": 5930 }, { "epoch": 1.03, "learning_rate": 0.00015750209053676432, "loss": 1.4915, "step": 5940 }, { "epoch": 1.03, "learning_rate": 0.00015734545319474693, "loss": 1.4443, "step": 5950 }, { "epoch": 1.03, "learning_rate": 0.00015718860593346473, "loss": 1.4245, "step": 5960 }, { "epoch": 1.04, "learning_rate": 0.0001570315493270742, "loss": 1.4927, "step": 5970 }, { "epoch": 1.04, "learning_rate": 0.00015687428395049814, "loss": 1.4698, "step": 5980 }, { "epoch": 1.04, "learning_rate": 0.00015671681037942355, "loss": 1.4568, "step": 5990 }, { "epoch": 1.04, "learning_rate": 0.00015655912919029953, "loss": 1.4498, "step": 6000 }, { "epoch": 1.04, "eval_loss": 0.8124380707740784, "eval_runtime": 62.0134, "eval_samples_per_second": 8.45, "eval_steps_per_second": 0.532, "step": 6000 }, { "epoch": 1.04, "learning_rate": 0.00015640124096033526, "loss": 1.4347, "step": 6010 }, { "epoch": 1.04, "learning_rate": 0.0001562431462674977, "loss": 1.4754, "step": 6020 }, { "epoch": 1.05, "learning_rate": 0.00015608484569050975, "loss": 1.4716, "step": 6030 }, { "epoch": 1.05, "learning_rate": 0.00015592633980884778, "loss": 1.4523, "step": 6040 }, { "epoch": 1.05, "learning_rate": 0.00015578350945939874, "loss": 1.4578, "step": 6050 }, { "epoch": 1.05, "learning_rate": 0.00015562461509800382, "loss": 1.4311, "step": 6060 }, { "epoch": 1.05, "learning_rate": 0.0001554814360610988, "loss": 1.4419, "step": 6070 }, { "epoch": 1.06, "learning_rate": 0.00015532215531972608, "loss": 1.449, "step": 6080 }, { "epoch": 1.06, "learning_rate": 0.0001551786294874456, "loss": 1.4604, "step": 6090 }, { "epoch": 1.06, "learning_rate": 0.0001550189644709561, "loss": 1.4673, "step": 6100 }, { "epoch": 1.06, "learning_rate": 0.00015485909805156665, "loss": 1.4787, "step": 6110 }, { "epoch": 1.06, "learning_rate": 0.0001546990308144857, "loss": 1.4571, "step": 6120 }, { "epoch": 1.06, "learning_rate": 0.0001545387633456568, "loss": 1.4773, "step": 6130 }, { "epoch": 1.07, "learning_rate": 0.00015437829623175637, "loss": 1.4816, "step": 6140 }, { "epoch": 1.07, "learning_rate": 0.00015421763006019177, "loss": 1.5024, "step": 6150 }, { "epoch": 1.07, "learning_rate": 0.00015405676541909897, "loss": 1.4956, "step": 6160 }, { "epoch": 1.07, "learning_rate": 0.00015389570289734046, "loss": 1.4515, "step": 6170 }, { "epoch": 1.07, "learning_rate": 0.00015373444308450313, "loss": 1.4745, "step": 6180 }, { "epoch": 1.07, "learning_rate": 0.00015357298657089606, "loss": 1.4185, "step": 6190 }, { "epoch": 1.08, "learning_rate": 0.00015341133394754838, "loss": 1.4462, "step": 6200 }, { "epoch": 1.08, "learning_rate": 0.00015324948580620703, "loss": 1.4664, "step": 6210 }, { "epoch": 1.08, "learning_rate": 0.00015308744273933477, "loss": 1.4747, "step": 6220 }, { "epoch": 1.08, "learning_rate": 0.00015292520534010784, "loss": 1.4091, "step": 6230 }, { "epoch": 1.08, "learning_rate": 0.00015277902601747382, "loss": 1.4634, "step": 6240 }, { "epoch": 1.08, "learning_rate": 0.0001526164210235197, "loss": 1.4462, "step": 6250 }, { "epoch": 1.09, "learning_rate": 0.0001524536234214371, "loss": 1.4771, "step": 6260 }, { "epoch": 1.09, "learning_rate": 0.0001522906338071643, "loss": 1.4639, "step": 6270 }, { "epoch": 1.09, "learning_rate": 0.00015212745277734259, "loss": 1.4302, "step": 6280 }, { "epoch": 1.09, "learning_rate": 0.00015196408092931383, "loss": 1.4412, "step": 6290 }, { "epoch": 1.09, "learning_rate": 0.0001518005188611184, "loss": 1.4518, "step": 6300 }, { "epoch": 1.1, "learning_rate": 0.00015163676717149308, "loss": 1.4316, "step": 6310 }, { "epoch": 1.1, "learning_rate": 0.00015147282645986866, "loss": 1.4317, "step": 6320 }, { "epoch": 1.1, "learning_rate": 0.00015130869732636804, "loss": 1.4911, "step": 6330 }, { "epoch": 1.1, "learning_rate": 0.00015114438037180364, "loss": 1.4359, "step": 6340 }, { "epoch": 1.1, "learning_rate": 0.00015097987619767556, "loss": 1.4517, "step": 6350 }, { "epoch": 1.1, "learning_rate": 0.0001508316628659255, "loss": 1.4638, "step": 6360 }, { "epoch": 1.11, "learning_rate": 0.000150666804634212, "loss": 1.4206, "step": 6370 }, { "epoch": 1.11, "learning_rate": 0.0001505017609311527, "loss": 1.4665, "step": 6380 }, { "epoch": 1.11, "learning_rate": 0.00015033653236090806, "loss": 1.4544, "step": 6390 }, { "epoch": 1.11, "learning_rate": 0.00015018766908612838, "loss": 1.4356, "step": 6400 }, { "epoch": 1.11, "learning_rate": 0.00015002209093511546, "loss": 1.4497, "step": 6410 }, { "epoch": 1.11, "learning_rate": 0.00014985632967280134, "loss": 1.4469, "step": 6420 }, { "epoch": 1.12, "learning_rate": 0.00014969038590597315, "loss": 1.4334, "step": 6430 }, { "epoch": 1.12, "learning_rate": 0.0001495242602420861, "loss": 1.4714, "step": 6440 }, { "epoch": 1.12, "learning_rate": 0.00014935795328926125, "loss": 1.4577, "step": 6450 }, { "epoch": 1.12, "learning_rate": 0.00014919146565628327, "loss": 1.4247, "step": 6460 }, { "epoch": 1.12, "learning_rate": 0.00014902479795259822, "loss": 1.4811, "step": 6470 }, { "epoch": 1.12, "learning_rate": 0.00014885795078831132, "loss": 1.4309, "step": 6480 }, { "epoch": 1.13, "learning_rate": 0.00014869092477418482, "loss": 1.4853, "step": 6490 }, { "epoch": 1.13, "learning_rate": 0.00014852372052163553, "loss": 1.4507, "step": 6500 }, { "epoch": 1.13, "learning_rate": 0.00014835633864273287, "loss": 1.4455, "step": 6510 }, { "epoch": 1.13, "learning_rate": 0.0001482055435875876, "loss": 1.4421, "step": 6520 }, { "epoch": 1.13, "learning_rate": 0.0001480378259071914, "loss": 1.4758, "step": 6530 }, { "epoch": 1.14, "learning_rate": 0.00014786993237911187, "loss": 1.4564, "step": 6540 }, { "epoch": 1.14, "learning_rate": 0.00014771867836201847, "loss": 1.4497, "step": 6550 }, { "epoch": 1.14, "learning_rate": 0.00014755045241707308, "loss": 1.4975, "step": 6560 }, { "epoch": 1.14, "learning_rate": 0.00014738205240852806, "loss": 1.4897, "step": 6570 }, { "epoch": 1.14, "learning_rate": 0.00014721347895282978, "loss": 1.4567, "step": 6580 }, { "epoch": 1.14, "learning_rate": 0.0001470447326670598, "loss": 1.4813, "step": 6590 }, { "epoch": 1.15, "learning_rate": 0.00014687581416893218, "loss": 1.4861, "step": 6600 }, { "epoch": 1.15, "learning_rate": 0.0001467067240767915, "loss": 1.4759, "step": 6610 }, { "epoch": 1.15, "learning_rate": 0.00014653746300961038, "loss": 1.4753, "step": 6620 }, { "epoch": 1.15, "learning_rate": 0.00014636803158698738, "loss": 1.464, "step": 6630 }, { "epoch": 1.15, "learning_rate": 0.00014619843042914466, "loss": 1.4849, "step": 6640 }, { "epoch": 1.15, "learning_rate": 0.00014602866015692563, "loss": 1.4503, "step": 6650 }, { "epoch": 1.16, "learning_rate": 0.00014587572283276284, "loss": 1.4429, "step": 6660 }, { "epoch": 1.16, "learning_rate": 0.00014572264948280539, "loss": 1.481, "step": 6670 }, { "epoch": 1.16, "learning_rate": 0.00014555240899848083, "loss": 1.4358, "step": 6680 }, { "epoch": 1.16, "learning_rate": 0.00014538200176461162, "loss": 1.4236, "step": 6690 }, { "epoch": 1.16, "learning_rate": 0.00014521142840499203, "loss": 1.4508, "step": 6700 }, { "epoch": 1.16, "learning_rate": 0.0001450406895440244, "loss": 1.4858, "step": 6710 }, { "epoch": 1.17, "learning_rate": 0.0001448697858067168, "loss": 1.4396, "step": 6720 }, { "epoch": 1.17, "learning_rate": 0.00014469871781868098, "loss": 1.4119, "step": 6730 }, { "epoch": 1.17, "learning_rate": 0.00014452748620612992, "loss": 1.4862, "step": 6740 }, { "epoch": 1.17, "learning_rate": 0.00014435609159587555, "loss": 1.4585, "step": 6750 }, { "epoch": 1.17, "learning_rate": 0.0001442016976021512, "loss": 1.4725, "step": 6760 }, { "epoch": 1.17, "learning_rate": 0.00014402999502526254, "loss": 1.4497, "step": 6770 }, { "epoch": 1.18, "learning_rate": 0.00014385813127179106, "loss": 1.4455, "step": 6780 }, { "epoch": 1.18, "learning_rate": 0.00014368610697086277, "loss": 1.4252, "step": 6790 }, { "epoch": 1.18, "learning_rate": 0.00014351392275219134, "loss": 1.4629, "step": 6800 }, { "epoch": 1.18, "learning_rate": 0.00014334157924607578, "loss": 1.4628, "step": 6810 }, { "epoch": 1.18, "learning_rate": 0.00014316907708339822, "loss": 1.4321, "step": 6820 }, { "epoch": 1.19, "learning_rate": 0.00014299641689562156, "loss": 1.4558, "step": 6830 }, { "epoch": 1.19, "learning_rate": 0.0001428235993147873, "loss": 1.4762, "step": 6840 }, { "epoch": 1.19, "learning_rate": 0.00014265062497351285, "loss": 1.4601, "step": 6850 }, { "epoch": 1.19, "learning_rate": 0.00014247749450498962, "loss": 1.4782, "step": 6860 }, { "epoch": 1.19, "learning_rate": 0.00014230420854298054, "loss": 1.4407, "step": 6870 }, { "epoch": 1.19, "learning_rate": 0.00014213076772181767, "loss": 1.4164, "step": 6880 }, { "epoch": 1.2, "learning_rate": 0.00014195717267640004, "loss": 1.4223, "step": 6890 }, { "epoch": 1.2, "learning_rate": 0.00014178342404219118, "loss": 1.3949, "step": 6900 }, { "epoch": 1.2, "learning_rate": 0.00014160952245521682, "loss": 1.4718, "step": 6910 }, { "epoch": 1.2, "learning_rate": 0.00014145288077845185, "loss": 1.4722, "step": 6920 }, { "epoch": 1.2, "learning_rate": 0.00014127869033547745, "loss": 1.4446, "step": 6930 }, { "epoch": 1.2, "learning_rate": 0.00014112178972372757, "loss": 1.4475, "step": 6940 }, { "epoch": 1.21, "learning_rate": 0.00014094731272664267, "loss": 1.4202, "step": 6950 }, { "epoch": 1.21, "learning_rate": 0.00014077268583746858, "loss": 1.4854, "step": 6960 }, { "epoch": 1.21, "learning_rate": 0.0001405979096954461, "loss": 1.4541, "step": 6970 }, { "epoch": 1.21, "learning_rate": 0.00014042298494036228, "loss": 1.4756, "step": 6980 }, { "epoch": 1.21, "learning_rate": 0.00014024791221254815, "loss": 1.435, "step": 6990 }, { "epoch": 1.21, "learning_rate": 0.0001400726921528765, "loss": 1.3868, "step": 7000 }, { "epoch": 1.21, "eval_loss": 0.8074263334274292, "eval_runtime": 61.8502, "eval_samples_per_second": 8.472, "eval_steps_per_second": 0.534, "step": 7000 }, { "epoch": 1.22, "learning_rate": 0.0001398973254027594, "loss": 1.4526, "step": 7010 }, { "epoch": 1.22, "learning_rate": 0.00013972181260414585, "loss": 1.4434, "step": 7020 }, { "epoch": 1.22, "learning_rate": 0.0001395461543995196, "loss": 1.4733, "step": 7030 }, { "epoch": 1.22, "learning_rate": 0.00013937035143189657, "loss": 1.4456, "step": 7040 }, { "epoch": 1.22, "learning_rate": 0.00013919440434482266, "loss": 1.4451, "step": 7050 }, { "epoch": 1.23, "learning_rate": 0.00013901831378237124, "loss": 1.4572, "step": 7060 }, { "epoch": 1.23, "learning_rate": 0.000138842080389141, "loss": 1.4455, "step": 7070 }, { "epoch": 1.23, "learning_rate": 0.00013866570481025346, "loss": 1.438, "step": 7080 }, { "epoch": 1.23, "learning_rate": 0.00013848918769135055, "loss": 1.4261, "step": 7090 }, { "epoch": 1.23, "learning_rate": 0.00013831252967859238, "loss": 1.436, "step": 7100 }, { "epoch": 1.23, "learning_rate": 0.00013813573141865484, "loss": 1.4295, "step": 7110 }, { "epoch": 1.24, "learning_rate": 0.00013797649360826399, "loss": 1.4416, "step": 7120 }, { "epoch": 1.24, "learning_rate": 0.00013779943066211437, "loss": 1.4494, "step": 7130 }, { "epoch": 1.24, "learning_rate": 0.0001376222293470401, "loss": 1.4558, "step": 7140 }, { "epoch": 1.24, "learning_rate": 0.00013744489031170578, "loss": 1.4371, "step": 7150 }, { "epoch": 1.24, "learning_rate": 0.0001372851679656103, "loss": 1.4876, "step": 7160 }, { "epoch": 1.24, "learning_rate": 0.00013710756905065686, "loss": 1.4465, "step": 7170 }, { "epoch": 1.25, "learning_rate": 0.00013692983429941337, "loss": 1.5011, "step": 7180 }, { "epoch": 1.25, "learning_rate": 0.00013675196436249725, "loss": 1.4399, "step": 7190 }, { "epoch": 1.25, "learning_rate": 0.00013657395989102067, "loss": 1.4586, "step": 7200 }, { "epoch": 1.25, "learning_rate": 0.00013639582153658842, "loss": 1.431, "step": 7210 }, { "epoch": 1.25, "learning_rate": 0.00013621754995129522, "loss": 1.4681, "step": 7220 }, { "epoch": 1.25, "learning_rate": 0.0001360391457877237, "loss": 1.47, "step": 7230 }, { "epoch": 1.26, "learning_rate": 0.0001358606096989416, "loss": 1.4658, "step": 7240 }, { "epoch": 1.26, "learning_rate": 0.0001356819423384997, "loss": 1.4789, "step": 7250 }, { "epoch": 1.26, "learning_rate": 0.00013550314436042932, "loss": 1.4218, "step": 7260 }, { "epoch": 1.26, "learning_rate": 0.0001353242164192399, "loss": 1.4351, "step": 7270 }, { "epoch": 1.26, "learning_rate": 0.00013514515916991657, "loss": 1.4711, "step": 7280 }, { "epoch": 1.27, "learning_rate": 0.00013496597326791786, "loss": 1.4263, "step": 7290 }, { "epoch": 1.27, "learning_rate": 0.00013478665936917332, "loss": 1.4692, "step": 7300 }, { "epoch": 1.27, "learning_rate": 0.00013460721813008086, "loss": 1.457, "step": 7310 }, { "epoch": 1.27, "learning_rate": 0.0001344276502075047, "loss": 1.4114, "step": 7320 }, { "epoch": 1.27, "learning_rate": 0.00013424795625877276, "loss": 1.395, "step": 7330 }, { "epoch": 1.27, "learning_rate": 0.0001340681369416742, "loss": 1.4456, "step": 7340 }, { "epoch": 1.28, "learning_rate": 0.00013388819291445723, "loss": 1.4459, "step": 7350 }, { "epoch": 1.28, "learning_rate": 0.0001337081248358265, "loss": 1.4642, "step": 7360 }, { "epoch": 1.28, "learning_rate": 0.0001335279333649408, "loss": 1.4538, "step": 7370 }, { "epoch": 1.28, "learning_rate": 0.00013334761916141064, "loss": 1.4443, "step": 7380 }, { "epoch": 1.28, "learning_rate": 0.00013316718288529567, "loss": 1.4769, "step": 7390 }, { "epoch": 1.28, "learning_rate": 0.00013300468641063172, "loss": 1.4329, "step": 7400 }, { "epoch": 1.29, "learning_rate": 0.00013282402001666874, "loss": 1.4563, "step": 7410 }, { "epoch": 1.29, "learning_rate": 0.00013264323346681258, "loss": 1.487, "step": 7420 }, { "epoch": 1.29, "learning_rate": 0.00013246232742285206, "loss": 1.4135, "step": 7430 }, { "epoch": 1.29, "learning_rate": 0.00013228130254701342, "loss": 1.485, "step": 7440 }, { "epoch": 1.29, "learning_rate": 0.0001321182791051834, "loss": 1.4554, "step": 7450 }, { "epoch": 1.29, "learning_rate": 0.00013193703027476557, "loss": 1.4543, "step": 7460 }, { "epoch": 1.3, "learning_rate": 0.00013175566453537692, "loss": 1.4419, "step": 7470 }, { "epoch": 1.3, "learning_rate": 0.0001315741825509265, "loss": 1.4353, "step": 7480 }, { "epoch": 1.3, "learning_rate": 0.00013139258498574873, "loss": 1.4382, "step": 7490 }, { "epoch": 1.3, "learning_rate": 0.00013121087250460132, "loss": 1.4579, "step": 7500 }, { "epoch": 1.3, "learning_rate": 0.00013102904577266255, "loss": 1.4475, "step": 7510 }, { "epoch": 1.31, "learning_rate": 0.00013084710545552893, "loss": 1.442, "step": 7520 }, { "epoch": 1.31, "learning_rate": 0.00013066505221921273, "loss": 1.4578, "step": 7530 }, { "epoch": 1.31, "learning_rate": 0.00013048288673013966, "loss": 1.4778, "step": 7540 }, { "epoch": 1.31, "learning_rate": 0.00013030060965514632, "loss": 1.4279, "step": 7550 }, { "epoch": 1.31, "learning_rate": 0.00013011822166147767, "loss": 1.4175, "step": 7560 }, { "epoch": 1.31, "learning_rate": 0.00012993572341678483, "loss": 1.4537, "step": 7570 }, { "epoch": 1.32, "learning_rate": 0.00012975311558912248, "loss": 1.473, "step": 7580 }, { "epoch": 1.32, "learning_rate": 0.00012957039884694638, "loss": 1.4041, "step": 7590 }, { "epoch": 1.32, "learning_rate": 0.00012938757385911104, "loss": 1.4453, "step": 7600 }, { "epoch": 1.32, "learning_rate": 0.00012920464129486723, "loss": 1.4795, "step": 7610 }, { "epoch": 1.32, "learning_rate": 0.00012903991056267166, "loss": 1.4592, "step": 7620 }, { "epoch": 1.32, "learning_rate": 0.0001288567754484459, "loss": 1.4767, "step": 7630 }, { "epoch": 1.33, "learning_rate": 0.00012867353470085696, "loss": 1.4769, "step": 7640 }, { "epoch": 1.33, "learning_rate": 0.00012849018899067748, "loss": 1.4212, "step": 7650 }, { "epoch": 1.33, "learning_rate": 0.00012830673898906435, "loss": 1.4932, "step": 7660 }, { "epoch": 1.33, "learning_rate": 0.00012812318536755622, "loss": 1.4644, "step": 7670 }, { "epoch": 1.33, "learning_rate": 0.00012795789906852118, "loss": 1.4359, "step": 7680 }, { "epoch": 1.33, "learning_rate": 0.0001277741504206582, "loss": 1.4672, "step": 7690 }, { "epoch": 1.34, "learning_rate": 0.00012759030010249867, "loss": 1.4161, "step": 7700 }, { "epoch": 1.34, "learning_rate": 0.00012740634878704655, "loss": 1.4479, "step": 7710 }, { "epoch": 1.34, "learning_rate": 0.00012722229714767566, "loss": 1.5016, "step": 7720 }, { "epoch": 1.34, "learning_rate": 0.00012703814585812706, "loss": 1.4459, "step": 7730 }, { "epoch": 1.34, "learning_rate": 0.00012685389559250655, "loss": 1.4491, "step": 7740 }, { "epoch": 1.35, "learning_rate": 0.00012666954702528224, "loss": 1.4229, "step": 7750 }, { "epoch": 1.35, "learning_rate": 0.00012648510083128212, "loss": 1.4286, "step": 7760 }, { "epoch": 1.35, "learning_rate": 0.00012631901634382203, "loss": 1.4043, "step": 7770 }, { "epoch": 1.35, "learning_rate": 0.00012613438651937683, "loss": 1.44, "step": 7780 }, { "epoch": 1.35, "learning_rate": 0.00012594966102716905, "loss": 1.4456, "step": 7790 }, { "epoch": 1.35, "learning_rate": 0.00012576484054340636, "loss": 1.4206, "step": 7800 }, { "epoch": 1.36, "learning_rate": 0.00012557992574464428, "loss": 1.4273, "step": 7810 }, { "epoch": 1.36, "learning_rate": 0.00012539491730778355, "loss": 1.4658, "step": 7820 }, { "epoch": 1.36, "learning_rate": 0.0001252098159100676, "loss": 1.423, "step": 7830 }, { "epoch": 1.36, "learning_rate": 0.00012502462222908025, "loss": 1.4591, "step": 7840 }, { "epoch": 1.36, "learning_rate": 0.0001248393369427431, "loss": 1.3987, "step": 7850 }, { "epoch": 1.36, "learning_rate": 0.00012465396072931307, "loss": 1.4278, "step": 7860 }, { "epoch": 1.37, "learning_rate": 0.00012446849426737996, "loss": 1.4273, "step": 7870 }, { "epoch": 1.37, "learning_rate": 0.00012428293823586387, "loss": 1.4464, "step": 7880 }, { "epoch": 1.37, "learning_rate": 0.00012409729331401288, "loss": 1.4407, "step": 7890 }, { "epoch": 1.37, "learning_rate": 0.0001239115601814004, "loss": 1.4192, "step": 7900 }, { "epoch": 1.37, "learning_rate": 0.00012372573951792271, "loss": 1.4327, "step": 7910 }, { "epoch": 1.37, "learning_rate": 0.00012355842664409558, "loss": 1.4286, "step": 7920 }, { "epoch": 1.38, "learning_rate": 0.00012337244154623397, "loss": 1.4381, "step": 7930 }, { "epoch": 1.38, "learning_rate": 0.0001231863708910095, "loss": 1.4084, "step": 7940 }, { "epoch": 1.38, "learning_rate": 0.00012300021535955412, "loss": 1.4431, "step": 7950 }, { "epoch": 1.38, "learning_rate": 0.0001228139756333103, "loss": 1.4226, "step": 7960 }, { "epoch": 1.38, "learning_rate": 0.00012262765239402884, "loss": 1.3949, "step": 7970 }, { "epoch": 1.38, "learning_rate": 0.0001224412463237662, "loss": 1.4388, "step": 7980 }, { "epoch": 1.39, "learning_rate": 0.00012225475810488206, "loss": 1.4102, "step": 7990 }, { "epoch": 1.39, "learning_rate": 0.00012208684903502762, "loss": 1.4059, "step": 8000 }, { "epoch": 1.39, "eval_loss": 0.8009569048881531, "eval_runtime": 62.0166, "eval_samples_per_second": 8.449, "eval_steps_per_second": 0.532, "step": 8000 }, { "epoch": 1.39, "learning_rate": 0.00012190020661473858, "loss": 1.4513, "step": 8010 }, { "epoch": 1.39, "learning_rate": 0.00012171348402636268, "loss": 1.4368, "step": 8020 }, { "epoch": 1.39, "learning_rate": 0.00012152668195341832, "loss": 1.4503, "step": 8030 }, { "epoch": 1.4, "learning_rate": 0.00012133980107971474, "loss": 1.4039, "step": 8040 }, { "epoch": 1.4, "learning_rate": 0.00012115284208934969, "loss": 1.4467, "step": 8050 }, { "epoch": 1.4, "learning_rate": 0.00012096580566670692, "loss": 1.4028, "step": 8060 }, { "epoch": 1.4, "learning_rate": 0.00012077869249645357, "loss": 1.4299, "step": 8070 }, { "epoch": 1.4, "learning_rate": 0.00012059150326353772, "loss": 1.4264, "step": 8080 }, { "epoch": 1.4, "learning_rate": 0.00012040423865318591, "loss": 1.453, "step": 8090 }, { "epoch": 1.41, "learning_rate": 0.0001202168993509006, "loss": 1.4452, "step": 8100 }, { "epoch": 1.41, "learning_rate": 0.00012002948604245768, "loss": 1.4251, "step": 8110 }, { "epoch": 1.41, "learning_rate": 0.00011984199941390392, "loss": 1.4419, "step": 8120 }, { "epoch": 1.41, "learning_rate": 0.00011965444015155452, "loss": 1.4453, "step": 8130 }, { "epoch": 1.41, "learning_rate": 0.00011946680894199054, "loss": 1.4178, "step": 8140 }, { "epoch": 1.41, "learning_rate": 0.00011927910647205644, "loss": 1.4655, "step": 8150 }, { "epoch": 1.42, "learning_rate": 0.00011909133342885747, "loss": 1.4289, "step": 8160 }, { "epoch": 1.42, "learning_rate": 0.00011890349049975729, "loss": 1.4673, "step": 8170 }, { "epoch": 1.42, "learning_rate": 0.00011871557837237537, "loss": 1.4313, "step": 8180 }, { "epoch": 1.42, "learning_rate": 0.00011852759773458446, "loss": 1.4631, "step": 8190 }, { "epoch": 1.42, "learning_rate": 0.00011835835715290196, "loss": 1.4695, "step": 8200 }, { "epoch": 1.42, "learning_rate": 0.00011817024824131962, "loss": 1.4487, "step": 8210 }, { "epoch": 1.43, "learning_rate": 0.00011798207281556853, "loss": 1.4025, "step": 8220 }, { "epoch": 1.43, "learning_rate": 0.00011779383156448527, "loss": 1.4665, "step": 8230 }, { "epoch": 1.43, "learning_rate": 0.00011760552517714743, "loss": 1.4005, "step": 8240 }, { "epoch": 1.43, "learning_rate": 0.00011741715434287097, "loss": 1.4488, "step": 8250 }, { "epoch": 1.43, "learning_rate": 0.00011722871975120782, "loss": 1.4511, "step": 8260 }, { "epoch": 1.44, "learning_rate": 0.00011705907467624817, "loss": 1.4192, "step": 8270 }, { "epoch": 1.44, "learning_rate": 0.00011687052084609971, "loss": 1.4309, "step": 8280 }, { "epoch": 1.44, "learning_rate": 0.0001166819052595759, "loss": 1.4495, "step": 8290 }, { "epoch": 1.44, "learning_rate": 0.00011649322860712455, "loss": 1.4375, "step": 8300 }, { "epoch": 1.44, "learning_rate": 0.00011630449157941714, "loss": 1.4502, "step": 8310 }, { "epoch": 1.44, "learning_rate": 0.00011611569486734603, "loss": 1.4179, "step": 8320 }, { "epoch": 1.45, "learning_rate": 0.00011592683916202211, "loss": 1.4581, "step": 8330 }, { "epoch": 1.45, "learning_rate": 0.00011573792515477222, "loss": 1.4211, "step": 8340 }, { "epoch": 1.45, "learning_rate": 0.00011554895353713662, "loss": 1.4118, "step": 8350 }, { "epoch": 1.45, "learning_rate": 0.00011535992500086643, "loss": 1.4308, "step": 8360 }, { "epoch": 1.45, "learning_rate": 0.0001151708402379212, "loss": 1.429, "step": 8370 }, { "epoch": 1.45, "learning_rate": 0.00011498169994046621, "loss": 1.4262, "step": 8380 }, { "epoch": 1.46, "learning_rate": 0.00011479250480087011, "loss": 1.4375, "step": 8390 }, { "epoch": 1.46, "learning_rate": 0.00011462218285760746, "loss": 1.4508, "step": 8400 }, { "epoch": 1.46, "learning_rate": 0.00011443288542613578, "loss": 1.4318, "step": 8410 }, { "epoch": 1.46, "learning_rate": 0.00011424353516151814, "loss": 1.4416, "step": 8420 }, { "epoch": 1.46, "learning_rate": 0.00011405413275689179, "loss": 1.4296, "step": 8430 }, { "epoch": 1.46, "learning_rate": 0.0001138646789055848, "loss": 1.4192, "step": 8440 }, { "epoch": 1.47, "learning_rate": 0.00011367517430111365, "loss": 1.4411, "step": 8450 }, { "epoch": 1.47, "learning_rate": 0.0001134856196371805, "loss": 1.407, "step": 8460 }, { "epoch": 1.47, "learning_rate": 0.00011329601560767078, "loss": 1.447, "step": 8470 }, { "epoch": 1.47, "learning_rate": 0.0001131063629066507, "loss": 1.4344, "step": 8480 }, { "epoch": 1.47, "learning_rate": 0.00011291666222836454, "loss": 1.4774, "step": 8490 }, { "epoch": 1.48, "learning_rate": 0.00011274589117127904, "loss": 1.4209, "step": 8500 }, { "epoch": 1.48, "learning_rate": 0.00011255610124945745, "loss": 1.3907, "step": 8510 }, { "epoch": 1.48, "learning_rate": 0.00011236626536466241, "loss": 1.4373, "step": 8520 }, { "epoch": 1.48, "learning_rate": 0.00011217638421180883, "loss": 1.4305, "step": 8530 }, { "epoch": 1.48, "learning_rate": 0.00011198645848597729, "loss": 1.4338, "step": 8540 }, { "epoch": 1.48, "learning_rate": 0.00011179648888241155, "loss": 1.4363, "step": 8550 }, { "epoch": 1.49, "learning_rate": 0.00011160647609651597, "loss": 1.4053, "step": 8560 }, { "epoch": 1.49, "learning_rate": 0.00011141642082385304, "loss": 1.4481, "step": 8570 }, { "epoch": 1.49, "learning_rate": 0.00011122632376014078, "loss": 1.3928, "step": 8580 }, { "epoch": 1.49, "learning_rate": 0.00011103618560125007, "loss": 1.3817, "step": 8590 }, { "epoch": 1.49, "learning_rate": 0.00011084600704320238, "loss": 1.4496, "step": 8600 }, { "epoch": 1.49, "learning_rate": 0.00011065578878216696, "loss": 1.4582, "step": 8610 }, { "epoch": 1.5, "learning_rate": 0.00011046553151445844, "loss": 1.451, "step": 8620 }, { "epoch": 1.5, "learning_rate": 0.0001102752359365342, "loss": 1.469, "step": 8630 }, { "epoch": 1.5, "learning_rate": 0.00011008490274499193, "loss": 1.4299, "step": 8640 }, { "epoch": 1.5, "learning_rate": 0.00010989453263656697, "loss": 1.4298, "step": 8650 }, { "epoch": 1.5, "learning_rate": 0.00010972316855101048, "loss": 1.4192, "step": 8660 }, { "epoch": 1.5, "learning_rate": 0.00010953273022049615, "loss": 1.4405, "step": 8670 }, { "epoch": 1.51, "learning_rate": 0.00010934225699438665, "loss": 1.4636, "step": 8680 }, { "epoch": 1.51, "learning_rate": 0.00010917080183142705, "loss": 1.4231, "step": 8690 }, { "epoch": 1.51, "learning_rate": 0.00010898026422470837, "loss": 1.4357, "step": 8700 }, { "epoch": 1.51, "learning_rate": 0.00010878969374475633, "loss": 1.4173, "step": 8710 }, { "epoch": 1.51, "learning_rate": 0.00010859909108917496, "loss": 1.4286, "step": 8720 }, { "epoch": 1.52, "learning_rate": 0.00010840845695568593, "loss": 1.4365, "step": 8730 }, { "epoch": 1.52, "learning_rate": 0.00010821779204212623, "loss": 1.4292, "step": 8740 }, { "epoch": 1.52, "learning_rate": 0.00010804616787981517, "loss": 1.4429, "step": 8750 }, { "epoch": 1.52, "learning_rate": 0.00010785544640706349, "loss": 1.43, "step": 8760 }, { "epoch": 1.52, "learning_rate": 0.0001076646961785964, "loss": 1.4428, "step": 8770 }, { "epoch": 1.52, "learning_rate": 0.0001074739178926758, "loss": 1.4511, "step": 8780 }, { "epoch": 1.53, "learning_rate": 0.00010728311224766634, "loss": 1.4283, "step": 8790 }, { "epoch": 1.53, "learning_rate": 0.00010709227994203286, "loss": 1.4041, "step": 8800 }, { "epoch": 1.53, "learning_rate": 0.00010690142167433773, "loss": 1.4574, "step": 8810 }, { "epoch": 1.53, "learning_rate": 0.00010671053814323834, "loss": 1.4421, "step": 8820 }, { "epoch": 1.53, "learning_rate": 0.00010651963004748471, "loss": 1.453, "step": 8830 }, { "epoch": 1.53, "learning_rate": 0.00010632869808591662, "loss": 1.4239, "step": 8840 }, { "epoch": 1.54, "learning_rate": 0.00010613774295746124, "loss": 1.4069, "step": 8850 }, { "epoch": 1.54, "learning_rate": 0.0001059467653611306, "loss": 1.4335, "step": 8860 }, { "epoch": 1.54, "learning_rate": 0.00010575576599601895, "loss": 1.4341, "step": 8870 }, { "epoch": 1.54, "learning_rate": 0.00010556474556130025, "loss": 1.3864, "step": 8880 }, { "epoch": 1.54, "learning_rate": 0.00010537370475622554, "loss": 1.4389, "step": 8890 }, { "epoch": 1.54, "learning_rate": 0.00010518264428012043, "loss": 1.4895, "step": 8900 }, { "epoch": 1.55, "learning_rate": 0.00010499156483238262, "loss": 1.4252, "step": 8910 }, { "epoch": 1.55, "learning_rate": 0.00010480046711247918, "loss": 1.4495, "step": 8920 }, { "epoch": 1.55, "learning_rate": 0.00010460935181994404, "loss": 1.4292, "step": 8930 }, { "epoch": 1.55, "learning_rate": 0.00010441821965437556, "loss": 1.405, "step": 8940 }, { "epoch": 1.55, "learning_rate": 0.00010422707131543377, "loss": 1.4666, "step": 8950 }, { "epoch": 1.56, "learning_rate": 0.00010405502456046876, "loss": 1.4412, "step": 8960 }, { "epoch": 1.56, "learning_rate": 0.0001038638474198912, "loss": 1.3975, "step": 8970 }, { "epoch": 1.56, "learning_rate": 0.00010367265613528012, "loss": 1.4423, "step": 8980 }, { "epoch": 1.56, "learning_rate": 0.00010348145140651204, "loss": 1.4614, "step": 8990 }, { "epoch": 1.56, "learning_rate": 0.00010329023393351272, "loss": 1.4521, "step": 9000 }, { "epoch": 1.56, "eval_loss": 0.7902594208717346, "eval_runtime": 61.869, "eval_samples_per_second": 8.47, "eval_steps_per_second": 0.533, "step": 9000 }, { "epoch": 1.56, "learning_rate": 0.00010309900441625435, "loss": 1.4428, "step": 9010 }, { "epoch": 1.57, "learning_rate": 0.0001029077635547535, "loss": 1.4417, "step": 9020 }, { "epoch": 1.57, "learning_rate": 0.00010271651204906811, "loss": 1.4228, "step": 9030 }, { "epoch": 1.57, "learning_rate": 0.0001025252505992951, "loss": 1.4302, "step": 9040 }, { "epoch": 1.57, "learning_rate": 0.00010233397990556775, "loss": 1.4544, "step": 9050 }, { "epoch": 1.57, "learning_rate": 0.0001021618289563197, "loss": 1.4378, "step": 9060 }, { "epoch": 1.57, "learning_rate": 0.00010198967159704729, "loss": 1.4494, "step": 9070 }, { "epoch": 1.58, "learning_rate": 0.00010179837874523537, "loss": 1.4467, "step": 9080 }, { "epoch": 1.58, "learning_rate": 0.00010160707931026259, "loss": 1.4443, "step": 9090 }, { "epoch": 1.58, "learning_rate": 0.00010143490476895921, "loss": 1.4272, "step": 9100 }, { "epoch": 1.58, "learning_rate": 0.000101243594755249, "loss": 1.4492, "step": 9110 }, { "epoch": 1.58, "learning_rate": 0.00010105228018922502, "loss": 1.4289, "step": 9120 }, { "epoch": 1.58, "learning_rate": 0.00010086096177121504, "loss": 1.3822, "step": 9130 }, { "epoch": 1.59, "learning_rate": 0.00010066964020156091, "loss": 1.413, "step": 9140 }, { "epoch": 1.59, "learning_rate": 0.000100478316180616, "loss": 1.4419, "step": 9150 }, { "epoch": 1.59, "learning_rate": 0.00010028699040874277, "loss": 1.3911, "step": 9160 }, { "epoch": 1.59, "learning_rate": 0.00010009566358630991, "loss": 1.4321, "step": 9170 }, { "epoch": 1.59, "learning_rate": 9.990433641369012e-05, "loss": 1.4358, "step": 9180 }, { "epoch": 1.6, "learning_rate": 9.971300959125727e-05, "loss": 1.4307, "step": 9190 }, { "epoch": 1.6, "learning_rate": 9.952168381938401e-05, "loss": 1.4235, "step": 9200 }, { "epoch": 1.6, "learning_rate": 9.933035979843912e-05, "loss": 1.4123, "step": 9210 }, { "epoch": 1.6, "learning_rate": 9.913903822878499e-05, "loss": 1.4267, "step": 9220 }, { "epoch": 1.6, "learning_rate": 9.8947719810775e-05, "loss": 1.4151, "step": 9230 }, { "epoch": 1.6, "learning_rate": 9.875640524475103e-05, "loss": 1.4215, "step": 9240 }, { "epoch": 1.61, "learning_rate": 9.856509523104083e-05, "loss": 1.3905, "step": 9250 }, { "epoch": 1.61, "learning_rate": 9.83737904699555e-05, "loss": 1.4147, "step": 9260 }, { "epoch": 1.61, "learning_rate": 9.820162125476466e-05, "loss": 1.4225, "step": 9270 }, { "epoch": 1.61, "learning_rate": 9.802945737193441e-05, "loss": 1.4273, "step": 9280 }, { "epoch": 1.61, "learning_rate": 9.783817104368033e-05, "loss": 1.4644, "step": 9290 }, { "epoch": 1.61, "learning_rate": 9.764689262903611e-05, "loss": 1.4026, "step": 9300 }, { "epoch": 1.62, "learning_rate": 9.74556228281972e-05, "loss": 1.4154, "step": 9310 }, { "epoch": 1.62, "learning_rate": 9.726436234132755e-05, "loss": 1.438, "step": 9320 }, { "epoch": 1.62, "learning_rate": 9.707311186855684e-05, "loss": 1.4191, "step": 9330 }, { "epoch": 1.62, "learning_rate": 9.68818721099783e-05, "loss": 1.4027, "step": 9340 }, { "epoch": 1.62, "learning_rate": 9.669064376564584e-05, "loss": 1.4297, "step": 9350 }, { "epoch": 1.62, "learning_rate": 9.64994275355716e-05, "loss": 1.4164, "step": 9360 }, { "epoch": 1.63, "learning_rate": 9.630822411972336e-05, "loss": 1.3995, "step": 9370 }, { "epoch": 1.63, "learning_rate": 9.611703421802204e-05, "loss": 1.4077, "step": 9380 }, { "epoch": 1.63, "learning_rate": 9.592585853033905e-05, "loss": 1.4506, "step": 9390 }, { "epoch": 1.63, "learning_rate": 9.573469775649374e-05, "loss": 1.4236, "step": 9400 }, { "epoch": 1.63, "learning_rate": 9.554355259625092e-05, "loss": 1.3765, "step": 9410 }, { "epoch": 1.63, "learning_rate": 9.535242374931823e-05, "loss": 1.4404, "step": 9420 }, { "epoch": 1.64, "learning_rate": 9.516131191534359e-05, "loss": 1.4332, "step": 9430 }, { "epoch": 1.64, "learning_rate": 9.497021779391265e-05, "loss": 1.4082, "step": 9440 }, { "epoch": 1.64, "learning_rate": 9.477914208454618e-05, "loss": 1.4542, "step": 9450 }, { "epoch": 1.64, "learning_rate": 9.458808548669762e-05, "loss": 1.4153, "step": 9460 }, { "epoch": 1.64, "learning_rate": 9.439704869975043e-05, "loss": 1.4286, "step": 9470 }, { "epoch": 1.65, "learning_rate": 9.426333511085766e-05, "loss": 1.3663, "step": 9480 }, { "epoch": 1.65, "learning_rate": 9.407233360732119e-05, "loss": 1.389, "step": 9490 }, { "epoch": 1.65, "learning_rate": 9.388135380265187e-05, "loss": 1.4096, "step": 9500 }, { "epoch": 1.65, "learning_rate": 9.369039639595209e-05, "loss": 1.4111, "step": 9510 }, { "epoch": 1.65, "learning_rate": 9.349946208624212e-05, "loss": 1.3776, "step": 9520 }, { "epoch": 1.65, "learning_rate": 9.330855157245775e-05, "loss": 1.4375, "step": 9530 }, { "epoch": 1.66, "learning_rate": 9.31176655534477e-05, "loss": 1.4131, "step": 9540 }, { "epoch": 1.66, "learning_rate": 9.292680472797101e-05, "loss": 1.3917, "step": 9550 }, { "epoch": 1.66, "learning_rate": 9.273596979469446e-05, "loss": 1.4346, "step": 9560 }, { "epoch": 1.66, "learning_rate": 9.254516145219005e-05, "loss": 1.4056, "step": 9570 }, { "epoch": 1.66, "learning_rate": 9.235438039893248e-05, "loss": 1.4045, "step": 9580 }, { "epoch": 1.66, "learning_rate": 9.216362733329655e-05, "loss": 1.4248, "step": 9590 }, { "epoch": 1.67, "learning_rate": 9.197290295355454e-05, "loss": 1.4291, "step": 9600 }, { "epoch": 1.67, "learning_rate": 9.17822079578738e-05, "loss": 1.4162, "step": 9610 }, { "epoch": 1.67, "learning_rate": 9.159154304431409e-05, "loss": 1.4189, "step": 9620 }, { "epoch": 1.67, "learning_rate": 9.140090891082506e-05, "loss": 1.4231, "step": 9630 }, { "epoch": 1.67, "learning_rate": 9.121030625524365e-05, "loss": 1.4008, "step": 9640 }, { "epoch": 1.67, "learning_rate": 9.103879135550087e-05, "loss": 1.4321, "step": 9650 }, { "epoch": 1.68, "learning_rate": 9.084825043007008e-05, "loss": 1.4719, "step": 9660 }, { "epoch": 1.68, "learning_rate": 9.065774300561337e-05, "loss": 1.4252, "step": 9670 }, { "epoch": 1.68, "learning_rate": 9.04672697795039e-05, "loss": 1.4116, "step": 9680 }, { "epoch": 1.68, "learning_rate": 9.029587369187029e-05, "loss": 1.421, "step": 9690 }, { "epoch": 1.68, "learning_rate": 9.010546736343308e-05, "loss": 1.4574, "step": 9700 }, { "epoch": 1.69, "learning_rate": 8.991509725500809e-05, "loss": 1.3972, "step": 9710 }, { "epoch": 1.69, "learning_rate": 8.972476406346583e-05, "loss": 1.3886, "step": 9720 }, { "epoch": 1.69, "learning_rate": 8.953446848554158e-05, "loss": 1.4333, "step": 9730 }, { "epoch": 1.69, "learning_rate": 8.934421121783305e-05, "loss": 1.3839, "step": 9740 }, { "epoch": 1.69, "learning_rate": 8.915399295679763e-05, "loss": 1.4411, "step": 9750 }, { "epoch": 1.69, "learning_rate": 8.896381439874992e-05, "loss": 1.4303, "step": 9760 }, { "epoch": 1.7, "learning_rate": 8.877367623985927e-05, "loss": 1.453, "step": 9770 }, { "epoch": 1.7, "learning_rate": 8.858357917614699e-05, "loss": 1.4066, "step": 9780 }, { "epoch": 1.7, "learning_rate": 8.839352390348404e-05, "loss": 1.3924, "step": 9790 }, { "epoch": 1.7, "learning_rate": 8.820351111758849e-05, "loss": 1.3878, "step": 9800 }, { "epoch": 1.7, "learning_rate": 8.801354151402274e-05, "loss": 1.3849, "step": 9810 }, { "epoch": 1.7, "learning_rate": 8.782361578819118e-05, "loss": 1.4135, "step": 9820 }, { "epoch": 1.71, "learning_rate": 8.763373463533758e-05, "loss": 1.4187, "step": 9830 }, { "epoch": 1.71, "learning_rate": 8.748186227269857e-05, "loss": 1.4153, "step": 9840 }, { "epoch": 1.71, "learning_rate": 8.729206310269713e-05, "loss": 1.4083, "step": 9850 }, { "epoch": 1.71, "learning_rate": 8.710231045148006e-05, "loss": 1.4055, "step": 9860 }, { "epoch": 1.71, "learning_rate": 8.691260501365754e-05, "loss": 1.42, "step": 9870 }, { "epoch": 1.71, "learning_rate": 8.672294748366692e-05, "loss": 1.438, "step": 9880 }, { "epoch": 1.72, "learning_rate": 8.653333855577024e-05, "loss": 1.4077, "step": 9890 }, { "epoch": 1.72, "learning_rate": 8.634377892405157e-05, "loss": 1.4448, "step": 9900 }, { "epoch": 1.72, "learning_rate": 8.615426928241457e-05, "loss": 1.3921, "step": 9910 }, { "epoch": 1.72, "learning_rate": 8.596481032457986e-05, "loss": 1.4282, "step": 9920 }, { "epoch": 1.72, "learning_rate": 8.577540274408256e-05, "loss": 1.4084, "step": 9930 }, { "epoch": 1.73, "learning_rate": 8.558604723426972e-05, "loss": 1.4007, "step": 9940 }, { "epoch": 1.73, "learning_rate": 8.539674448829775e-05, "loss": 1.4371, "step": 9950 }, { "epoch": 1.73, "learning_rate": 8.520749519912991e-05, "loss": 1.4304, "step": 9960 }, { "epoch": 1.73, "learning_rate": 8.501830005953381e-05, "loss": 1.3866, "step": 9970 }, { "epoch": 1.73, "learning_rate": 8.482915976207883e-05, "loss": 1.4156, "step": 9980 }, { "epoch": 1.73, "learning_rate": 8.464007499913359e-05, "loss": 1.4735, "step": 9990 }, { "epoch": 1.74, "learning_rate": 8.445104646286339e-05, "loss": 1.3907, "step": 10000 }, { "epoch": 1.74, "eval_loss": 0.7795238494873047, "eval_runtime": 61.8635, "eval_samples_per_second": 8.47, "eval_steps_per_second": 0.533, "step": 10000 }, { "epoch": 1.74, "learning_rate": 8.428096942593624e-05, "loss": 1.3785, "step": 10010 }, { "epoch": 1.74, "learning_rate": 8.409204962652496e-05, "loss": 1.4134, "step": 10020 }, { "epoch": 1.74, "learning_rate": 8.392207157637791e-05, "loss": 1.3956, "step": 10030 }, { "epoch": 1.74, "learning_rate": 8.373326301036039e-05, "loss": 1.3801, "step": 10040 }, { "epoch": 1.74, "learning_rate": 8.354451399050185e-05, "loss": 1.3976, "step": 10050 }, { "epoch": 1.75, "learning_rate": 8.335582520773848e-05, "loss": 1.4065, "step": 10060 }, { "epoch": 1.75, "learning_rate": 8.316719735278616e-05, "loss": 1.4058, "step": 10070 }, { "epoch": 1.75, "learning_rate": 8.29786311161376e-05, "loss": 1.3986, "step": 10080 }, { "epoch": 1.75, "learning_rate": 8.279012718806004e-05, "loss": 1.3801, "step": 10090 }, { "epoch": 1.75, "learning_rate": 8.260168625859259e-05, "loss": 1.4051, "step": 10100 }, { "epoch": 1.75, "learning_rate": 8.241330901754376e-05, "loss": 1.4178, "step": 10110 }, { "epoch": 1.76, "learning_rate": 8.222499615448894e-05, "loss": 1.3994, "step": 10120 }, { "epoch": 1.76, "learning_rate": 8.203674835876778e-05, "loss": 1.3948, "step": 10130 }, { "epoch": 1.76, "learning_rate": 8.184856631948184e-05, "loss": 1.412, "step": 10140 }, { "epoch": 1.76, "learning_rate": 8.16604507254919e-05, "loss": 1.4018, "step": 10150 }, { "epoch": 1.76, "learning_rate": 8.147240226541555e-05, "loss": 1.395, "step": 10160 }, { "epoch": 1.77, "learning_rate": 8.128442162762465e-05, "loss": 1.4177, "step": 10170 }, { "epoch": 1.77, "learning_rate": 8.109650950024272e-05, "loss": 1.4684, "step": 10180 }, { "epoch": 1.77, "learning_rate": 8.090866657114254e-05, "loss": 1.3745, "step": 10190 }, { "epoch": 1.77, "learning_rate": 8.072089352794361e-05, "loss": 1.4243, "step": 10200 }, { "epoch": 1.77, "learning_rate": 8.055195810962145e-05, "loss": 1.3984, "step": 10210 }, { "epoch": 1.77, "learning_rate": 8.036431974310813e-05, "loss": 1.4003, "step": 10220 }, { "epoch": 1.78, "learning_rate": 8.017675325513676e-05, "loss": 1.4197, "step": 10230 }, { "epoch": 1.78, "learning_rate": 8.000800543960246e-05, "loss": 1.389, "step": 10240 }, { "epoch": 1.78, "learning_rate": 7.9820577412245e-05, "loss": 1.3928, "step": 10250 }, { "epoch": 1.78, "learning_rate": 7.96332232538574e-05, "loss": 1.4402, "step": 10260 }, { "epoch": 1.78, "learning_rate": 7.94459436502699e-05, "loss": 1.4059, "step": 10270 }, { "epoch": 1.78, "learning_rate": 7.925873928703986e-05, "loss": 1.4186, "step": 10280 }, { "epoch": 1.79, "learning_rate": 7.90716108494492e-05, "loss": 1.4009, "step": 10290 }, { "epoch": 1.79, "learning_rate": 7.888455902250194e-05, "loss": 1.4537, "step": 10300 }, { "epoch": 1.79, "learning_rate": 7.869758449092155e-05, "loss": 1.4237, "step": 10310 }, { "epoch": 1.79, "learning_rate": 7.851068793914867e-05, "loss": 1.4158, "step": 10320 }, { "epoch": 1.79, "learning_rate": 7.832387005133845e-05, "loss": 1.3977, "step": 10330 }, { "epoch": 1.79, "learning_rate": 7.813713151135805e-05, "loss": 1.4253, "step": 10340 }, { "epoch": 1.8, "learning_rate": 7.795047300278422e-05, "loss": 1.3966, "step": 10350 }, { "epoch": 1.8, "learning_rate": 7.776389520890071e-05, "loss": 1.4211, "step": 10360 }, { "epoch": 1.8, "learning_rate": 7.757739881269582e-05, "loss": 1.3777, "step": 10370 }, { "epoch": 1.8, "learning_rate": 7.739098449685987e-05, "loss": 1.3922, "step": 10380 }, { "epoch": 1.8, "learning_rate": 7.720465294378272e-05, "loss": 1.3884, "step": 10390 }, { "epoch": 1.81, "learning_rate": 7.70184048355513e-05, "loss": 1.4122, "step": 10400 }, { "epoch": 1.81, "learning_rate": 7.683224085394702e-05, "loss": 1.403, "step": 10410 }, { "epoch": 1.81, "learning_rate": 7.664616168044339e-05, "loss": 1.433, "step": 10420 }, { "epoch": 1.81, "learning_rate": 7.646016799620345e-05, "loss": 1.3929, "step": 10430 }, { "epoch": 1.81, "learning_rate": 7.631143505862324e-05, "loss": 1.4136, "step": 10440 }, { "epoch": 1.81, "learning_rate": 7.614417685784577e-05, "loss": 1.3835, "step": 10450 }, { "epoch": 1.82, "learning_rate": 7.595841739456996e-05, "loss": 1.3911, "step": 10460 }, { "epoch": 1.82, "learning_rate": 7.577274593812058e-05, "loss": 1.3981, "step": 10470 }, { "epoch": 1.82, "learning_rate": 7.558716316816814e-05, "loss": 1.4022, "step": 10480 }, { "epoch": 1.82, "learning_rate": 7.54016697640586e-05, "loss": 1.4368, "step": 10490 }, { "epoch": 1.82, "learning_rate": 7.521626640481061e-05, "loss": 1.4221, "step": 10500 }, { "epoch": 1.82, "learning_rate": 7.503095376911342e-05, "loss": 1.4189, "step": 10510 }, { "epoch": 1.83, "learning_rate": 7.484573253532406e-05, "loss": 1.4389, "step": 10520 }, { "epoch": 1.83, "learning_rate": 7.466060338146498e-05, "loss": 1.3616, "step": 10530 }, { "epoch": 1.83, "learning_rate": 7.447556698522156e-05, "loss": 1.4295, "step": 10540 }, { "epoch": 1.83, "learning_rate": 7.429062402393965e-05, "loss": 1.3832, "step": 10550 }, { "epoch": 1.83, "learning_rate": 7.410577517462307e-05, "loss": 1.3762, "step": 10560 }, { "epoch": 1.83, "learning_rate": 7.392102111393116e-05, "loss": 1.4565, "step": 10570 }, { "epoch": 1.84, "learning_rate": 7.373636251817615e-05, "loss": 1.4553, "step": 10580 }, { "epoch": 1.84, "learning_rate": 7.355180006332097e-05, "loss": 1.4171, "step": 10590 }, { "epoch": 1.84, "learning_rate": 7.336733442497654e-05, "loss": 1.3761, "step": 10600 }, { "epoch": 1.84, "learning_rate": 7.318296627839935e-05, "loss": 1.3928, "step": 10610 }, { "epoch": 1.84, "learning_rate": 7.299869629848908e-05, "loss": 1.3833, "step": 10620 }, { "epoch": 1.84, "learning_rate": 7.281452515978599e-05, "loss": 1.3991, "step": 10630 }, { "epoch": 1.85, "learning_rate": 7.263045353646861e-05, "loss": 1.4247, "step": 10640 }, { "epoch": 1.85, "learning_rate": 7.248326834170777e-05, "loss": 1.473, "step": 10650 }, { "epoch": 1.85, "learning_rate": 7.229937754384992e-05, "loss": 1.3753, "step": 10660 }, { "epoch": 1.85, "learning_rate": 7.211558814713165e-05, "loss": 1.41, "step": 10670 }, { "epoch": 1.85, "learning_rate": 7.195026494412065e-05, "loss": 1.4259, "step": 10680 }, { "epoch": 1.86, "learning_rate": 7.176667006277049e-05, "loss": 1.4327, "step": 10690 }, { "epoch": 1.86, "learning_rate": 7.158317853259342e-05, "loss": 1.397, "step": 10700 }, { "epoch": 1.86, "learning_rate": 7.13997910252802e-05, "loss": 1.3827, "step": 10710 }, { "epoch": 1.86, "learning_rate": 7.121650821214074e-05, "loss": 1.417, "step": 10720 }, { "epoch": 1.86, "learning_rate": 7.103333076410166e-05, "loss": 1.4142, "step": 10730 }, { "epoch": 1.86, "learning_rate": 7.085025935170397e-05, "loss": 1.4047, "step": 10740 }, { "epoch": 1.87, "learning_rate": 7.066729464510045e-05, "loss": 1.4153, "step": 10750 }, { "epoch": 1.87, "learning_rate": 7.04844373140533e-05, "loss": 1.3896, "step": 10760 }, { "epoch": 1.87, "learning_rate": 7.030168802793164e-05, "loss": 1.4255, "step": 10770 }, { "epoch": 1.87, "learning_rate": 7.011904745570912e-05, "loss": 1.4286, "step": 10780 }, { "epoch": 1.87, "learning_rate": 6.993651626596138e-05, "loss": 1.4337, "step": 10790 }, { "epoch": 1.87, "learning_rate": 6.97540951268637e-05, "loss": 1.3943, "step": 10800 }, { "epoch": 1.88, "learning_rate": 6.95717847061885e-05, "loss": 1.4138, "step": 10810 }, { "epoch": 1.88, "learning_rate": 6.938958567130285e-05, "loss": 1.3604, "step": 10820 }, { "epoch": 1.88, "learning_rate": 6.920749868916618e-05, "loss": 1.4096, "step": 10830 }, { "epoch": 1.88, "learning_rate": 6.902552442632765e-05, "loss": 1.3915, "step": 10840 }, { "epoch": 1.88, "learning_rate": 6.88436635489238e-05, "loss": 1.3946, "step": 10850 }, { "epoch": 1.88, "learning_rate": 6.868008625403449e-05, "loss": 1.3984, "step": 10860 }, { "epoch": 1.89, "learning_rate": 6.851660182560898e-05, "loss": 1.4222, "step": 10870 }, { "epoch": 1.89, "learning_rate": 6.833506196772657e-05, "loss": 1.43, "step": 10880 }, { "epoch": 1.89, "learning_rate": 6.815363802279173e-05, "loss": 1.3911, "step": 10890 }, { "epoch": 1.89, "learning_rate": 6.797233065492654e-05, "loss": 1.4225, "step": 10900 }, { "epoch": 1.89, "learning_rate": 6.779114052782636e-05, "loss": 1.4097, "step": 10910 }, { "epoch": 1.9, "learning_rate": 6.761006830475733e-05, "loss": 1.3827, "step": 10920 }, { "epoch": 1.9, "learning_rate": 6.742911464855399e-05, "loss": 1.4585, "step": 10930 }, { "epoch": 1.9, "learning_rate": 6.724828022161692e-05, "loss": 1.4062, "step": 10940 }, { "epoch": 1.9, "learning_rate": 6.706756568591013e-05, "loss": 1.4324, "step": 10950 }, { "epoch": 1.9, "learning_rate": 6.68869717029588e-05, "loss": 1.3791, "step": 10960 }, { "epoch": 1.9, "learning_rate": 6.670649893384692e-05, "loss": 1.4147, "step": 10970 }, { "epoch": 1.91, "learning_rate": 6.652614803921461e-05, "loss": 1.4273, "step": 10980 }, { "epoch": 1.91, "learning_rate": 6.634591967925598e-05, "loss": 1.3976, "step": 10990 }, { "epoch": 1.91, "learning_rate": 6.616581451371651e-05, "loss": 1.3826, "step": 11000 }, { "epoch": 1.91, "eval_loss": 0.7706289887428284, "eval_runtime": 63.1592, "eval_samples_per_second": 8.296, "eval_steps_per_second": 0.522, "step": 11000 }, { "epoch": 1.91, "learning_rate": 6.598583320189075e-05, "loss": 1.4273, "step": 11010 }, { "epoch": 1.91, "learning_rate": 6.580597640261978e-05, "loss": 1.3904, "step": 11020 }, { "epoch": 1.91, "learning_rate": 6.562624477428905e-05, "loss": 1.3824, "step": 11030 }, { "epoch": 1.92, "learning_rate": 6.544663897482568e-05, "loss": 1.3709, "step": 11040 }, { "epoch": 1.92, "learning_rate": 6.528510188239592e-05, "loss": 1.4232, "step": 11050 }, { "epoch": 1.92, "learning_rate": 6.510573696871829e-05, "loss": 1.3842, "step": 11060 }, { "epoch": 1.92, "learning_rate": 6.492649978928341e-05, "loss": 1.3655, "step": 11070 }, { "epoch": 1.92, "learning_rate": 6.47473910002085e-05, "loss": 1.3651, "step": 11080 }, { "epoch": 1.92, "learning_rate": 6.456841125714071e-05, "loss": 1.3925, "step": 11090 }, { "epoch": 1.93, "learning_rate": 6.440744036422758e-05, "loss": 1.3946, "step": 11100 }, { "epoch": 1.93, "learning_rate": 6.422870761318759e-05, "loss": 1.4216, "step": 11110 }, { "epoch": 1.93, "learning_rate": 6.405010580685171e-05, "loss": 1.3654, "step": 11120 }, { "epoch": 1.93, "learning_rate": 6.387163559901117e-05, "loss": 1.3438, "step": 11130 }, { "epoch": 1.93, "learning_rate": 6.36932976429756e-05, "loss": 1.4255, "step": 11140 }, { "epoch": 1.94, "learning_rate": 6.35150925915705e-05, "loss": 1.4268, "step": 11150 }, { "epoch": 1.94, "learning_rate": 6.333702109713477e-05, "loss": 1.3947, "step": 11160 }, { "epoch": 1.94, "learning_rate": 6.315908381151857e-05, "loss": 1.4452, "step": 11170 }, { "epoch": 1.94, "learning_rate": 6.298128138608059e-05, "loss": 1.4187, "step": 11180 }, { "epoch": 1.94, "learning_rate": 6.280361447168603e-05, "loss": 1.3878, "step": 11190 }, { "epoch": 1.94, "learning_rate": 6.264383064821323e-05, "loss": 1.3753, "step": 11200 }, { "epoch": 1.95, "learning_rate": 6.246642299615586e-05, "loss": 1.372, "step": 11210 }, { "epoch": 1.95, "learning_rate": 6.230687356416249e-05, "loss": 1.361, "step": 11220 }, { "epoch": 1.95, "learning_rate": 6.212972751884663e-05, "loss": 1.4421, "step": 11230 }, { "epoch": 1.95, "learning_rate": 6.195272010177959e-05, "loss": 1.4402, "step": 11240 }, { "epoch": 1.95, "learning_rate": 6.177585196091631e-05, "loss": 1.3958, "step": 11250 }, { "epoch": 1.95, "learning_rate": 6.159912374370183e-05, "loss": 1.4365, "step": 11260 }, { "epoch": 1.96, "learning_rate": 6.142253609706898e-05, "loss": 1.4041, "step": 11270 }, { "epoch": 1.96, "learning_rate": 6.124608966743606e-05, "loss": 1.4321, "step": 11280 }, { "epoch": 1.96, "learning_rate": 6.106978510070443e-05, "loss": 1.4154, "step": 11290 }, { "epoch": 1.96, "learning_rate": 6.089362304225603e-05, "loss": 1.4208, "step": 11300 }, { "epoch": 1.96, "learning_rate": 6.071760413695131e-05, "loss": 1.3757, "step": 11310 }, { "epoch": 1.96, "learning_rate": 6.054172902912656e-05, "loss": 1.4266, "step": 11320 }, { "epoch": 1.97, "learning_rate": 6.0365998362591744e-05, "loss": 1.4081, "step": 11330 }, { "epoch": 1.97, "learning_rate": 6.019041278062807e-05, "loss": 1.4108, "step": 11340 }, { "epoch": 1.97, "learning_rate": 6.0032510335413086e-05, "loss": 1.3667, "step": 11350 }, { "epoch": 1.97, "learning_rate": 5.985720218447026e-05, "loss": 1.3987, "step": 11360 }, { "epoch": 1.97, "learning_rate": 5.9682040980602316e-05, "loss": 1.3553, "step": 11370 }, { "epoch": 1.98, "learning_rate": 5.9524522066830346e-05, "loss": 1.3722, "step": 11380 }, { "epoch": 1.98, "learning_rate": 5.934964182845485e-05, "loss": 1.4445, "step": 11390 }, { "epoch": 1.98, "learning_rate": 5.917491039513411e-05, "loss": 1.3968, "step": 11400 }, { "epoch": 1.98, "learning_rate": 5.9000328406491425e-05, "loss": 1.3855, "step": 11410 }, { "epoch": 1.98, "learning_rate": 5.882589650160322e-05, "loss": 1.3988, "step": 11420 }, { "epoch": 1.98, "learning_rate": 5.865161531899642e-05, "loss": 1.3642, "step": 11430 }, { "epoch": 1.99, "learning_rate": 5.8477485496646245e-05, "loss": 1.4189, "step": 11440 }, { "epoch": 1.99, "learning_rate": 5.8303507671973864e-05, "loss": 1.4004, "step": 11450 }, { "epoch": 1.99, "learning_rate": 5.812968248184392e-05, "loss": 1.3928, "step": 11460 }, { "epoch": 1.99, "learning_rate": 5.795601056256257e-05, "loss": 1.4273, "step": 11470 }, { "epoch": 1.99, "learning_rate": 5.778249254987461e-05, "loss": 1.3824, "step": 11480 }, { "epoch": 1.99, "learning_rate": 5.7609129078961655e-05, "loss": 1.4004, "step": 11490 }, { "epoch": 2.0, "learning_rate": 5.7435920784439514e-05, "loss": 1.3801, "step": 11500 }, { "epoch": 2.0, "learning_rate": 5.7262868300355975e-05, "loss": 1.373, "step": 11510 }, { "epoch": 2.0, "learning_rate": 5.7089972260188485e-05, "loss": 1.414, "step": 11520 }, { "epoch": 2.0, "learning_rate": 5.6917233296841776e-05, "loss": 1.3001, "step": 11530 }, { "epoch": 2.0, "learning_rate": 5.6744652042645616e-05, "loss": 1.3162, "step": 11540 }, { "epoch": 2.0, "learning_rate": 5.6572229129352474e-05, "loss": 1.2765, "step": 11550 }, { "epoch": 2.01, "learning_rate": 5.6399965188135084e-05, "loss": 1.2453, "step": 11560 }, { "epoch": 2.01, "learning_rate": 5.622786084958437e-05, "loss": 1.2787, "step": 11570 }, { "epoch": 2.01, "learning_rate": 5.6090292716047934e-05, "loss": 1.2726, "step": 11580 }, { "epoch": 2.01, "learning_rate": 5.591847724951989e-05, "loss": 1.2786, "step": 11590 }, { "epoch": 2.01, "learning_rate": 5.574682314819745e-05, "loss": 1.2788, "step": 11600 }, { "epoch": 2.02, "learning_rate": 5.557533104043913e-05, "loss": 1.2654, "step": 11610 }, { "epoch": 2.02, "learning_rate": 5.54040015540104e-05, "loss": 1.2507, "step": 11620 }, { "epoch": 2.02, "learning_rate": 5.5249944575829906e-05, "loss": 1.3315, "step": 11630 }, { "epoch": 2.02, "learning_rate": 5.507892579728751e-05, "loss": 1.2968, "step": 11640 }, { "epoch": 2.02, "learning_rate": 5.490807145722008e-05, "loss": 1.3051, "step": 11650 }, { "epoch": 2.02, "learning_rate": 5.47373821810585e-05, "loss": 1.306, "step": 11660 }, { "epoch": 2.03, "learning_rate": 5.4566858593629454e-05, "loss": 1.3139, "step": 11670 }, { "epoch": 2.03, "learning_rate": 5.439650131915299e-05, "loss": 1.277, "step": 11680 }, { "epoch": 2.03, "learning_rate": 5.4226310981240466e-05, "loss": 1.2737, "step": 11690 }, { "epoch": 2.03, "learning_rate": 5.4056288202892126e-05, "loss": 1.311, "step": 11700 }, { "epoch": 2.03, "learning_rate": 5.3886433606494804e-05, "loss": 1.2775, "step": 11710 }, { "epoch": 2.03, "learning_rate": 5.37167478138197e-05, "loss": 1.2633, "step": 11720 }, { "epoch": 2.04, "learning_rate": 5.354723144602016e-05, "loss": 1.3022, "step": 11730 }, { "epoch": 2.04, "learning_rate": 5.337788512362931e-05, "loss": 1.2979, "step": 11740 }, { "epoch": 2.04, "learning_rate": 5.320870946655765e-05, "loss": 1.2726, "step": 11750 }, { "epoch": 2.04, "learning_rate": 5.303970509409113e-05, "loss": 1.2303, "step": 11760 }, { "epoch": 2.04, "learning_rate": 5.2870872624888615e-05, "loss": 1.2648, "step": 11770 }, { "epoch": 2.04, "learning_rate": 5.2702212676979704e-05, "loss": 1.2865, "step": 11780 }, { "epoch": 2.05, "learning_rate": 5.253372586776248e-05, "loss": 1.2944, "step": 11790 }, { "epoch": 2.05, "learning_rate": 5.236541281400122e-05, "loss": 1.3188, "step": 11800 }, { "epoch": 2.05, "learning_rate": 5.219727413182419e-05, "loss": 1.3009, "step": 11810 }, { "epoch": 2.05, "learning_rate": 5.202931043672124e-05, "loss": 1.2727, "step": 11820 }, { "epoch": 2.05, "learning_rate": 5.186152234354172e-05, "loss": 1.2515, "step": 11830 }, { "epoch": 2.05, "learning_rate": 5.172741871515152e-05, "loss": 1.3243, "step": 11840 }, { "epoch": 2.06, "learning_rate": 5.1559948252801414e-05, "loss": 1.3009, "step": 11850 }, { "epoch": 2.06, "learning_rate": 5.139265511052607e-05, "loss": 1.3033, "step": 11860 }, { "epoch": 2.06, "learning_rate": 5.122553990072023e-05, "loss": 1.2961, "step": 11870 }, { "epoch": 2.06, "learning_rate": 5.10586032351273e-05, "loss": 1.2361, "step": 11880 }, { "epoch": 2.06, "learning_rate": 5.090851339647496e-05, "loss": 1.2856, "step": 11890 }, { "epoch": 2.07, "learning_rate": 5.074191764789694e-05, "loss": 1.2688, "step": 11900 }, { "epoch": 2.07, "learning_rate": 5.0575502213883655e-05, "loss": 1.2338, "step": 11910 }, { "epoch": 2.07, "learning_rate": 5.040926770361687e-05, "loss": 1.3065, "step": 11920 }, { "epoch": 2.07, "learning_rate": 5.0243214725616126e-05, "loss": 1.2683, "step": 11930 }, { "epoch": 2.07, "learning_rate": 5.00773438877363e-05, "loss": 1.3036, "step": 11940 }, { "epoch": 2.07, "learning_rate": 4.99116557971657e-05, "loss": 1.25, "step": 11950 }, { "epoch": 2.08, "learning_rate": 4.9746151060423564e-05, "loss": 1.2719, "step": 11960 }, { "epoch": 2.08, "learning_rate": 4.958083028335794e-05, "loss": 1.2411, "step": 11970 }, { "epoch": 2.08, "learning_rate": 4.9415694071143584e-05, "loss": 1.286, "step": 11980 }, { "epoch": 2.08, "learning_rate": 4.9250743028279486e-05, "loss": 1.2786, "step": 11990 }, { "epoch": 2.08, "learning_rate": 4.9085977758586906e-05, "loss": 1.2634, "step": 12000 }, { "epoch": 2.08, "eval_loss": 0.7820777893066406, "eval_runtime": 62.0421, "eval_samples_per_second": 8.446, "eval_steps_per_second": 0.532, "step": 12000 }, { "epoch": 2.08, "learning_rate": 4.8921398865207045e-05, "loss": 1.3052, "step": 12010 }, { "epoch": 2.09, "learning_rate": 4.875700695059875e-05, "loss": 1.3028, "step": 12020 }, { "epoch": 2.09, "learning_rate": 4.859280261653654e-05, "loss": 1.3132, "step": 12030 }, { "epoch": 2.09, "learning_rate": 4.8428786464108225e-05, "loss": 1.3153, "step": 12040 }, { "epoch": 2.09, "learning_rate": 4.826495909371276e-05, "loss": 1.3391, "step": 12050 }, { "epoch": 2.09, "learning_rate": 4.810132110505804e-05, "loss": 1.2821, "step": 12060 }, { "epoch": 2.09, "learning_rate": 4.793787309715871e-05, "loss": 1.2542, "step": 12070 }, { "epoch": 2.1, "learning_rate": 4.779093281812042e-05, "loss": 1.2344, "step": 12080 }, { "epoch": 2.1, "learning_rate": 4.7627847421449165e-05, "loss": 1.2916, "step": 12090 }, { "epoch": 2.1, "learning_rate": 4.746495373873521e-05, "loss": 1.2703, "step": 12100 }, { "epoch": 2.1, "learning_rate": 4.730225236626855e-05, "loss": 1.3033, "step": 12110 }, { "epoch": 2.1, "learning_rate": 4.713974389963527e-05, "loss": 1.2804, "step": 12120 }, { "epoch": 2.11, "learning_rate": 4.697742893371525e-05, "loss": 1.317, "step": 12130 }, { "epoch": 2.11, "learning_rate": 4.6815308062680086e-05, "loss": 1.3142, "step": 12140 }, { "epoch": 2.11, "learning_rate": 4.665338187999084e-05, "loss": 1.2592, "step": 12150 }, { "epoch": 2.11, "learning_rate": 4.649165097839591e-05, "loss": 1.2689, "step": 12160 }, { "epoch": 2.11, "learning_rate": 4.6330115949928876e-05, "loss": 1.2734, "step": 12170 }, { "epoch": 2.11, "learning_rate": 4.618490238457079e-05, "loss": 1.2713, "step": 12180 }, { "epoch": 2.12, "learning_rate": 4.602374114352934e-05, "loss": 1.3216, "step": 12190 }, { "epoch": 2.12, "learning_rate": 4.586277748845055e-05, "loss": 1.2775, "step": 12200 }, { "epoch": 2.12, "learning_rate": 4.570201200855939e-05, "loss": 1.2749, "step": 12210 }, { "epoch": 2.12, "learning_rate": 4.554144529235537e-05, "loss": 1.2809, "step": 12220 }, { "epoch": 2.12, "learning_rate": 4.538107792761041e-05, "loss": 1.2817, "step": 12230 }, { "epoch": 2.12, "learning_rate": 4.522091050136663e-05, "loss": 1.2324, "step": 12240 }, { "epoch": 2.13, "learning_rate": 4.50609435999344e-05, "loss": 1.2649, "step": 12250 }, { "epoch": 2.13, "learning_rate": 4.4901177808889936e-05, "loss": 1.2493, "step": 12260 }, { "epoch": 2.13, "learning_rate": 4.474161371307322e-05, "loss": 1.2946, "step": 12270 }, { "epoch": 2.13, "learning_rate": 4.458225189658598e-05, "loss": 1.2523, "step": 12280 }, { "epoch": 2.13, "learning_rate": 4.44230929427895e-05, "loss": 1.2769, "step": 12290 }, { "epoch": 2.13, "learning_rate": 4.426413743430241e-05, "loss": 1.2823, "step": 12300 }, { "epoch": 2.14, "learning_rate": 4.410538595299864e-05, "loss": 1.2536, "step": 12310 }, { "epoch": 2.14, "learning_rate": 4.3962684543383956e-05, "loss": 1.2686, "step": 12320 }, { "epoch": 2.14, "learning_rate": 4.380432231411452e-05, "loss": 1.2826, "step": 12330 }, { "epoch": 2.14, "learning_rate": 4.364616579523162e-05, "loss": 1.2906, "step": 12340 }, { "epoch": 2.14, "learning_rate": 4.348821556568439e-05, "loss": 1.2777, "step": 12350 }, { "epoch": 2.15, "learning_rate": 4.3346237214366844e-05, "loss": 1.2865, "step": 12360 }, { "epoch": 2.15, "learning_rate": 4.3188680526855985e-05, "loss": 1.2784, "step": 12370 }, { "epoch": 2.15, "learning_rate": 4.303133180335535e-05, "loss": 1.2866, "step": 12380 }, { "epoch": 2.15, "learning_rate": 4.287419161985704e-05, "loss": 1.2954, "step": 12390 }, { "epoch": 2.15, "learning_rate": 4.2717260551589775e-05, "loss": 1.2677, "step": 12400 }, { "epoch": 2.15, "learning_rate": 4.2560539173016813e-05, "loss": 1.2825, "step": 12410 }, { "epoch": 2.16, "learning_rate": 4.240402805783377e-05, "loss": 1.2749, "step": 12420 }, { "epoch": 2.16, "learning_rate": 4.224772777896659e-05, "loss": 1.2797, "step": 12430 }, { "epoch": 2.16, "learning_rate": 4.209163890856951e-05, "loss": 1.2819, "step": 12440 }, { "epoch": 2.16, "learning_rate": 4.193576201802268e-05, "loss": 1.2607, "step": 12450 }, { "epoch": 2.16, "learning_rate": 4.1780097677930485e-05, "loss": 1.2767, "step": 12460 }, { "epoch": 2.16, "learning_rate": 4.162464645811913e-05, "loss": 1.2333, "step": 12470 }, { "epoch": 2.17, "learning_rate": 4.146940892763472e-05, "loss": 1.2667, "step": 12480 }, { "epoch": 2.17, "learning_rate": 4.131438565474112e-05, "loss": 1.3182, "step": 12490 }, { "epoch": 2.17, "learning_rate": 4.11595772069178e-05, "loss": 1.2889, "step": 12500 }, { "epoch": 2.17, "learning_rate": 4.100498415085804e-05, "loss": 1.3046, "step": 12510 }, { "epoch": 2.17, "learning_rate": 4.085060705246642e-05, "loss": 1.2576, "step": 12520 }, { "epoch": 2.17, "learning_rate": 4.069644647685712e-05, "loss": 1.2588, "step": 12530 }, { "epoch": 2.18, "learning_rate": 4.0542502988351686e-05, "loss": 1.2901, "step": 12540 }, { "epoch": 2.18, "learning_rate": 4.038877715047699e-05, "loss": 1.3228, "step": 12550 }, { "epoch": 2.18, "learning_rate": 4.0250610452792004e-05, "loss": 1.2813, "step": 12560 }, { "epoch": 2.18, "learning_rate": 4.011262091761672e-05, "loss": 1.3074, "step": 12570 }, { "epoch": 2.18, "learning_rate": 3.997480895410295e-05, "loss": 1.2753, "step": 12580 }, { "epoch": 2.19, "learning_rate": 3.9821893310242744e-05, "loss": 1.2519, "step": 12590 }, { "epoch": 2.19, "learning_rate": 3.966919795488333e-05, "loss": 1.331, "step": 12600 }, { "epoch": 2.19, "learning_rate": 3.9516723446982664e-05, "loss": 1.3126, "step": 12610 }, { "epoch": 2.19, "learning_rate": 3.936447034469024e-05, "loss": 1.2616, "step": 12620 }, { "epoch": 2.19, "learning_rate": 3.92124392053451e-05, "loss": 1.2957, "step": 12630 }, { "epoch": 2.19, "learning_rate": 3.9060630585473746e-05, "loss": 1.309, "step": 12640 }, { "epoch": 2.2, "learning_rate": 3.890904504078814e-05, "loss": 1.2873, "step": 12650 }, { "epoch": 2.2, "learning_rate": 3.8757683126183654e-05, "loss": 1.283, "step": 12660 }, { "epoch": 2.2, "learning_rate": 3.8606545395737005e-05, "loss": 1.3069, "step": 12670 }, { "epoch": 2.2, "learning_rate": 3.84556324027043e-05, "loss": 1.2527, "step": 12680 }, { "epoch": 2.2, "learning_rate": 3.8304944699518954e-05, "loss": 1.2922, "step": 12690 }, { "epoch": 2.2, "learning_rate": 3.816951884539331e-05, "loss": 1.2795, "step": 12700 }, { "epoch": 2.21, "learning_rate": 3.801926071191671e-05, "loss": 1.285, "step": 12710 }, { "epoch": 2.21, "learning_rate": 3.786922946567352e-05, "loss": 1.2804, "step": 12720 }, { "epoch": 2.21, "learning_rate": 3.771942565586933e-05, "loss": 1.318, "step": 12730 }, { "epoch": 2.21, "learning_rate": 3.7569849830877333e-05, "loss": 1.3102, "step": 12740 }, { "epoch": 2.21, "learning_rate": 3.742050253823604e-05, "loss": 1.3083, "step": 12750 }, { "epoch": 2.21, "learning_rate": 3.7286285821885306e-05, "loss": 1.2854, "step": 12760 }, { "epoch": 2.22, "learning_rate": 3.713737424618142e-05, "loss": 1.305, "step": 12770 }, { "epoch": 2.22, "learning_rate": 3.6988692785952173e-05, "loss": 1.2948, "step": 12780 }, { "epoch": 2.22, "learning_rate": 3.68402419854622e-05, "loss": 1.2779, "step": 12790 }, { "epoch": 2.22, "learning_rate": 3.6692022388131795e-05, "loss": 1.3145, "step": 12800 }, { "epoch": 2.22, "learning_rate": 3.654403453653494e-05, "loss": 1.2673, "step": 12810 }, { "epoch": 2.23, "learning_rate": 3.639627897239718e-05, "loss": 1.2883, "step": 12820 }, { "epoch": 2.23, "learning_rate": 3.6248756236593863e-05, "loss": 1.2678, "step": 12830 }, { "epoch": 2.23, "learning_rate": 3.6101466869147995e-05, "loss": 1.2771, "step": 12840 }, { "epoch": 2.23, "learning_rate": 3.5954411409228294e-05, "loss": 1.3139, "step": 12850 }, { "epoch": 2.23, "learning_rate": 3.580759039514729e-05, "loss": 1.2914, "step": 12860 }, { "epoch": 2.23, "learning_rate": 3.566100436435924e-05, "loss": 1.2685, "step": 12870 }, { "epoch": 2.24, "learning_rate": 3.551465385345826e-05, "loss": 1.2932, "step": 12880 }, { "epoch": 2.24, "learning_rate": 3.5383140205951094e-05, "loss": 1.2751, "step": 12890 }, { "epoch": 2.24, "learning_rate": 3.5237238658062945e-05, "loss": 1.2775, "step": 12900 }, { "epoch": 2.24, "learning_rate": 3.5091574181302256e-05, "loss": 1.2826, "step": 12910 }, { "epoch": 2.24, "learning_rate": 3.494614730888971e-05, "loss": 1.2661, "step": 12920 }, { "epoch": 2.24, "learning_rate": 3.480095857317618e-05, "loss": 1.27, "step": 12930 }, { "epoch": 2.25, "learning_rate": 3.4656008505640814e-05, "loss": 1.3088, "step": 12940 }, { "epoch": 2.25, "learning_rate": 3.4511297636889095e-05, "loss": 1.263, "step": 12950 }, { "epoch": 2.25, "learning_rate": 3.4366826496650886e-05, "loss": 1.2896, "step": 12960 }, { "epoch": 2.25, "learning_rate": 3.422259561377853e-05, "loss": 1.2919, "step": 12970 }, { "epoch": 2.25, "learning_rate": 3.4078605516244785e-05, "loss": 1.2451, "step": 12980 }, { "epoch": 2.25, "learning_rate": 3.396358715789669e-05, "loss": 1.3136, "step": 12990 }, { "epoch": 2.26, "learning_rate": 3.383437644428432e-05, "loss": 1.2834, "step": 13000 }, { "epoch": 2.26, "eval_loss": 0.78007972240448, "eval_runtime": 61.984, "eval_samples_per_second": 8.454, "eval_steps_per_second": 0.532, "step": 13000 }, { "epoch": 2.26, "learning_rate": 3.369103909748521e-05, "loss": 1.2585, "step": 13010 }, { "epoch": 2.26, "learning_rate": 3.354794448184514e-05, "loss": 1.27, "step": 13020 }, { "epoch": 2.26, "learning_rate": 3.340509312117752e-05, "loss": 1.2923, "step": 13030 }, { "epoch": 2.26, "learning_rate": 3.32624855384053e-05, "loss": 1.2711, "step": 13040 }, { "epoch": 2.26, "learning_rate": 3.3120122255559e-05, "loss": 1.2891, "step": 13050 }, { "epoch": 2.27, "learning_rate": 3.2978003793774914e-05, "loss": 1.2635, "step": 13060 }, { "epoch": 2.27, "learning_rate": 3.283613067329311e-05, "loss": 1.2926, "step": 13070 }, { "epoch": 2.27, "learning_rate": 3.269450341345558e-05, "loss": 1.2621, "step": 13080 }, { "epoch": 2.27, "learning_rate": 3.2553122532704325e-05, "loss": 1.291, "step": 13090 }, { "epoch": 2.27, "learning_rate": 3.241198854857938e-05, "loss": 1.2658, "step": 13100 }, { "epoch": 2.28, "learning_rate": 3.227110197771703e-05, "loss": 1.2671, "step": 13110 }, { "epoch": 2.28, "learning_rate": 3.213046333584792e-05, "loss": 1.2686, "step": 13120 }, { "epoch": 2.28, "learning_rate": 3.1990073137795066e-05, "loss": 1.2723, "step": 13130 }, { "epoch": 2.28, "learning_rate": 3.186393480377876e-05, "loss": 1.3037, "step": 13140 }, { "epoch": 2.28, "learning_rate": 3.172401806405554e-05, "loss": 1.284, "step": 13150 }, { "epoch": 2.28, "learning_rate": 3.1584351255985664e-05, "loss": 1.2582, "step": 13160 }, { "epoch": 2.29, "learning_rate": 3.144493489083469e-05, "loss": 1.3076, "step": 13170 }, { "epoch": 2.29, "learning_rate": 3.130576947895139e-05, "loss": 1.3193, "step": 13180 }, { "epoch": 2.29, "learning_rate": 3.1166855529765825e-05, "loss": 1.2829, "step": 13190 }, { "epoch": 2.29, "learning_rate": 3.102819355178763e-05, "loss": 1.3018, "step": 13200 }, { "epoch": 2.29, "learning_rate": 3.0889784052604066e-05, "loss": 1.2842, "step": 13210 }, { "epoch": 2.29, "learning_rate": 3.075162753887814e-05, "loss": 1.2816, "step": 13220 }, { "epoch": 2.3, "learning_rate": 3.061372451634678e-05, "loss": 1.2915, "step": 13230 }, { "epoch": 2.3, "learning_rate": 3.0503584951244668e-05, "loss": 1.2235, "step": 13240 }, { "epoch": 2.3, "learning_rate": 3.0366139484357482e-05, "loss": 1.2766, "step": 13250 }, { "epoch": 2.3, "learning_rate": 3.0228948919785782e-05, "loss": 1.2729, "step": 13260 }, { "epoch": 2.3, "learning_rate": 3.0092013759730564e-05, "loss": 1.2784, "step": 13270 }, { "epoch": 2.3, "learning_rate": 2.9955334505457845e-05, "loss": 1.2827, "step": 13280 }, { "epoch": 2.31, "learning_rate": 2.981891165729691e-05, "loss": 1.279, "step": 13290 }, { "epoch": 2.31, "learning_rate": 2.9682745714638417e-05, "loss": 1.2917, "step": 13300 }, { "epoch": 2.31, "learning_rate": 2.9546837175932596e-05, "loss": 1.2764, "step": 13310 }, { "epoch": 2.31, "learning_rate": 2.941118653868744e-05, "loss": 1.3066, "step": 13320 }, { "epoch": 2.31, "learning_rate": 2.9289321881345254e-05, "loss": 1.2945, "step": 13330 }, { "epoch": 2.32, "learning_rate": 2.9154162624127146e-05, "loss": 1.2654, "step": 13340 }, { "epoch": 2.32, "learning_rate": 2.9019262705797567e-05, "loss": 1.2715, "step": 13350 }, { "epoch": 2.32, "learning_rate": 2.888462262017233e-05, "loss": 1.3311, "step": 13360 }, { "epoch": 2.32, "learning_rate": 2.875024286011615e-05, "loss": 1.306, "step": 13370 }, { "epoch": 2.32, "learning_rate": 2.8616123917540673e-05, "loss": 1.2865, "step": 13380 }, { "epoch": 2.32, "learning_rate": 2.848226628340287e-05, "loss": 1.2676, "step": 13390 }, { "epoch": 2.33, "learning_rate": 2.8348670447703218e-05, "loss": 1.2997, "step": 13400 }, { "epoch": 2.33, "learning_rate": 2.8215336899483768e-05, "loss": 1.2863, "step": 13410 }, { "epoch": 2.33, "learning_rate": 2.808226612682646e-05, "loss": 1.2532, "step": 13420 }, { "epoch": 2.33, "learning_rate": 2.7949458616851343e-05, "loss": 1.2524, "step": 13430 }, { "epoch": 2.33, "learning_rate": 2.781691485571475e-05, "loss": 1.263, "step": 13440 }, { "epoch": 2.33, "learning_rate": 2.7684635328607477e-05, "loss": 1.2607, "step": 13450 }, { "epoch": 2.34, "learning_rate": 2.7552620519753137e-05, "loss": 1.2505, "step": 13460 }, { "epoch": 2.34, "learning_rate": 2.742087091240628e-05, "loss": 1.2687, "step": 13470 }, { "epoch": 2.34, "learning_rate": 2.7302523411710645e-05, "loss": 1.2855, "step": 13480 }, { "epoch": 2.34, "learning_rate": 2.7171279015116002e-05, "loss": 1.2412, "step": 13490 }, { "epoch": 2.34, "learning_rate": 2.7040301215970876e-05, "loss": 1.2575, "step": 13500 }, { "epoch": 2.34, "learning_rate": 2.6922649533852228e-05, "loss": 1.3001, "step": 13510 }, { "epoch": 2.35, "learning_rate": 2.6792179589961273e-05, "loss": 1.2726, "step": 13520 }, { "epoch": 2.35, "learning_rate": 2.66619776312545e-05, "loss": 1.2727, "step": 13530 }, { "epoch": 2.35, "learning_rate": 2.6532044134350288e-05, "loss": 1.2466, "step": 13540 }, { "epoch": 2.35, "learning_rate": 2.6402379574884418e-05, "loss": 1.2975, "step": 13550 }, { "epoch": 2.35, "learning_rate": 2.627298442750803e-05, "loss": 1.2745, "step": 13560 }, { "epoch": 2.36, "learning_rate": 2.614385916588613e-05, "loss": 1.2988, "step": 13570 }, { "epoch": 2.36, "learning_rate": 2.6015004262695798e-05, "loss": 1.2541, "step": 13580 }, { "epoch": 2.36, "learning_rate": 2.5886420189624407e-05, "loss": 1.2596, "step": 13590 }, { "epoch": 2.36, "learning_rate": 2.5758107417367915e-05, "loss": 1.267, "step": 13600 }, { "epoch": 2.36, "learning_rate": 2.5630066415629195e-05, "loss": 1.2716, "step": 13610 }, { "epoch": 2.36, "learning_rate": 2.550229765311628e-05, "loss": 1.2824, "step": 13620 }, { "epoch": 2.37, "learning_rate": 2.5400278969684065e-05, "loss": 1.3018, "step": 13630 }, { "epoch": 2.37, "learning_rate": 2.5273001415739562e-05, "loss": 1.2786, "step": 13640 }, { "epoch": 2.37, "learning_rate": 2.5145997408096057e-05, "loss": 1.2634, "step": 13650 }, { "epoch": 2.37, "learning_rate": 2.503192806757474e-05, "loss": 1.3144, "step": 13660 }, { "epoch": 2.37, "learning_rate": 2.4905445077906675e-05, "loss": 1.2597, "step": 13670 }, { "epoch": 2.37, "learning_rate": 2.477923698001955e-05, "loss": 1.2646, "step": 13680 }, { "epoch": 2.38, "learning_rate": 2.4653304235911823e-05, "loss": 1.2471, "step": 13690 }, { "epoch": 2.38, "learning_rate": 2.4527647306573998e-05, "loss": 1.2835, "step": 13700 }, { "epoch": 2.38, "learning_rate": 2.4402266651986927e-05, "loss": 1.2674, "step": 13710 }, { "epoch": 2.38, "learning_rate": 2.4277162731120108e-05, "loss": 1.2984, "step": 13720 }, { "epoch": 2.38, "learning_rate": 2.4152336001930054e-05, "loss": 1.2879, "step": 13730 }, { "epoch": 2.38, "learning_rate": 2.4027786921358607e-05, "loss": 1.2361, "step": 13740 }, { "epoch": 2.39, "learning_rate": 2.3903515945331155e-05, "loss": 1.3072, "step": 13750 }, { "epoch": 2.39, "learning_rate": 2.3779523528755145e-05, "loss": 1.2665, "step": 13760 }, { "epoch": 2.39, "learning_rate": 2.3655810125518284e-05, "loss": 1.3312, "step": 13770 }, { "epoch": 2.39, "learning_rate": 2.3532376188486948e-05, "loss": 1.3107, "step": 13780 }, { "epoch": 2.39, "learning_rate": 2.340922216950443e-05, "loss": 1.2497, "step": 13790 }, { "epoch": 2.4, "learning_rate": 2.328634851938949e-05, "loss": 1.3204, "step": 13800 }, { "epoch": 2.4, "learning_rate": 2.316375568793443e-05, "loss": 1.2692, "step": 13810 }, { "epoch": 2.4, "learning_rate": 2.3041444123903668e-05, "loss": 1.26, "step": 13820 }, { "epoch": 2.4, "learning_rate": 2.2919414275031914e-05, "loss": 1.2608, "step": 13830 }, { "epoch": 2.4, "learning_rate": 2.2797666588022748e-05, "loss": 1.2862, "step": 13840 }, { "epoch": 2.4, "learning_rate": 2.2676201508546792e-05, "loss": 1.2762, "step": 13850 }, { "epoch": 2.41, "learning_rate": 2.2567124933972495e-05, "loss": 1.3093, "step": 13860 }, { "epoch": 2.41, "learning_rate": 2.2470360715755768e-05, "loss": 1.2785, "step": 13870 }, { "epoch": 2.41, "learning_rate": 2.2349660894643332e-05, "loss": 1.2841, "step": 13880 }, { "epoch": 2.41, "learning_rate": 2.222924532103765e-05, "loss": 1.2676, "step": 13890 }, { "epoch": 2.41, "learning_rate": 2.2109114435733026e-05, "loss": 1.2522, "step": 13900 }, { "epoch": 2.41, "learning_rate": 2.19892686784816e-05, "loss": 1.2573, "step": 13910 }, { "epoch": 2.42, "learning_rate": 2.1869708487991812e-05, "loss": 1.2572, "step": 13920 }, { "epoch": 2.42, "learning_rate": 2.1750434301926704e-05, "loss": 1.23, "step": 13930 }, { "epoch": 2.42, "learning_rate": 2.163144655690249e-05, "loss": 1.2547, "step": 13940 }, { "epoch": 2.42, "learning_rate": 2.1512745688486646e-05, "loss": 1.29, "step": 13950 }, { "epoch": 2.42, "learning_rate": 2.139433213119664e-05, "loss": 1.2863, "step": 13960 }, { "epoch": 2.42, "learning_rate": 2.127620631849816e-05, "loss": 1.2471, "step": 13970 }, { "epoch": 2.43, "learning_rate": 2.11583686828036e-05, "loss": 1.2756, "step": 13980 }, { "epoch": 2.43, "learning_rate": 2.104081965547041e-05, "loss": 1.272, "step": 13990 }, { "epoch": 2.43, "learning_rate": 2.092355966679961e-05, "loss": 1.2714, "step": 14000 }, { "epoch": 2.43, "eval_loss": 0.7770761251449585, "eval_runtime": 62.0435, "eval_samples_per_second": 8.446, "eval_steps_per_second": 0.532, "step": 14000 }, { "epoch": 2.43, "learning_rate": 2.080658914603415e-05, "loss": 1.3074, "step": 14010 }, { "epoch": 2.43, "learning_rate": 2.068990852135728e-05, "loss": 1.2862, "step": 14020 }, { "epoch": 2.44, "learning_rate": 2.057351821989113e-05, "loss": 1.285, "step": 14030 }, { "epoch": 2.44, "learning_rate": 2.045741866769507e-05, "loss": 1.2885, "step": 14040 }, { "epoch": 2.44, "learning_rate": 2.034161028976408e-05, "loss": 1.2729, "step": 14050 }, { "epoch": 2.44, "learning_rate": 2.0226093510027388e-05, "loss": 1.2783, "step": 14060 }, { "epoch": 2.44, "learning_rate": 2.0110868751346678e-05, "loss": 1.2502, "step": 14070 }, { "epoch": 2.44, "learning_rate": 1.999593643551475e-05, "loss": 1.2787, "step": 14080 }, { "epoch": 2.45, "learning_rate": 1.9881296983253773e-05, "loss": 1.2514, "step": 14090 }, { "epoch": 2.45, "learning_rate": 1.9766950814213946e-05, "loss": 1.3199, "step": 14100 }, { "epoch": 2.45, "learning_rate": 1.966429036520796e-05, "loss": 1.321, "step": 14110 }, { "epoch": 2.45, "learning_rate": 1.9550502586578255e-05, "loss": 1.2912, "step": 14120 }, { "epoch": 2.45, "learning_rate": 1.9437009302078558e-05, "loss": 1.2402, "step": 14130 }, { "epoch": 2.45, "learning_rate": 1.9323810927163365e-05, "loss": 1.2623, "step": 14140 }, { "epoch": 2.46, "learning_rate": 1.921090787620764e-05, "loss": 1.2941, "step": 14150 }, { "epoch": 2.46, "learning_rate": 1.9098300562505266e-05, "loss": 1.2472, "step": 14160 }, { "epoch": 2.46, "learning_rate": 1.8985989398267557e-05, "loss": 1.2568, "step": 14170 }, { "epoch": 2.46, "learning_rate": 1.887397479462174e-05, "loss": 1.2569, "step": 14180 }, { "epoch": 2.46, "learning_rate": 1.8762257161609442e-05, "loss": 1.2837, "step": 14190 }, { "epoch": 2.46, "learning_rate": 1.865083690818521e-05, "loss": 1.255, "step": 14200 }, { "epoch": 2.47, "learning_rate": 1.8550813276774915e-05, "loss": 1.264, "step": 14210 }, { "epoch": 2.47, "learning_rate": 1.845103114979575e-05, "loss": 1.2722, "step": 14220 }, { "epoch": 2.47, "learning_rate": 1.8340445725584443e-05, "loss": 1.2828, "step": 14230 }, { "epoch": 2.47, "learning_rate": 1.8230159225047806e-05, "loss": 1.2776, "step": 14240 }, { "epoch": 2.47, "learning_rate": 1.8120172051901564e-05, "loss": 1.2505, "step": 14250 }, { "epoch": 2.47, "learning_rate": 1.801048460876572e-05, "loss": 1.2663, "step": 14260 }, { "epoch": 2.48, "learning_rate": 1.7901097297163094e-05, "loss": 1.2305, "step": 14270 }, { "epoch": 2.48, "learning_rate": 1.779201051751783e-05, "loss": 1.2955, "step": 14280 }, { "epoch": 2.48, "learning_rate": 1.768322466915392e-05, "loss": 1.2682, "step": 14290 }, { "epoch": 2.48, "learning_rate": 1.7574740150293778e-05, "loss": 1.2796, "step": 14300 }, { "epoch": 2.48, "learning_rate": 1.746655735805681e-05, "loss": 1.2872, "step": 14310 }, { "epoch": 2.49, "learning_rate": 1.7380228633595075e-05, "loss": 1.2768, "step": 14320 }, { "epoch": 2.49, "learning_rate": 1.7272589946494132e-05, "loss": 1.2804, "step": 14330 }, { "epoch": 2.49, "learning_rate": 1.7165254092070015e-05, "loss": 1.2582, "step": 14340 }, { "epoch": 2.49, "learning_rate": 1.7058221463237277e-05, "loss": 1.3296, "step": 14350 }, { "epoch": 2.49, "learning_rate": 1.695149245180051e-05, "loss": 1.2726, "step": 14360 }, { "epoch": 2.49, "learning_rate": 1.685569625731185e-05, "loss": 1.273, "step": 14370 }, { "epoch": 2.5, "learning_rate": 1.6749545194367288e-05, "loss": 1.3058, "step": 14380 }, { "epoch": 2.5, "learning_rate": 1.6643698878761716e-05, "loss": 1.295, "step": 14390 }, { "epoch": 2.5, "learning_rate": 1.6538157697957113e-05, "loss": 1.3008, "step": 14400 }, { "epoch": 2.5, "learning_rate": 1.643292203829839e-05, "loss": 1.2531, "step": 14410 }, { "epoch": 2.5, "learning_rate": 1.632799228501215e-05, "loss": 1.2844, "step": 14420 }, { "epoch": 2.5, "learning_rate": 1.622336882220514e-05, "loss": 1.2638, "step": 14430 }, { "epoch": 2.51, "learning_rate": 1.6119052032862915e-05, "loss": 1.2261, "step": 14440 }, { "epoch": 2.51, "learning_rate": 1.601504229884846e-05, "loss": 1.2561, "step": 14450 }, { "epoch": 2.51, "learning_rate": 1.5911340000900688e-05, "loss": 1.2693, "step": 14460 }, { "epoch": 2.51, "learning_rate": 1.580794551863316e-05, "loss": 1.267, "step": 14470 }, { "epoch": 2.51, "learning_rate": 1.5704859230532563e-05, "loss": 1.3048, "step": 14480 }, { "epoch": 2.51, "learning_rate": 1.560208151395749e-05, "loss": 1.2803, "step": 14490 }, { "epoch": 2.52, "learning_rate": 1.549961274513695e-05, "loss": 1.2607, "step": 14500 }, { "epoch": 2.52, "learning_rate": 1.5407655313570525e-05, "loss": 1.275, "step": 14510 }, { "epoch": 2.52, "learning_rate": 1.5315948706191573e-05, "loss": 1.2627, "step": 14520 }, { "epoch": 2.52, "learning_rate": 1.5214346982990213e-05, "loss": 1.2514, "step": 14530 }, { "epoch": 2.52, "learning_rate": 1.5113055626887762e-05, "loss": 1.2496, "step": 14540 }, { "epoch": 2.53, "learning_rate": 1.5012075008672267e-05, "loss": 1.3028, "step": 14550 }, { "epoch": 2.53, "learning_rate": 1.4911405497994235e-05, "loss": 1.2599, "step": 14560 }, { "epoch": 2.53, "learning_rate": 1.4811047463365357e-05, "loss": 1.2633, "step": 14570 }, { "epoch": 2.53, "learning_rate": 1.4711001272157132e-05, "loss": 1.2443, "step": 14580 }, { "epoch": 2.53, "learning_rate": 1.4611267290599528e-05, "loss": 1.3036, "step": 14590 }, { "epoch": 2.53, "learning_rate": 1.4511845883779607e-05, "loss": 1.2934, "step": 14600 }, { "epoch": 2.54, "learning_rate": 1.4412737415640232e-05, "loss": 1.2485, "step": 14610 }, { "epoch": 2.54, "learning_rate": 1.4313942248978752e-05, "loss": 1.2625, "step": 14620 }, { "epoch": 2.54, "learning_rate": 1.4235131935781309e-05, "loss": 1.272, "step": 14630 }, { "epoch": 2.54, "learning_rate": 1.4136901622367581e-05, "loss": 1.2825, "step": 14640 }, { "epoch": 2.54, "learning_rate": 1.403898562015863e-05, "loss": 1.2842, "step": 14650 }, { "epoch": 2.54, "learning_rate": 1.3941384287586633e-05, "loss": 1.2833, "step": 14660 }, { "epoch": 2.55, "learning_rate": 1.384409798193188e-05, "loss": 1.2957, "step": 14670 }, { "epoch": 2.55, "learning_rate": 1.3747127059321474e-05, "loss": 1.2412, "step": 14680 }, { "epoch": 2.55, "learning_rate": 1.3650471874727967e-05, "loss": 1.2911, "step": 14690 }, { "epoch": 2.55, "learning_rate": 1.3554132781968232e-05, "loss": 1.3062, "step": 14700 }, { "epoch": 2.55, "learning_rate": 1.3458110133701962e-05, "loss": 1.2822, "step": 14710 }, { "epoch": 2.55, "learning_rate": 1.3362404281430497e-05, "loss": 1.2376, "step": 14720 }, { "epoch": 2.56, "learning_rate": 1.3267015575495512e-05, "loss": 1.2577, "step": 14730 }, { "epoch": 2.56, "learning_rate": 1.3171944365077748e-05, "loss": 1.2595, "step": 14740 }, { "epoch": 2.56, "learning_rate": 1.307719099819571e-05, "loss": 1.2946, "step": 14750 }, { "epoch": 2.56, "learning_rate": 1.2982755821704372e-05, "loss": 1.2915, "step": 14760 }, { "epoch": 2.56, "learning_rate": 1.288863918129396e-05, "loss": 1.2599, "step": 14770 }, { "epoch": 2.57, "learning_rate": 1.2794841421488679e-05, "loss": 1.2552, "step": 14780 }, { "epoch": 2.57, "learning_rate": 1.2710696364389941e-05, "loss": 1.2647, "step": 14790 }, { "epoch": 2.57, "learning_rate": 1.2626810128213363e-05, "loss": 1.2425, "step": 14800 }, { "epoch": 2.57, "learning_rate": 1.2533907057030315e-05, "loss": 1.2571, "step": 14810 }, { "epoch": 2.57, "learning_rate": 1.244132416498789e-05, "loss": 1.297, "step": 14820 }, { "epoch": 2.57, "learning_rate": 1.2349061790995841e-05, "loss": 1.2686, "step": 14830 }, { "epoch": 2.58, "learning_rate": 1.225712027279059e-05, "loss": 1.2944, "step": 14840 }, { "epoch": 2.58, "learning_rate": 1.21654999469341e-05, "loss": 1.2394, "step": 14850 }, { "epoch": 2.58, "learning_rate": 1.2074201148812537e-05, "loss": 1.2908, "step": 14860 }, { "epoch": 2.58, "learning_rate": 1.1983224212635024e-05, "loss": 1.2721, "step": 14870 }, { "epoch": 2.58, "learning_rate": 1.1892569471432557e-05, "loss": 1.2818, "step": 14880 }, { "epoch": 2.58, "learning_rate": 1.1802237257056659e-05, "loss": 1.2811, "step": 14890 }, { "epoch": 2.59, "learning_rate": 1.171222790017823e-05, "loss": 1.2835, "step": 14900 }, { "epoch": 2.59, "learning_rate": 1.1622541730286296e-05, "loss": 1.2731, "step": 14910 }, { "epoch": 2.59, "learning_rate": 1.153317907568684e-05, "loss": 1.2946, "step": 14920 }, { "epoch": 2.59, "learning_rate": 1.1444140263501591e-05, "loss": 1.2726, "step": 14930 }, { "epoch": 2.59, "learning_rate": 1.135542561966675e-05, "loss": 1.2807, "step": 14940 }, { "epoch": 2.59, "learning_rate": 1.1275859872585081e-05, "loss": 1.2817, "step": 14950 }, { "epoch": 2.6, "learning_rate": 1.1187762042319471e-05, "loss": 1.2802, "step": 14960 }, { "epoch": 2.6, "learning_rate": 1.1108751952271423e-05, "loss": 1.2721, "step": 14970 }, { "epoch": 2.6, "learning_rate": 1.1021272099769108e-05, "loss": 1.2398, "step": 14980 }, { "epoch": 2.6, "learning_rate": 1.093411796357211e-05, "loss": 1.2574, "step": 14990 }, { "epoch": 2.6, "learning_rate": 1.0847289862717614e-05, "loss": 1.2228, "step": 15000 }, { "epoch": 2.6, "eval_loss": 0.7751675248146057, "eval_runtime": 62.0277, "eval_samples_per_second": 8.448, "eval_steps_per_second": 0.532, "step": 15000 }, { "epoch": 2.61, "learning_rate": 1.0760788115049313e-05, "loss": 1.3108, "step": 15010 }, { "epoch": 2.61, "learning_rate": 1.0674613037216263e-05, "loss": 1.2385, "step": 15020 }, { "epoch": 2.61, "learning_rate": 1.0588764944671713e-05, "loss": 1.2627, "step": 15030 }, { "epoch": 2.61, "learning_rate": 1.0503244151671942e-05, "loss": 1.2532, "step": 15040 }, { "epoch": 2.61, "learning_rate": 1.0426555537850258e-05, "loss": 1.2731, "step": 15050 }, { "epoch": 2.61, "learning_rate": 1.034165747546959e-05, "loss": 1.2618, "step": 15060 }, { "epoch": 2.62, "learning_rate": 1.0257087617197447e-05, "loss": 1.2941, "step": 15070 }, { "epoch": 2.62, "learning_rate": 1.017284627261097e-05, "loss": 1.229, "step": 15080 }, { "epoch": 2.62, "learning_rate": 1.008893375008475e-05, "loss": 1.3288, "step": 15090 }, { "epoch": 2.62, "learning_rate": 1.0005350356789733e-05, "loss": 1.2818, "step": 15100 }, { "epoch": 2.62, "learning_rate": 9.922096398692005e-06, "loss": 1.2817, "step": 15110 }, { "epoch": 2.62, "learning_rate": 9.839172180551736e-06, "loss": 1.2831, "step": 15120 }, { "epoch": 2.63, "learning_rate": 9.756578005922001e-06, "loss": 1.2657, "step": 15130 }, { "epoch": 2.63, "learning_rate": 9.674314177147791e-06, "loss": 1.2788, "step": 15140 }, { "epoch": 2.63, "learning_rate": 9.592380995364781e-06, "loss": 1.2736, "step": 15150 }, { "epoch": 2.63, "learning_rate": 9.510778760498273e-06, "loss": 1.262, "step": 15160 }, { "epoch": 2.63, "learning_rate": 9.429507771262148e-06, "loss": 1.2497, "step": 15170 }, { "epoch": 2.63, "learning_rate": 9.348568325157681e-06, "loss": 1.2698, "step": 15180 }, { "epoch": 2.64, "learning_rate": 9.267960718472513e-06, "loss": 1.2894, "step": 15190 }, { "epoch": 2.64, "learning_rate": 9.187685246279565e-06, "loss": 1.277, "step": 15200 }, { "epoch": 2.64, "learning_rate": 9.107742202435876e-06, "loss": 1.2803, "step": 15210 }, { "epoch": 2.64, "learning_rate": 9.028131879581714e-06, "loss": 1.2451, "step": 15220 }, { "epoch": 2.64, "learning_rate": 8.948854569139287e-06, "loss": 1.241, "step": 15230 }, { "epoch": 2.65, "learning_rate": 8.8699105613118e-06, "loss": 1.2558, "step": 15240 }, { "epoch": 2.65, "learning_rate": 8.79914616687264e-06, "loss": 1.2357, "step": 15250 }, { "epoch": 2.65, "learning_rate": 8.720836229152817e-06, "loss": 1.2819, "step": 15260 }, { "epoch": 2.65, "learning_rate": 8.642860428733857e-06, "loss": 1.288, "step": 15270 }, { "epoch": 2.65, "learning_rate": 8.565219051054663e-06, "loss": 1.283, "step": 15280 }, { "epoch": 2.65, "learning_rate": 8.495627977514654e-06, "loss": 1.2858, "step": 15290 }, { "epoch": 2.66, "learning_rate": 8.41862278503991e-06, "loss": 1.2931, "step": 15300 }, { "epoch": 2.66, "learning_rate": 8.341952836151169e-06, "loss": 1.2803, "step": 15310 }, { "epoch": 2.66, "learning_rate": 8.265618411507148e-06, "loss": 1.2773, "step": 15320 }, { "epoch": 2.66, "learning_rate": 8.189619790538295e-06, "loss": 1.2717, "step": 15330 }, { "epoch": 2.66, "learning_rate": 8.113957251445836e-06, "loss": 1.2474, "step": 15340 }, { "epoch": 2.66, "learning_rate": 8.038631071200698e-06, "loss": 1.2828, "step": 15350 }, { "epoch": 2.67, "learning_rate": 7.963641525542564e-06, "loss": 1.2829, "step": 15360 }, { "epoch": 2.67, "learning_rate": 7.888988888978833e-06, "loss": 1.2845, "step": 15370 }, { "epoch": 2.67, "learning_rate": 7.814673434783604e-06, "loss": 1.2726, "step": 15380 }, { "epoch": 2.67, "learning_rate": 7.740695434996626e-06, "loss": 1.2498, "step": 15390 }, { "epoch": 2.67, "learning_rate": 7.667055160422431e-06, "loss": 1.2746, "step": 15400 }, { "epoch": 2.67, "learning_rate": 7.593752880629257e-06, "loss": 1.271, "step": 15410 }, { "epoch": 2.68, "learning_rate": 7.52078886394807e-06, "loss": 1.256, "step": 15420 }, { "epoch": 2.68, "learning_rate": 7.448163377471562e-06, "loss": 1.2778, "step": 15430 }, { "epoch": 2.68, "learning_rate": 7.375876687053251e-06, "loss": 1.2898, "step": 15440 }, { "epoch": 2.68, "learning_rate": 7.303929057306414e-06, "loss": 1.2512, "step": 15450 }, { "epoch": 2.68, "learning_rate": 7.23232075160315e-06, "loss": 1.2936, "step": 15460 }, { "epoch": 2.68, "learning_rate": 7.161052032073445e-06, "loss": 1.2946, "step": 15470 }, { "epoch": 2.69, "learning_rate": 7.097200746323862e-06, "loss": 1.2764, "step": 15480 }, { "epoch": 2.69, "learning_rate": 7.026577958239167e-06, "loss": 1.2406, "step": 15490 }, { "epoch": 2.69, "learning_rate": 6.956295509471921e-06, "loss": 1.2662, "step": 15500 }, { "epoch": 2.69, "learning_rate": 6.88635365729865e-06, "loss": 1.244, "step": 15510 }, { "epoch": 2.69, "learning_rate": 6.8167526577491034e-06, "loss": 1.2808, "step": 15520 }, { "epoch": 2.7, "learning_rate": 6.747492765605312e-06, "loss": 1.3011, "step": 15530 }, { "epoch": 2.7, "learning_rate": 6.678574234400659e-06, "loss": 1.2447, "step": 15540 }, { "epoch": 2.7, "learning_rate": 6.60999731641887e-06, "loss": 1.2038, "step": 15550 }, { "epoch": 2.7, "learning_rate": 6.548570377045693e-06, "loss": 1.3007, "step": 15560 }, { "epoch": 2.7, "learning_rate": 6.480643214749759e-06, "loss": 1.2823, "step": 15570 }, { "epoch": 2.7, "learning_rate": 6.413058390224724e-06, "loss": 1.2388, "step": 15580 }, { "epoch": 2.71, "learning_rate": 6.345816150872197e-06, "loss": 1.2874, "step": 15590 }, { "epoch": 2.71, "learning_rate": 6.278916742839691e-06, "loss": 1.2493, "step": 15600 }, { "epoch": 2.71, "learning_rate": 6.2123604110197686e-06, "loss": 1.282, "step": 15610 }, { "epoch": 2.71, "learning_rate": 6.146147399049107e-06, "loss": 1.2575, "step": 15620 }, { "epoch": 2.71, "learning_rate": 6.0802779493076665e-06, "loss": 1.286, "step": 15630 }, { "epoch": 2.71, "learning_rate": 6.014752302917681e-06, "loss": 1.281, "step": 15640 }, { "epoch": 2.72, "learning_rate": 5.949570699742935e-06, "loss": 1.2855, "step": 15650 }, { "epoch": 2.72, "learning_rate": 5.8847333783877635e-06, "loss": 1.2316, "step": 15660 }, { "epoch": 2.72, "learning_rate": 5.820240576196223e-06, "loss": 1.2645, "step": 15670 }, { "epoch": 2.72, "learning_rate": 5.7560925292512335e-06, "loss": 1.2897, "step": 15680 }, { "epoch": 2.72, "learning_rate": 5.69228947237368e-06, "loss": 1.2823, "step": 15690 }, { "epoch": 2.72, "learning_rate": 5.635161880753381e-06, "loss": 1.2788, "step": 15700 }, { "epoch": 2.73, "learning_rate": 5.572014947411885e-06, "loss": 1.2814, "step": 15710 }, { "epoch": 2.73, "learning_rate": 5.515478243480177e-06, "loss": 1.2719, "step": 15720 }, { "epoch": 2.73, "learning_rate": 5.452988268147996e-06, "loss": 1.2618, "step": 15730 }, { "epoch": 2.73, "learning_rate": 5.390844392429362e-06, "loss": 1.3436, "step": 15740 }, { "epoch": 2.73, "learning_rate": 5.329046843808683e-06, "loss": 1.2658, "step": 15750 }, { "epoch": 2.74, "learning_rate": 5.267595848502604e-06, "loss": 1.2742, "step": 15760 }, { "epoch": 2.74, "learning_rate": 5.2064916314591646e-06, "loss": 1.2553, "step": 15770 }, { "epoch": 2.74, "learning_rate": 5.145734416356996e-06, "loss": 1.2679, "step": 15780 }, { "epoch": 2.74, "learning_rate": 5.085324425604499e-06, "loss": 1.2254, "step": 15790 }, { "epoch": 2.74, "learning_rate": 5.025261880338994e-06, "loss": 1.2656, "step": 15800 }, { "epoch": 2.74, "learning_rate": 4.965547000425985e-06, "loss": 1.2524, "step": 15810 }, { "epoch": 2.75, "learning_rate": 4.9061800044582385e-06, "loss": 1.2899, "step": 15820 }, { "epoch": 2.75, "learning_rate": 4.853047328501259e-06, "loss": 1.2837, "step": 15830 }, { "epoch": 2.75, "learning_rate": 4.794341909691191e-06, "loss": 1.2689, "step": 15840 }, { "epoch": 2.75, "learning_rate": 4.735985001541243e-06, "loss": 1.2794, "step": 15850 }, { "epoch": 2.75, "learning_rate": 4.677976817673235e-06, "loss": 1.2599, "step": 15860 }, { "epoch": 2.75, "learning_rate": 4.62031757043242e-06, "loss": 1.2905, "step": 15870 }, { "epoch": 2.76, "learning_rate": 4.563007470886749e-06, "loss": 1.2726, "step": 15880 }, { "epoch": 2.76, "learning_rate": 4.506046728826075e-06, "loss": 1.2318, "step": 15890 }, { "epoch": 2.76, "learning_rate": 4.449435552761372e-06, "loss": 1.2712, "step": 15900 }, { "epoch": 2.76, "learning_rate": 4.398784544532874e-06, "loss": 1.3048, "step": 15910 }, { "epoch": 2.76, "learning_rate": 4.342838113724712e-06, "loss": 1.2803, "step": 15920 }, { "epoch": 2.76, "learning_rate": 4.2872418463554055e-06, "loss": 1.3073, "step": 15930 }, { "epoch": 2.77, "learning_rate": 4.231995945941125e-06, "loss": 1.2495, "step": 15940 }, { "epoch": 2.77, "learning_rate": 4.1771006147155015e-06, "loss": 1.2985, "step": 15950 }, { "epoch": 2.77, "learning_rate": 4.122556053628868e-06, "loss": 1.2603, "step": 15960 }, { "epoch": 2.77, "learning_rate": 4.068362462347508e-06, "loss": 1.2751, "step": 15970 }, { "epoch": 2.77, "learning_rate": 4.014520039252956e-06, "loss": 1.2502, "step": 15980 }, { "epoch": 2.78, "learning_rate": 3.961028981441251e-06, "loss": 1.2988, "step": 15990 }, { "epoch": 2.78, "learning_rate": 3.907889484722238e-06, "loss": 1.2901, "step": 16000 }, { "epoch": 2.78, "eval_loss": 0.7744143605232239, "eval_runtime": 61.9699, "eval_samples_per_second": 8.456, "eval_steps_per_second": 0.533, "step": 16000 }, { "epoch": 2.78, "learning_rate": 3.855101743618806e-06, "loss": 1.278, "step": 16010 }, { "epoch": 2.78, "learning_rate": 3.8026659513662353e-06, "loss": 1.2782, "step": 16020 }, { "epoch": 2.78, "learning_rate": 3.7505822999114206e-06, "loss": 1.266, "step": 16030 }, { "epoch": 2.78, "learning_rate": 3.6988509799122494e-06, "loss": 1.2606, "step": 16040 }, { "epoch": 2.79, "learning_rate": 3.647472180736833e-06, "loss": 1.2544, "step": 16050 }, { "epoch": 2.79, "learning_rate": 3.5964460904628685e-06, "loss": 1.2632, "step": 16060 }, { "epoch": 2.79, "learning_rate": 3.5457728958768642e-06, "loss": 1.2793, "step": 16070 }, { "epoch": 2.79, "learning_rate": 3.495452782473596e-06, "loss": 1.2691, "step": 16080 }, { "epoch": 2.79, "learning_rate": 3.4454859344552835e-06, "loss": 1.2889, "step": 16090 }, { "epoch": 2.79, "learning_rate": 3.4008179643440496e-06, "loss": 1.2798, "step": 16100 }, { "epoch": 2.8, "learning_rate": 3.3515228234023422e-06, "loss": 1.2751, "step": 16110 }, { "epoch": 2.8, "learning_rate": 3.307459683817815e-06, "loss": 1.2461, "step": 16120 }, { "epoch": 2.8, "learning_rate": 3.2588369013774933e-06, "loss": 1.2488, "step": 16130 }, { "epoch": 2.8, "learning_rate": 3.210568250480306e-06, "loss": 1.2522, "step": 16140 }, { "epoch": 2.8, "learning_rate": 3.1626539078188687e-06, "loss": 1.2958, "step": 16150 }, { "epoch": 2.8, "learning_rate": 3.1150940487888804e-06, "loss": 1.2353, "step": 16160 }, { "epoch": 2.81, "learning_rate": 3.0678888474883316e-06, "loss": 1.2498, "step": 16170 }, { "epoch": 2.81, "learning_rate": 3.0210384767169975e-06, "loss": 1.2708, "step": 16180 }, { "epoch": 2.81, "learning_rate": 2.97454310797578e-06, "loss": 1.2368, "step": 16190 }, { "epoch": 2.81, "learning_rate": 2.9284029114660107e-06, "loss": 1.2822, "step": 16200 }, { "epoch": 2.81, "learning_rate": 2.8826180560888927e-06, "loss": 1.2863, "step": 16210 }, { "epoch": 2.82, "learning_rate": 2.837188709444882e-06, "loss": 1.2477, "step": 16220 }, { "epoch": 2.82, "learning_rate": 2.792115037833032e-06, "loss": 1.2577, "step": 16230 }, { "epoch": 2.82, "learning_rate": 2.7473972062503905e-06, "loss": 1.2445, "step": 16240 }, { "epoch": 2.82, "learning_rate": 2.707455536371439e-06, "loss": 1.2237, "step": 16250 }, { "epoch": 2.82, "learning_rate": 2.6634142507455885e-06, "loss": 1.2587, "step": 16260 }, { "epoch": 2.82, "learning_rate": 2.624081735149897e-06, "loss": 1.2759, "step": 16270 }, { "epoch": 2.83, "learning_rate": 2.580717577477021e-06, "loss": 1.2786, "step": 16280 }, { "epoch": 2.83, "learning_rate": 2.5377100336767545e-06, "loss": 1.272, "step": 16290 }, { "epoch": 2.83, "learning_rate": 2.495059261182886e-06, "loss": 1.2404, "step": 16300 }, { "epoch": 2.83, "learning_rate": 2.452765416123215e-06, "loss": 1.2751, "step": 16310 }, { "epoch": 2.83, "learning_rate": 2.4108286533189527e-06, "loss": 1.2624, "step": 16320 }, { "epoch": 2.83, "learning_rate": 2.3692491262841785e-06, "loss": 1.2965, "step": 16330 }, { "epoch": 2.84, "learning_rate": 2.3280269872252847e-06, "loss": 1.2947, "step": 16340 }, { "epoch": 2.84, "learning_rate": 2.287162387040365e-06, "loss": 1.2839, "step": 16350 }, { "epoch": 2.84, "learning_rate": 2.2506900662738086e-06, "loss": 1.2637, "step": 16360 }, { "epoch": 2.84, "learning_rate": 2.210505200985846e-06, "loss": 1.2615, "step": 16370 }, { "epoch": 2.84, "learning_rate": 2.1706783047731326e-06, "loss": 1.2743, "step": 16380 }, { "epoch": 2.84, "learning_rate": 2.1312095234263807e-06, "loss": 1.2368, "step": 16390 }, { "epoch": 2.85, "learning_rate": 2.0920990014253185e-06, "loss": 1.2542, "step": 16400 }, { "epoch": 2.85, "learning_rate": 2.0533468819382893e-06, "loss": 1.2367, "step": 16410 }, { "epoch": 2.85, "learning_rate": 2.014953306821632e-06, "loss": 1.2418, "step": 16420 }, { "epoch": 2.85, "learning_rate": 1.976918416619211e-06, "loss": 1.2555, "step": 16430 }, { "epoch": 2.85, "learning_rate": 1.939242350561854e-06, "loss": 1.2583, "step": 16440 }, { "epoch": 2.86, "learning_rate": 1.9019252465669046e-06, "loss": 1.2734, "step": 16450 }, { "epoch": 2.86, "learning_rate": 1.8649672412376916e-06, "loss": 1.2125, "step": 16460 }, { "epoch": 2.86, "learning_rate": 1.8283684698629843e-06, "loss": 1.2853, "step": 16470 }, { "epoch": 2.86, "learning_rate": 1.7921290664165923e-06, "loss": 1.2621, "step": 16480 }, { "epoch": 2.86, "learning_rate": 1.756249163556778e-06, "loss": 1.2599, "step": 16490 }, { "epoch": 2.86, "learning_rate": 1.7207288926258225e-06, "loss": 1.2865, "step": 16500 }, { "epoch": 2.87, "learning_rate": 1.6855683836495383e-06, "loss": 1.238, "step": 16510 }, { "epoch": 2.87, "learning_rate": 1.6507677653367915e-06, "loss": 1.2989, "step": 16520 }, { "epoch": 2.87, "learning_rate": 1.6163271650790456e-06, "loss": 1.2784, "step": 16530 }, { "epoch": 2.87, "learning_rate": 1.5822467089498304e-06, "loss": 1.2912, "step": 16540 }, { "epoch": 2.87, "learning_rate": 1.5485265217043854e-06, "loss": 1.2584, "step": 16550 }, { "epoch": 2.87, "learning_rate": 1.5184864851265469e-06, "loss": 1.2535, "step": 16560 }, { "epoch": 2.88, "learning_rate": 1.4854511477372047e-06, "loss": 1.3007, "step": 16570 }, { "epoch": 2.88, "learning_rate": 1.456027673515925e-06, "loss": 1.2791, "step": 16580 }, { "epoch": 2.88, "learning_rate": 1.4236776225376336e-06, "loss": 1.2686, "step": 16590 }, { "epoch": 2.88, "learning_rate": 1.3916884209024705e-06, "loss": 1.2315, "step": 16600 }, { "epoch": 2.88, "learning_rate": 1.3600601857104101e-06, "loss": 1.2747, "step": 16610 }, { "epoch": 2.88, "learning_rate": 1.3287930327400167e-06, "loss": 1.2595, "step": 16620 }, { "epoch": 2.89, "learning_rate": 1.2978870764481232e-06, "loss": 1.2505, "step": 16630 }, { "epoch": 2.89, "learning_rate": 1.2673424299693204e-06, "loss": 1.2814, "step": 16640 }, { "epoch": 2.89, "learning_rate": 1.2371592051156345e-06, "loss": 1.2427, "step": 16650 }, { "epoch": 2.89, "learning_rate": 1.2073375123760168e-06, "loss": 1.2477, "step": 16660 }, { "epoch": 2.89, "learning_rate": 1.1778774609160436e-06, "loss": 1.2516, "step": 16670 }, { "epoch": 2.89, "learning_rate": 1.1487791585774176e-06, "loss": 1.2804, "step": 16680 }, { "epoch": 2.9, "learning_rate": 1.1200427118776224e-06, "loss": 1.2826, "step": 16690 }, { "epoch": 2.9, "learning_rate": 1.0916682260095789e-06, "loss": 1.2703, "step": 16700 }, { "epoch": 2.9, "learning_rate": 1.063655804841146e-06, "loss": 1.2501, "step": 16710 }, { "epoch": 2.9, "learning_rate": 1.0360055509148535e-06, "loss": 1.2323, "step": 16720 }, { "epoch": 2.9, "learning_rate": 1.008717565447448e-06, "loss": 1.2773, "step": 16730 }, { "epoch": 2.91, "learning_rate": 9.871480775350161e-07, "loss": 1.3079, "step": 16740 }, { "epoch": 2.91, "learning_rate": 9.605124261266474e-07, "loss": 1.2767, "step": 16750 }, { "epoch": 2.91, "learning_rate": 9.34239319527963e-07, "loss": 1.2475, "step": 16760 }, { "epoch": 2.91, "learning_rate": 9.083288539145196e-07, "loss": 1.2586, "step": 16770 }, { "epoch": 2.91, "learning_rate": 8.827811241344131e-07, "loss": 1.2465, "step": 16780 }, { "epoch": 2.91, "learning_rate": 8.575962237078572e-07, "loss": 1.2841, "step": 16790 }, { "epoch": 2.92, "learning_rate": 8.327742448269394e-07, "loss": 1.2984, "step": 16800 }, { "epoch": 2.92, "learning_rate": 8.083152783552095e-07, "loss": 1.2587, "step": 16810 }, { "epoch": 2.92, "learning_rate": 7.842194138273584e-07, "loss": 1.2659, "step": 16820 }, { "epoch": 2.92, "learning_rate": 7.628436608436595e-07, "loss": 1.2654, "step": 16830 }, { "epoch": 2.92, "learning_rate": 7.3943793191662e-07, "loss": 1.2415, "step": 16840 }, { "epoch": 2.92, "learning_rate": 7.163955570664738e-07, "loss": 1.2256, "step": 16850 }, { "epoch": 2.93, "learning_rate": 6.937166206423485e-07, "loss": 1.2541, "step": 16860 }, { "epoch": 2.93, "learning_rate": 6.714012056629693e-07, "loss": 1.2869, "step": 16870 }, { "epoch": 2.93, "learning_rate": 6.494493938163038e-07, "loss": 1.2867, "step": 16880 }, { "epoch": 2.93, "learning_rate": 6.278612654593729e-07, "loss": 1.242, "step": 16890 }, { "epoch": 2.93, "learning_rate": 6.066368996178517e-07, "loss": 1.2183, "step": 16900 }, { "epoch": 2.93, "learning_rate": 5.85776373985858e-07, "loss": 1.2517, "step": 16910 }, { "epoch": 2.94, "learning_rate": 5.652797649255969e-07, "loss": 1.2363, "step": 16920 }, { "epoch": 2.94, "learning_rate": 5.4514714746714e-07, "loss": 1.2635, "step": 16930 }, { "epoch": 2.94, "learning_rate": 5.253785953081125e-07, "loss": 1.2782, "step": 16940 }, { "epoch": 2.94, "learning_rate": 5.059741808134621e-07, "loss": 1.3006, "step": 16950 }, { "epoch": 2.94, "learning_rate": 4.869339750151469e-07, "loss": 1.2425, "step": 16960 }, { "epoch": 2.95, "learning_rate": 4.682580476119247e-07, "loss": 1.276, "step": 16970 }, { "epoch": 2.95, "learning_rate": 4.499464669690423e-07, "loss": 1.2827, "step": 16980 }, { "epoch": 2.95, "learning_rate": 4.3199930011802446e-07, "loss": 1.3223, "step": 16990 }, { "epoch": 2.95, "learning_rate": 4.1441661275645195e-07, "loss": 1.2453, "step": 17000 }, { "epoch": 2.95, "eval_loss": 0.7738975882530212, "eval_runtime": 61.8545, "eval_samples_per_second": 8.471, "eval_steps_per_second": 0.534, "step": 17000 }, { "epoch": 2.95, "learning_rate": 3.971984692476394e-07, "loss": 1.2683, "step": 17010 }, { "epoch": 2.95, "learning_rate": 3.820138772047788e-07, "loss": 1.2824, "step": 17020 }, { "epoch": 2.96, "learning_rate": 3.6548853955771235e-07, "loss": 1.271, "step": 17030 }, { "epoch": 2.96, "learning_rate": 3.493279248699355e-07, "loss": 1.298, "step": 17040 }, { "epoch": 2.96, "learning_rate": 3.3353209229913806e-07, "loss": 1.2695, "step": 17050 }, { "epoch": 2.96, "learning_rate": 3.181010996677003e-07, "loss": 1.2698, "step": 17060 }, { "epoch": 2.96, "learning_rate": 3.030350034624374e-07, "loss": 1.266, "step": 17070 }, { "epoch": 2.96, "learning_rate": 2.88333858834422e-07, "loss": 1.3003, "step": 17080 }, { "epoch": 2.97, "learning_rate": 2.7399771959880637e-07, "loss": 1.2441, "step": 17090 }, { "epoch": 2.97, "learning_rate": 2.600266382345895e-07, "loss": 1.2973, "step": 17100 }, { "epoch": 2.97, "learning_rate": 2.4642066588441705e-07, "loss": 1.2442, "step": 17110 }, { "epoch": 2.97, "learning_rate": 2.3317985235443707e-07, "loss": 1.2901, "step": 17120 }, { "epoch": 2.97, "learning_rate": 2.215753710563373e-07, "loss": 1.2668, "step": 17130 }, { "epoch": 2.97, "learning_rate": 2.0902849171310356e-07, "loss": 1.2799, "step": 17140 }, { "epoch": 2.98, "learning_rate": 1.968469080681823e-07, "loss": 1.2652, "step": 17150 }, { "epoch": 2.98, "learning_rate": 1.8619584749273167e-07, "loss": 1.2909, "step": 17160 }, { "epoch": 2.98, "learning_rate": 1.747084474202576e-07, "loss": 1.2791, "step": 17170 }, { "epoch": 2.98, "learning_rate": 1.6358646867835615e-07, "loss": 1.269, "step": 17180 }, { "epoch": 2.98, "learning_rate": 1.5282995198021565e-07, "loss": 1.2189, "step": 17190 }, { "epoch": 2.99, "learning_rate": 1.424389367012613e-07, "loss": 1.2991, "step": 17200 }, { "epoch": 2.99, "learning_rate": 1.3241346087892182e-07, "loss": 1.2893, "step": 17210 }, { "epoch": 2.99, "learning_rate": 1.2275356121254077e-07, "loss": 1.2596, "step": 17220 }, { "epoch": 2.99, "learning_rate": 1.1345927306323224e-07, "loss": 1.2393, "step": 17230 }, { "epoch": 2.99, "learning_rate": 1.0453063045375855e-07, "loss": 1.2814, "step": 17240 }, { "epoch": 2.99, "learning_rate": 9.596766606836393e-08, "loss": 1.2632, "step": 17250 }, { "epoch": 3.0, "learning_rate": 8.777041125273e-08, "loss": 1.2705, "step": 17260 }, { "epoch": 3.0, "learning_rate": 7.993889601378701e-08, "loss": 1.2207, "step": 17270 }, { "epoch": 3.0, "learning_rate": 7.24731490196584e-08, "loss": 1.2858, "step": 17280 }, { "epoch": 3.0, "step": 17285, "total_flos": 9.82692376847319e+16, "train_loss": 1.5514631569575188, "train_runtime": 557290.3071, "train_samples_per_second": 3.97, "train_steps_per_second": 0.031 } ], "max_steps": 17285, "num_train_epochs": 4, "total_flos": 9.82692376847319e+16, "trial_name": null, "trial_params": null }