{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1e-06, "loss": 1.3946, "step": 20 }, { "epoch": 0.01, "eval_accuracy": 0.24016666666666667, "eval_loss": 1.3855726718902588, "eval_runtime": 15.834, "eval_samples_per_second": 1515.727, "eval_steps_per_second": 4.737, "step": 20 }, { "epoch": 0.02, "learning_rate": 1e-06, "loss": 1.3801, "step": 40 }, { "epoch": 0.02, "eval_accuracy": 0.2665416666666667, "eval_loss": 1.3742554187774658, "eval_runtime": 15.8222, "eval_samples_per_second": 1516.855, "eval_steps_per_second": 4.74, "step": 40 }, { "epoch": 0.03, "learning_rate": 1e-06, "loss": 1.37, "step": 60 }, { "epoch": 0.03, "eval_accuracy": 0.299875, "eval_loss": 1.362697958946228, "eval_runtime": 15.4839, "eval_samples_per_second": 1549.994, "eval_steps_per_second": 4.844, "step": 60 }, { "epoch": 0.03, "learning_rate": 1e-06, "loss": 1.3559, "step": 80 }, { "epoch": 0.03, "eval_accuracy": 0.3585, "eval_loss": 1.3499468564987183, "eval_runtime": 15.9453, "eval_samples_per_second": 1505.143, "eval_steps_per_second": 4.704, "step": 80 }, { "epoch": 0.04, "learning_rate": 1e-06, "loss": 1.3425, "step": 100 }, { "epoch": 0.04, "eval_accuracy": 0.43916666666666665, "eval_loss": 1.3347731828689575, "eval_runtime": 15.6185, "eval_samples_per_second": 1536.635, "eval_steps_per_second": 4.802, "step": 100 }, { "epoch": 0.05, "learning_rate": 1e-06, "loss": 1.326, "step": 120 }, { "epoch": 0.05, "eval_accuracy": 0.5287083333333333, "eval_loss": 1.3159666061401367, "eval_runtime": 16.2205, "eval_samples_per_second": 1479.607, "eval_steps_per_second": 4.624, "step": 120 }, { "epoch": 0.06, "learning_rate": 1e-06, "loss": 1.3043, "step": 140 }, { "epoch": 0.06, "eval_accuracy": 0.58625, "eval_loss": 1.2926729917526245, "eval_runtime": 15.8692, "eval_samples_per_second": 1512.368, "eval_steps_per_second": 4.726, "step": 140 }, { "epoch": 0.07, "learning_rate": 1e-06, "loss": 1.283, "step": 160 }, { "epoch": 0.07, "eval_accuracy": 0.6705833333333333, "eval_loss": 1.2648732662200928, "eval_runtime": 15.5702, "eval_samples_per_second": 1541.406, "eval_steps_per_second": 4.817, "step": 160 }, { "epoch": 0.07, "learning_rate": 1e-06, "loss": 1.2501, "step": 180 }, { "epoch": 0.07, "eval_accuracy": 0.7289583333333334, "eval_loss": 1.2298475503921509, "eval_runtime": 15.3558, "eval_samples_per_second": 1562.932, "eval_steps_per_second": 4.884, "step": 180 }, { "epoch": 0.08, "learning_rate": 1e-06, "loss": 1.2218, "step": 200 }, { "epoch": 0.08, "eval_accuracy": 0.7775, "eval_loss": 1.1907247304916382, "eval_runtime": 15.6618, "eval_samples_per_second": 1532.387, "eval_steps_per_second": 4.789, "step": 200 }, { "epoch": 0.09, "learning_rate": 1e-06, "loss": 1.1819, "step": 220 }, { "epoch": 0.09, "eval_accuracy": 0.8095833333333333, "eval_loss": 1.1489461660385132, "eval_runtime": 16.7595, "eval_samples_per_second": 1432.027, "eval_steps_per_second": 4.475, "step": 220 }, { "epoch": 0.1, "learning_rate": 1e-06, "loss": 1.1375, "step": 240 }, { "epoch": 0.1, "eval_accuracy": 0.8254583333333333, "eval_loss": 1.1064685583114624, "eval_runtime": 15.9246, "eval_samples_per_second": 1507.098, "eval_steps_per_second": 4.71, "step": 240 }, { "epoch": 0.11, "learning_rate": 1e-06, "loss": 1.0918, "step": 260 }, { "epoch": 0.11, "eval_accuracy": 0.83575, "eval_loss": 1.0633411407470703, "eval_runtime": 15.4403, "eval_samples_per_second": 1554.376, "eval_steps_per_second": 4.857, "step": 260 }, { "epoch": 0.12, "learning_rate": 1e-06, "loss": 1.0493, "step": 280 }, { "epoch": 0.12, "eval_accuracy": 0.8455833333333334, "eval_loss": 1.0193486213684082, "eval_runtime": 15.5383, "eval_samples_per_second": 1544.57, "eval_steps_per_second": 4.827, "step": 280 }, { "epoch": 0.12, "learning_rate": 1e-06, "loss": 1.0137, "step": 300 }, { "epoch": 0.12, "eval_accuracy": 0.8547916666666666, "eval_loss": 0.9759964942932129, "eval_runtime": 15.7415, "eval_samples_per_second": 1524.631, "eval_steps_per_second": 4.764, "step": 300 }, { "epoch": 0.13, "learning_rate": 1e-06, "loss": 0.9799, "step": 320 }, { "epoch": 0.13, "eval_accuracy": 0.856875, "eval_loss": 0.9349467754364014, "eval_runtime": 15.9509, "eval_samples_per_second": 1504.613, "eval_steps_per_second": 4.702, "step": 320 }, { "epoch": 0.14, "learning_rate": 1e-06, "loss": 0.9269, "step": 340 }, { "epoch": 0.14, "eval_accuracy": 0.862125, "eval_loss": 0.8932501077651978, "eval_runtime": 15.5524, "eval_samples_per_second": 1543.167, "eval_steps_per_second": 4.822, "step": 340 }, { "epoch": 0.15, "learning_rate": 1e-06, "loss": 0.8982, "step": 360 }, { "epoch": 0.15, "eval_accuracy": 0.866125, "eval_loss": 0.8538751006126404, "eval_runtime": 16.1674, "eval_samples_per_second": 1484.473, "eval_steps_per_second": 4.639, "step": 360 }, { "epoch": 0.16, "learning_rate": 1e-06, "loss": 0.854, "step": 380 }, { "epoch": 0.16, "eval_accuracy": 0.8698333333333333, "eval_loss": 0.8160046935081482, "eval_runtime": 15.675, "eval_samples_per_second": 1531.105, "eval_steps_per_second": 4.785, "step": 380 }, { "epoch": 0.17, "learning_rate": 1e-06, "loss": 0.8087, "step": 400 }, { "epoch": 0.17, "eval_accuracy": 0.8705416666666667, "eval_loss": 0.7807948589324951, "eval_runtime": 15.6603, "eval_samples_per_second": 1532.542, "eval_steps_per_second": 4.789, "step": 400 }, { "epoch": 0.17, "learning_rate": 1e-06, "loss": 0.7941, "step": 420 }, { "epoch": 0.17, "eval_accuracy": 0.8717916666666666, "eval_loss": 0.7479580044746399, "eval_runtime": 15.5761, "eval_samples_per_second": 1540.822, "eval_steps_per_second": 4.815, "step": 420 }, { "epoch": 0.18, "learning_rate": 1e-06, "loss": 0.7443, "step": 440 }, { "epoch": 0.18, "eval_accuracy": 0.872, "eval_loss": 0.7175572514533997, "eval_runtime": 15.9105, "eval_samples_per_second": 1508.436, "eval_steps_per_second": 4.714, "step": 440 }, { "epoch": 0.19, "learning_rate": 1e-06, "loss": 0.7233, "step": 460 }, { "epoch": 0.19, "eval_accuracy": 0.87075, "eval_loss": 0.6889370679855347, "eval_runtime": 16.1633, "eval_samples_per_second": 1484.847, "eval_steps_per_second": 4.64, "step": 460 }, { "epoch": 0.2, "learning_rate": 1e-06, "loss": 0.6985, "step": 480 }, { "epoch": 0.2, "eval_accuracy": 0.8758333333333334, "eval_loss": 0.6611708402633667, "eval_runtime": 15.5607, "eval_samples_per_second": 1542.351, "eval_steps_per_second": 4.82, "step": 480 }, { "epoch": 0.21, "learning_rate": 1e-06, "loss": 0.6754, "step": 500 }, { "epoch": 0.21, "eval_accuracy": 0.8768333333333334, "eval_loss": 0.635998547077179, "eval_runtime": 15.8738, "eval_samples_per_second": 1511.926, "eval_steps_per_second": 4.725, "step": 500 }, { "epoch": 0.22, "learning_rate": 1e-06, "loss": 0.6536, "step": 520 }, { "epoch": 0.22, "eval_accuracy": 0.8788333333333334, "eval_loss": 0.6131625175476074, "eval_runtime": 15.5202, "eval_samples_per_second": 1546.371, "eval_steps_per_second": 4.832, "step": 520 }, { "epoch": 0.23, "learning_rate": 1e-06, "loss": 0.614, "step": 540 }, { "epoch": 0.23, "eval_accuracy": 0.8811666666666667, "eval_loss": 0.5912534594535828, "eval_runtime": 16.1599, "eval_samples_per_second": 1485.154, "eval_steps_per_second": 4.641, "step": 540 }, { "epoch": 0.23, "learning_rate": 1e-06, "loss": 0.5963, "step": 560 }, { "epoch": 0.23, "eval_accuracy": 0.8809583333333333, "eval_loss": 0.5706831216812134, "eval_runtime": 15.5035, "eval_samples_per_second": 1548.034, "eval_steps_per_second": 4.838, "step": 560 }, { "epoch": 0.24, "learning_rate": 1e-06, "loss": 0.5894, "step": 580 }, { "epoch": 0.24, "eval_accuracy": 0.8815416666666667, "eval_loss": 0.5526705980300903, "eval_runtime": 15.515, "eval_samples_per_second": 1546.894, "eval_steps_per_second": 4.834, "step": 580 }, { "epoch": 0.25, "learning_rate": 1e-06, "loss": 0.5504, "step": 600 }, { "epoch": 0.25, "eval_accuracy": 0.8835416666666667, "eval_loss": 0.5349943041801453, "eval_runtime": 15.5986, "eval_samples_per_second": 1538.598, "eval_steps_per_second": 4.808, "step": 600 }, { "epoch": 0.26, "learning_rate": 1e-06, "loss": 0.5434, "step": 620 }, { "epoch": 0.26, "eval_accuracy": 0.8850833333333333, "eval_loss": 0.5199493169784546, "eval_runtime": 15.5783, "eval_samples_per_second": 1540.605, "eval_steps_per_second": 4.814, "step": 620 }, { "epoch": 0.27, "learning_rate": 1e-06, "loss": 0.5325, "step": 640 }, { "epoch": 0.27, "eval_accuracy": 0.8842083333333334, "eval_loss": 0.5056445598602295, "eval_runtime": 16.1601, "eval_samples_per_second": 1485.138, "eval_steps_per_second": 4.641, "step": 640 }, { "epoch": 0.28, "learning_rate": 1e-06, "loss": 0.4894, "step": 660 }, { "epoch": 0.28, "eval_accuracy": 0.8850416666666666, "eval_loss": 0.4912012815475464, "eval_runtime": 15.5535, "eval_samples_per_second": 1543.06, "eval_steps_per_second": 4.822, "step": 660 }, { "epoch": 0.28, "learning_rate": 1e-06, "loss": 0.4955, "step": 680 }, { "epoch": 0.28, "eval_accuracy": 0.8854166666666666, "eval_loss": 0.4787778854370117, "eval_runtime": 15.9742, "eval_samples_per_second": 1502.42, "eval_steps_per_second": 4.695, "step": 680 }, { "epoch": 0.29, "learning_rate": 1e-06, "loss": 0.5002, "step": 700 }, { "epoch": 0.29, "eval_accuracy": 0.8854166666666666, "eval_loss": 0.4684358835220337, "eval_runtime": 15.4896, "eval_samples_per_second": 1549.429, "eval_steps_per_second": 4.842, "step": 700 }, { "epoch": 0.3, "learning_rate": 1e-06, "loss": 0.4621, "step": 720 }, { "epoch": 0.3, "eval_accuracy": 0.8866666666666667, "eval_loss": 0.4581121802330017, "eval_runtime": 15.7197, "eval_samples_per_second": 1526.742, "eval_steps_per_second": 4.771, "step": 720 }, { "epoch": 0.31, "learning_rate": 1e-06, "loss": 0.4873, "step": 740 }, { "epoch": 0.31, "eval_accuracy": 0.8860833333333333, "eval_loss": 0.4509838819503784, "eval_runtime": 15.7293, "eval_samples_per_second": 1525.818, "eval_steps_per_second": 4.768, "step": 740 }, { "epoch": 0.32, "learning_rate": 1e-06, "loss": 0.4575, "step": 760 }, { "epoch": 0.32, "eval_accuracy": 0.88875, "eval_loss": 0.4410632252693176, "eval_runtime": 15.4276, "eval_samples_per_second": 1555.649, "eval_steps_per_second": 4.861, "step": 760 }, { "epoch": 0.33, "learning_rate": 1e-06, "loss": 0.4365, "step": 780 }, { "epoch": 0.33, "eval_accuracy": 0.8889583333333333, "eval_loss": 0.4337460398674011, "eval_runtime": 15.9845, "eval_samples_per_second": 1501.455, "eval_steps_per_second": 4.692, "step": 780 }, { "epoch": 0.33, "learning_rate": 1e-06, "loss": 0.4306, "step": 800 }, { "epoch": 0.33, "eval_accuracy": 0.8873333333333333, "eval_loss": 0.42743387818336487, "eval_runtime": 15.705, "eval_samples_per_second": 1528.179, "eval_steps_per_second": 4.776, "step": 800 }, { "epoch": 0.34, "learning_rate": 1e-06, "loss": 0.4367, "step": 820 }, { "epoch": 0.34, "eval_accuracy": 0.889625, "eval_loss": 0.41997990012168884, "eval_runtime": 15.2578, "eval_samples_per_second": 1572.97, "eval_steps_per_second": 4.916, "step": 820 }, { "epoch": 0.35, "learning_rate": 1e-06, "loss": 0.4643, "step": 840 }, { "epoch": 0.35, "eval_accuracy": 0.8909583333333333, "eval_loss": 0.41470062732696533, "eval_runtime": 15.4309, "eval_samples_per_second": 1555.323, "eval_steps_per_second": 4.86, "step": 840 }, { "epoch": 0.36, "learning_rate": 1e-06, "loss": 0.4483, "step": 860 }, { "epoch": 0.36, "eval_accuracy": 0.8922916666666667, "eval_loss": 0.40827029943466187, "eval_runtime": 15.9503, "eval_samples_per_second": 1504.673, "eval_steps_per_second": 4.702, "step": 860 }, { "epoch": 0.37, "learning_rate": 1e-06, "loss": 0.3937, "step": 880 }, { "epoch": 0.37, "eval_accuracy": 0.8928333333333334, "eval_loss": 0.40155351161956787, "eval_runtime": 15.6448, "eval_samples_per_second": 1534.053, "eval_steps_per_second": 4.794, "step": 880 }, { "epoch": 0.38, "learning_rate": 1e-06, "loss": 0.4074, "step": 900 }, { "epoch": 0.38, "eval_accuracy": 0.89125, "eval_loss": 0.39755621552467346, "eval_runtime": 16.1245, "eval_samples_per_second": 1488.416, "eval_steps_per_second": 4.651, "step": 900 }, { "epoch": 0.38, "learning_rate": 1e-06, "loss": 0.4153, "step": 920 }, { "epoch": 0.38, "eval_accuracy": 0.891125, "eval_loss": 0.39380690455436707, "eval_runtime": 15.6816, "eval_samples_per_second": 1530.453, "eval_steps_per_second": 4.783, "step": 920 }, { "epoch": 0.39, "learning_rate": 1e-06, "loss": 0.3934, "step": 940 }, { "epoch": 0.39, "eval_accuracy": 0.8914166666666666, "eval_loss": 0.3885686695575714, "eval_runtime": 15.0444, "eval_samples_per_second": 1595.283, "eval_steps_per_second": 4.985, "step": 940 }, { "epoch": 0.4, "learning_rate": 1e-06, "loss": 0.4486, "step": 960 }, { "epoch": 0.4, "eval_accuracy": 0.8915, "eval_loss": 0.3852112293243408, "eval_runtime": 15.3141, "eval_samples_per_second": 1567.186, "eval_steps_per_second": 4.897, "step": 960 }, { "epoch": 0.41, "learning_rate": 1e-06, "loss": 0.3908, "step": 980 }, { "epoch": 0.41, "eval_accuracy": 0.8928333333333334, "eval_loss": 0.3800646662712097, "eval_runtime": 15.4314, "eval_samples_per_second": 1555.271, "eval_steps_per_second": 4.86, "step": 980 }, { "epoch": 0.42, "learning_rate": 1e-06, "loss": 0.3888, "step": 1000 }, { "epoch": 0.42, "eval_accuracy": 0.894375, "eval_loss": 0.3761395514011383, "eval_runtime": 15.8671, "eval_samples_per_second": 1512.567, "eval_steps_per_second": 4.727, "step": 1000 }, { "epoch": 0.42, "learning_rate": 1e-06, "loss": 0.3801, "step": 1020 }, { "epoch": 0.42, "eval_accuracy": 0.8949583333333333, "eval_loss": 0.3725646734237671, "eval_runtime": 15.3688, "eval_samples_per_second": 1561.603, "eval_steps_per_second": 4.88, "step": 1020 }, { "epoch": 0.43, "learning_rate": 1e-06, "loss": 0.387, "step": 1040 }, { "epoch": 0.43, "eval_accuracy": 0.894125, "eval_loss": 0.3705978989601135, "eval_runtime": 15.7377, "eval_samples_per_second": 1525.005, "eval_steps_per_second": 4.766, "step": 1040 }, { "epoch": 0.44, "learning_rate": 1e-06, "loss": 0.4346, "step": 1060 }, { "epoch": 0.44, "eval_accuracy": 0.8947916666666667, "eval_loss": 0.367171049118042, "eval_runtime": 14.9848, "eval_samples_per_second": 1601.62, "eval_steps_per_second": 5.005, "step": 1060 }, { "epoch": 0.45, "learning_rate": 1e-06, "loss": 0.3806, "step": 1080 }, { "epoch": 0.45, "eval_accuracy": 0.8936666666666667, "eval_loss": 0.3656676709651947, "eval_runtime": 15.8929, "eval_samples_per_second": 1510.112, "eval_steps_per_second": 4.719, "step": 1080 }, { "epoch": 0.46, "learning_rate": 1e-06, "loss": 0.3633, "step": 1100 }, { "epoch": 0.46, "eval_accuracy": 0.8948333333333334, "eval_loss": 0.3619757294654846, "eval_runtime": 15.254, "eval_samples_per_second": 1573.363, "eval_steps_per_second": 4.917, "step": 1100 }, { "epoch": 0.47, "learning_rate": 1e-06, "loss": 0.3429, "step": 1120 }, { "epoch": 0.47, "eval_accuracy": 0.8954583333333334, "eval_loss": 0.35918018221855164, "eval_runtime": 15.6719, "eval_samples_per_second": 1531.4, "eval_steps_per_second": 4.786, "step": 1120 }, { "epoch": 0.47, "learning_rate": 1e-06, "loss": 0.3681, "step": 1140 }, { "epoch": 0.47, "eval_accuracy": 0.896625, "eval_loss": 0.3563116788864136, "eval_runtime": 16.068, "eval_samples_per_second": 1493.649, "eval_steps_per_second": 4.668, "step": 1140 }, { "epoch": 0.48, "learning_rate": 1e-06, "loss": 0.3624, "step": 1160 }, { "epoch": 0.48, "eval_accuracy": 0.8964583333333334, "eval_loss": 0.35381361842155457, "eval_runtime": 15.992, "eval_samples_per_second": 1500.748, "eval_steps_per_second": 4.69, "step": 1160 }, { "epoch": 0.49, "learning_rate": 1e-06, "loss": 0.3808, "step": 1180 }, { "epoch": 0.49, "eval_accuracy": 0.895625, "eval_loss": 0.35254761576652527, "eval_runtime": 16.151, "eval_samples_per_second": 1485.976, "eval_steps_per_second": 4.644, "step": 1180 }, { "epoch": 0.5, "learning_rate": 1e-06, "loss": 0.3626, "step": 1200 }, { "epoch": 0.5, "eval_accuracy": 0.897, "eval_loss": 0.34913721680641174, "eval_runtime": 15.769, "eval_samples_per_second": 1521.97, "eval_steps_per_second": 4.756, "step": 1200 }, { "epoch": 0.51, "learning_rate": 1e-06, "loss": 0.3278, "step": 1220 }, { "epoch": 0.51, "eval_accuracy": 0.897375, "eval_loss": 0.3466154932975769, "eval_runtime": 16.1468, "eval_samples_per_second": 1486.363, "eval_steps_per_second": 4.645, "step": 1220 }, { "epoch": 0.52, "learning_rate": 1e-06, "loss": 0.3463, "step": 1240 }, { "epoch": 0.52, "eval_accuracy": 0.8966666666666666, "eval_loss": 0.34570518136024475, "eval_runtime": 15.3513, "eval_samples_per_second": 1563.389, "eval_steps_per_second": 4.886, "step": 1240 }, { "epoch": 0.53, "learning_rate": 1e-06, "loss": 0.371, "step": 1260 }, { "epoch": 0.53, "eval_accuracy": 0.8975416666666667, "eval_loss": 0.3436819016933441, "eval_runtime": 15.6461, "eval_samples_per_second": 1533.926, "eval_steps_per_second": 4.794, "step": 1260 }, { "epoch": 0.53, "learning_rate": 1e-06, "loss": 0.326, "step": 1280 }, { "epoch": 0.53, "eval_accuracy": 0.8983333333333333, "eval_loss": 0.3413917124271393, "eval_runtime": 15.3084, "eval_samples_per_second": 1567.764, "eval_steps_per_second": 4.899, "step": 1280 }, { "epoch": 0.54, "learning_rate": 1e-06, "loss": 0.3639, "step": 1300 }, { "epoch": 0.54, "eval_accuracy": 0.898, "eval_loss": 0.33931395411491394, "eval_runtime": 15.4706, "eval_samples_per_second": 1551.329, "eval_steps_per_second": 4.848, "step": 1300 }, { "epoch": 0.55, "learning_rate": 1e-06, "loss": 0.3537, "step": 1320 }, { "epoch": 0.55, "eval_accuracy": 0.8990416666666666, "eval_loss": 0.33674728870391846, "eval_runtime": 15.8671, "eval_samples_per_second": 1512.563, "eval_steps_per_second": 4.727, "step": 1320 }, { "epoch": 0.56, "learning_rate": 1e-06, "loss": 0.3831, "step": 1340 }, { "epoch": 0.56, "eval_accuracy": 0.898875, "eval_loss": 0.3362838625907898, "eval_runtime": 15.4851, "eval_samples_per_second": 1549.872, "eval_steps_per_second": 4.843, "step": 1340 }, { "epoch": 0.57, "learning_rate": 1e-06, "loss": 0.3559, "step": 1360 }, { "epoch": 0.57, "eval_accuracy": 0.8994166666666666, "eval_loss": 0.3338731527328491, "eval_runtime": 15.8107, "eval_samples_per_second": 1517.958, "eval_steps_per_second": 4.744, "step": 1360 }, { "epoch": 0.57, "learning_rate": 1e-06, "loss": 0.3706, "step": 1380 }, { "epoch": 0.57, "eval_accuracy": 0.899375, "eval_loss": 0.3323739767074585, "eval_runtime": 15.3746, "eval_samples_per_second": 1561.014, "eval_steps_per_second": 4.878, "step": 1380 }, { "epoch": 0.58, "learning_rate": 1e-06, "loss": 0.374, "step": 1400 }, { "epoch": 0.58, "eval_accuracy": 0.8998333333333334, "eval_loss": 0.33071157336235046, "eval_runtime": 16.2242, "eval_samples_per_second": 1479.273, "eval_steps_per_second": 4.623, "step": 1400 }, { "epoch": 0.59, "learning_rate": 1e-06, "loss": 0.3515, "step": 1420 }, { "epoch": 0.59, "eval_accuracy": 0.8999583333333333, "eval_loss": 0.3292597532272339, "eval_runtime": 15.6689, "eval_samples_per_second": 1531.696, "eval_steps_per_second": 4.787, "step": 1420 }, { "epoch": 0.6, "learning_rate": 1e-06, "loss": 0.3854, "step": 1440 }, { "epoch": 0.6, "eval_accuracy": 0.8996666666666666, "eval_loss": 0.32925406098365784, "eval_runtime": 15.8799, "eval_samples_per_second": 1511.341, "eval_steps_per_second": 4.723, "step": 1440 }, { "epoch": 0.61, "learning_rate": 1e-06, "loss": 0.3363, "step": 1460 }, { "epoch": 0.61, "eval_accuracy": 0.8998333333333334, "eval_loss": 0.327946275472641, "eval_runtime": 15.54, "eval_samples_per_second": 1544.404, "eval_steps_per_second": 4.826, "step": 1460 }, { "epoch": 0.62, "learning_rate": 1e-06, "loss": 0.3199, "step": 1480 }, { "epoch": 0.62, "eval_accuracy": 0.900625, "eval_loss": 0.32561612129211426, "eval_runtime": 15.692, "eval_samples_per_second": 1529.44, "eval_steps_per_second": 4.779, "step": 1480 }, { "epoch": 0.62, "learning_rate": 1e-06, "loss": 0.356, "step": 1500 }, { "epoch": 0.62, "eval_accuracy": 0.9007916666666667, "eval_loss": 0.324897825717926, "eval_runtime": 15.6415, "eval_samples_per_second": 1534.381, "eval_steps_per_second": 4.795, "step": 1500 }, { "epoch": 0.63, "learning_rate": 1e-06, "loss": 0.3566, "step": 1520 }, { "epoch": 0.63, "eval_accuracy": 0.9013333333333333, "eval_loss": 0.32291579246520996, "eval_runtime": 16.0404, "eval_samples_per_second": 1496.219, "eval_steps_per_second": 4.676, "step": 1520 }, { "epoch": 0.64, "learning_rate": 1e-06, "loss": 0.3419, "step": 1540 }, { "epoch": 0.64, "eval_accuracy": 0.9010833333333333, "eval_loss": 0.3221406042575836, "eval_runtime": 15.5217, "eval_samples_per_second": 1546.222, "eval_steps_per_second": 4.832, "step": 1540 }, { "epoch": 0.65, "learning_rate": 1e-06, "loss": 0.3453, "step": 1560 }, { "epoch": 0.65, "eval_accuracy": 0.9005833333333333, "eval_loss": 0.32188284397125244, "eval_runtime": 15.8978, "eval_samples_per_second": 1509.639, "eval_steps_per_second": 4.718, "step": 1560 }, { "epoch": 0.66, "learning_rate": 1e-06, "loss": 0.3132, "step": 1580 }, { "epoch": 0.66, "eval_accuracy": 0.901875, "eval_loss": 0.31994542479515076, "eval_runtime": 15.5539, "eval_samples_per_second": 1543.025, "eval_steps_per_second": 4.822, "step": 1580 }, { "epoch": 0.67, "learning_rate": 1e-06, "loss": 0.3457, "step": 1600 }, { "epoch": 0.67, "eval_accuracy": 0.9012083333333333, "eval_loss": 0.32011812925338745, "eval_runtime": 15.7109, "eval_samples_per_second": 1527.598, "eval_steps_per_second": 4.774, "step": 1600 }, { "epoch": 0.68, "learning_rate": 1e-06, "loss": 0.3036, "step": 1620 }, { "epoch": 0.68, "eval_accuracy": 0.9016666666666666, "eval_loss": 0.31820276379585266, "eval_runtime": 15.5575, "eval_samples_per_second": 1542.661, "eval_steps_per_second": 4.821, "step": 1620 }, { "epoch": 0.68, "learning_rate": 1e-06, "loss": 0.3359, "step": 1640 }, { "epoch": 0.68, "eval_accuracy": 0.902125, "eval_loss": 0.316621333360672, "eval_runtime": 16.0532, "eval_samples_per_second": 1495.031, "eval_steps_per_second": 4.672, "step": 1640 }, { "epoch": 0.69, "learning_rate": 1e-06, "loss": 0.3291, "step": 1660 }, { "epoch": 0.69, "eval_accuracy": 0.9020833333333333, "eval_loss": 0.31562095880508423, "eval_runtime": 15.7321, "eval_samples_per_second": 1525.545, "eval_steps_per_second": 4.767, "step": 1660 }, { "epoch": 0.7, "learning_rate": 1e-06, "loss": 0.3586, "step": 1680 }, { "epoch": 0.7, "eval_accuracy": 0.9005833333333333, "eval_loss": 0.3162485659122467, "eval_runtime": 15.5682, "eval_samples_per_second": 1541.608, "eval_steps_per_second": 4.818, "step": 1680 }, { "epoch": 0.71, "learning_rate": 1e-06, "loss": 0.3002, "step": 1700 }, { "epoch": 0.71, "eval_accuracy": 0.9013333333333333, "eval_loss": 0.31555384397506714, "eval_runtime": 15.6458, "eval_samples_per_second": 1533.959, "eval_steps_per_second": 4.794, "step": 1700 }, { "epoch": 0.72, "learning_rate": 1e-06, "loss": 0.3743, "step": 1720 }, { "epoch": 0.72, "eval_accuracy": 0.9024583333333334, "eval_loss": 0.3135637044906616, "eval_runtime": 15.9237, "eval_samples_per_second": 1507.184, "eval_steps_per_second": 4.71, "step": 1720 }, { "epoch": 0.72, "learning_rate": 1e-06, "loss": 0.3506, "step": 1740 }, { "epoch": 0.72, "eval_accuracy": 0.9029166666666667, "eval_loss": 0.3118599057197571, "eval_runtime": 15.6284, "eval_samples_per_second": 1535.667, "eval_steps_per_second": 4.799, "step": 1740 }, { "epoch": 0.73, "learning_rate": 1e-06, "loss": 0.3328, "step": 1760 }, { "epoch": 0.73, "eval_accuracy": 0.9022083333333333, "eval_loss": 0.31298351287841797, "eval_runtime": 15.7141, "eval_samples_per_second": 1527.295, "eval_steps_per_second": 4.773, "step": 1760 }, { "epoch": 0.74, "learning_rate": 1e-06, "loss": 0.328, "step": 1780 }, { "epoch": 0.74, "eval_accuracy": 0.9025416666666667, "eval_loss": 0.31090086698532104, "eval_runtime": 16.0505, "eval_samples_per_second": 1495.277, "eval_steps_per_second": 4.673, "step": 1780 }, { "epoch": 0.75, "learning_rate": 1e-06, "loss": 0.3312, "step": 1800 }, { "epoch": 0.75, "eval_accuracy": 0.9022083333333333, "eval_loss": 0.31056439876556396, "eval_runtime": 15.8194, "eval_samples_per_second": 1517.124, "eval_steps_per_second": 4.741, "step": 1800 }, { "epoch": 0.76, "learning_rate": 1e-06, "loss": 0.3176, "step": 1820 }, { "epoch": 0.76, "eval_accuracy": 0.9030833333333333, "eval_loss": 0.3086493909358978, "eval_runtime": 15.8105, "eval_samples_per_second": 1517.979, "eval_steps_per_second": 4.744, "step": 1820 }, { "epoch": 0.77, "learning_rate": 1e-06, "loss": 0.326, "step": 1840 }, { "epoch": 0.77, "eval_accuracy": 0.9035416666666667, "eval_loss": 0.30771201848983765, "eval_runtime": 15.6087, "eval_samples_per_second": 1537.601, "eval_steps_per_second": 4.805, "step": 1840 }, { "epoch": 0.78, "learning_rate": 1e-06, "loss": 0.3308, "step": 1860 }, { "epoch": 0.78, "eval_accuracy": 0.9033333333333333, "eval_loss": 0.30720219016075134, "eval_runtime": 15.7737, "eval_samples_per_second": 1521.525, "eval_steps_per_second": 4.755, "step": 1860 }, { "epoch": 0.78, "learning_rate": 1e-06, "loss": 0.2876, "step": 1880 }, { "epoch": 0.78, "eval_accuracy": 0.90275, "eval_loss": 0.30769696831703186, "eval_runtime": 16.0101, "eval_samples_per_second": 1499.051, "eval_steps_per_second": 4.685, "step": 1880 }, { "epoch": 0.79, "learning_rate": 1e-06, "loss": 0.3326, "step": 1900 }, { "epoch": 0.79, "eval_accuracy": 0.9028333333333334, "eval_loss": 0.3074987530708313, "eval_runtime": 15.7279, "eval_samples_per_second": 1525.953, "eval_steps_per_second": 4.769, "step": 1900 }, { "epoch": 0.8, "learning_rate": 1e-06, "loss": 0.2999, "step": 1920 }, { "epoch": 0.8, "eval_accuracy": 0.9040416666666666, "eval_loss": 0.3049142062664032, "eval_runtime": 15.7379, "eval_samples_per_second": 1524.978, "eval_steps_per_second": 4.766, "step": 1920 }, { "epoch": 0.81, "learning_rate": 1e-06, "loss": 0.3207, "step": 1940 }, { "epoch": 0.81, "eval_accuracy": 0.9042083333333333, "eval_loss": 0.30403196811676025, "eval_runtime": 16.0526, "eval_samples_per_second": 1495.086, "eval_steps_per_second": 4.672, "step": 1940 }, { "epoch": 0.82, "learning_rate": 1e-06, "loss": 0.3126, "step": 1960 }, { "epoch": 0.82, "eval_accuracy": 0.903625, "eval_loss": 0.3041483461856842, "eval_runtime": 15.4127, "eval_samples_per_second": 1557.161, "eval_steps_per_second": 4.866, "step": 1960 }, { "epoch": 0.82, "learning_rate": 1e-06, "loss": 0.3785, "step": 1980 }, { "epoch": 0.82, "eval_accuracy": 0.9036666666666666, "eval_loss": 0.30376511812210083, "eval_runtime": 15.6455, "eval_samples_per_second": 1533.984, "eval_steps_per_second": 4.794, "step": 1980 }, { "epoch": 0.83, "learning_rate": 1e-06, "loss": 0.3015, "step": 2000 }, { "epoch": 0.83, "eval_accuracy": 0.9044166666666666, "eval_loss": 0.30233460664749146, "eval_runtime": 15.6366, "eval_samples_per_second": 1534.859, "eval_steps_per_second": 4.796, "step": 2000 }, { "epoch": 0.84, "learning_rate": 1e-06, "loss": 0.348, "step": 2020 }, { "epoch": 0.84, "eval_accuracy": 0.9045833333333333, "eval_loss": 0.30240559577941895, "eval_runtime": 15.8212, "eval_samples_per_second": 1516.949, "eval_steps_per_second": 4.74, "step": 2020 }, { "epoch": 0.85, "learning_rate": 1e-06, "loss": 0.2947, "step": 2040 }, { "epoch": 0.85, "eval_accuracy": 0.904875, "eval_loss": 0.30133891105651855, "eval_runtime": 15.8234, "eval_samples_per_second": 1516.737, "eval_steps_per_second": 4.74, "step": 2040 }, { "epoch": 0.86, "learning_rate": 1e-06, "loss": 0.3344, "step": 2060 }, { "epoch": 0.86, "eval_accuracy": 0.9049583333333333, "eval_loss": 0.30154794454574585, "eval_runtime": 15.2879, "eval_samples_per_second": 1569.873, "eval_steps_per_second": 4.906, "step": 2060 }, { "epoch": 0.87, "learning_rate": 1e-06, "loss": 0.3436, "step": 2080 }, { "epoch": 0.87, "eval_accuracy": 0.9045833333333333, "eval_loss": 0.29952293634414673, "eval_runtime": 16.2949, "eval_samples_per_second": 1472.856, "eval_steps_per_second": 4.603, "step": 2080 }, { "epoch": 0.88, "learning_rate": 1e-06, "loss": 0.3179, "step": 2100 }, { "epoch": 0.88, "eval_accuracy": 0.9036666666666666, "eval_loss": 0.30100810527801514, "eval_runtime": 15.803, "eval_samples_per_second": 1518.698, "eval_steps_per_second": 4.746, "step": 2100 }, { "epoch": 0.88, "learning_rate": 1e-06, "loss": 0.3045, "step": 2120 }, { "epoch": 0.88, "eval_accuracy": 0.9050833333333334, "eval_loss": 0.29866886138916016, "eval_runtime": 15.5575, "eval_samples_per_second": 1542.663, "eval_steps_per_second": 4.821, "step": 2120 }, { "epoch": 0.89, "learning_rate": 1e-06, "loss": 0.2797, "step": 2140 }, { "epoch": 0.89, "eval_accuracy": 0.905125, "eval_loss": 0.2977401614189148, "eval_runtime": 15.9997, "eval_samples_per_second": 1500.024, "eval_steps_per_second": 4.688, "step": 2140 }, { "epoch": 0.9, "learning_rate": 1e-06, "loss": 0.3044, "step": 2160 }, { "epoch": 0.9, "eval_accuracy": 0.9046666666666666, "eval_loss": 0.29765772819519043, "eval_runtime": 15.813, "eval_samples_per_second": 1517.737, "eval_steps_per_second": 4.743, "step": 2160 }, { "epoch": 0.91, "learning_rate": 1e-06, "loss": 0.2948, "step": 2180 }, { "epoch": 0.91, "eval_accuracy": 0.9055, "eval_loss": 0.2965656518936157, "eval_runtime": 15.4407, "eval_samples_per_second": 1554.336, "eval_steps_per_second": 4.857, "step": 2180 }, { "epoch": 0.92, "learning_rate": 1e-06, "loss": 0.2803, "step": 2200 }, { "epoch": 0.92, "eval_accuracy": 0.9052083333333333, "eval_loss": 0.2959022521972656, "eval_runtime": 15.9407, "eval_samples_per_second": 1505.578, "eval_steps_per_second": 4.705, "step": 2200 }, { "epoch": 0.93, "learning_rate": 1e-06, "loss": 0.3008, "step": 2220 }, { "epoch": 0.93, "eval_accuracy": 0.9055833333333333, "eval_loss": 0.2946693003177643, "eval_runtime": 15.495, "eval_samples_per_second": 1548.887, "eval_steps_per_second": 4.84, "step": 2220 }, { "epoch": 0.93, "learning_rate": 1e-06, "loss": 0.3163, "step": 2240 }, { "epoch": 0.93, "eval_accuracy": 0.905625, "eval_loss": 0.2954801321029663, "eval_runtime": 15.9747, "eval_samples_per_second": 1502.377, "eval_steps_per_second": 4.695, "step": 2240 }, { "epoch": 0.94, "learning_rate": 1e-06, "loss": 0.3094, "step": 2260 }, { "epoch": 0.94, "eval_accuracy": 0.90525, "eval_loss": 0.2948620617389679, "eval_runtime": 15.5314, "eval_samples_per_second": 1545.257, "eval_steps_per_second": 4.829, "step": 2260 }, { "epoch": 0.95, "learning_rate": 1e-06, "loss": 0.2932, "step": 2280 }, { "epoch": 0.95, "eval_accuracy": 0.9058333333333334, "eval_loss": 0.29374802112579346, "eval_runtime": 15.9691, "eval_samples_per_second": 1502.905, "eval_steps_per_second": 4.697, "step": 2280 }, { "epoch": 0.96, "learning_rate": 1e-06, "loss": 0.2963, "step": 2300 }, { "epoch": 0.96, "eval_accuracy": 0.9058333333333334, "eval_loss": 0.29317864775657654, "eval_runtime": 15.765, "eval_samples_per_second": 1522.362, "eval_steps_per_second": 4.757, "step": 2300 }, { "epoch": 0.97, "learning_rate": 1e-06, "loss": 0.3027, "step": 2320 }, { "epoch": 0.97, "eval_accuracy": 0.905375, "eval_loss": 0.2939450442790985, "eval_runtime": 15.8745, "eval_samples_per_second": 1511.86, "eval_steps_per_second": 4.725, "step": 2320 }, { "epoch": 0.97, "learning_rate": 1e-06, "loss": 0.3017, "step": 2340 }, { "epoch": 0.97, "eval_accuracy": 0.9062083333333333, "eval_loss": 0.29151102900505066, "eval_runtime": 15.7805, "eval_samples_per_second": 1520.865, "eval_steps_per_second": 4.753, "step": 2340 }, { "epoch": 0.98, "learning_rate": 1e-06, "loss": 0.347, "step": 2360 }, { "epoch": 0.98, "eval_accuracy": 0.9069583333333333, "eval_loss": 0.2903897166252136, "eval_runtime": 15.6602, "eval_samples_per_second": 1532.552, "eval_steps_per_second": 4.789, "step": 2360 }, { "epoch": 0.99, "learning_rate": 1e-06, "loss": 0.311, "step": 2380 }, { "epoch": 0.99, "eval_accuracy": 0.906375, "eval_loss": 0.29053354263305664, "eval_runtime": 15.4212, "eval_samples_per_second": 1556.296, "eval_steps_per_second": 4.863, "step": 2380 }, { "epoch": 1.0, "learning_rate": 1e-06, "loss": 0.3007, "step": 2400 }, { "epoch": 1.0, "eval_accuracy": 0.9072083333333333, "eval_loss": 0.290608286857605, "eval_runtime": 15.7304, "eval_samples_per_second": 1525.709, "eval_steps_per_second": 4.768, "step": 2400 }, { "epoch": 1.01, "learning_rate": 1e-06, "loss": 0.3577, "step": 2420 }, { "epoch": 1.01, "eval_accuracy": 0.9072083333333333, "eval_loss": 0.289432168006897, "eval_runtime": 16.5863, "eval_samples_per_second": 1446.973, "eval_steps_per_second": 4.522, "step": 2420 }, { "epoch": 1.02, "learning_rate": 1e-06, "loss": 0.2775, "step": 2440 }, { "epoch": 1.02, "eval_accuracy": 0.906625, "eval_loss": 0.2914583384990692, "eval_runtime": 15.8866, "eval_samples_per_second": 1510.711, "eval_steps_per_second": 4.721, "step": 2440 }, { "epoch": 1.02, "learning_rate": 1e-06, "loss": 0.326, "step": 2460 }, { "epoch": 1.02, "eval_accuracy": 0.907, "eval_loss": 0.28882813453674316, "eval_runtime": 15.4052, "eval_samples_per_second": 1557.919, "eval_steps_per_second": 4.868, "step": 2460 }, { "epoch": 1.03, "learning_rate": 1e-06, "loss": 0.2958, "step": 2480 }, { "epoch": 1.03, "eval_accuracy": 0.9076666666666666, "eval_loss": 0.2885444760322571, "eval_runtime": 15.4629, "eval_samples_per_second": 1552.103, "eval_steps_per_second": 4.85, "step": 2480 }, { "epoch": 1.04, "learning_rate": 1e-06, "loss": 0.2543, "step": 2500 }, { "epoch": 1.04, "eval_accuracy": 0.90825, "eval_loss": 0.28599992394447327, "eval_runtime": 15.7097, "eval_samples_per_second": 1527.717, "eval_steps_per_second": 4.774, "step": 2500 }, { "epoch": 1.05, "learning_rate": 1e-06, "loss": 0.287, "step": 2520 }, { "epoch": 1.05, "eval_accuracy": 0.908875, "eval_loss": 0.285250186920166, "eval_runtime": 15.7948, "eval_samples_per_second": 1519.484, "eval_steps_per_second": 4.748, "step": 2520 }, { "epoch": 1.06, "learning_rate": 1e-06, "loss": 0.3226, "step": 2540 }, { "epoch": 1.06, "eval_accuracy": 0.908375, "eval_loss": 0.28508585691452026, "eval_runtime": 15.6253, "eval_samples_per_second": 1535.966, "eval_steps_per_second": 4.8, "step": 2540 }, { "epoch": 1.07, "learning_rate": 1e-06, "loss": 0.2912, "step": 2560 }, { "epoch": 1.07, "eval_accuracy": 0.9077083333333333, "eval_loss": 0.28835633397102356, "eval_runtime": 15.302, "eval_samples_per_second": 1568.422, "eval_steps_per_second": 4.901, "step": 2560 }, { "epoch": 1.07, "learning_rate": 1e-06, "loss": 0.3344, "step": 2580 }, { "epoch": 1.07, "eval_accuracy": 0.9082916666666667, "eval_loss": 0.2841767966747284, "eval_runtime": 15.7333, "eval_samples_per_second": 1525.424, "eval_steps_per_second": 4.767, "step": 2580 }, { "epoch": 1.08, "learning_rate": 1e-06, "loss": 0.2883, "step": 2600 }, { "epoch": 1.08, "eval_accuracy": 0.9075, "eval_loss": 0.28568732738494873, "eval_runtime": 15.9862, "eval_samples_per_second": 1501.294, "eval_steps_per_second": 4.692, "step": 2600 }, { "epoch": 1.09, "learning_rate": 1e-06, "loss": 0.2958, "step": 2620 }, { "epoch": 1.09, "eval_accuracy": 0.90875, "eval_loss": 0.2833400368690491, "eval_runtime": 15.9204, "eval_samples_per_second": 1507.497, "eval_steps_per_second": 4.711, "step": 2620 }, { "epoch": 1.1, "learning_rate": 1e-06, "loss": 0.2913, "step": 2640 }, { "epoch": 1.1, "eval_accuracy": 0.9085, "eval_loss": 0.2836286723613739, "eval_runtime": 16.0314, "eval_samples_per_second": 1497.058, "eval_steps_per_second": 4.678, "step": 2640 }, { "epoch": 1.11, "learning_rate": 1e-06, "loss": 0.3007, "step": 2660 }, { "epoch": 1.11, "eval_accuracy": 0.908625, "eval_loss": 0.2823469638824463, "eval_runtime": 15.5726, "eval_samples_per_second": 1541.172, "eval_steps_per_second": 4.816, "step": 2660 }, { "epoch": 1.12, "learning_rate": 1e-06, "loss": 0.3152, "step": 2680 }, { "epoch": 1.12, "eval_accuracy": 0.9082083333333333, "eval_loss": 0.2841881811618805, "eval_runtime": 15.7571, "eval_samples_per_second": 1523.126, "eval_steps_per_second": 4.76, "step": 2680 }, { "epoch": 1.12, "learning_rate": 1e-06, "loss": 0.2857, "step": 2700 }, { "epoch": 1.12, "eval_accuracy": 0.9079583333333333, "eval_loss": 0.28267306089401245, "eval_runtime": 15.9307, "eval_samples_per_second": 1506.523, "eval_steps_per_second": 4.708, "step": 2700 }, { "epoch": 1.13, "learning_rate": 1e-06, "loss": 0.2821, "step": 2720 }, { "epoch": 1.13, "eval_accuracy": 0.9085416666666667, "eval_loss": 0.28154897689819336, "eval_runtime": 15.5765, "eval_samples_per_second": 1540.779, "eval_steps_per_second": 4.815, "step": 2720 }, { "epoch": 1.14, "learning_rate": 1e-06, "loss": 0.2775, "step": 2740 }, { "epoch": 1.14, "eval_accuracy": 0.9074166666666666, "eval_loss": 0.2842314839363098, "eval_runtime": 16.5328, "eval_samples_per_second": 1451.656, "eval_steps_per_second": 4.536, "step": 2740 }, { "epoch": 1.15, "learning_rate": 1e-06, "loss": 0.2964, "step": 2760 }, { "epoch": 1.15, "eval_accuracy": 0.9084166666666667, "eval_loss": 0.2818900942802429, "eval_runtime": 15.7457, "eval_samples_per_second": 1524.229, "eval_steps_per_second": 4.763, "step": 2760 }, { "epoch": 1.16, "learning_rate": 1e-06, "loss": 0.3245, "step": 2780 }, { "epoch": 1.16, "eval_accuracy": 0.9088333333333334, "eval_loss": 0.2803751826286316, "eval_runtime": 15.6563, "eval_samples_per_second": 1532.925, "eval_steps_per_second": 4.79, "step": 2780 }, { "epoch": 1.17, "learning_rate": 1e-06, "loss": 0.2706, "step": 2800 }, { "epoch": 1.17, "eval_accuracy": 0.909125, "eval_loss": 0.2802983820438385, "eval_runtime": 15.9707, "eval_samples_per_second": 1502.752, "eval_steps_per_second": 4.696, "step": 2800 }, { "epoch": 1.18, "learning_rate": 1e-06, "loss": 0.2674, "step": 2820 }, { "epoch": 1.18, "eval_accuracy": 0.908875, "eval_loss": 0.2793467342853546, "eval_runtime": 15.9613, "eval_samples_per_second": 1503.634, "eval_steps_per_second": 4.699, "step": 2820 }, { "epoch": 1.18, "learning_rate": 1e-06, "loss": 0.3296, "step": 2840 }, { "epoch": 1.18, "eval_accuracy": 0.9093333333333333, "eval_loss": 0.2792114019393921, "eval_runtime": 15.5813, "eval_samples_per_second": 1540.308, "eval_steps_per_second": 4.813, "step": 2840 }, { "epoch": 1.19, "learning_rate": 1e-06, "loss": 0.2993, "step": 2860 }, { "epoch": 1.19, "eval_accuracy": 0.909125, "eval_loss": 0.27838194370269775, "eval_runtime": 15.9048, "eval_samples_per_second": 1508.979, "eval_steps_per_second": 4.716, "step": 2860 }, { "epoch": 1.2, "learning_rate": 1e-06, "loss": 0.3305, "step": 2880 }, { "epoch": 1.2, "eval_accuracy": 0.9084583333333334, "eval_loss": 0.2802034914493561, "eval_runtime": 15.8179, "eval_samples_per_second": 1517.271, "eval_steps_per_second": 4.741, "step": 2880 }, { "epoch": 1.21, "learning_rate": 1e-06, "loss": 0.2984, "step": 2900 }, { "epoch": 1.21, "eval_accuracy": 0.9088333333333334, "eval_loss": 0.2789742052555084, "eval_runtime": 15.8715, "eval_samples_per_second": 1512.14, "eval_steps_per_second": 4.725, "step": 2900 }, { "epoch": 1.22, "learning_rate": 1e-06, "loss": 0.2748, "step": 2920 }, { "epoch": 1.22, "eval_accuracy": 0.9088333333333334, "eval_loss": 0.2786828875541687, "eval_runtime": 15.744, "eval_samples_per_second": 1524.387, "eval_steps_per_second": 4.764, "step": 2920 }, { "epoch": 1.23, "learning_rate": 1e-06, "loss": 0.2708, "step": 2940 }, { "epoch": 1.23, "eval_accuracy": 0.909125, "eval_loss": 0.2787318527698517, "eval_runtime": 15.8587, "eval_samples_per_second": 1513.363, "eval_steps_per_second": 4.729, "step": 2940 }, { "epoch": 1.23, "learning_rate": 1e-06, "loss": 0.3062, "step": 2960 }, { "epoch": 1.23, "eval_accuracy": 0.910125, "eval_loss": 0.2767701745033264, "eval_runtime": 15.5088, "eval_samples_per_second": 1547.508, "eval_steps_per_second": 4.836, "step": 2960 }, { "epoch": 1.24, "learning_rate": 1e-06, "loss": 0.3039, "step": 2980 }, { "epoch": 1.24, "eval_accuracy": 0.910375, "eval_loss": 0.2769327163696289, "eval_runtime": 15.7715, "eval_samples_per_second": 1521.733, "eval_steps_per_second": 4.755, "step": 2980 }, { "epoch": 1.25, "learning_rate": 1e-06, "loss": 0.2889, "step": 3000 }, { "epoch": 1.25, "eval_accuracy": 0.9106666666666666, "eval_loss": 0.2753864526748657, "eval_runtime": 16.0825, "eval_samples_per_second": 1492.309, "eval_steps_per_second": 4.663, "step": 3000 }, { "epoch": 1.26, "learning_rate": 1e-06, "loss": 0.2964, "step": 3020 }, { "epoch": 1.26, "eval_accuracy": 0.9102916666666667, "eval_loss": 0.2752026319503784, "eval_runtime": 15.8059, "eval_samples_per_second": 1518.419, "eval_steps_per_second": 4.745, "step": 3020 }, { "epoch": 1.27, "learning_rate": 1e-06, "loss": 0.2951, "step": 3040 }, { "epoch": 1.27, "eval_accuracy": 0.9099583333333333, "eval_loss": 0.27485111355781555, "eval_runtime": 15.7664, "eval_samples_per_second": 1522.226, "eval_steps_per_second": 4.757, "step": 3040 }, { "epoch": 1.27, "learning_rate": 1e-06, "loss": 0.2879, "step": 3060 }, { "epoch": 1.27, "eval_accuracy": 0.91075, "eval_loss": 0.27414020895957947, "eval_runtime": 15.6142, "eval_samples_per_second": 1537.064, "eval_steps_per_second": 4.803, "step": 3060 }, { "epoch": 1.28, "learning_rate": 1e-06, "loss": 0.2795, "step": 3080 }, { "epoch": 1.28, "eval_accuracy": 0.9106666666666666, "eval_loss": 0.2753457725048065, "eval_runtime": 15.8605, "eval_samples_per_second": 1513.198, "eval_steps_per_second": 4.729, "step": 3080 }, { "epoch": 1.29, "learning_rate": 1e-06, "loss": 0.2794, "step": 3100 }, { "epoch": 1.29, "eval_accuracy": 0.9100416666666666, "eval_loss": 0.27380895614624023, "eval_runtime": 16.3184, "eval_samples_per_second": 1470.733, "eval_steps_per_second": 4.596, "step": 3100 }, { "epoch": 1.3, "learning_rate": 1e-06, "loss": 0.258, "step": 3120 }, { "epoch": 1.3, "eval_accuracy": 0.910625, "eval_loss": 0.27472469210624695, "eval_runtime": 15.8282, "eval_samples_per_second": 1516.281, "eval_steps_per_second": 4.738, "step": 3120 }, { "epoch": 1.31, "learning_rate": 1e-06, "loss": 0.2551, "step": 3140 }, { "epoch": 1.31, "eval_accuracy": 0.91, "eval_loss": 0.274568110704422, "eval_runtime": 15.7686, "eval_samples_per_second": 1522.008, "eval_steps_per_second": 4.756, "step": 3140 }, { "epoch": 1.32, "learning_rate": 1e-06, "loss": 0.3086, "step": 3160 }, { "epoch": 1.32, "eval_accuracy": 0.9097083333333333, "eval_loss": 0.27524352073669434, "eval_runtime": 16.163, "eval_samples_per_second": 1484.869, "eval_steps_per_second": 4.64, "step": 3160 }, { "epoch": 1.32, "learning_rate": 1e-06, "loss": 0.2812, "step": 3180 }, { "epoch": 1.32, "eval_accuracy": 0.9117083333333333, "eval_loss": 0.27365365624427795, "eval_runtime": 15.5225, "eval_samples_per_second": 1546.138, "eval_steps_per_second": 4.832, "step": 3180 }, { "epoch": 1.33, "learning_rate": 1e-06, "loss": 0.3016, "step": 3200 }, { "epoch": 1.33, "eval_accuracy": 0.911, "eval_loss": 0.271597683429718, "eval_runtime": 15.8761, "eval_samples_per_second": 1511.709, "eval_steps_per_second": 4.724, "step": 3200 }, { "epoch": 1.34, "learning_rate": 1e-06, "loss": 0.2913, "step": 3220 }, { "epoch": 1.34, "eval_accuracy": 0.9109583333333333, "eval_loss": 0.27143821120262146, "eval_runtime": 16.2761, "eval_samples_per_second": 1474.556, "eval_steps_per_second": 4.608, "step": 3220 }, { "epoch": 1.35, "learning_rate": 1e-06, "loss": 0.2634, "step": 3240 }, { "epoch": 1.35, "eval_accuracy": 0.909, "eval_loss": 0.2747085988521576, "eval_runtime": 16.3191, "eval_samples_per_second": 1470.669, "eval_steps_per_second": 4.596, "step": 3240 }, { "epoch": 1.36, "learning_rate": 1e-06, "loss": 0.2845, "step": 3260 }, { "epoch": 1.36, "eval_accuracy": 0.9109166666666667, "eval_loss": 0.2722584903240204, "eval_runtime": 15.7837, "eval_samples_per_second": 1520.554, "eval_steps_per_second": 4.752, "step": 3260 }, { "epoch": 1.37, "learning_rate": 1e-06, "loss": 0.2864, "step": 3280 }, { "epoch": 1.37, "eval_accuracy": 0.9112083333333333, "eval_loss": 0.2707350552082062, "eval_runtime": 15.9926, "eval_samples_per_second": 1500.697, "eval_steps_per_second": 4.69, "step": 3280 }, { "epoch": 1.38, "learning_rate": 1e-06, "loss": 0.2794, "step": 3300 }, { "epoch": 1.38, "eval_accuracy": 0.9111666666666667, "eval_loss": 0.2700752317905426, "eval_runtime": 15.6416, "eval_samples_per_second": 1534.367, "eval_steps_per_second": 4.795, "step": 3300 }, { "epoch": 1.38, "learning_rate": 1e-06, "loss": 0.3216, "step": 3320 }, { "epoch": 1.38, "eval_accuracy": 0.9105, "eval_loss": 0.27095627784729004, "eval_runtime": 16.2168, "eval_samples_per_second": 1479.95, "eval_steps_per_second": 4.625, "step": 3320 }, { "epoch": 1.39, "learning_rate": 1e-06, "loss": 0.2738, "step": 3340 }, { "epoch": 1.39, "eval_accuracy": 0.9102083333333333, "eval_loss": 0.2725074291229248, "eval_runtime": 15.7189, "eval_samples_per_second": 1526.821, "eval_steps_per_second": 4.771, "step": 3340 }, { "epoch": 1.4, "learning_rate": 1e-06, "loss": 0.301, "step": 3360 }, { "epoch": 1.4, "eval_accuracy": 0.9102083333333333, "eval_loss": 0.27188044786453247, "eval_runtime": 15.3899, "eval_samples_per_second": 1559.463, "eval_steps_per_second": 4.873, "step": 3360 }, { "epoch": 1.41, "learning_rate": 1e-06, "loss": 0.3144, "step": 3380 }, { "epoch": 1.41, "eval_accuracy": 0.911125, "eval_loss": 0.269314169883728, "eval_runtime": 16.1663, "eval_samples_per_second": 1484.568, "eval_steps_per_second": 4.639, "step": 3380 }, { "epoch": 1.42, "learning_rate": 1e-06, "loss": 0.2914, "step": 3400 }, { "epoch": 1.42, "eval_accuracy": 0.9120416666666666, "eval_loss": 0.26871222257614136, "eval_runtime": 15.7364, "eval_samples_per_second": 1525.13, "eval_steps_per_second": 4.766, "step": 3400 }, { "epoch": 1.43, "learning_rate": 1e-06, "loss": 0.252, "step": 3420 }, { "epoch": 1.43, "eval_accuracy": 0.9114166666666667, "eval_loss": 0.26832154393196106, "eval_runtime": 16.1928, "eval_samples_per_second": 1482.143, "eval_steps_per_second": 4.632, "step": 3420 }, { "epoch": 1.43, "learning_rate": 1e-06, "loss": 0.2616, "step": 3440 }, { "epoch": 1.43, "eval_accuracy": 0.9119583333333333, "eval_loss": 0.2678380608558655, "eval_runtime": 15.9974, "eval_samples_per_second": 1500.246, "eval_steps_per_second": 4.688, "step": 3440 }, { "epoch": 1.44, "learning_rate": 1e-06, "loss": 0.247, "step": 3460 }, { "epoch": 1.44, "eval_accuracy": 0.9126666666666666, "eval_loss": 0.2679081857204437, "eval_runtime": 15.5041, "eval_samples_per_second": 1547.976, "eval_steps_per_second": 4.837, "step": 3460 }, { "epoch": 1.45, "learning_rate": 1e-06, "loss": 0.279, "step": 3480 }, { "epoch": 1.45, "eval_accuracy": 0.912, "eval_loss": 0.2675539553165436, "eval_runtime": 16.0139, "eval_samples_per_second": 1498.702, "eval_steps_per_second": 4.683, "step": 3480 }, { "epoch": 1.46, "learning_rate": 1e-06, "loss": 0.2823, "step": 3500 }, { "epoch": 1.46, "eval_accuracy": 0.9124166666666667, "eval_loss": 0.26708924770355225, "eval_runtime": 15.8242, "eval_samples_per_second": 1516.667, "eval_steps_per_second": 4.74, "step": 3500 }, { "epoch": 1.47, "learning_rate": 1e-06, "loss": 0.2769, "step": 3520 }, { "epoch": 1.47, "eval_accuracy": 0.9125, "eval_loss": 0.26735562086105347, "eval_runtime": 16.0609, "eval_samples_per_second": 1494.311, "eval_steps_per_second": 4.67, "step": 3520 }, { "epoch": 1.48, "learning_rate": 1e-06, "loss": 0.253, "step": 3540 }, { "epoch": 1.48, "eval_accuracy": 0.9122916666666666, "eval_loss": 0.2679881453514099, "eval_runtime": 15.8153, "eval_samples_per_second": 1517.514, "eval_steps_per_second": 4.742, "step": 3540 }, { "epoch": 1.48, "learning_rate": 1e-06, "loss": 0.2398, "step": 3560 }, { "epoch": 1.48, "eval_accuracy": 0.9125, "eval_loss": 0.26599201560020447, "eval_runtime": 15.854, "eval_samples_per_second": 1513.812, "eval_steps_per_second": 4.731, "step": 3560 }, { "epoch": 1.49, "learning_rate": 1e-06, "loss": 0.2524, "step": 3580 }, { "epoch": 1.49, "eval_accuracy": 0.912875, "eval_loss": 0.2660870850086212, "eval_runtime": 15.72, "eval_samples_per_second": 1526.713, "eval_steps_per_second": 4.771, "step": 3580 }, { "epoch": 1.5, "learning_rate": 1e-06, "loss": 0.2794, "step": 3600 }, { "epoch": 1.5, "eval_accuracy": 0.9122916666666666, "eval_loss": 0.2667754590511322, "eval_runtime": 15.9732, "eval_samples_per_second": 1502.513, "eval_steps_per_second": 4.695, "step": 3600 }, { "epoch": 1.51, "learning_rate": 1e-06, "loss": 0.2378, "step": 3620 }, { "epoch": 1.51, "eval_accuracy": 0.912375, "eval_loss": 0.26728901267051697, "eval_runtime": 15.3559, "eval_samples_per_second": 1562.918, "eval_steps_per_second": 4.884, "step": 3620 }, { "epoch": 1.52, "learning_rate": 1e-06, "loss": 0.2309, "step": 3640 }, { "epoch": 1.52, "eval_accuracy": 0.91175, "eval_loss": 0.2677074074745178, "eval_runtime": 15.7934, "eval_samples_per_second": 1519.623, "eval_steps_per_second": 4.749, "step": 3640 }, { "epoch": 1.52, "learning_rate": 1e-06, "loss": 0.2414, "step": 3660 }, { "epoch": 1.52, "eval_accuracy": 0.9127083333333333, "eval_loss": 0.2664356529712677, "eval_runtime": 16.3142, "eval_samples_per_second": 1471.115, "eval_steps_per_second": 4.597, "step": 3660 }, { "epoch": 1.53, "learning_rate": 1e-06, "loss": 0.2698, "step": 3680 }, { "epoch": 1.53, "eval_accuracy": 0.9130416666666666, "eval_loss": 0.26516926288604736, "eval_runtime": 15.5498, "eval_samples_per_second": 1543.428, "eval_steps_per_second": 4.823, "step": 3680 }, { "epoch": 1.54, "learning_rate": 1e-06, "loss": 0.2674, "step": 3700 }, { "epoch": 1.54, "eval_accuracy": 0.912875, "eval_loss": 0.26480814814567566, "eval_runtime": 15.9433, "eval_samples_per_second": 1505.331, "eval_steps_per_second": 4.704, "step": 3700 }, { "epoch": 1.55, "learning_rate": 1e-06, "loss": 0.3004, "step": 3720 }, { "epoch": 1.55, "eval_accuracy": 0.9120833333333334, "eval_loss": 0.26566416025161743, "eval_runtime": 16.4814, "eval_samples_per_second": 1456.188, "eval_steps_per_second": 4.551, "step": 3720 }, { "epoch": 1.56, "learning_rate": 1e-06, "loss": 0.2621, "step": 3740 }, { "epoch": 1.56, "eval_accuracy": 0.9130833333333334, "eval_loss": 0.2644825577735901, "eval_runtime": 15.8572, "eval_samples_per_second": 1513.507, "eval_steps_per_second": 4.73, "step": 3740 }, { "epoch": 1.57, "learning_rate": 1e-06, "loss": 0.2691, "step": 3760 }, { "epoch": 1.57, "eval_accuracy": 0.9137916666666667, "eval_loss": 0.26423123478889465, "eval_runtime": 15.9709, "eval_samples_per_second": 1502.733, "eval_steps_per_second": 4.696, "step": 3760 }, { "epoch": 1.57, "learning_rate": 1e-06, "loss": 0.2768, "step": 3780 }, { "epoch": 1.57, "eval_accuracy": 0.9114583333333334, "eval_loss": 0.2679901421070099, "eval_runtime": 15.6044, "eval_samples_per_second": 1538.026, "eval_steps_per_second": 4.806, "step": 3780 }, { "epoch": 1.58, "learning_rate": 1e-06, "loss": 0.2768, "step": 3800 }, { "epoch": 1.58, "eval_accuracy": 0.9123333333333333, "eval_loss": 0.2639557421207428, "eval_runtime": 16.2935, "eval_samples_per_second": 1472.978, "eval_steps_per_second": 4.603, "step": 3800 }, { "epoch": 1.59, "learning_rate": 1e-06, "loss": 0.2308, "step": 3820 }, { "epoch": 1.59, "eval_accuracy": 0.9118333333333334, "eval_loss": 0.2661728262901306, "eval_runtime": 15.4841, "eval_samples_per_second": 1549.978, "eval_steps_per_second": 4.844, "step": 3820 }, { "epoch": 1.6, "learning_rate": 1e-06, "loss": 0.2591, "step": 3840 }, { "epoch": 1.6, "eval_accuracy": 0.9131666666666667, "eval_loss": 0.2664397954940796, "eval_runtime": 15.6675, "eval_samples_per_second": 1531.834, "eval_steps_per_second": 4.787, "step": 3840 }, { "epoch": 1.61, "learning_rate": 1e-06, "loss": 0.2496, "step": 3860 }, { "epoch": 1.61, "eval_accuracy": 0.9125, "eval_loss": 0.2657550573348999, "eval_runtime": 15.6288, "eval_samples_per_second": 1535.631, "eval_steps_per_second": 4.799, "step": 3860 }, { "epoch": 1.62, "learning_rate": 1e-06, "loss": 0.2609, "step": 3880 }, { "epoch": 1.62, "eval_accuracy": 0.91225, "eval_loss": 0.26461654901504517, "eval_runtime": 15.6184, "eval_samples_per_second": 1536.644, "eval_steps_per_second": 4.802, "step": 3880 }, { "epoch": 1.62, "learning_rate": 1e-06, "loss": 0.2491, "step": 3900 }, { "epoch": 1.62, "eval_accuracy": 0.9132916666666666, "eval_loss": 0.2625100314617157, "eval_runtime": 15.868, "eval_samples_per_second": 1512.482, "eval_steps_per_second": 4.727, "step": 3900 }, { "epoch": 1.63, "learning_rate": 1e-06, "loss": 0.2609, "step": 3920 }, { "epoch": 1.63, "eval_accuracy": 0.91375, "eval_loss": 0.2650201916694641, "eval_runtime": 15.7536, "eval_samples_per_second": 1523.463, "eval_steps_per_second": 4.761, "step": 3920 }, { "epoch": 1.64, "learning_rate": 1e-06, "loss": 0.225, "step": 3940 }, { "epoch": 1.64, "eval_accuracy": 0.9124166666666667, "eval_loss": 0.262962281703949, "eval_runtime": 15.7571, "eval_samples_per_second": 1523.123, "eval_steps_per_second": 4.76, "step": 3940 }, { "epoch": 1.65, "learning_rate": 1e-06, "loss": 0.3099, "step": 3960 }, { "epoch": 1.65, "eval_accuracy": 0.9139583333333333, "eval_loss": 0.26255694031715393, "eval_runtime": 15.7275, "eval_samples_per_second": 1525.985, "eval_steps_per_second": 4.769, "step": 3960 }, { "epoch": 1.66, "learning_rate": 1e-06, "loss": 0.2627, "step": 3980 }, { "epoch": 1.66, "eval_accuracy": 0.91425, "eval_loss": 0.26082727313041687, "eval_runtime": 15.785, "eval_samples_per_second": 1520.427, "eval_steps_per_second": 4.751, "step": 3980 }, { "epoch": 1.67, "learning_rate": 1e-06, "loss": 0.2831, "step": 4000 }, { "epoch": 1.67, "eval_accuracy": 0.9128333333333334, "eval_loss": 0.2629248797893524, "eval_runtime": 15.9672, "eval_samples_per_second": 1503.078, "eval_steps_per_second": 4.697, "step": 4000 }, { "epoch": 1.68, "learning_rate": 1e-06, "loss": 0.2806, "step": 4020 }, { "epoch": 1.68, "eval_accuracy": 0.9131666666666667, "eval_loss": 0.2608546316623688, "eval_runtime": 15.7614, "eval_samples_per_second": 1522.706, "eval_steps_per_second": 4.758, "step": 4020 }, { "epoch": 1.68, "learning_rate": 1e-06, "loss": 0.3095, "step": 4040 }, { "epoch": 1.68, "eval_accuracy": 0.914125, "eval_loss": 0.263700008392334, "eval_runtime": 15.7288, "eval_samples_per_second": 1525.864, "eval_steps_per_second": 4.768, "step": 4040 }, { "epoch": 1.69, "learning_rate": 1e-06, "loss": 0.2637, "step": 4060 }, { "epoch": 1.69, "eval_accuracy": 0.9152083333333333, "eval_loss": 0.26146170496940613, "eval_runtime": 15.6613, "eval_samples_per_second": 1532.442, "eval_steps_per_second": 4.789, "step": 4060 }, { "epoch": 1.7, "learning_rate": 1e-06, "loss": 0.3105, "step": 4080 }, { "epoch": 1.7, "eval_accuracy": 0.9137083333333333, "eval_loss": 0.25999248027801514, "eval_runtime": 15.6145, "eval_samples_per_second": 1537.029, "eval_steps_per_second": 4.803, "step": 4080 }, { "epoch": 1.71, "learning_rate": 1e-06, "loss": 0.2665, "step": 4100 }, { "epoch": 1.71, "eval_accuracy": 0.912875, "eval_loss": 0.2612546980381012, "eval_runtime": 16.3137, "eval_samples_per_second": 1471.158, "eval_steps_per_second": 4.597, "step": 4100 }, { "epoch": 1.72, "learning_rate": 1e-06, "loss": 0.2582, "step": 4120 }, { "epoch": 1.72, "eval_accuracy": 0.9130416666666666, "eval_loss": 0.2623673975467682, "eval_runtime": 15.824, "eval_samples_per_second": 1516.686, "eval_steps_per_second": 4.74, "step": 4120 }, { "epoch": 1.73, "learning_rate": 1e-06, "loss": 0.2425, "step": 4140 }, { "epoch": 1.73, "eval_accuracy": 0.91425, "eval_loss": 0.25996851921081543, "eval_runtime": 16.2345, "eval_samples_per_second": 1478.333, "eval_steps_per_second": 4.62, "step": 4140 }, { "epoch": 1.73, "learning_rate": 1e-06, "loss": 0.2431, "step": 4160 }, { "epoch": 1.73, "eval_accuracy": 0.914, "eval_loss": 0.25984445214271545, "eval_runtime": 15.7995, "eval_samples_per_second": 1519.04, "eval_steps_per_second": 4.747, "step": 4160 }, { "epoch": 1.74, "learning_rate": 1e-06, "loss": 0.2423, "step": 4180 }, { "epoch": 1.74, "eval_accuracy": 0.9142916666666666, "eval_loss": 0.2597646713256836, "eval_runtime": 15.5687, "eval_samples_per_second": 1541.552, "eval_steps_per_second": 4.817, "step": 4180 }, { "epoch": 1.75, "learning_rate": 1e-06, "loss": 0.2594, "step": 4200 }, { "epoch": 1.75, "eval_accuracy": 0.9134583333333334, "eval_loss": 0.26066410541534424, "eval_runtime": 15.9904, "eval_samples_per_second": 1500.899, "eval_steps_per_second": 4.69, "step": 4200 }, { "epoch": 1.76, "learning_rate": 1e-06, "loss": 0.2647, "step": 4220 }, { "epoch": 1.76, "eval_accuracy": 0.914, "eval_loss": 0.25917357206344604, "eval_runtime": 15.5439, "eval_samples_per_second": 1544.015, "eval_steps_per_second": 4.825, "step": 4220 }, { "epoch": 1.77, "learning_rate": 1e-06, "loss": 0.282, "step": 4240 }, { "epoch": 1.77, "eval_accuracy": 0.9147083333333333, "eval_loss": 0.258999764919281, "eval_runtime": 15.9706, "eval_samples_per_second": 1502.761, "eval_steps_per_second": 4.696, "step": 4240 }, { "epoch": 1.77, "learning_rate": 1e-06, "loss": 0.246, "step": 4260 }, { "epoch": 1.77, "eval_accuracy": 0.9139166666666667, "eval_loss": 0.25847962498664856, "eval_runtime": 15.9552, "eval_samples_per_second": 1504.21, "eval_steps_per_second": 4.701, "step": 4260 }, { "epoch": 1.78, "learning_rate": 1e-06, "loss": 0.2558, "step": 4280 }, { "epoch": 1.78, "eval_accuracy": 0.9138333333333334, "eval_loss": 0.25929775834083557, "eval_runtime": 15.5972, "eval_samples_per_second": 1538.742, "eval_steps_per_second": 4.809, "step": 4280 }, { "epoch": 1.79, "learning_rate": 1e-06, "loss": 0.2249, "step": 4300 }, { "epoch": 1.79, "eval_accuracy": 0.9144166666666667, "eval_loss": 0.258432537317276, "eval_runtime": 15.9462, "eval_samples_per_second": 1505.062, "eval_steps_per_second": 4.703, "step": 4300 }, { "epoch": 1.8, "learning_rate": 1e-06, "loss": 0.2619, "step": 4320 }, { "epoch": 1.8, "eval_accuracy": 0.9135416666666667, "eval_loss": 0.25964200496673584, "eval_runtime": 15.7728, "eval_samples_per_second": 1521.607, "eval_steps_per_second": 4.755, "step": 4320 }, { "epoch": 1.81, "learning_rate": 1e-06, "loss": 0.2789, "step": 4340 }, { "epoch": 1.81, "eval_accuracy": 0.9133333333333333, "eval_loss": 0.26067379117012024, "eval_runtime": 16.1036, "eval_samples_per_second": 1490.346, "eval_steps_per_second": 4.657, "step": 4340 }, { "epoch": 1.82, "learning_rate": 1e-06, "loss": 0.2714, "step": 4360 }, { "epoch": 1.82, "eval_accuracy": 0.9144583333333334, "eval_loss": 0.25738218426704407, "eval_runtime": 15.9298, "eval_samples_per_second": 1506.611, "eval_steps_per_second": 4.708, "step": 4360 }, { "epoch": 1.82, "learning_rate": 1e-06, "loss": 0.2662, "step": 4380 }, { "epoch": 1.82, "eval_accuracy": 0.9156666666666666, "eval_loss": 0.2561679184436798, "eval_runtime": 16.0834, "eval_samples_per_second": 1492.218, "eval_steps_per_second": 4.663, "step": 4380 }, { "epoch": 1.83, "learning_rate": 1e-06, "loss": 0.2773, "step": 4400 }, { "epoch": 1.83, "eval_accuracy": 0.914, "eval_loss": 0.2571386694908142, "eval_runtime": 15.7636, "eval_samples_per_second": 1522.493, "eval_steps_per_second": 4.758, "step": 4400 }, { "epoch": 1.84, "learning_rate": 1e-06, "loss": 0.2478, "step": 4420 }, { "epoch": 1.84, "eval_accuracy": 0.9152916666666666, "eval_loss": 0.25946521759033203, "eval_runtime": 15.8816, "eval_samples_per_second": 1511.187, "eval_steps_per_second": 4.722, "step": 4420 }, { "epoch": 1.85, "learning_rate": 1e-06, "loss": 0.2517, "step": 4440 }, { "epoch": 1.85, "eval_accuracy": 0.9144583333333334, "eval_loss": 0.2570332884788513, "eval_runtime": 15.9553, "eval_samples_per_second": 1504.206, "eval_steps_per_second": 4.701, "step": 4440 }, { "epoch": 1.86, "learning_rate": 1e-06, "loss": 0.2539, "step": 4460 }, { "epoch": 1.86, "eval_accuracy": 0.9137916666666667, "eval_loss": 0.25786763429641724, "eval_runtime": 15.6911, "eval_samples_per_second": 1529.534, "eval_steps_per_second": 4.78, "step": 4460 }, { "epoch": 1.87, "learning_rate": 1e-06, "loss": 0.2635, "step": 4480 }, { "epoch": 1.87, "eval_accuracy": 0.9154166666666667, "eval_loss": 0.25599122047424316, "eval_runtime": 15.95, "eval_samples_per_second": 1504.705, "eval_steps_per_second": 4.702, "step": 4480 }, { "epoch": 1.88, "learning_rate": 1e-06, "loss": 0.3007, "step": 4500 }, { "epoch": 1.88, "eval_accuracy": 0.914625, "eval_loss": 0.25718793272972107, "eval_runtime": 16.0115, "eval_samples_per_second": 1498.923, "eval_steps_per_second": 4.684, "step": 4500 }, { "epoch": 1.88, "learning_rate": 1e-06, "loss": 0.2865, "step": 4520 }, { "epoch": 1.88, "eval_accuracy": 0.915125, "eval_loss": 0.25561970472335815, "eval_runtime": 15.2788, "eval_samples_per_second": 1570.809, "eval_steps_per_second": 4.909, "step": 4520 }, { "epoch": 1.89, "learning_rate": 1e-06, "loss": 0.2234, "step": 4540 }, { "epoch": 1.89, "eval_accuracy": 0.9139583333333333, "eval_loss": 0.2578865885734558, "eval_runtime": 15.8725, "eval_samples_per_second": 1512.049, "eval_steps_per_second": 4.725, "step": 4540 }, { "epoch": 1.9, "learning_rate": 1e-06, "loss": 0.2864, "step": 4560 }, { "epoch": 1.9, "eval_accuracy": 0.9142083333333333, "eval_loss": 0.25844380259513855, "eval_runtime": 15.9686, "eval_samples_per_second": 1502.947, "eval_steps_per_second": 4.697, "step": 4560 }, { "epoch": 1.91, "learning_rate": 1e-06, "loss": 0.229, "step": 4580 }, { "epoch": 1.91, "eval_accuracy": 0.9151666666666667, "eval_loss": 0.2548525631427765, "eval_runtime": 15.8099, "eval_samples_per_second": 1518.04, "eval_steps_per_second": 4.744, "step": 4580 }, { "epoch": 1.92, "learning_rate": 1e-06, "loss": 0.2584, "step": 4600 }, { "epoch": 1.92, "eval_accuracy": 0.9155, "eval_loss": 0.25401976704597473, "eval_runtime": 15.9608, "eval_samples_per_second": 1503.688, "eval_steps_per_second": 4.699, "step": 4600 }, { "epoch": 1.93, "learning_rate": 1e-06, "loss": 0.3175, "step": 4620 }, { "epoch": 1.93, "eval_accuracy": 0.9151666666666667, "eval_loss": 0.2564151883125305, "eval_runtime": 16.163, "eval_samples_per_second": 1484.877, "eval_steps_per_second": 4.64, "step": 4620 }, { "epoch": 1.93, "learning_rate": 1e-06, "loss": 0.3066, "step": 4640 }, { "epoch": 1.93, "eval_accuracy": 0.9160416666666666, "eval_loss": 0.2530251443386078, "eval_runtime": 15.8188, "eval_samples_per_second": 1517.184, "eval_steps_per_second": 4.741, "step": 4640 }, { "epoch": 1.94, "learning_rate": 1e-06, "loss": 0.2746, "step": 4660 }, { "epoch": 1.94, "eval_accuracy": 0.9144583333333334, "eval_loss": 0.25700142979621887, "eval_runtime": 15.9454, "eval_samples_per_second": 1505.134, "eval_steps_per_second": 4.704, "step": 4660 }, { "epoch": 1.95, "learning_rate": 1e-06, "loss": 0.2608, "step": 4680 }, { "epoch": 1.95, "eval_accuracy": 0.915625, "eval_loss": 0.25347229838371277, "eval_runtime": 15.8024, "eval_samples_per_second": 1518.76, "eval_steps_per_second": 4.746, "step": 4680 }, { "epoch": 1.96, "learning_rate": 1e-06, "loss": 0.233, "step": 4700 }, { "epoch": 1.96, "eval_accuracy": 0.9157083333333333, "eval_loss": 0.2529941201210022, "eval_runtime": 16.3246, "eval_samples_per_second": 1470.174, "eval_steps_per_second": 4.594, "step": 4700 }, { "epoch": 1.97, "learning_rate": 1e-06, "loss": 0.2556, "step": 4720 }, { "epoch": 1.97, "eval_accuracy": 0.9156666666666666, "eval_loss": 0.25271856784820557, "eval_runtime": 15.8607, "eval_samples_per_second": 1513.176, "eval_steps_per_second": 4.729, "step": 4720 }, { "epoch": 1.98, "learning_rate": 1e-06, "loss": 0.253, "step": 4740 }, { "epoch": 1.98, "eval_accuracy": 0.9163333333333333, "eval_loss": 0.251432865858078, "eval_runtime": 16.1859, "eval_samples_per_second": 1482.768, "eval_steps_per_second": 4.634, "step": 4740 }, { "epoch": 1.98, "learning_rate": 1e-06, "loss": 0.212, "step": 4760 }, { "epoch": 1.98, "eval_accuracy": 0.9162916666666666, "eval_loss": 0.25113439559936523, "eval_runtime": 15.5374, "eval_samples_per_second": 1544.657, "eval_steps_per_second": 4.827, "step": 4760 }, { "epoch": 1.99, "learning_rate": 1e-06, "loss": 0.2573, "step": 4780 }, { "epoch": 1.99, "eval_accuracy": 0.9160833333333334, "eval_loss": 0.25215280055999756, "eval_runtime": 16.3238, "eval_samples_per_second": 1470.242, "eval_steps_per_second": 4.595, "step": 4780 }, { "epoch": 2.0, "learning_rate": 1e-06, "loss": 0.2533, "step": 4800 }, { "epoch": 2.0, "eval_accuracy": 0.9161666666666667, "eval_loss": 0.25234469771385193, "eval_runtime": 15.7714, "eval_samples_per_second": 1521.744, "eval_steps_per_second": 4.755, "step": 4800 }, { "epoch": 2.01, "learning_rate": 1e-06, "loss": 0.2638, "step": 4820 }, { "epoch": 2.01, "eval_accuracy": 0.9162916666666666, "eval_loss": 0.25334304571151733, "eval_runtime": 16.0308, "eval_samples_per_second": 1497.122, "eval_steps_per_second": 4.679, "step": 4820 }, { "epoch": 2.02, "learning_rate": 1e-06, "loss": 0.2763, "step": 4840 }, { "epoch": 2.02, "eval_accuracy": 0.9144166666666667, "eval_loss": 0.25832322239875793, "eval_runtime": 15.8766, "eval_samples_per_second": 1511.661, "eval_steps_per_second": 4.724, "step": 4840 }, { "epoch": 2.02, "learning_rate": 1e-06, "loss": 0.2503, "step": 4860 }, { "epoch": 2.02, "eval_accuracy": 0.9145416666666667, "eval_loss": 0.2566870450973511, "eval_runtime": 15.4218, "eval_samples_per_second": 1556.237, "eval_steps_per_second": 4.863, "step": 4860 }, { "epoch": 2.03, "learning_rate": 1e-06, "loss": 0.2439, "step": 4880 }, { "epoch": 2.03, "eval_accuracy": 0.916, "eval_loss": 0.2518457770347595, "eval_runtime": 16.4312, "eval_samples_per_second": 1460.639, "eval_steps_per_second": 4.564, "step": 4880 }, { "epoch": 2.04, "learning_rate": 1e-06, "loss": 0.2539, "step": 4900 }, { "epoch": 2.04, "eval_accuracy": 0.9162083333333333, "eval_loss": 0.25036895275115967, "eval_runtime": 15.7834, "eval_samples_per_second": 1520.583, "eval_steps_per_second": 4.752, "step": 4900 }, { "epoch": 2.05, "learning_rate": 1e-06, "loss": 0.2508, "step": 4920 }, { "epoch": 2.05, "eval_accuracy": 0.9161666666666667, "eval_loss": 0.25022852420806885, "eval_runtime": 15.2801, "eval_samples_per_second": 1570.674, "eval_steps_per_second": 4.908, "step": 4920 }, { "epoch": 2.06, "learning_rate": 1e-06, "loss": 0.2793, "step": 4940 }, { "epoch": 2.06, "eval_accuracy": 0.917125, "eval_loss": 0.25100791454315186, "eval_runtime": 15.9757, "eval_samples_per_second": 1502.281, "eval_steps_per_second": 4.695, "step": 4940 }, { "epoch": 2.07, "learning_rate": 1e-06, "loss": 0.2369, "step": 4960 }, { "epoch": 2.07, "eval_accuracy": 0.9169166666666667, "eval_loss": 0.25172725319862366, "eval_runtime": 15.9075, "eval_samples_per_second": 1508.718, "eval_steps_per_second": 4.715, "step": 4960 }, { "epoch": 2.08, "learning_rate": 1e-06, "loss": 0.2337, "step": 4980 }, { "epoch": 2.08, "eval_accuracy": 0.9175833333333333, "eval_loss": 0.24899640679359436, "eval_runtime": 15.735, "eval_samples_per_second": 1525.264, "eval_steps_per_second": 4.766, "step": 4980 }, { "epoch": 2.08, "learning_rate": 1e-06, "loss": 0.2683, "step": 5000 }, { "epoch": 2.08, "eval_accuracy": 0.9167916666666667, "eval_loss": 0.24926243722438812, "eval_runtime": 16.2679, "eval_samples_per_second": 1475.302, "eval_steps_per_second": 4.61, "step": 5000 }, { "epoch": 2.09, "learning_rate": 1e-06, "loss": 0.2288, "step": 5020 }, { "epoch": 2.09, "eval_accuracy": 0.9171666666666667, "eval_loss": 0.24971893429756165, "eval_runtime": 15.7076, "eval_samples_per_second": 1527.924, "eval_steps_per_second": 4.775, "step": 5020 }, { "epoch": 2.1, "learning_rate": 1e-06, "loss": 0.209, "step": 5040 }, { "epoch": 2.1, "eval_accuracy": 0.9169583333333333, "eval_loss": 0.24874989688396454, "eval_runtime": 15.9175, "eval_samples_per_second": 1507.777, "eval_steps_per_second": 4.712, "step": 5040 }, { "epoch": 2.11, "learning_rate": 1e-06, "loss": 0.2272, "step": 5060 }, { "epoch": 2.11, "eval_accuracy": 0.9155416666666667, "eval_loss": 0.2508015036582947, "eval_runtime": 15.4995, "eval_samples_per_second": 1548.44, "eval_steps_per_second": 4.839, "step": 5060 }, { "epoch": 2.12, "learning_rate": 1e-06, "loss": 0.2537, "step": 5080 }, { "epoch": 2.12, "eval_accuracy": 0.9165833333333333, "eval_loss": 0.250166654586792, "eval_runtime": 16.1014, "eval_samples_per_second": 1490.551, "eval_steps_per_second": 4.658, "step": 5080 }, { "epoch": 2.12, "learning_rate": 1e-06, "loss": 0.3171, "step": 5100 }, { "epoch": 2.12, "eval_accuracy": 0.9159166666666667, "eval_loss": 0.25174057483673096, "eval_runtime": 15.868, "eval_samples_per_second": 1512.48, "eval_steps_per_second": 4.727, "step": 5100 }, { "epoch": 2.13, "learning_rate": 1e-06, "loss": 0.1955, "step": 5120 }, { "epoch": 2.13, "eval_accuracy": 0.9165833333333333, "eval_loss": 0.2481849193572998, "eval_runtime": 15.8739, "eval_samples_per_second": 1511.915, "eval_steps_per_second": 4.725, "step": 5120 }, { "epoch": 2.14, "learning_rate": 1e-06, "loss": 0.2618, "step": 5140 }, { "epoch": 2.14, "eval_accuracy": 0.9178333333333333, "eval_loss": 0.24656133353710175, "eval_runtime": 16.2181, "eval_samples_per_second": 1479.83, "eval_steps_per_second": 4.624, "step": 5140 }, { "epoch": 2.15, "learning_rate": 1e-06, "loss": 0.2334, "step": 5160 }, { "epoch": 2.15, "eval_accuracy": 0.9174583333333334, "eval_loss": 0.24714942276477814, "eval_runtime": 15.6119, "eval_samples_per_second": 1537.293, "eval_steps_per_second": 4.804, "step": 5160 }, { "epoch": 2.16, "learning_rate": 1e-06, "loss": 0.2253, "step": 5180 }, { "epoch": 2.16, "eval_accuracy": 0.917125, "eval_loss": 0.2473263293504715, "eval_runtime": 16.4346, "eval_samples_per_second": 1460.331, "eval_steps_per_second": 4.564, "step": 5180 }, { "epoch": 2.17, "learning_rate": 1e-06, "loss": 0.2735, "step": 5200 }, { "epoch": 2.17, "eval_accuracy": 0.9169583333333333, "eval_loss": 0.24832595884799957, "eval_runtime": 15.9352, "eval_samples_per_second": 1506.103, "eval_steps_per_second": 4.707, "step": 5200 }, { "epoch": 2.17, "learning_rate": 1e-06, "loss": 0.2119, "step": 5220 }, { "epoch": 2.17, "eval_accuracy": 0.9175, "eval_loss": 0.24637369811534882, "eval_runtime": 15.8502, "eval_samples_per_second": 1514.172, "eval_steps_per_second": 4.732, "step": 5220 }, { "epoch": 2.18, "learning_rate": 1e-06, "loss": 0.2338, "step": 5240 }, { "epoch": 2.18, "eval_accuracy": 0.9175416666666667, "eval_loss": 0.24698366224765778, "eval_runtime": 15.7545, "eval_samples_per_second": 1523.378, "eval_steps_per_second": 4.761, "step": 5240 }, { "epoch": 2.19, "learning_rate": 1e-06, "loss": 0.2233, "step": 5260 }, { "epoch": 2.19, "eval_accuracy": 0.9155, "eval_loss": 0.24797461926937103, "eval_runtime": 15.7338, "eval_samples_per_second": 1525.377, "eval_steps_per_second": 4.767, "step": 5260 }, { "epoch": 2.2, "learning_rate": 1e-06, "loss": 0.243, "step": 5280 }, { "epoch": 2.2, "eval_accuracy": 0.916375, "eval_loss": 0.24534855782985687, "eval_runtime": 15.9549, "eval_samples_per_second": 1504.244, "eval_steps_per_second": 4.701, "step": 5280 }, { "epoch": 2.21, "learning_rate": 1e-06, "loss": 0.2476, "step": 5300 }, { "epoch": 2.21, "eval_accuracy": 0.917125, "eval_loss": 0.24429555237293243, "eval_runtime": 16.2563, "eval_samples_per_second": 1476.355, "eval_steps_per_second": 4.614, "step": 5300 }, { "epoch": 2.22, "learning_rate": 1e-06, "loss": 0.2481, "step": 5320 }, { "epoch": 2.22, "eval_accuracy": 0.9174166666666667, "eval_loss": 0.24369333684444427, "eval_runtime": 15.6541, "eval_samples_per_second": 1533.143, "eval_steps_per_second": 4.791, "step": 5320 }, { "epoch": 2.23, "learning_rate": 1e-06, "loss": 0.2505, "step": 5340 }, { "epoch": 2.23, "eval_accuracy": 0.9179583333333333, "eval_loss": 0.24328631162643433, "eval_runtime": 15.9128, "eval_samples_per_second": 1508.223, "eval_steps_per_second": 4.713, "step": 5340 }, { "epoch": 2.23, "learning_rate": 1e-06, "loss": 0.2591, "step": 5360 }, { "epoch": 2.23, "eval_accuracy": 0.9164583333333334, "eval_loss": 0.24514025449752808, "eval_runtime": 16.021, "eval_samples_per_second": 1498.036, "eval_steps_per_second": 4.681, "step": 5360 }, { "epoch": 2.24, "learning_rate": 1e-06, "loss": 0.2342, "step": 5380 }, { "epoch": 2.24, "eval_accuracy": 0.9154166666666667, "eval_loss": 0.2472737431526184, "eval_runtime": 15.688, "eval_samples_per_second": 1529.832, "eval_steps_per_second": 4.781, "step": 5380 }, { "epoch": 2.25, "learning_rate": 1e-06, "loss": 0.2405, "step": 5400 }, { "epoch": 2.25, "eval_accuracy": 0.9179166666666667, "eval_loss": 0.24237360060214996, "eval_runtime": 16.4849, "eval_samples_per_second": 1455.882, "eval_steps_per_second": 4.55, "step": 5400 }, { "epoch": 2.26, "learning_rate": 1e-06, "loss": 0.2803, "step": 5420 }, { "epoch": 2.26, "eval_accuracy": 0.9185416666666667, "eval_loss": 0.2412734031677246, "eval_runtime": 15.7027, "eval_samples_per_second": 1528.403, "eval_steps_per_second": 4.776, "step": 5420 }, { "epoch": 2.27, "learning_rate": 1e-06, "loss": 0.2816, "step": 5440 }, { "epoch": 2.27, "eval_accuracy": 0.9178333333333333, "eval_loss": 0.2425076812505722, "eval_runtime": 15.8848, "eval_samples_per_second": 1510.874, "eval_steps_per_second": 4.721, "step": 5440 }, { "epoch": 2.27, "learning_rate": 1e-06, "loss": 0.2489, "step": 5460 }, { "epoch": 2.27, "eval_accuracy": 0.9172916666666666, "eval_loss": 0.24504542350769043, "eval_runtime": 16.3712, "eval_samples_per_second": 1465.986, "eval_steps_per_second": 4.581, "step": 5460 }, { "epoch": 2.28, "learning_rate": 1e-06, "loss": 0.2346, "step": 5480 }, { "epoch": 2.28, "eval_accuracy": 0.9180833333333334, "eval_loss": 0.2423253357410431, "eval_runtime": 15.5334, "eval_samples_per_second": 1545.062, "eval_steps_per_second": 4.828, "step": 5480 }, { "epoch": 2.29, "learning_rate": 1e-06, "loss": 0.251, "step": 5500 }, { "epoch": 2.29, "eval_accuracy": 0.9185, "eval_loss": 0.24269212782382965, "eval_runtime": 16.0617, "eval_samples_per_second": 1494.234, "eval_steps_per_second": 4.669, "step": 5500 }, { "epoch": 2.3, "learning_rate": 1e-06, "loss": 0.2574, "step": 5520 }, { "epoch": 2.3, "eval_accuracy": 0.9187083333333333, "eval_loss": 0.2401323914527893, "eval_runtime": 15.7783, "eval_samples_per_second": 1521.078, "eval_steps_per_second": 4.753, "step": 5520 }, { "epoch": 2.31, "learning_rate": 1e-06, "loss": 0.2227, "step": 5540 }, { "epoch": 2.31, "eval_accuracy": 0.9179583333333333, "eval_loss": 0.24057930707931519, "eval_runtime": 16.3047, "eval_samples_per_second": 1471.971, "eval_steps_per_second": 4.6, "step": 5540 }, { "epoch": 2.32, "learning_rate": 1e-06, "loss": 0.2362, "step": 5560 }, { "epoch": 2.32, "eval_accuracy": 0.9185833333333333, "eval_loss": 0.23938481509685516, "eval_runtime": 15.6702, "eval_samples_per_second": 1531.572, "eval_steps_per_second": 4.786, "step": 5560 }, { "epoch": 2.33, "learning_rate": 1e-06, "loss": 0.2346, "step": 5580 }, { "epoch": 2.33, "eval_accuracy": 0.918625, "eval_loss": 0.23980508744716644, "eval_runtime": 15.999, "eval_samples_per_second": 1500.095, "eval_steps_per_second": 4.688, "step": 5580 }, { "epoch": 2.33, "learning_rate": 1e-06, "loss": 0.2308, "step": 5600 }, { "epoch": 2.33, "eval_accuracy": 0.9172083333333333, "eval_loss": 0.24078369140625, "eval_runtime": 16.2118, "eval_samples_per_second": 1480.402, "eval_steps_per_second": 4.626, "step": 5600 }, { "epoch": 2.34, "learning_rate": 1e-06, "loss": 0.2668, "step": 5620 }, { "epoch": 2.34, "eval_accuracy": 0.9185833333333333, "eval_loss": 0.23922114074230194, "eval_runtime": 15.9955, "eval_samples_per_second": 1500.419, "eval_steps_per_second": 4.689, "step": 5620 }, { "epoch": 2.35, "learning_rate": 1e-06, "loss": 0.2363, "step": 5640 }, { "epoch": 2.35, "eval_accuracy": 0.9182083333333333, "eval_loss": 0.23886892199516296, "eval_runtime": 15.6875, "eval_samples_per_second": 1529.876, "eval_steps_per_second": 4.781, "step": 5640 }, { "epoch": 2.36, "learning_rate": 1e-06, "loss": 0.2152, "step": 5660 }, { "epoch": 2.36, "eval_accuracy": 0.918125, "eval_loss": 0.23896987736225128, "eval_runtime": 15.4793, "eval_samples_per_second": 1550.456, "eval_steps_per_second": 4.845, "step": 5660 }, { "epoch": 2.37, "learning_rate": 1e-06, "loss": 0.2759, "step": 5680 }, { "epoch": 2.37, "eval_accuracy": 0.9181666666666667, "eval_loss": 0.23786494135856628, "eval_runtime": 15.5978, "eval_samples_per_second": 1538.675, "eval_steps_per_second": 4.808, "step": 5680 }, { "epoch": 2.38, "learning_rate": 1e-06, "loss": 0.2274, "step": 5700 }, { "epoch": 2.38, "eval_accuracy": 0.9188333333333333, "eval_loss": 0.23672978579998016, "eval_runtime": 15.6992, "eval_samples_per_second": 1528.744, "eval_steps_per_second": 4.777, "step": 5700 }, { "epoch": 2.38, "learning_rate": 1e-06, "loss": 0.239, "step": 5720 }, { "epoch": 2.38, "eval_accuracy": 0.9190833333333334, "eval_loss": 0.23753681778907776, "eval_runtime": 15.6792, "eval_samples_per_second": 1530.692, "eval_steps_per_second": 4.783, "step": 5720 }, { "epoch": 2.39, "learning_rate": 1e-06, "loss": 0.2195, "step": 5740 }, { "epoch": 2.39, "eval_accuracy": 0.9187083333333333, "eval_loss": 0.2378937304019928, "eval_runtime": 16.0105, "eval_samples_per_second": 1499.013, "eval_steps_per_second": 4.684, "step": 5740 }, { "epoch": 2.4, "learning_rate": 1e-06, "loss": 0.2454, "step": 5760 }, { "epoch": 2.4, "eval_accuracy": 0.9187916666666667, "eval_loss": 0.23726117610931396, "eval_runtime": 15.4532, "eval_samples_per_second": 1553.077, "eval_steps_per_second": 4.853, "step": 5760 }, { "epoch": 2.41, "learning_rate": 1e-06, "loss": 0.2376, "step": 5780 }, { "epoch": 2.41, "eval_accuracy": 0.9192916666666666, "eval_loss": 0.23683802783489227, "eval_runtime": 16.4115, "eval_samples_per_second": 1462.385, "eval_steps_per_second": 4.57, "step": 5780 }, { "epoch": 2.42, "learning_rate": 1e-06, "loss": 0.253, "step": 5800 }, { "epoch": 2.42, "eval_accuracy": 0.9193333333333333, "eval_loss": 0.23695851862430573, "eval_runtime": 15.7863, "eval_samples_per_second": 1520.307, "eval_steps_per_second": 4.751, "step": 5800 }, { "epoch": 2.42, "learning_rate": 1e-06, "loss": 0.2365, "step": 5820 }, { "epoch": 2.42, "eval_accuracy": 0.919375, "eval_loss": 0.23560389876365662, "eval_runtime": 16.216, "eval_samples_per_second": 1480.022, "eval_steps_per_second": 4.625, "step": 5820 }, { "epoch": 2.43, "learning_rate": 1e-06, "loss": 0.2025, "step": 5840 }, { "epoch": 2.43, "eval_accuracy": 0.9199583333333333, "eval_loss": 0.2355504035949707, "eval_runtime": 15.4916, "eval_samples_per_second": 1549.222, "eval_steps_per_second": 4.841, "step": 5840 }, { "epoch": 2.44, "learning_rate": 1e-06, "loss": 0.2115, "step": 5860 }, { "epoch": 2.44, "eval_accuracy": 0.9187083333333333, "eval_loss": 0.23711217939853668, "eval_runtime": 15.6181, "eval_samples_per_second": 1536.68, "eval_steps_per_second": 4.802, "step": 5860 }, { "epoch": 2.45, "learning_rate": 1e-06, "loss": 0.191, "step": 5880 }, { "epoch": 2.45, "eval_accuracy": 0.9195, "eval_loss": 0.2373773753643036, "eval_runtime": 15.8036, "eval_samples_per_second": 1518.638, "eval_steps_per_second": 4.746, "step": 5880 }, { "epoch": 2.46, "learning_rate": 1e-06, "loss": 0.2228, "step": 5900 }, { "epoch": 2.46, "eval_accuracy": 0.9200416666666666, "eval_loss": 0.23531056940555573, "eval_runtime": 15.9661, "eval_samples_per_second": 1503.188, "eval_steps_per_second": 4.697, "step": 5900 }, { "epoch": 2.47, "learning_rate": 1e-06, "loss": 0.2151, "step": 5920 }, { "epoch": 2.47, "eval_accuracy": 0.919375, "eval_loss": 0.23577865958213806, "eval_runtime": 15.3031, "eval_samples_per_second": 1568.307, "eval_steps_per_second": 4.901, "step": 5920 }, { "epoch": 2.48, "learning_rate": 1e-06, "loss": 0.2351, "step": 5940 }, { "epoch": 2.48, "eval_accuracy": 0.9205, "eval_loss": 0.23470845818519592, "eval_runtime": 15.6649, "eval_samples_per_second": 1532.088, "eval_steps_per_second": 4.788, "step": 5940 }, { "epoch": 2.48, "learning_rate": 1e-06, "loss": 0.2535, "step": 5960 }, { "epoch": 2.48, "eval_accuracy": 0.9204166666666667, "eval_loss": 0.23469573259353638, "eval_runtime": 15.8743, "eval_samples_per_second": 1511.875, "eval_steps_per_second": 4.725, "step": 5960 }, { "epoch": 2.49, "learning_rate": 1e-06, "loss": 0.2646, "step": 5980 }, { "epoch": 2.49, "eval_accuracy": 0.9199166666666667, "eval_loss": 0.23572835326194763, "eval_runtime": 15.9499, "eval_samples_per_second": 1504.715, "eval_steps_per_second": 4.702, "step": 5980 }, { "epoch": 2.5, "learning_rate": 1e-06, "loss": 0.2495, "step": 6000 }, { "epoch": 2.5, "eval_accuracy": 0.9185, "eval_loss": 0.237389475107193, "eval_runtime": 15.8761, "eval_samples_per_second": 1511.71, "eval_steps_per_second": 4.724, "step": 6000 }, { "epoch": 2.51, "learning_rate": 1e-06, "loss": 0.2383, "step": 6020 }, { "epoch": 2.51, "eval_accuracy": 0.91875, "eval_loss": 0.23722399771213531, "eval_runtime": 16.1535, "eval_samples_per_second": 1485.747, "eval_steps_per_second": 4.643, "step": 6020 }, { "epoch": 2.52, "learning_rate": 1e-06, "loss": 0.2103, "step": 6040 }, { "epoch": 2.52, "eval_accuracy": 0.9197916666666667, "eval_loss": 0.23570208251476288, "eval_runtime": 15.6602, "eval_samples_per_second": 1532.549, "eval_steps_per_second": 4.789, "step": 6040 }, { "epoch": 2.52, "learning_rate": 1e-06, "loss": 0.2667, "step": 6060 }, { "epoch": 2.52, "eval_accuracy": 0.9205, "eval_loss": 0.2345227301120758, "eval_runtime": 15.9993, "eval_samples_per_second": 1500.065, "eval_steps_per_second": 4.688, "step": 6060 }, { "epoch": 2.53, "learning_rate": 1e-06, "loss": 0.2229, "step": 6080 }, { "epoch": 2.53, "eval_accuracy": 0.9203333333333333, "eval_loss": 0.23675419390201569, "eval_runtime": 16.0513, "eval_samples_per_second": 1495.205, "eval_steps_per_second": 4.673, "step": 6080 }, { "epoch": 2.54, "learning_rate": 1e-06, "loss": 0.2794, "step": 6100 }, { "epoch": 2.54, "eval_accuracy": 0.9181666666666667, "eval_loss": 0.2398298680782318, "eval_runtime": 15.8057, "eval_samples_per_second": 1518.443, "eval_steps_per_second": 4.745, "step": 6100 }, { "epoch": 2.55, "learning_rate": 1e-06, "loss": 0.2191, "step": 6120 }, { "epoch": 2.55, "eval_accuracy": 0.919125, "eval_loss": 0.23680013418197632, "eval_runtime": 16.0686, "eval_samples_per_second": 1493.6, "eval_steps_per_second": 4.668, "step": 6120 }, { "epoch": 2.56, "learning_rate": 1e-06, "loss": 0.263, "step": 6140 }, { "epoch": 2.56, "eval_accuracy": 0.9210833333333334, "eval_loss": 0.23318885266780853, "eval_runtime": 15.8077, "eval_samples_per_second": 1518.25, "eval_steps_per_second": 4.745, "step": 6140 }, { "epoch": 2.57, "learning_rate": 1e-06, "loss": 0.2008, "step": 6160 }, { "epoch": 2.57, "eval_accuracy": 0.9214166666666667, "eval_loss": 0.2328587919473648, "eval_runtime": 15.7426, "eval_samples_per_second": 1524.526, "eval_steps_per_second": 4.764, "step": 6160 }, { "epoch": 2.58, "learning_rate": 1e-06, "loss": 0.2226, "step": 6180 }, { "epoch": 2.58, "eval_accuracy": 0.9210416666666666, "eval_loss": 0.23366689682006836, "eval_runtime": 15.6914, "eval_samples_per_second": 1529.497, "eval_steps_per_second": 4.78, "step": 6180 }, { "epoch": 2.58, "learning_rate": 1e-06, "loss": 0.2261, "step": 6200 }, { "epoch": 2.58, "eval_accuracy": 0.9207083333333334, "eval_loss": 0.23435989022254944, "eval_runtime": 15.7413, "eval_samples_per_second": 1524.655, "eval_steps_per_second": 4.765, "step": 6200 }, { "epoch": 2.59, "learning_rate": 1e-06, "loss": 0.2481, "step": 6220 }, { "epoch": 2.59, "eval_accuracy": 0.9221666666666667, "eval_loss": 0.23215261101722717, "eval_runtime": 15.7855, "eval_samples_per_second": 1520.387, "eval_steps_per_second": 4.751, "step": 6220 }, { "epoch": 2.6, "learning_rate": 1e-06, "loss": 0.2235, "step": 6240 }, { "epoch": 2.6, "eval_accuracy": 0.9214583333333334, "eval_loss": 0.23404672741889954, "eval_runtime": 16.0667, "eval_samples_per_second": 1493.777, "eval_steps_per_second": 4.668, "step": 6240 }, { "epoch": 2.61, "learning_rate": 1e-06, "loss": 0.2821, "step": 6260 }, { "epoch": 2.61, "eval_accuracy": 0.9206666666666666, "eval_loss": 0.2340671271085739, "eval_runtime": 16.2416, "eval_samples_per_second": 1477.689, "eval_steps_per_second": 4.618, "step": 6260 }, { "epoch": 2.62, "learning_rate": 1e-06, "loss": 0.1988, "step": 6280 }, { "epoch": 2.62, "eval_accuracy": 0.9215, "eval_loss": 0.2360154539346695, "eval_runtime": 15.5902, "eval_samples_per_second": 1539.425, "eval_steps_per_second": 4.811, "step": 6280 }, { "epoch": 2.62, "learning_rate": 1e-06, "loss": 0.2407, "step": 6300 }, { "epoch": 2.62, "eval_accuracy": 0.9207083333333334, "eval_loss": 0.23405057191848755, "eval_runtime": 16.1083, "eval_samples_per_second": 1489.915, "eval_steps_per_second": 4.656, "step": 6300 }, { "epoch": 2.63, "learning_rate": 1e-06, "loss": 0.2138, "step": 6320 }, { "epoch": 2.63, "eval_accuracy": 0.9222916666666666, "eval_loss": 0.23155492544174194, "eval_runtime": 15.9219, "eval_samples_per_second": 1507.362, "eval_steps_per_second": 4.711, "step": 6320 }, { "epoch": 2.64, "learning_rate": 1e-06, "loss": 0.2171, "step": 6340 }, { "epoch": 2.64, "eval_accuracy": 0.9215833333333333, "eval_loss": 0.23096613585948944, "eval_runtime": 15.2563, "eval_samples_per_second": 1573.117, "eval_steps_per_second": 4.916, "step": 6340 }, { "epoch": 2.65, "learning_rate": 1e-06, "loss": 0.2302, "step": 6360 }, { "epoch": 2.65, "eval_accuracy": 0.9212083333333333, "eval_loss": 0.23060353100299835, "eval_runtime": 16.0532, "eval_samples_per_second": 1495.028, "eval_steps_per_second": 4.672, "step": 6360 }, { "epoch": 2.66, "learning_rate": 1e-06, "loss": 0.2394, "step": 6380 }, { "epoch": 2.66, "eval_accuracy": 0.9209166666666667, "eval_loss": 0.23088908195495605, "eval_runtime": 15.9256, "eval_samples_per_second": 1507.003, "eval_steps_per_second": 4.709, "step": 6380 }, { "epoch": 2.67, "learning_rate": 1e-06, "loss": 0.2321, "step": 6400 }, { "epoch": 2.67, "eval_accuracy": 0.9219166666666667, "eval_loss": 0.22929558157920837, "eval_runtime": 15.7096, "eval_samples_per_second": 1527.727, "eval_steps_per_second": 4.774, "step": 6400 }, { "epoch": 2.67, "learning_rate": 1e-06, "loss": 0.2011, "step": 6420 }, { "epoch": 2.67, "eval_accuracy": 0.92225, "eval_loss": 0.2290237993001938, "eval_runtime": 15.5552, "eval_samples_per_second": 1542.895, "eval_steps_per_second": 4.822, "step": 6420 }, { "epoch": 2.68, "learning_rate": 1e-06, "loss": 0.2235, "step": 6440 }, { "epoch": 2.68, "eval_accuracy": 0.9219583333333333, "eval_loss": 0.23050343990325928, "eval_runtime": 16.0981, "eval_samples_per_second": 1490.855, "eval_steps_per_second": 4.659, "step": 6440 }, { "epoch": 2.69, "learning_rate": 1e-06, "loss": 0.2638, "step": 6460 }, { "epoch": 2.69, "eval_accuracy": 0.9218333333333333, "eval_loss": 0.23081812262535095, "eval_runtime": 16.0097, "eval_samples_per_second": 1499.096, "eval_steps_per_second": 4.685, "step": 6460 }, { "epoch": 2.7, "learning_rate": 1e-06, "loss": 0.2767, "step": 6480 }, { "epoch": 2.7, "eval_accuracy": 0.92225, "eval_loss": 0.23322133719921112, "eval_runtime": 16.0242, "eval_samples_per_second": 1497.736, "eval_steps_per_second": 4.68, "step": 6480 }, { "epoch": 2.71, "learning_rate": 1e-06, "loss": 0.2332, "step": 6500 }, { "epoch": 2.71, "eval_accuracy": 0.9226666666666666, "eval_loss": 0.22899757325649261, "eval_runtime": 16.6235, "eval_samples_per_second": 1443.741, "eval_steps_per_second": 4.512, "step": 6500 }, { "epoch": 2.72, "learning_rate": 1e-06, "loss": 0.2104, "step": 6520 }, { "epoch": 2.72, "eval_accuracy": 0.9220833333333334, "eval_loss": 0.22903680801391602, "eval_runtime": 15.8598, "eval_samples_per_second": 1513.257, "eval_steps_per_second": 4.729, "step": 6520 }, { "epoch": 2.73, "learning_rate": 1e-06, "loss": 0.2148, "step": 6540 }, { "epoch": 2.73, "eval_accuracy": 0.9219166666666667, "eval_loss": 0.2291133552789688, "eval_runtime": 16.0042, "eval_samples_per_second": 1499.604, "eval_steps_per_second": 4.686, "step": 6540 }, { "epoch": 2.73, "learning_rate": 1e-06, "loss": 0.2708, "step": 6560 }, { "epoch": 2.73, "eval_accuracy": 0.922, "eval_loss": 0.22891123592853546, "eval_runtime": 15.8338, "eval_samples_per_second": 1515.741, "eval_steps_per_second": 4.737, "step": 6560 }, { "epoch": 2.74, "learning_rate": 1e-06, "loss": 0.2155, "step": 6580 }, { "epoch": 2.74, "eval_accuracy": 0.9213333333333333, "eval_loss": 0.2305193543434143, "eval_runtime": 15.7359, "eval_samples_per_second": 1525.179, "eval_steps_per_second": 4.766, "step": 6580 }, { "epoch": 2.75, "learning_rate": 1e-06, "loss": 0.2149, "step": 6600 }, { "epoch": 2.75, "eval_accuracy": 0.921375, "eval_loss": 0.22942766547203064, "eval_runtime": 15.983, "eval_samples_per_second": 1501.595, "eval_steps_per_second": 4.692, "step": 6600 }, { "epoch": 2.76, "learning_rate": 1e-06, "loss": 0.2396, "step": 6620 }, { "epoch": 2.76, "eval_accuracy": 0.9215, "eval_loss": 0.23061256110668182, "eval_runtime": 15.913, "eval_samples_per_second": 1508.203, "eval_steps_per_second": 4.713, "step": 6620 }, { "epoch": 2.77, "learning_rate": 1e-06, "loss": 0.2572, "step": 6640 }, { "epoch": 2.77, "eval_accuracy": 0.9215, "eval_loss": 0.23231017589569092, "eval_runtime": 15.4396, "eval_samples_per_second": 1554.44, "eval_steps_per_second": 4.858, "step": 6640 }, { "epoch": 2.77, "learning_rate": 1e-06, "loss": 0.2536, "step": 6660 }, { "epoch": 2.77, "eval_accuracy": 0.9224166666666667, "eval_loss": 0.2280372679233551, "eval_runtime": 15.4345, "eval_samples_per_second": 1554.958, "eval_steps_per_second": 4.859, "step": 6660 }, { "epoch": 2.78, "learning_rate": 1e-06, "loss": 0.1892, "step": 6680 }, { "epoch": 2.78, "eval_accuracy": 0.9220416666666666, "eval_loss": 0.22818611562252045, "eval_runtime": 16.0452, "eval_samples_per_second": 1495.777, "eval_steps_per_second": 4.674, "step": 6680 }, { "epoch": 2.79, "learning_rate": 1e-06, "loss": 0.2689, "step": 6700 }, { "epoch": 2.79, "eval_accuracy": 0.92125, "eval_loss": 0.229068323969841, "eval_runtime": 15.6442, "eval_samples_per_second": 1534.111, "eval_steps_per_second": 4.794, "step": 6700 }, { "epoch": 2.8, "learning_rate": 1e-06, "loss": 0.3018, "step": 6720 }, { "epoch": 2.8, "eval_accuracy": 0.9225, "eval_loss": 0.22712692618370056, "eval_runtime": 15.8562, "eval_samples_per_second": 1513.605, "eval_steps_per_second": 4.73, "step": 6720 }, { "epoch": 2.81, "learning_rate": 1e-06, "loss": 0.1971, "step": 6740 }, { "epoch": 2.81, "eval_accuracy": 0.9229583333333333, "eval_loss": 0.22696258127689362, "eval_runtime": 15.7829, "eval_samples_per_second": 1520.631, "eval_steps_per_second": 4.752, "step": 6740 }, { "epoch": 2.82, "learning_rate": 1e-06, "loss": 0.2276, "step": 6760 }, { "epoch": 2.82, "eval_accuracy": 0.923375, "eval_loss": 0.22678209841251373, "eval_runtime": 15.81, "eval_samples_per_second": 1518.027, "eval_steps_per_second": 4.744, "step": 6760 }, { "epoch": 2.83, "learning_rate": 1e-06, "loss": 0.2141, "step": 6780 }, { "epoch": 2.83, "eval_accuracy": 0.923, "eval_loss": 0.2268705815076828, "eval_runtime": 16.2609, "eval_samples_per_second": 1475.93, "eval_steps_per_second": 4.612, "step": 6780 }, { "epoch": 2.83, "learning_rate": 1e-06, "loss": 0.2376, "step": 6800 }, { "epoch": 2.83, "eval_accuracy": 0.9234583333333334, "eval_loss": 0.22707809507846832, "eval_runtime": 15.6779, "eval_samples_per_second": 1530.813, "eval_steps_per_second": 4.784, "step": 6800 }, { "epoch": 2.84, "learning_rate": 1e-06, "loss": 0.2237, "step": 6820 }, { "epoch": 2.84, "eval_accuracy": 0.9207916666666667, "eval_loss": 0.231236070394516, "eval_runtime": 16.1718, "eval_samples_per_second": 1484.064, "eval_steps_per_second": 4.638, "step": 6820 }, { "epoch": 2.85, "learning_rate": 1e-06, "loss": 0.2114, "step": 6840 }, { "epoch": 2.85, "eval_accuracy": 0.9222083333333333, "eval_loss": 0.2280959039926529, "eval_runtime": 15.646, "eval_samples_per_second": 1533.937, "eval_steps_per_second": 4.794, "step": 6840 }, { "epoch": 2.86, "learning_rate": 1e-06, "loss": 0.2037, "step": 6860 }, { "epoch": 2.86, "eval_accuracy": 0.9230833333333334, "eval_loss": 0.22740183770656586, "eval_runtime": 16.2073, "eval_samples_per_second": 1480.812, "eval_steps_per_second": 4.628, "step": 6860 }, { "epoch": 2.87, "learning_rate": 1e-06, "loss": 0.2412, "step": 6880 }, { "epoch": 2.87, "eval_accuracy": 0.9233333333333333, "eval_loss": 0.22586235404014587, "eval_runtime": 15.8595, "eval_samples_per_second": 1513.292, "eval_steps_per_second": 4.729, "step": 6880 }, { "epoch": 2.88, "learning_rate": 1e-06, "loss": 0.2105, "step": 6900 }, { "epoch": 2.88, "eval_accuracy": 0.9239583333333333, "eval_loss": 0.22604452073574066, "eval_runtime": 15.5265, "eval_samples_per_second": 1545.746, "eval_steps_per_second": 4.83, "step": 6900 }, { "epoch": 2.88, "learning_rate": 1e-06, "loss": 0.2209, "step": 6920 }, { "epoch": 2.88, "eval_accuracy": 0.922625, "eval_loss": 0.22909581661224365, "eval_runtime": 16.0771, "eval_samples_per_second": 1492.802, "eval_steps_per_second": 4.665, "step": 6920 }, { "epoch": 2.89, "learning_rate": 1e-06, "loss": 0.2323, "step": 6940 }, { "epoch": 2.89, "eval_accuracy": 0.9240833333333334, "eval_loss": 0.2258378565311432, "eval_runtime": 15.5648, "eval_samples_per_second": 1541.936, "eval_steps_per_second": 4.819, "step": 6940 }, { "epoch": 2.9, "learning_rate": 1e-06, "loss": 0.2416, "step": 6960 }, { "epoch": 2.9, "eval_accuracy": 0.9242916666666666, "eval_loss": 0.22519205510616302, "eval_runtime": 16.1579, "eval_samples_per_second": 1485.345, "eval_steps_per_second": 4.642, "step": 6960 }, { "epoch": 2.91, "learning_rate": 1e-06, "loss": 0.2369, "step": 6980 }, { "epoch": 2.91, "eval_accuracy": 0.9219166666666667, "eval_loss": 0.22782516479492188, "eval_runtime": 15.7414, "eval_samples_per_second": 1524.641, "eval_steps_per_second": 4.765, "step": 6980 }, { "epoch": 2.92, "learning_rate": 1e-06, "loss": 0.2218, "step": 7000 }, { "epoch": 2.92, "eval_accuracy": 0.91975, "eval_loss": 0.23249071836471558, "eval_runtime": 16.1495, "eval_samples_per_second": 1486.113, "eval_steps_per_second": 4.644, "step": 7000 }, { "epoch": 2.92, "learning_rate": 1e-06, "loss": 0.2479, "step": 7020 }, { "epoch": 2.92, "eval_accuracy": 0.922375, "eval_loss": 0.2269277721643448, "eval_runtime": 15.707, "eval_samples_per_second": 1527.979, "eval_steps_per_second": 4.775, "step": 7020 }, { "epoch": 2.93, "learning_rate": 1e-06, "loss": 0.2174, "step": 7040 }, { "epoch": 2.93, "eval_accuracy": 0.9234583333333334, "eval_loss": 0.22529129683971405, "eval_runtime": 15.7426, "eval_samples_per_second": 1524.524, "eval_steps_per_second": 4.764, "step": 7040 }, { "epoch": 2.94, "learning_rate": 1e-06, "loss": 0.2243, "step": 7060 }, { "epoch": 2.94, "eval_accuracy": 0.9230416666666666, "eval_loss": 0.22688570618629456, "eval_runtime": 16.3366, "eval_samples_per_second": 1469.094, "eval_steps_per_second": 4.591, "step": 7060 }, { "epoch": 2.95, "learning_rate": 1e-06, "loss": 0.2822, "step": 7080 }, { "epoch": 2.95, "eval_accuracy": 0.9227916666666667, "eval_loss": 0.23044590651988983, "eval_runtime": 15.8096, "eval_samples_per_second": 1518.063, "eval_steps_per_second": 4.744, "step": 7080 }, { "epoch": 2.96, "learning_rate": 1e-06, "loss": 0.2161, "step": 7100 }, { "epoch": 2.96, "eval_accuracy": 0.9220833333333334, "eval_loss": 0.22724518179893494, "eval_runtime": 16.2528, "eval_samples_per_second": 1476.67, "eval_steps_per_second": 4.615, "step": 7100 }, { "epoch": 2.97, "learning_rate": 1e-06, "loss": 0.238, "step": 7120 }, { "epoch": 2.97, "eval_accuracy": 0.9245, "eval_loss": 0.22448720037937164, "eval_runtime": 15.5514, "eval_samples_per_second": 1543.268, "eval_steps_per_second": 4.823, "step": 7120 }, { "epoch": 2.98, "learning_rate": 1e-06, "loss": 0.238, "step": 7140 }, { "epoch": 2.98, "eval_accuracy": 0.924125, "eval_loss": 0.22591613233089447, "eval_runtime": 15.8997, "eval_samples_per_second": 1509.461, "eval_steps_per_second": 4.717, "step": 7140 }, { "epoch": 2.98, "learning_rate": 1e-06, "loss": 0.1969, "step": 7160 }, { "epoch": 2.98, "eval_accuracy": 0.924125, "eval_loss": 0.22452926635742188, "eval_runtime": 15.5234, "eval_samples_per_second": 1546.054, "eval_steps_per_second": 4.831, "step": 7160 }, { "epoch": 2.99, "learning_rate": 1e-06, "loss": 0.1897, "step": 7180 }, { "epoch": 2.99, "eval_accuracy": 0.923625, "eval_loss": 0.22510305047035217, "eval_runtime": 15.9816, "eval_samples_per_second": 1501.726, "eval_steps_per_second": 4.693, "step": 7180 }, { "epoch": 3.0, "learning_rate": 1e-06, "loss": 0.2168, "step": 7200 }, { "epoch": 3.0, "eval_accuracy": 0.9232916666666666, "eval_loss": 0.22502471506595612, "eval_runtime": 15.6986, "eval_samples_per_second": 1528.802, "eval_steps_per_second": 4.778, "step": 7200 }, { "epoch": 3.01, "learning_rate": 1e-06, "loss": 0.251, "step": 7220 }, { "epoch": 3.01, "eval_accuracy": 0.9230833333333334, "eval_loss": 0.22653472423553467, "eval_runtime": 16.1799, "eval_samples_per_second": 1483.32, "eval_steps_per_second": 4.635, "step": 7220 }, { "epoch": 3.02, "learning_rate": 1e-06, "loss": 0.2315, "step": 7240 }, { "epoch": 3.02, "eval_accuracy": 0.923125, "eval_loss": 0.2254115790128708, "eval_runtime": 15.9152, "eval_samples_per_second": 1507.997, "eval_steps_per_second": 4.712, "step": 7240 }, { "epoch": 3.02, "learning_rate": 1e-06, "loss": 0.2055, "step": 7260 }, { "epoch": 3.02, "eval_accuracy": 0.9245416666666667, "eval_loss": 0.22353042662143707, "eval_runtime": 15.5398, "eval_samples_per_second": 1544.419, "eval_steps_per_second": 4.826, "step": 7260 }, { "epoch": 3.03, "learning_rate": 1e-06, "loss": 0.2277, "step": 7280 }, { "epoch": 3.03, "eval_accuracy": 0.9244583333333334, "eval_loss": 0.2269069403409958, "eval_runtime": 16.1706, "eval_samples_per_second": 1484.176, "eval_steps_per_second": 4.638, "step": 7280 }, { "epoch": 3.04, "learning_rate": 1e-06, "loss": 0.2379, "step": 7300 }, { "epoch": 3.04, "eval_accuracy": 0.9248333333333333, "eval_loss": 0.22591404616832733, "eval_runtime": 16.0606, "eval_samples_per_second": 1494.339, "eval_steps_per_second": 4.67, "step": 7300 }, { "epoch": 3.05, "learning_rate": 1e-06, "loss": 0.1889, "step": 7320 }, { "epoch": 3.05, "eval_accuracy": 0.9246666666666666, "eval_loss": 0.22398823499679565, "eval_runtime": 15.816, "eval_samples_per_second": 1517.45, "eval_steps_per_second": 4.742, "step": 7320 }, { "epoch": 3.06, "learning_rate": 1e-06, "loss": 0.2386, "step": 7340 }, { "epoch": 3.06, "eval_accuracy": 0.9240416666666667, "eval_loss": 0.22369486093521118, "eval_runtime": 16.3833, "eval_samples_per_second": 1464.908, "eval_steps_per_second": 4.578, "step": 7340 }, { "epoch": 3.07, "learning_rate": 1e-06, "loss": 0.2135, "step": 7360 }, { "epoch": 3.07, "eval_accuracy": 0.9237083333333334, "eval_loss": 0.22461163997650146, "eval_runtime": 15.8713, "eval_samples_per_second": 1512.165, "eval_steps_per_second": 4.726, "step": 7360 }, { "epoch": 3.08, "learning_rate": 1e-06, "loss": 0.2411, "step": 7380 }, { "epoch": 3.08, "eval_accuracy": 0.923875, "eval_loss": 0.2259892225265503, "eval_runtime": 16.3476, "eval_samples_per_second": 1468.103, "eval_steps_per_second": 4.588, "step": 7380 }, { "epoch": 3.08, "learning_rate": 1e-06, "loss": 0.2206, "step": 7400 }, { "epoch": 3.08, "eval_accuracy": 0.9233333333333333, "eval_loss": 0.22540873289108276, "eval_runtime": 15.7971, "eval_samples_per_second": 1519.265, "eval_steps_per_second": 4.748, "step": 7400 }, { "epoch": 3.09, "learning_rate": 1e-06, "loss": 0.221, "step": 7420 }, { "epoch": 3.09, "eval_accuracy": 0.9237083333333334, "eval_loss": 0.2240631878376007, "eval_runtime": 15.554, "eval_samples_per_second": 1543.013, "eval_steps_per_second": 4.822, "step": 7420 }, { "epoch": 3.1, "learning_rate": 1e-06, "loss": 0.2189, "step": 7440 }, { "epoch": 3.1, "eval_accuracy": 0.9245, "eval_loss": 0.2240666151046753, "eval_runtime": 15.7074, "eval_samples_per_second": 1527.938, "eval_steps_per_second": 4.775, "step": 7440 }, { "epoch": 3.11, "learning_rate": 1e-06, "loss": 0.2222, "step": 7460 }, { "epoch": 3.11, "eval_accuracy": 0.9242916666666666, "eval_loss": 0.22323697805404663, "eval_runtime": 15.5541, "eval_samples_per_second": 1543.003, "eval_steps_per_second": 4.822, "step": 7460 }, { "epoch": 3.12, "learning_rate": 1e-06, "loss": 0.2227, "step": 7480 }, { "epoch": 3.12, "eval_accuracy": 0.92575, "eval_loss": 0.221858412027359, "eval_runtime": 16.1116, "eval_samples_per_second": 1489.607, "eval_steps_per_second": 4.655, "step": 7480 }, { "epoch": 3.12, "learning_rate": 1e-06, "loss": 0.2375, "step": 7500 }, { "epoch": 3.12, "eval_accuracy": 0.9249166666666667, "eval_loss": 0.2222757488489151, "eval_runtime": 15.423, "eval_samples_per_second": 1556.117, "eval_steps_per_second": 4.863, "step": 7500 }, { "epoch": 3.13, "learning_rate": 1e-06, "loss": 0.2124, "step": 7520 }, { "epoch": 3.13, "eval_accuracy": 0.9242083333333333, "eval_loss": 0.2229667603969574, "eval_runtime": 16.1122, "eval_samples_per_second": 1489.556, "eval_steps_per_second": 4.655, "step": 7520 }, { "epoch": 3.14, "learning_rate": 1e-06, "loss": 0.2383, "step": 7540 }, { "epoch": 3.14, "eval_accuracy": 0.9243333333333333, "eval_loss": 0.2248881459236145, "eval_runtime": 15.7471, "eval_samples_per_second": 1524.086, "eval_steps_per_second": 4.763, "step": 7540 }, { "epoch": 3.15, "learning_rate": 1e-06, "loss": 0.2735, "step": 7560 }, { "epoch": 3.15, "eval_accuracy": 0.9245833333333333, "eval_loss": 0.22355443239212036, "eval_runtime": 16.0099, "eval_samples_per_second": 1499.071, "eval_steps_per_second": 4.685, "step": 7560 }, { "epoch": 3.16, "learning_rate": 1e-06, "loss": 0.195, "step": 7580 }, { "epoch": 3.16, "eval_accuracy": 0.9247916666666667, "eval_loss": 0.2215178906917572, "eval_runtime": 16.1768, "eval_samples_per_second": 1483.608, "eval_steps_per_second": 4.636, "step": 7580 }, { "epoch": 3.17, "learning_rate": 1e-06, "loss": 0.208, "step": 7600 }, { "epoch": 3.17, "eval_accuracy": 0.9236666666666666, "eval_loss": 0.22357864677906036, "eval_runtime": 15.8058, "eval_samples_per_second": 1518.434, "eval_steps_per_second": 4.745, "step": 7600 }, { "epoch": 3.17, "learning_rate": 1e-06, "loss": 0.2019, "step": 7620 }, { "epoch": 3.17, "eval_accuracy": 0.9232916666666666, "eval_loss": 0.2229277789592743, "eval_runtime": 16.1663, "eval_samples_per_second": 1484.567, "eval_steps_per_second": 4.639, "step": 7620 }, { "epoch": 3.18, "learning_rate": 1e-06, "loss": 0.242, "step": 7640 }, { "epoch": 3.18, "eval_accuracy": 0.925125, "eval_loss": 0.22199295461177826, "eval_runtime": 15.6259, "eval_samples_per_second": 1535.909, "eval_steps_per_second": 4.8, "step": 7640 }, { "epoch": 3.19, "learning_rate": 1e-06, "loss": 0.2209, "step": 7660 }, { "epoch": 3.19, "eval_accuracy": 0.924375, "eval_loss": 0.22275349497795105, "eval_runtime": 15.853, "eval_samples_per_second": 1513.909, "eval_steps_per_second": 4.731, "step": 7660 }, { "epoch": 3.2, "learning_rate": 1e-06, "loss": 0.263, "step": 7680 }, { "epoch": 3.2, "eval_accuracy": 0.9236666666666666, "eval_loss": 0.22205360233783722, "eval_runtime": 15.6683, "eval_samples_per_second": 1531.757, "eval_steps_per_second": 4.787, "step": 7680 }, { "epoch": 3.21, "learning_rate": 1e-06, "loss": 0.1923, "step": 7700 }, { "epoch": 3.21, "eval_accuracy": 0.9255, "eval_loss": 0.22050580382347107, "eval_runtime": 16.26, "eval_samples_per_second": 1476.012, "eval_steps_per_second": 4.613, "step": 7700 }, { "epoch": 3.22, "learning_rate": 1e-06, "loss": 0.2203, "step": 7720 }, { "epoch": 3.22, "eval_accuracy": 0.9250833333333334, "eval_loss": 0.22197723388671875, "eval_runtime": 16.2074, "eval_samples_per_second": 1480.801, "eval_steps_per_second": 4.628, "step": 7720 }, { "epoch": 3.23, "learning_rate": 1e-06, "loss": 0.2166, "step": 7740 }, { "epoch": 3.23, "eval_accuracy": 0.9254583333333334, "eval_loss": 0.2208959013223648, "eval_runtime": 16.0355, "eval_samples_per_second": 1496.675, "eval_steps_per_second": 4.677, "step": 7740 }, { "epoch": 3.23, "learning_rate": 1e-06, "loss": 0.2545, "step": 7760 }, { "epoch": 3.23, "eval_accuracy": 0.9252083333333333, "eval_loss": 0.22131887078285217, "eval_runtime": 15.9945, "eval_samples_per_second": 1500.516, "eval_steps_per_second": 4.689, "step": 7760 }, { "epoch": 3.24, "learning_rate": 1e-06, "loss": 0.1604, "step": 7780 }, { "epoch": 3.24, "eval_accuracy": 0.92425, "eval_loss": 0.22284860908985138, "eval_runtime": 16.005, "eval_samples_per_second": 1499.53, "eval_steps_per_second": 4.686, "step": 7780 }, { "epoch": 3.25, "learning_rate": 1e-06, "loss": 0.2, "step": 7800 }, { "epoch": 3.25, "eval_accuracy": 0.923875, "eval_loss": 0.22088997066020966, "eval_runtime": 15.4149, "eval_samples_per_second": 1556.938, "eval_steps_per_second": 4.865, "step": 7800 }, { "epoch": 3.26, "learning_rate": 1e-06, "loss": 0.2373, "step": 7820 }, { "epoch": 3.26, "eval_accuracy": 0.9215833333333333, "eval_loss": 0.2259444147348404, "eval_runtime": 16.167, "eval_samples_per_second": 1484.503, "eval_steps_per_second": 4.639, "step": 7820 }, { "epoch": 3.27, "learning_rate": 1e-06, "loss": 0.217, "step": 7840 }, { "epoch": 3.27, "eval_accuracy": 0.9252083333333333, "eval_loss": 0.22143015265464783, "eval_runtime": 15.727, "eval_samples_per_second": 1526.036, "eval_steps_per_second": 4.769, "step": 7840 }, { "epoch": 3.27, "learning_rate": 1e-06, "loss": 0.2172, "step": 7860 }, { "epoch": 3.27, "eval_accuracy": 0.9234583333333334, "eval_loss": 0.22263799607753754, "eval_runtime": 16.2672, "eval_samples_per_second": 1475.362, "eval_steps_per_second": 4.611, "step": 7860 }, { "epoch": 3.28, "learning_rate": 1e-06, "loss": 0.1959, "step": 7880 }, { "epoch": 3.28, "eval_accuracy": 0.9235833333333333, "eval_loss": 0.22203776240348816, "eval_runtime": 15.4597, "eval_samples_per_second": 1552.421, "eval_steps_per_second": 4.851, "step": 7880 }, { "epoch": 3.29, "learning_rate": 1e-06, "loss": 0.1781, "step": 7900 }, { "epoch": 3.29, "eval_accuracy": 0.9245, "eval_loss": 0.22064486145973206, "eval_runtime": 15.8085, "eval_samples_per_second": 1518.167, "eval_steps_per_second": 4.744, "step": 7900 }, { "epoch": 3.3, "learning_rate": 1e-06, "loss": 0.2024, "step": 7920 }, { "epoch": 3.3, "eval_accuracy": 0.9228333333333333, "eval_loss": 0.22356781363487244, "eval_runtime": 16.1166, "eval_samples_per_second": 1489.15, "eval_steps_per_second": 4.654, "step": 7920 }, { "epoch": 3.31, "learning_rate": 1e-06, "loss": 0.196, "step": 7940 }, { "epoch": 3.31, "eval_accuracy": 0.923375, "eval_loss": 0.22183743119239807, "eval_runtime": 15.9128, "eval_samples_per_second": 1508.22, "eval_steps_per_second": 4.713, "step": 7940 }, { "epoch": 3.32, "learning_rate": 1e-06, "loss": 0.2462, "step": 7960 }, { "epoch": 3.32, "eval_accuracy": 0.925875, "eval_loss": 0.21925699710845947, "eval_runtime": 15.9525, "eval_samples_per_second": 1504.469, "eval_steps_per_second": 4.701, "step": 7960 }, { "epoch": 3.33, "learning_rate": 1e-06, "loss": 0.205, "step": 7980 }, { "epoch": 3.33, "eval_accuracy": 0.9255833333333333, "eval_loss": 0.21881194412708282, "eval_runtime": 15.9116, "eval_samples_per_second": 1508.338, "eval_steps_per_second": 4.714, "step": 7980 }, { "epoch": 3.33, "learning_rate": 1e-06, "loss": 0.1793, "step": 8000 }, { "epoch": 3.33, "eval_accuracy": 0.9254583333333334, "eval_loss": 0.21929273009300232, "eval_runtime": 16.6645, "eval_samples_per_second": 1440.183, "eval_steps_per_second": 4.501, "step": 8000 }, { "epoch": 3.34, "learning_rate": 1e-06, "loss": 0.2551, "step": 8020 }, { "epoch": 3.34, "eval_accuracy": 0.9253333333333333, "eval_loss": 0.21935345232486725, "eval_runtime": 15.8842, "eval_samples_per_second": 1510.936, "eval_steps_per_second": 4.722, "step": 8020 }, { "epoch": 3.35, "learning_rate": 1e-06, "loss": 0.2471, "step": 8040 }, { "epoch": 3.35, "eval_accuracy": 0.9250416666666667, "eval_loss": 0.220913827419281, "eval_runtime": 15.7346, "eval_samples_per_second": 1525.304, "eval_steps_per_second": 4.767, "step": 8040 }, { "epoch": 3.36, "learning_rate": 1e-06, "loss": 0.1765, "step": 8060 }, { "epoch": 3.36, "eval_accuracy": 0.9254166666666667, "eval_loss": 0.22260655462741852, "eval_runtime": 15.7321, "eval_samples_per_second": 1525.546, "eval_steps_per_second": 4.767, "step": 8060 }, { "epoch": 3.37, "learning_rate": 1e-06, "loss": 0.161, "step": 8080 }, { "epoch": 3.37, "eval_accuracy": 0.9257083333333334, "eval_loss": 0.2232980579137802, "eval_runtime": 15.5673, "eval_samples_per_second": 1541.698, "eval_steps_per_second": 4.818, "step": 8080 }, { "epoch": 3.38, "learning_rate": 1e-06, "loss": 0.2243, "step": 8100 }, { "epoch": 3.38, "eval_accuracy": 0.9247083333333334, "eval_loss": 0.22126658260822296, "eval_runtime": 16.3031, "eval_samples_per_second": 1472.111, "eval_steps_per_second": 4.6, "step": 8100 }, { "epoch": 3.38, "learning_rate": 1e-06, "loss": 0.2044, "step": 8120 }, { "epoch": 3.38, "eval_accuracy": 0.92525, "eval_loss": 0.21990346908569336, "eval_runtime": 15.9234, "eval_samples_per_second": 1507.215, "eval_steps_per_second": 4.71, "step": 8120 }, { "epoch": 3.39, "learning_rate": 1e-06, "loss": 0.2213, "step": 8140 }, { "epoch": 3.39, "eval_accuracy": 0.9261666666666667, "eval_loss": 0.21853962540626526, "eval_runtime": 15.7101, "eval_samples_per_second": 1527.683, "eval_steps_per_second": 4.774, "step": 8140 }, { "epoch": 3.4, "learning_rate": 1e-06, "loss": 0.23, "step": 8160 }, { "epoch": 3.4, "eval_accuracy": 0.9267916666666667, "eval_loss": 0.21748250722885132, "eval_runtime": 15.4099, "eval_samples_per_second": 1557.437, "eval_steps_per_second": 4.867, "step": 8160 }, { "epoch": 3.41, "learning_rate": 1e-06, "loss": 0.2377, "step": 8180 }, { "epoch": 3.41, "eval_accuracy": 0.9244166666666667, "eval_loss": 0.21937525272369385, "eval_runtime": 15.8123, "eval_samples_per_second": 1517.802, "eval_steps_per_second": 4.743, "step": 8180 }, { "epoch": 3.42, "learning_rate": 1e-06, "loss": 0.2349, "step": 8200 }, { "epoch": 3.42, "eval_accuracy": 0.9235833333333333, "eval_loss": 0.21988487243652344, "eval_runtime": 15.604, "eval_samples_per_second": 1538.067, "eval_steps_per_second": 4.806, "step": 8200 }, { "epoch": 3.42, "learning_rate": 1e-06, "loss": 0.2217, "step": 8220 }, { "epoch": 3.42, "eval_accuracy": 0.9260416666666667, "eval_loss": 0.21746017038822174, "eval_runtime": 15.8834, "eval_samples_per_second": 1511.008, "eval_steps_per_second": 4.722, "step": 8220 }, { "epoch": 3.43, "learning_rate": 1e-06, "loss": 0.2005, "step": 8240 }, { "epoch": 3.43, "eval_accuracy": 0.9264166666666667, "eval_loss": 0.21770620346069336, "eval_runtime": 16.0248, "eval_samples_per_second": 1497.677, "eval_steps_per_second": 4.68, "step": 8240 }, { "epoch": 3.44, "learning_rate": 1e-06, "loss": 0.2193, "step": 8260 }, { "epoch": 3.44, "eval_accuracy": 0.9264583333333334, "eval_loss": 0.21887263655662537, "eval_runtime": 15.827, "eval_samples_per_second": 1516.398, "eval_steps_per_second": 4.739, "step": 8260 }, { "epoch": 3.45, "learning_rate": 1e-06, "loss": 0.1551, "step": 8280 }, { "epoch": 3.45, "eval_accuracy": 0.92625, "eval_loss": 0.22045257687568665, "eval_runtime": 15.6985, "eval_samples_per_second": 1528.808, "eval_steps_per_second": 4.778, "step": 8280 }, { "epoch": 3.46, "learning_rate": 1e-06, "loss": 0.2399, "step": 8300 }, { "epoch": 3.46, "eval_accuracy": 0.9254583333333334, "eval_loss": 0.21784846484661102, "eval_runtime": 15.9829, "eval_samples_per_second": 1501.6, "eval_steps_per_second": 4.693, "step": 8300 }, { "epoch": 3.47, "learning_rate": 1e-06, "loss": 0.2308, "step": 8320 }, { "epoch": 3.47, "eval_accuracy": 0.9246666666666666, "eval_loss": 0.21774353086948395, "eval_runtime": 15.8498, "eval_samples_per_second": 1514.211, "eval_steps_per_second": 4.732, "step": 8320 }, { "epoch": 3.48, "learning_rate": 1e-06, "loss": 0.1661, "step": 8340 }, { "epoch": 3.48, "eval_accuracy": 0.924625, "eval_loss": 0.21844609081745148, "eval_runtime": 16.0502, "eval_samples_per_second": 1495.308, "eval_steps_per_second": 4.673, "step": 8340 }, { "epoch": 3.48, "learning_rate": 1e-06, "loss": 0.2014, "step": 8360 }, { "epoch": 3.48, "eval_accuracy": 0.9242916666666666, "eval_loss": 0.21898412704467773, "eval_runtime": 15.9262, "eval_samples_per_second": 1506.951, "eval_steps_per_second": 4.709, "step": 8360 }, { "epoch": 3.49, "learning_rate": 1e-06, "loss": 0.2667, "step": 8380 }, { "epoch": 3.49, "eval_accuracy": 0.9242083333333333, "eval_loss": 0.21949926018714905, "eval_runtime": 15.9646, "eval_samples_per_second": 1503.329, "eval_steps_per_second": 4.698, "step": 8380 }, { "epoch": 3.5, "learning_rate": 1e-06, "loss": 0.205, "step": 8400 }, { "epoch": 3.5, "eval_accuracy": 0.9252916666666666, "eval_loss": 0.21843333542346954, "eval_runtime": 16.4182, "eval_samples_per_second": 1461.79, "eval_steps_per_second": 4.568, "step": 8400 }, { "epoch": 3.51, "learning_rate": 1e-06, "loss": 0.1982, "step": 8420 }, { "epoch": 3.51, "eval_accuracy": 0.9259166666666667, "eval_loss": 0.21639133989810944, "eval_runtime": 15.6132, "eval_samples_per_second": 1537.157, "eval_steps_per_second": 4.804, "step": 8420 }, { "epoch": 3.52, "learning_rate": 1e-06, "loss": 0.2511, "step": 8440 }, { "epoch": 3.52, "eval_accuracy": 0.926125, "eval_loss": 0.21578435599803925, "eval_runtime": 15.8122, "eval_samples_per_second": 1517.813, "eval_steps_per_second": 4.743, "step": 8440 }, { "epoch": 3.52, "learning_rate": 1e-06, "loss": 0.2627, "step": 8460 }, { "epoch": 3.52, "eval_accuracy": 0.9263333333333333, "eval_loss": 0.2152308076620102, "eval_runtime": 16.1172, "eval_samples_per_second": 1489.089, "eval_steps_per_second": 4.653, "step": 8460 }, { "epoch": 3.53, "learning_rate": 1e-06, "loss": 0.1905, "step": 8480 }, { "epoch": 3.53, "eval_accuracy": 0.9267083333333334, "eval_loss": 0.215366929769516, "eval_runtime": 16.1037, "eval_samples_per_second": 1490.344, "eval_steps_per_second": 4.657, "step": 8480 }, { "epoch": 3.54, "learning_rate": 1e-06, "loss": 0.2349, "step": 8500 }, { "epoch": 3.54, "eval_accuracy": 0.9254166666666667, "eval_loss": 0.216691255569458, "eval_runtime": 15.7789, "eval_samples_per_second": 1521.022, "eval_steps_per_second": 4.753, "step": 8500 }, { "epoch": 3.55, "learning_rate": 1e-06, "loss": 0.1732, "step": 8520 }, { "epoch": 3.55, "eval_accuracy": 0.9255833333333333, "eval_loss": 0.2171410173177719, "eval_runtime": 15.9111, "eval_samples_per_second": 1508.379, "eval_steps_per_second": 4.714, "step": 8520 }, { "epoch": 3.56, "learning_rate": 1e-06, "loss": 0.2152, "step": 8540 }, { "epoch": 3.56, "eval_accuracy": 0.9257916666666667, "eval_loss": 0.21639755368232727, "eval_runtime": 16.6264, "eval_samples_per_second": 1443.488, "eval_steps_per_second": 4.511, "step": 8540 }, { "epoch": 3.57, "learning_rate": 1e-06, "loss": 0.2487, "step": 8560 }, { "epoch": 3.57, "eval_accuracy": 0.92575, "eval_loss": 0.21681177616119385, "eval_runtime": 15.9061, "eval_samples_per_second": 1508.852, "eval_steps_per_second": 4.715, "step": 8560 }, { "epoch": 3.58, "learning_rate": 1e-06, "loss": 0.2411, "step": 8580 }, { "epoch": 3.58, "eval_accuracy": 0.9255833333333333, "eval_loss": 0.2175518274307251, "eval_runtime": 16.2026, "eval_samples_per_second": 1481.242, "eval_steps_per_second": 4.629, "step": 8580 }, { "epoch": 3.58, "learning_rate": 1e-06, "loss": 0.2113, "step": 8600 }, { "epoch": 3.58, "eval_accuracy": 0.9257916666666667, "eval_loss": 0.21555760502815247, "eval_runtime": 15.883, "eval_samples_per_second": 1511.052, "eval_steps_per_second": 4.722, "step": 8600 }, { "epoch": 3.59, "learning_rate": 1e-06, "loss": 0.2065, "step": 8620 }, { "epoch": 3.59, "eval_accuracy": 0.9265833333333333, "eval_loss": 0.21459566056728363, "eval_runtime": 15.8987, "eval_samples_per_second": 1509.554, "eval_steps_per_second": 4.717, "step": 8620 }, { "epoch": 3.6, "learning_rate": 1e-06, "loss": 0.2317, "step": 8640 }, { "epoch": 3.6, "eval_accuracy": 0.926, "eval_loss": 0.21562263369560242, "eval_runtime": 15.7909, "eval_samples_per_second": 1519.863, "eval_steps_per_second": 4.75, "step": 8640 }, { "epoch": 3.61, "learning_rate": 1e-06, "loss": 0.2217, "step": 8660 }, { "epoch": 3.61, "eval_accuracy": 0.9255416666666667, "eval_loss": 0.21699927747249603, "eval_runtime": 15.7983, "eval_samples_per_second": 1519.146, "eval_steps_per_second": 4.747, "step": 8660 }, { "epoch": 3.62, "learning_rate": 1e-06, "loss": 0.2296, "step": 8680 }, { "epoch": 3.62, "eval_accuracy": 0.9252916666666666, "eval_loss": 0.21904636919498444, "eval_runtime": 15.8426, "eval_samples_per_second": 1514.899, "eval_steps_per_second": 4.734, "step": 8680 }, { "epoch": 3.62, "learning_rate": 1e-06, "loss": 0.2143, "step": 8700 }, { "epoch": 3.62, "eval_accuracy": 0.9249166666666667, "eval_loss": 0.21984702348709106, "eval_runtime": 15.7532, "eval_samples_per_second": 1523.499, "eval_steps_per_second": 4.761, "step": 8700 }, { "epoch": 3.63, "learning_rate": 1e-06, "loss": 0.215, "step": 8720 }, { "epoch": 3.63, "eval_accuracy": 0.9260416666666667, "eval_loss": 0.21465341746807098, "eval_runtime": 15.8302, "eval_samples_per_second": 1516.094, "eval_steps_per_second": 4.738, "step": 8720 }, { "epoch": 3.64, "learning_rate": 1e-06, "loss": 0.2109, "step": 8740 }, { "epoch": 3.64, "eval_accuracy": 0.9271666666666667, "eval_loss": 0.21410001814365387, "eval_runtime": 15.7941, "eval_samples_per_second": 1519.554, "eval_steps_per_second": 4.749, "step": 8740 }, { "epoch": 3.65, "learning_rate": 1e-06, "loss": 0.1908, "step": 8760 }, { "epoch": 3.65, "eval_accuracy": 0.9242083333333333, "eval_loss": 0.21745717525482178, "eval_runtime": 16.1017, "eval_samples_per_second": 1490.524, "eval_steps_per_second": 4.658, "step": 8760 }, { "epoch": 3.66, "learning_rate": 1e-06, "loss": 0.1899, "step": 8780 }, { "epoch": 3.66, "eval_accuracy": 0.9242916666666666, "eval_loss": 0.21800926327705383, "eval_runtime": 15.5229, "eval_samples_per_second": 1546.1, "eval_steps_per_second": 4.832, "step": 8780 }, { "epoch": 3.67, "learning_rate": 1e-06, "loss": 0.1681, "step": 8800 }, { "epoch": 3.67, "eval_accuracy": 0.925625, "eval_loss": 0.2183951884508133, "eval_runtime": 15.9633, "eval_samples_per_second": 1503.444, "eval_steps_per_second": 4.698, "step": 8800 }, { "epoch": 3.67, "learning_rate": 1e-06, "loss": 0.2134, "step": 8820 }, { "epoch": 3.67, "eval_accuracy": 0.9258333333333333, "eval_loss": 0.21758043766021729, "eval_runtime": 15.6658, "eval_samples_per_second": 1532.001, "eval_steps_per_second": 4.788, "step": 8820 }, { "epoch": 3.68, "learning_rate": 1e-06, "loss": 0.1788, "step": 8840 }, { "epoch": 3.68, "eval_accuracy": 0.9257916666666667, "eval_loss": 0.2156967669725418, "eval_runtime": 16.1604, "eval_samples_per_second": 1485.116, "eval_steps_per_second": 4.641, "step": 8840 }, { "epoch": 3.69, "learning_rate": 1e-06, "loss": 0.1963, "step": 8860 }, { "epoch": 3.69, "eval_accuracy": 0.9265833333333333, "eval_loss": 0.21555079519748688, "eval_runtime": 15.9521, "eval_samples_per_second": 1504.506, "eval_steps_per_second": 4.702, "step": 8860 }, { "epoch": 3.7, "learning_rate": 1e-06, "loss": 0.1962, "step": 8880 }, { "epoch": 3.7, "eval_accuracy": 0.9252083333333333, "eval_loss": 0.2161911576986313, "eval_runtime": 16.0835, "eval_samples_per_second": 1492.21, "eval_steps_per_second": 4.663, "step": 8880 }, { "epoch": 3.71, "learning_rate": 1e-06, "loss": 0.1587, "step": 8900 }, { "epoch": 3.71, "eval_accuracy": 0.9257916666666667, "eval_loss": 0.21644917130470276, "eval_runtime": 16.0872, "eval_samples_per_second": 1491.867, "eval_steps_per_second": 4.662, "step": 8900 }, { "epoch": 3.72, "learning_rate": 1e-06, "loss": 0.1926, "step": 8920 }, { "epoch": 3.72, "eval_accuracy": 0.924875, "eval_loss": 0.21802347898483276, "eval_runtime": 15.7349, "eval_samples_per_second": 1525.267, "eval_steps_per_second": 4.766, "step": 8920 }, { "epoch": 3.73, "learning_rate": 1e-06, "loss": 0.2268, "step": 8940 }, { "epoch": 3.73, "eval_accuracy": 0.9250416666666667, "eval_loss": 0.21704040467739105, "eval_runtime": 16.0405, "eval_samples_per_second": 1496.209, "eval_steps_per_second": 4.676, "step": 8940 }, { "epoch": 3.73, "learning_rate": 1e-06, "loss": 0.191, "step": 8960 }, { "epoch": 3.73, "eval_accuracy": 0.9265416666666667, "eval_loss": 0.21474313735961914, "eval_runtime": 16.1261, "eval_samples_per_second": 1488.27, "eval_steps_per_second": 4.651, "step": 8960 }, { "epoch": 3.74, "learning_rate": 1e-06, "loss": 0.242, "step": 8980 }, { "epoch": 3.74, "eval_accuracy": 0.92675, "eval_loss": 0.21450480818748474, "eval_runtime": 15.8452, "eval_samples_per_second": 1514.651, "eval_steps_per_second": 4.733, "step": 8980 }, { "epoch": 3.75, "learning_rate": 1e-06, "loss": 0.2096, "step": 9000 }, { "epoch": 3.75, "eval_accuracy": 0.925625, "eval_loss": 0.2159881889820099, "eval_runtime": 15.8929, "eval_samples_per_second": 1510.111, "eval_steps_per_second": 4.719, "step": 9000 }, { "epoch": 3.76, "learning_rate": 1e-06, "loss": 0.1713, "step": 9020 }, { "epoch": 3.76, "eval_accuracy": 0.9254583333333334, "eval_loss": 0.2180025428533554, "eval_runtime": 16.0033, "eval_samples_per_second": 1499.686, "eval_steps_per_second": 4.687, "step": 9020 }, { "epoch": 3.77, "learning_rate": 1e-06, "loss": 0.2437, "step": 9040 }, { "epoch": 3.77, "eval_accuracy": 0.9272083333333333, "eval_loss": 0.21439102292060852, "eval_runtime": 15.8661, "eval_samples_per_second": 1512.663, "eval_steps_per_second": 4.727, "step": 9040 }, { "epoch": 3.77, "learning_rate": 1e-06, "loss": 0.2058, "step": 9060 }, { "epoch": 3.77, "eval_accuracy": 0.9265416666666667, "eval_loss": 0.21493494510650635, "eval_runtime": 16.417, "eval_samples_per_second": 1461.901, "eval_steps_per_second": 4.568, "step": 9060 }, { "epoch": 3.78, "learning_rate": 1e-06, "loss": 0.2107, "step": 9080 }, { "epoch": 3.78, "eval_accuracy": 0.9262083333333333, "eval_loss": 0.21356363594532013, "eval_runtime": 15.9759, "eval_samples_per_second": 1502.265, "eval_steps_per_second": 4.695, "step": 9080 }, { "epoch": 3.79, "learning_rate": 1e-06, "loss": 0.2274, "step": 9100 }, { "epoch": 3.79, "eval_accuracy": 0.9263333333333333, "eval_loss": 0.21457020938396454, "eval_runtime": 16.1086, "eval_samples_per_second": 1489.887, "eval_steps_per_second": 4.656, "step": 9100 }, { "epoch": 3.8, "learning_rate": 1e-06, "loss": 0.1802, "step": 9120 }, { "epoch": 3.8, "eval_accuracy": 0.9258333333333333, "eval_loss": 0.21796223521232605, "eval_runtime": 16.0162, "eval_samples_per_second": 1498.485, "eval_steps_per_second": 4.683, "step": 9120 }, { "epoch": 3.81, "learning_rate": 1e-06, "loss": 0.1901, "step": 9140 }, { "epoch": 3.81, "eval_accuracy": 0.9270416666666667, "eval_loss": 0.21377238631248474, "eval_runtime": 16.0799, "eval_samples_per_second": 1492.542, "eval_steps_per_second": 4.664, "step": 9140 }, { "epoch": 3.82, "learning_rate": 1e-06, "loss": 0.1868, "step": 9160 }, { "epoch": 3.82, "eval_accuracy": 0.9264583333333334, "eval_loss": 0.214362233877182, "eval_runtime": 15.7967, "eval_samples_per_second": 1519.305, "eval_steps_per_second": 4.748, "step": 9160 }, { "epoch": 3.83, "learning_rate": 1e-06, "loss": 0.159, "step": 9180 }, { "epoch": 3.83, "eval_accuracy": 0.927875, "eval_loss": 0.21380971372127533, "eval_runtime": 15.6723, "eval_samples_per_second": 1531.362, "eval_steps_per_second": 4.786, "step": 9180 }, { "epoch": 3.83, "learning_rate": 1e-06, "loss": 0.2448, "step": 9200 }, { "epoch": 3.83, "eval_accuracy": 0.9277083333333334, "eval_loss": 0.2141939252614975, "eval_runtime": 16.2577, "eval_samples_per_second": 1476.225, "eval_steps_per_second": 4.613, "step": 9200 }, { "epoch": 3.84, "learning_rate": 1e-06, "loss": 0.2412, "step": 9220 }, { "epoch": 3.84, "eval_accuracy": 0.9275416666666667, "eval_loss": 0.2140486091375351, "eval_runtime": 15.679, "eval_samples_per_second": 1530.709, "eval_steps_per_second": 4.783, "step": 9220 }, { "epoch": 3.85, "learning_rate": 1e-06, "loss": 0.2457, "step": 9240 }, { "epoch": 3.85, "eval_accuracy": 0.925375, "eval_loss": 0.21611127257347107, "eval_runtime": 16.288, "eval_samples_per_second": 1473.473, "eval_steps_per_second": 4.605, "step": 9240 }, { "epoch": 3.86, "learning_rate": 1e-06, "loss": 0.1918, "step": 9260 }, { "epoch": 3.86, "eval_accuracy": 0.92625, "eval_loss": 0.21398600935935974, "eval_runtime": 15.9302, "eval_samples_per_second": 1506.573, "eval_steps_per_second": 4.708, "step": 9260 }, { "epoch": 3.87, "learning_rate": 1e-06, "loss": 0.2424, "step": 9280 }, { "epoch": 3.87, "eval_accuracy": 0.92875, "eval_loss": 0.21145953238010406, "eval_runtime": 16.2556, "eval_samples_per_second": 1476.419, "eval_steps_per_second": 4.614, "step": 9280 }, { "epoch": 3.88, "learning_rate": 1e-06, "loss": 0.2059, "step": 9300 }, { "epoch": 3.88, "eval_accuracy": 0.9278333333333333, "eval_loss": 0.21115827560424805, "eval_runtime": 16.0018, "eval_samples_per_second": 1499.836, "eval_steps_per_second": 4.687, "step": 9300 }, { "epoch": 3.88, "learning_rate": 1e-06, "loss": 0.2415, "step": 9320 }, { "epoch": 3.88, "eval_accuracy": 0.9265, "eval_loss": 0.212614968419075, "eval_runtime": 15.9163, "eval_samples_per_second": 1507.888, "eval_steps_per_second": 4.712, "step": 9320 }, { "epoch": 3.89, "learning_rate": 1e-06, "loss": 0.2312, "step": 9340 }, { "epoch": 3.89, "eval_accuracy": 0.9279166666666666, "eval_loss": 0.21092422306537628, "eval_runtime": 15.7459, "eval_samples_per_second": 1524.21, "eval_steps_per_second": 4.763, "step": 9340 }, { "epoch": 3.9, "learning_rate": 1e-06, "loss": 0.2002, "step": 9360 }, { "epoch": 3.9, "eval_accuracy": 0.927, "eval_loss": 0.21157173812389374, "eval_runtime": 15.674, "eval_samples_per_second": 1531.197, "eval_steps_per_second": 4.785, "step": 9360 }, { "epoch": 3.91, "learning_rate": 1e-06, "loss": 0.2061, "step": 9380 }, { "epoch": 3.91, "eval_accuracy": 0.9255416666666667, "eval_loss": 0.21427378058433533, "eval_runtime": 16.16, "eval_samples_per_second": 1485.144, "eval_steps_per_second": 4.641, "step": 9380 }, { "epoch": 3.92, "learning_rate": 1e-06, "loss": 0.1892, "step": 9400 }, { "epoch": 3.92, "eval_accuracy": 0.9269583333333333, "eval_loss": 0.21478785574436188, "eval_runtime": 15.666, "eval_samples_per_second": 1531.978, "eval_steps_per_second": 4.787, "step": 9400 }, { "epoch": 3.92, "learning_rate": 1e-06, "loss": 0.242, "step": 9420 }, { "epoch": 3.92, "eval_accuracy": 0.9268333333333333, "eval_loss": 0.21459507942199707, "eval_runtime": 16.0867, "eval_samples_per_second": 1491.912, "eval_steps_per_second": 4.662, "step": 9420 }, { "epoch": 3.93, "learning_rate": 1e-06, "loss": 0.2151, "step": 9440 }, { "epoch": 3.93, "eval_accuracy": 0.9257916666666667, "eval_loss": 0.21316887438297272, "eval_runtime": 16.0112, "eval_samples_per_second": 1498.948, "eval_steps_per_second": 4.684, "step": 9440 }, { "epoch": 3.94, "learning_rate": 1e-06, "loss": 0.2057, "step": 9460 }, { "epoch": 3.94, "eval_accuracy": 0.927125, "eval_loss": 0.21309146285057068, "eval_runtime": 15.8401, "eval_samples_per_second": 1515.139, "eval_steps_per_second": 4.735, "step": 9460 }, { "epoch": 3.95, "learning_rate": 1e-06, "loss": 0.2165, "step": 9480 }, { "epoch": 3.95, "eval_accuracy": 0.9276666666666666, "eval_loss": 0.21158146858215332, "eval_runtime": 16.0289, "eval_samples_per_second": 1497.291, "eval_steps_per_second": 4.679, "step": 9480 }, { "epoch": 3.96, "learning_rate": 1e-06, "loss": 0.1845, "step": 9500 }, { "epoch": 3.96, "eval_accuracy": 0.9277083333333334, "eval_loss": 0.21126095950603485, "eval_runtime": 15.8162, "eval_samples_per_second": 1517.433, "eval_steps_per_second": 4.742, "step": 9500 }, { "epoch": 3.97, "learning_rate": 1e-06, "loss": 0.1787, "step": 9520 }, { "epoch": 3.97, "eval_accuracy": 0.927, "eval_loss": 0.21392786502838135, "eval_runtime": 16.2866, "eval_samples_per_second": 1473.607, "eval_steps_per_second": 4.605, "step": 9520 }, { "epoch": 3.98, "learning_rate": 1e-06, "loss": 0.1947, "step": 9540 }, { "epoch": 3.98, "eval_accuracy": 0.927875, "eval_loss": 0.2135414332151413, "eval_runtime": 15.7812, "eval_samples_per_second": 1520.796, "eval_steps_per_second": 4.752, "step": 9540 }, { "epoch": 3.98, "learning_rate": 1e-06, "loss": 0.1802, "step": 9560 }, { "epoch": 3.98, "eval_accuracy": 0.9288333333333333, "eval_loss": 0.2114471048116684, "eval_runtime": 15.9961, "eval_samples_per_second": 1500.37, "eval_steps_per_second": 4.689, "step": 9560 }, { "epoch": 3.99, "learning_rate": 1e-06, "loss": 0.1865, "step": 9580 }, { "epoch": 3.99, "eval_accuracy": 0.9280833333333334, "eval_loss": 0.21309266984462738, "eval_runtime": 16.0813, "eval_samples_per_second": 1492.415, "eval_steps_per_second": 4.664, "step": 9580 }, { "epoch": 4.0, "learning_rate": 1e-06, "loss": 0.2346, "step": 9600 }, { "epoch": 4.0, "eval_accuracy": 0.9282083333333333, "eval_loss": 0.21224650740623474, "eval_runtime": 15.806, "eval_samples_per_second": 1518.415, "eval_steps_per_second": 4.745, "step": 9600 }, { "epoch": 4.01, "learning_rate": 1e-06, "loss": 0.1973, "step": 9620 }, { "epoch": 4.01, "eval_accuracy": 0.9283333333333333, "eval_loss": 0.2113197296857834, "eval_runtime": 16.0449, "eval_samples_per_second": 1495.802, "eval_steps_per_second": 4.674, "step": 9620 }, { "epoch": 4.02, "learning_rate": 1e-06, "loss": 0.1873, "step": 9640 }, { "epoch": 4.02, "eval_accuracy": 0.928, "eval_loss": 0.21195828914642334, "eval_runtime": 16.0572, "eval_samples_per_second": 1494.657, "eval_steps_per_second": 4.671, "step": 9640 }, { "epoch": 4.03, "learning_rate": 1e-06, "loss": 0.1883, "step": 9660 }, { "epoch": 4.03, "eval_accuracy": 0.9275416666666667, "eval_loss": 0.21404027938842773, "eval_runtime": 16.1167, "eval_samples_per_second": 1489.14, "eval_steps_per_second": 4.654, "step": 9660 }, { "epoch": 4.03, "learning_rate": 1e-06, "loss": 0.2386, "step": 9680 }, { "epoch": 4.03, "eval_accuracy": 0.9255, "eval_loss": 0.21560032665729523, "eval_runtime": 15.8482, "eval_samples_per_second": 1514.369, "eval_steps_per_second": 4.732, "step": 9680 }, { "epoch": 4.04, "learning_rate": 1e-06, "loss": 0.1594, "step": 9700 }, { "epoch": 4.04, "eval_accuracy": 0.9266666666666666, "eval_loss": 0.2141159623861313, "eval_runtime": 15.8534, "eval_samples_per_second": 1513.868, "eval_steps_per_second": 4.731, "step": 9700 }, { "epoch": 4.05, "learning_rate": 1e-06, "loss": 0.1805, "step": 9720 }, { "epoch": 4.05, "eval_accuracy": 0.927875, "eval_loss": 0.21176785230636597, "eval_runtime": 15.5441, "eval_samples_per_second": 1543.994, "eval_steps_per_second": 4.825, "step": 9720 }, { "epoch": 4.06, "learning_rate": 1e-06, "loss": 0.2121, "step": 9740 }, { "epoch": 4.06, "eval_accuracy": 0.927, "eval_loss": 0.21185992658138275, "eval_runtime": 16.1338, "eval_samples_per_second": 1487.557, "eval_steps_per_second": 4.649, "step": 9740 }, { "epoch": 4.07, "learning_rate": 1e-06, "loss": 0.1764, "step": 9760 }, { "epoch": 4.07, "eval_accuracy": 0.926, "eval_loss": 0.21480970084667206, "eval_runtime": 15.7121, "eval_samples_per_second": 1527.483, "eval_steps_per_second": 4.773, "step": 9760 }, { "epoch": 4.08, "learning_rate": 1e-06, "loss": 0.2067, "step": 9780 }, { "epoch": 4.08, "eval_accuracy": 0.9278333333333333, "eval_loss": 0.21278834342956543, "eval_runtime": 15.7391, "eval_samples_per_second": 1524.869, "eval_steps_per_second": 4.765, "step": 9780 }, { "epoch": 4.08, "learning_rate": 1e-06, "loss": 0.2219, "step": 9800 }, { "epoch": 4.08, "eval_accuracy": 0.9279166666666666, "eval_loss": 0.21168003976345062, "eval_runtime": 15.591, "eval_samples_per_second": 1539.347, "eval_steps_per_second": 4.81, "step": 9800 }, { "epoch": 4.09, "learning_rate": 1e-06, "loss": 0.1931, "step": 9820 }, { "epoch": 4.09, "eval_accuracy": 0.9288333333333333, "eval_loss": 0.2115764170885086, "eval_runtime": 16.0212, "eval_samples_per_second": 1498.017, "eval_steps_per_second": 4.681, "step": 9820 }, { "epoch": 4.1, "learning_rate": 1e-06, "loss": 0.198, "step": 9840 }, { "epoch": 4.1, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.21085327863693237, "eval_runtime": 16.2758, "eval_samples_per_second": 1474.58, "eval_steps_per_second": 4.608, "step": 9840 }, { "epoch": 4.11, "learning_rate": 1e-06, "loss": 0.1664, "step": 9860 }, { "epoch": 4.11, "eval_accuracy": 0.9275833333333333, "eval_loss": 0.21043312549591064, "eval_runtime": 15.7593, "eval_samples_per_second": 1522.91, "eval_steps_per_second": 4.759, "step": 9860 }, { "epoch": 4.12, "learning_rate": 1e-06, "loss": 0.1998, "step": 9880 }, { "epoch": 4.12, "eval_accuracy": 0.9276666666666666, "eval_loss": 0.21023598313331604, "eval_runtime": 15.8663, "eval_samples_per_second": 1512.639, "eval_steps_per_second": 4.727, "step": 9880 }, { "epoch": 4.12, "learning_rate": 1e-06, "loss": 0.2338, "step": 9900 }, { "epoch": 4.12, "eval_accuracy": 0.9267916666666667, "eval_loss": 0.21208404004573822, "eval_runtime": 15.987, "eval_samples_per_second": 1501.219, "eval_steps_per_second": 4.691, "step": 9900 }, { "epoch": 4.13, "learning_rate": 1e-06, "loss": 0.1807, "step": 9920 }, { "epoch": 4.13, "eval_accuracy": 0.9267083333333334, "eval_loss": 0.21226836740970612, "eval_runtime": 15.4893, "eval_samples_per_second": 1549.453, "eval_steps_per_second": 4.842, "step": 9920 }, { "epoch": 4.14, "learning_rate": 1e-06, "loss": 0.2055, "step": 9940 }, { "epoch": 4.14, "eval_accuracy": 0.92775, "eval_loss": 0.21014344692230225, "eval_runtime": 16.5702, "eval_samples_per_second": 1448.387, "eval_steps_per_second": 4.526, "step": 9940 }, { "epoch": 4.15, "learning_rate": 1e-06, "loss": 0.2094, "step": 9960 }, { "epoch": 4.15, "eval_accuracy": 0.9275, "eval_loss": 0.21054843068122864, "eval_runtime": 15.7855, "eval_samples_per_second": 1520.387, "eval_steps_per_second": 4.751, "step": 9960 }, { "epoch": 4.16, "learning_rate": 1e-06, "loss": 0.1675, "step": 9980 }, { "epoch": 4.16, "eval_accuracy": 0.9266666666666666, "eval_loss": 0.21459108591079712, "eval_runtime": 15.728, "eval_samples_per_second": 1525.936, "eval_steps_per_second": 4.769, "step": 9980 }, { "epoch": 4.17, "learning_rate": 1e-06, "loss": 0.2043, "step": 10000 }, { "epoch": 4.17, "eval_accuracy": 0.9267916666666667, "eval_loss": 0.2115868628025055, "eval_runtime": 16.2887, "eval_samples_per_second": 1473.412, "eval_steps_per_second": 4.604, "step": 10000 }, { "epoch": 4.17, "learning_rate": 1e-06, "loss": 0.1625, "step": 10020 }, { "epoch": 4.17, "eval_accuracy": 0.927625, "eval_loss": 0.21186378598213196, "eval_runtime": 15.6418, "eval_samples_per_second": 1534.346, "eval_steps_per_second": 4.795, "step": 10020 }, { "epoch": 4.18, "learning_rate": 1e-06, "loss": 0.1761, "step": 10040 }, { "epoch": 4.18, "eval_accuracy": 0.92725, "eval_loss": 0.2122122347354889, "eval_runtime": 16.1634, "eval_samples_per_second": 1484.835, "eval_steps_per_second": 4.64, "step": 10040 }, { "epoch": 4.19, "learning_rate": 1e-06, "loss": 0.1787, "step": 10060 }, { "epoch": 4.19, "eval_accuracy": 0.9278333333333333, "eval_loss": 0.21212342381477356, "eval_runtime": 15.87, "eval_samples_per_second": 1512.288, "eval_steps_per_second": 4.726, "step": 10060 }, { "epoch": 4.2, "learning_rate": 1e-06, "loss": 0.1773, "step": 10080 }, { "epoch": 4.2, "eval_accuracy": 0.927375, "eval_loss": 0.21214234828948975, "eval_runtime": 15.6627, "eval_samples_per_second": 1532.306, "eval_steps_per_second": 4.788, "step": 10080 }, { "epoch": 4.21, "learning_rate": 1e-06, "loss": 0.2317, "step": 10100 }, { "epoch": 4.21, "eval_accuracy": 0.9284166666666667, "eval_loss": 0.21087035536766052, "eval_runtime": 15.9309, "eval_samples_per_second": 1506.51, "eval_steps_per_second": 4.708, "step": 10100 }, { "epoch": 4.22, "learning_rate": 1e-06, "loss": 0.1909, "step": 10120 }, { "epoch": 4.22, "eval_accuracy": 0.9279583333333333, "eval_loss": 0.2117665410041809, "eval_runtime": 15.8849, "eval_samples_per_second": 1510.864, "eval_steps_per_second": 4.721, "step": 10120 }, { "epoch": 4.22, "learning_rate": 1e-06, "loss": 0.2192, "step": 10140 }, { "epoch": 4.22, "eval_accuracy": 0.9291666666666667, "eval_loss": 0.20962686836719513, "eval_runtime": 16.1628, "eval_samples_per_second": 1484.887, "eval_steps_per_second": 4.64, "step": 10140 }, { "epoch": 4.23, "learning_rate": 1e-06, "loss": 0.1977, "step": 10160 }, { "epoch": 4.23, "eval_accuracy": 0.9277083333333334, "eval_loss": 0.21077552437782288, "eval_runtime": 15.8089, "eval_samples_per_second": 1518.136, "eval_steps_per_second": 4.744, "step": 10160 }, { "epoch": 4.24, "learning_rate": 1e-06, "loss": 0.2138, "step": 10180 }, { "epoch": 4.24, "eval_accuracy": 0.9267083333333334, "eval_loss": 0.2138536125421524, "eval_runtime": 15.891, "eval_samples_per_second": 1510.292, "eval_steps_per_second": 4.72, "step": 10180 }, { "epoch": 4.25, "learning_rate": 1e-06, "loss": 0.1753, "step": 10200 }, { "epoch": 4.25, "eval_accuracy": 0.92675, "eval_loss": 0.21307241916656494, "eval_runtime": 16.0289, "eval_samples_per_second": 1497.293, "eval_steps_per_second": 4.679, "step": 10200 }, { "epoch": 4.26, "learning_rate": 1e-06, "loss": 0.2094, "step": 10220 }, { "epoch": 4.26, "eval_accuracy": 0.9283333333333333, "eval_loss": 0.21122166514396667, "eval_runtime": 16.1206, "eval_samples_per_second": 1488.781, "eval_steps_per_second": 4.652, "step": 10220 }, { "epoch": 4.27, "learning_rate": 1e-06, "loss": 0.204, "step": 10240 }, { "epoch": 4.27, "eval_accuracy": 0.92825, "eval_loss": 0.20935103297233582, "eval_runtime": 15.822, "eval_samples_per_second": 1516.879, "eval_steps_per_second": 4.74, "step": 10240 }, { "epoch": 4.28, "learning_rate": 1e-06, "loss": 0.2196, "step": 10260 }, { "epoch": 4.28, "eval_accuracy": 0.927, "eval_loss": 0.2119808942079544, "eval_runtime": 16.0008, "eval_samples_per_second": 1499.92, "eval_steps_per_second": 4.687, "step": 10260 }, { "epoch": 4.28, "learning_rate": 1e-06, "loss": 0.2122, "step": 10280 }, { "epoch": 4.28, "eval_accuracy": 0.9271666666666667, "eval_loss": 0.21251654624938965, "eval_runtime": 16.2621, "eval_samples_per_second": 1475.821, "eval_steps_per_second": 4.612, "step": 10280 }, { "epoch": 4.29, "learning_rate": 1e-06, "loss": 0.1534, "step": 10300 }, { "epoch": 4.29, "eval_accuracy": 0.9284166666666667, "eval_loss": 0.2099744975566864, "eval_runtime": 15.4991, "eval_samples_per_second": 1548.474, "eval_steps_per_second": 4.839, "step": 10300 }, { "epoch": 4.3, "learning_rate": 1e-06, "loss": 0.1917, "step": 10320 }, { "epoch": 4.3, "eval_accuracy": 0.9281666666666667, "eval_loss": 0.2101805955171585, "eval_runtime": 15.541, "eval_samples_per_second": 1544.303, "eval_steps_per_second": 4.826, "step": 10320 }, { "epoch": 4.31, "learning_rate": 1e-06, "loss": 0.201, "step": 10340 }, { "epoch": 4.31, "eval_accuracy": 0.928125, "eval_loss": 0.20908385515213013, "eval_runtime": 15.6138, "eval_samples_per_second": 1537.104, "eval_steps_per_second": 4.803, "step": 10340 }, { "epoch": 4.32, "learning_rate": 1e-06, "loss": 0.1736, "step": 10360 }, { "epoch": 4.32, "eval_accuracy": 0.9292083333333333, "eval_loss": 0.20927385985851288, "eval_runtime": 15.7173, "eval_samples_per_second": 1526.975, "eval_steps_per_second": 4.772, "step": 10360 }, { "epoch": 4.33, "learning_rate": 1e-06, "loss": 0.1948, "step": 10380 }, { "epoch": 4.33, "eval_accuracy": 0.928625, "eval_loss": 0.2104508876800537, "eval_runtime": 15.7589, "eval_samples_per_second": 1522.952, "eval_steps_per_second": 4.759, "step": 10380 }, { "epoch": 4.33, "learning_rate": 1e-06, "loss": 0.1967, "step": 10400 }, { "epoch": 4.33, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.21193169057369232, "eval_runtime": 15.5786, "eval_samples_per_second": 1540.575, "eval_steps_per_second": 4.814, "step": 10400 }, { "epoch": 4.34, "learning_rate": 1e-06, "loss": 0.1722, "step": 10420 }, { "epoch": 4.34, "eval_accuracy": 0.9289583333333333, "eval_loss": 0.20838379859924316, "eval_runtime": 16.4064, "eval_samples_per_second": 1462.84, "eval_steps_per_second": 4.571, "step": 10420 }, { "epoch": 4.35, "learning_rate": 1e-06, "loss": 0.1855, "step": 10440 }, { "epoch": 4.35, "eval_accuracy": 0.928375, "eval_loss": 0.20829389989376068, "eval_runtime": 15.8035, "eval_samples_per_second": 1518.649, "eval_steps_per_second": 4.746, "step": 10440 }, { "epoch": 4.36, "learning_rate": 1e-06, "loss": 0.2067, "step": 10460 }, { "epoch": 4.36, "eval_accuracy": 0.92925, "eval_loss": 0.20744042098522186, "eval_runtime": 15.8104, "eval_samples_per_second": 1517.993, "eval_steps_per_second": 4.744, "step": 10460 }, { "epoch": 4.37, "learning_rate": 1e-06, "loss": 0.1925, "step": 10480 }, { "epoch": 4.37, "eval_accuracy": 0.9292916666666666, "eval_loss": 0.20747624337673187, "eval_runtime": 16.0704, "eval_samples_per_second": 1493.428, "eval_steps_per_second": 4.667, "step": 10480 }, { "epoch": 4.38, "learning_rate": 1e-06, "loss": 0.2027, "step": 10500 }, { "epoch": 4.38, "eval_accuracy": 0.92775, "eval_loss": 0.2088412493467331, "eval_runtime": 15.8843, "eval_samples_per_second": 1510.928, "eval_steps_per_second": 4.722, "step": 10500 }, { "epoch": 4.38, "learning_rate": 1e-06, "loss": 0.1763, "step": 10520 }, { "epoch": 4.38, "eval_accuracy": 0.928125, "eval_loss": 0.20948942005634308, "eval_runtime": 15.8584, "eval_samples_per_second": 1513.39, "eval_steps_per_second": 4.729, "step": 10520 }, { "epoch": 4.39, "learning_rate": 1e-06, "loss": 0.2019, "step": 10540 }, { "epoch": 4.39, "eval_accuracy": 0.9283333333333333, "eval_loss": 0.20972661674022675, "eval_runtime": 16.0436, "eval_samples_per_second": 1495.92, "eval_steps_per_second": 4.675, "step": 10540 }, { "epoch": 4.4, "learning_rate": 1e-06, "loss": 0.2411, "step": 10560 }, { "epoch": 4.4, "eval_accuracy": 0.9285, "eval_loss": 0.21000780165195465, "eval_runtime": 15.8037, "eval_samples_per_second": 1518.635, "eval_steps_per_second": 4.746, "step": 10560 }, { "epoch": 4.41, "learning_rate": 1e-06, "loss": 0.1896, "step": 10580 }, { "epoch": 4.41, "eval_accuracy": 0.9274583333333334, "eval_loss": 0.20897161960601807, "eval_runtime": 15.8212, "eval_samples_per_second": 1516.948, "eval_steps_per_second": 4.74, "step": 10580 }, { "epoch": 4.42, "learning_rate": 1e-06, "loss": 0.2153, "step": 10600 }, { "epoch": 4.42, "eval_accuracy": 0.9294166666666667, "eval_loss": 0.2074136584997177, "eval_runtime": 16.2219, "eval_samples_per_second": 1479.478, "eval_steps_per_second": 4.623, "step": 10600 }, { "epoch": 4.42, "learning_rate": 1e-06, "loss": 0.2157, "step": 10620 }, { "epoch": 4.42, "eval_accuracy": 0.9291666666666667, "eval_loss": 0.20682406425476074, "eval_runtime": 15.4922, "eval_samples_per_second": 1549.169, "eval_steps_per_second": 4.841, "step": 10620 }, { "epoch": 4.43, "learning_rate": 1e-06, "loss": 0.1676, "step": 10640 }, { "epoch": 4.43, "eval_accuracy": 0.9277916666666667, "eval_loss": 0.20833227038383484, "eval_runtime": 16.1598, "eval_samples_per_second": 1485.169, "eval_steps_per_second": 4.641, "step": 10640 }, { "epoch": 4.44, "learning_rate": 1e-06, "loss": 0.2071, "step": 10660 }, { "epoch": 4.44, "eval_accuracy": 0.9270833333333334, "eval_loss": 0.20793978869915009, "eval_runtime": 15.9207, "eval_samples_per_second": 1507.475, "eval_steps_per_second": 4.711, "step": 10660 }, { "epoch": 4.45, "learning_rate": 1e-06, "loss": 0.2311, "step": 10680 }, { "epoch": 4.45, "eval_accuracy": 0.9275, "eval_loss": 0.2090083658695221, "eval_runtime": 15.8466, "eval_samples_per_second": 1514.52, "eval_steps_per_second": 4.733, "step": 10680 }, { "epoch": 4.46, "learning_rate": 1e-06, "loss": 0.1938, "step": 10700 }, { "epoch": 4.46, "eval_accuracy": 0.9301666666666667, "eval_loss": 0.20576812326908112, "eval_runtime": 16.5431, "eval_samples_per_second": 1450.759, "eval_steps_per_second": 4.534, "step": 10700 }, { "epoch": 4.47, "learning_rate": 1e-06, "loss": 0.2202, "step": 10720 }, { "epoch": 4.47, "eval_accuracy": 0.9292083333333333, "eval_loss": 0.20580460131168365, "eval_runtime": 15.9051, "eval_samples_per_second": 1508.949, "eval_steps_per_second": 4.715, "step": 10720 }, { "epoch": 4.47, "learning_rate": 1e-06, "loss": 0.1872, "step": 10740 }, { "epoch": 4.47, "eval_accuracy": 0.9278333333333333, "eval_loss": 0.20827758312225342, "eval_runtime": 15.8978, "eval_samples_per_second": 1509.642, "eval_steps_per_second": 4.718, "step": 10740 }, { "epoch": 4.48, "learning_rate": 1e-06, "loss": 0.2034, "step": 10760 }, { "epoch": 4.48, "eval_accuracy": 0.927875, "eval_loss": 0.2091016322374344, "eval_runtime": 15.7146, "eval_samples_per_second": 1527.242, "eval_steps_per_second": 4.773, "step": 10760 }, { "epoch": 4.49, "learning_rate": 1e-06, "loss": 0.2204, "step": 10780 }, { "epoch": 4.49, "eval_accuracy": 0.9276666666666666, "eval_loss": 0.20921491086483002, "eval_runtime": 16.115, "eval_samples_per_second": 1489.295, "eval_steps_per_second": 4.654, "step": 10780 }, { "epoch": 4.5, "learning_rate": 1e-06, "loss": 0.216, "step": 10800 }, { "epoch": 4.5, "eval_accuracy": 0.927625, "eval_loss": 0.20790547132492065, "eval_runtime": 15.9443, "eval_samples_per_second": 1505.242, "eval_steps_per_second": 4.704, "step": 10800 }, { "epoch": 4.51, "learning_rate": 1e-06, "loss": 0.1824, "step": 10820 }, { "epoch": 4.51, "eval_accuracy": 0.9280833333333334, "eval_loss": 0.20656706392765045, "eval_runtime": 16.1164, "eval_samples_per_second": 1489.165, "eval_steps_per_second": 4.654, "step": 10820 }, { "epoch": 4.52, "learning_rate": 1e-06, "loss": 0.2098, "step": 10840 }, { "epoch": 4.52, "eval_accuracy": 0.9296666666666666, "eval_loss": 0.20553721487522125, "eval_runtime": 15.8252, "eval_samples_per_second": 1516.573, "eval_steps_per_second": 4.739, "step": 10840 }, { "epoch": 4.53, "learning_rate": 1e-06, "loss": 0.2258, "step": 10860 }, { "epoch": 4.53, "eval_accuracy": 0.9294583333333334, "eval_loss": 0.2055697739124298, "eval_runtime": 15.8329, "eval_samples_per_second": 1515.829, "eval_steps_per_second": 4.737, "step": 10860 }, { "epoch": 4.53, "learning_rate": 1e-06, "loss": 0.2086, "step": 10880 }, { "epoch": 4.53, "eval_accuracy": 0.9282083333333333, "eval_loss": 0.20601527392864227, "eval_runtime": 16.046, "eval_samples_per_second": 1495.703, "eval_steps_per_second": 4.674, "step": 10880 }, { "epoch": 4.54, "learning_rate": 1e-06, "loss": 0.1725, "step": 10900 }, { "epoch": 4.54, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.20634058117866516, "eval_runtime": 15.8771, "eval_samples_per_second": 1511.608, "eval_steps_per_second": 4.724, "step": 10900 }, { "epoch": 4.55, "learning_rate": 1e-06, "loss": 0.1899, "step": 10920 }, { "epoch": 4.55, "eval_accuracy": 0.9292916666666666, "eval_loss": 0.20672395825386047, "eval_runtime": 16.1064, "eval_samples_per_second": 1490.089, "eval_steps_per_second": 4.657, "step": 10920 }, { "epoch": 4.56, "learning_rate": 1e-06, "loss": 0.1959, "step": 10940 }, { "epoch": 4.56, "eval_accuracy": 0.9266666666666666, "eval_loss": 0.21057891845703125, "eval_runtime": 15.9754, "eval_samples_per_second": 1502.309, "eval_steps_per_second": 4.695, "step": 10940 }, { "epoch": 4.57, "learning_rate": 1e-06, "loss": 0.2029, "step": 10960 }, { "epoch": 4.57, "eval_accuracy": 0.928875, "eval_loss": 0.20633479952812195, "eval_runtime": 15.7725, "eval_samples_per_second": 1521.637, "eval_steps_per_second": 4.755, "step": 10960 }, { "epoch": 4.58, "learning_rate": 1e-06, "loss": 0.1843, "step": 10980 }, { "epoch": 4.58, "eval_accuracy": 0.9300833333333334, "eval_loss": 0.2045080065727234, "eval_runtime": 16.6018, "eval_samples_per_second": 1445.625, "eval_steps_per_second": 4.518, "step": 10980 }, { "epoch": 4.58, "learning_rate": 1e-06, "loss": 0.2249, "step": 11000 }, { "epoch": 4.58, "eval_accuracy": 0.93025, "eval_loss": 0.20456919074058533, "eval_runtime": 15.8868, "eval_samples_per_second": 1510.688, "eval_steps_per_second": 4.721, "step": 11000 }, { "epoch": 4.59, "learning_rate": 1e-06, "loss": 0.1764, "step": 11020 }, { "epoch": 4.59, "eval_accuracy": 0.92925, "eval_loss": 0.20611906051635742, "eval_runtime": 16.0015, "eval_samples_per_second": 1499.856, "eval_steps_per_second": 4.687, "step": 11020 }, { "epoch": 4.6, "learning_rate": 1e-06, "loss": 0.2269, "step": 11040 }, { "epoch": 4.6, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.20687995851039886, "eval_runtime": 15.9876, "eval_samples_per_second": 1501.163, "eval_steps_per_second": 4.691, "step": 11040 }, { "epoch": 4.61, "learning_rate": 1e-06, "loss": 0.234, "step": 11060 }, { "epoch": 4.61, "eval_accuracy": 0.92825, "eval_loss": 0.20855723321437836, "eval_runtime": 15.799, "eval_samples_per_second": 1519.081, "eval_steps_per_second": 4.747, "step": 11060 }, { "epoch": 4.62, "learning_rate": 1e-06, "loss": 0.1925, "step": 11080 }, { "epoch": 4.62, "eval_accuracy": 0.9294166666666667, "eval_loss": 0.2057270109653473, "eval_runtime": 15.765, "eval_samples_per_second": 1522.358, "eval_steps_per_second": 4.757, "step": 11080 }, { "epoch": 4.62, "learning_rate": 1e-06, "loss": 0.1949, "step": 11100 }, { "epoch": 4.62, "eval_accuracy": 0.9285, "eval_loss": 0.206070676445961, "eval_runtime": 16.1103, "eval_samples_per_second": 1489.726, "eval_steps_per_second": 4.655, "step": 11100 }, { "epoch": 4.63, "learning_rate": 1e-06, "loss": 0.1928, "step": 11120 }, { "epoch": 4.63, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.20591707527637482, "eval_runtime": 15.8556, "eval_samples_per_second": 1513.659, "eval_steps_per_second": 4.73, "step": 11120 }, { "epoch": 4.64, "learning_rate": 1e-06, "loss": 0.1744, "step": 11140 }, { "epoch": 4.64, "eval_accuracy": 0.9286666666666666, "eval_loss": 0.20713801681995392, "eval_runtime": 16.1571, "eval_samples_per_second": 1485.413, "eval_steps_per_second": 4.642, "step": 11140 }, { "epoch": 4.65, "learning_rate": 1e-06, "loss": 0.2161, "step": 11160 }, { "epoch": 4.65, "eval_accuracy": 0.92925, "eval_loss": 0.20511361956596375, "eval_runtime": 16.4581, "eval_samples_per_second": 1458.25, "eval_steps_per_second": 4.557, "step": 11160 }, { "epoch": 4.66, "learning_rate": 1e-06, "loss": 0.168, "step": 11180 }, { "epoch": 4.66, "eval_accuracy": 0.92925, "eval_loss": 0.2065957635641098, "eval_runtime": 15.7867, "eval_samples_per_second": 1520.267, "eval_steps_per_second": 4.751, "step": 11180 }, { "epoch": 4.67, "learning_rate": 1e-06, "loss": 0.1739, "step": 11200 }, { "epoch": 4.67, "eval_accuracy": 0.9285833333333333, "eval_loss": 0.20561246573925018, "eval_runtime": 15.6163, "eval_samples_per_second": 1536.854, "eval_steps_per_second": 4.803, "step": 11200 }, { "epoch": 4.67, "learning_rate": 1e-06, "loss": 0.1816, "step": 11220 }, { "epoch": 4.67, "eval_accuracy": 0.9287916666666667, "eval_loss": 0.20476531982421875, "eval_runtime": 16.2877, "eval_samples_per_second": 1473.506, "eval_steps_per_second": 4.605, "step": 11220 }, { "epoch": 4.68, "learning_rate": 1e-06, "loss": 0.1931, "step": 11240 }, { "epoch": 4.68, "eval_accuracy": 0.9285416666666667, "eval_loss": 0.20711849629878998, "eval_runtime": 15.8294, "eval_samples_per_second": 1516.17, "eval_steps_per_second": 4.738, "step": 11240 }, { "epoch": 4.69, "learning_rate": 1e-06, "loss": 0.1945, "step": 11260 }, { "epoch": 4.69, "eval_accuracy": 0.9281666666666667, "eval_loss": 0.20717017352581024, "eval_runtime": 16.0348, "eval_samples_per_second": 1496.744, "eval_steps_per_second": 4.677, "step": 11260 }, { "epoch": 4.7, "learning_rate": 1e-06, "loss": 0.1929, "step": 11280 }, { "epoch": 4.7, "eval_accuracy": 0.928875, "eval_loss": 0.20755107700824738, "eval_runtime": 16.3373, "eval_samples_per_second": 1469.027, "eval_steps_per_second": 4.591, "step": 11280 }, { "epoch": 4.71, "learning_rate": 1e-06, "loss": 0.1903, "step": 11300 }, { "epoch": 4.71, "eval_accuracy": 0.929875, "eval_loss": 0.20404241979122162, "eval_runtime": 16.0151, "eval_samples_per_second": 1498.586, "eval_steps_per_second": 4.683, "step": 11300 }, { "epoch": 4.72, "learning_rate": 1e-06, "loss": 0.2051, "step": 11320 }, { "epoch": 4.72, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.20393158495426178, "eval_runtime": 16.464, "eval_samples_per_second": 1457.729, "eval_steps_per_second": 4.555, "step": 11320 }, { "epoch": 4.72, "learning_rate": 1e-06, "loss": 0.1614, "step": 11340 }, { "epoch": 4.72, "eval_accuracy": 0.929125, "eval_loss": 0.20461545884609222, "eval_runtime": 15.8151, "eval_samples_per_second": 1517.534, "eval_steps_per_second": 4.742, "step": 11340 }, { "epoch": 4.73, "learning_rate": 1e-06, "loss": 0.1968, "step": 11360 }, { "epoch": 4.73, "eval_accuracy": 0.9283333333333333, "eval_loss": 0.20622652769088745, "eval_runtime": 15.9434, "eval_samples_per_second": 1505.328, "eval_steps_per_second": 4.704, "step": 11360 }, { "epoch": 4.74, "learning_rate": 1e-06, "loss": 0.2091, "step": 11380 }, { "epoch": 4.74, "eval_accuracy": 0.926375, "eval_loss": 0.2111745923757553, "eval_runtime": 15.9338, "eval_samples_per_second": 1506.236, "eval_steps_per_second": 4.707, "step": 11380 }, { "epoch": 4.75, "learning_rate": 1e-06, "loss": 0.2072, "step": 11400 }, { "epoch": 4.75, "eval_accuracy": 0.9277916666666667, "eval_loss": 0.2086385041475296, "eval_runtime": 15.7955, "eval_samples_per_second": 1519.425, "eval_steps_per_second": 4.748, "step": 11400 }, { "epoch": 4.76, "learning_rate": 1e-06, "loss": 0.2298, "step": 11420 }, { "epoch": 4.76, "eval_accuracy": 0.9287083333333334, "eval_loss": 0.2052876353263855, "eval_runtime": 16.0941, "eval_samples_per_second": 1491.226, "eval_steps_per_second": 4.66, "step": 11420 }, { "epoch": 4.77, "learning_rate": 1e-06, "loss": 0.1874, "step": 11440 }, { "epoch": 4.77, "eval_accuracy": 0.9292916666666666, "eval_loss": 0.20625039935112, "eval_runtime": 15.9003, "eval_samples_per_second": 1509.408, "eval_steps_per_second": 4.717, "step": 11440 }, { "epoch": 4.78, "learning_rate": 1e-06, "loss": 0.1838, "step": 11460 }, { "epoch": 4.78, "eval_accuracy": 0.928, "eval_loss": 0.20656456053256989, "eval_runtime": 16.1911, "eval_samples_per_second": 1482.299, "eval_steps_per_second": 4.632, "step": 11460 }, { "epoch": 4.78, "learning_rate": 1e-06, "loss": 0.2015, "step": 11480 }, { "epoch": 4.78, "eval_accuracy": 0.929625, "eval_loss": 0.20533688366413116, "eval_runtime": 15.5948, "eval_samples_per_second": 1538.974, "eval_steps_per_second": 4.809, "step": 11480 }, { "epoch": 4.79, "learning_rate": 1e-06, "loss": 0.1822, "step": 11500 }, { "epoch": 4.79, "eval_accuracy": 0.9284583333333334, "eval_loss": 0.20839504897594452, "eval_runtime": 15.9851, "eval_samples_per_second": 1501.396, "eval_steps_per_second": 4.692, "step": 11500 }, { "epoch": 4.8, "learning_rate": 1e-06, "loss": 0.2209, "step": 11520 }, { "epoch": 4.8, "eval_accuracy": 0.9295, "eval_loss": 0.20546448230743408, "eval_runtime": 15.8868, "eval_samples_per_second": 1510.688, "eval_steps_per_second": 4.721, "step": 11520 }, { "epoch": 4.81, "learning_rate": 1e-06, "loss": 0.1918, "step": 11540 }, { "epoch": 4.81, "eval_accuracy": 0.929625, "eval_loss": 0.20511560142040253, "eval_runtime": 16.5324, "eval_samples_per_second": 1451.696, "eval_steps_per_second": 4.537, "step": 11540 }, { "epoch": 4.82, "learning_rate": 1e-06, "loss": 0.2252, "step": 11560 }, { "epoch": 4.82, "eval_accuracy": 0.9294166666666667, "eval_loss": 0.2052285224199295, "eval_runtime": 15.5246, "eval_samples_per_second": 1545.931, "eval_steps_per_second": 4.831, "step": 11560 }, { "epoch": 4.83, "learning_rate": 1e-06, "loss": 0.1929, "step": 11580 }, { "epoch": 4.83, "eval_accuracy": 0.9300833333333334, "eval_loss": 0.20385289192199707, "eval_runtime": 15.4266, "eval_samples_per_second": 1555.758, "eval_steps_per_second": 4.862, "step": 11580 }, { "epoch": 4.83, "learning_rate": 1e-06, "loss": 0.1889, "step": 11600 }, { "epoch": 4.83, "eval_accuracy": 0.9298333333333333, "eval_loss": 0.2047443836927414, "eval_runtime": 15.9168, "eval_samples_per_second": 1507.841, "eval_steps_per_second": 4.712, "step": 11600 }, { "epoch": 4.84, "learning_rate": 1e-06, "loss": 0.1812, "step": 11620 }, { "epoch": 4.84, "eval_accuracy": 0.929, "eval_loss": 0.20585575699806213, "eval_runtime": 15.6349, "eval_samples_per_second": 1535.024, "eval_steps_per_second": 4.797, "step": 11620 }, { "epoch": 4.85, "learning_rate": 1e-06, "loss": 0.2473, "step": 11640 }, { "epoch": 4.85, "eval_accuracy": 0.9304166666666667, "eval_loss": 0.2033381462097168, "eval_runtime": 16.0063, "eval_samples_per_second": 1499.413, "eval_steps_per_second": 4.686, "step": 11640 }, { "epoch": 4.86, "learning_rate": 1e-06, "loss": 0.1757, "step": 11660 }, { "epoch": 4.86, "eval_accuracy": 0.9302083333333333, "eval_loss": 0.20283745229244232, "eval_runtime": 15.8227, "eval_samples_per_second": 1516.806, "eval_steps_per_second": 4.74, "step": 11660 }, { "epoch": 4.87, "learning_rate": 1e-06, "loss": 0.2138, "step": 11680 }, { "epoch": 4.87, "eval_accuracy": 0.9295833333333333, "eval_loss": 0.20379288494586945, "eval_runtime": 15.9957, "eval_samples_per_second": 1500.406, "eval_steps_per_second": 4.689, "step": 11680 }, { "epoch": 4.88, "learning_rate": 1e-06, "loss": 0.2594, "step": 11700 }, { "epoch": 4.88, "eval_accuracy": 0.9294166666666667, "eval_loss": 0.20390905439853668, "eval_runtime": 15.8805, "eval_samples_per_second": 1511.292, "eval_steps_per_second": 4.723, "step": 11700 }, { "epoch": 4.88, "learning_rate": 1e-06, "loss": 0.183, "step": 11720 }, { "epoch": 4.88, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.2046515792608261, "eval_runtime": 16.3804, "eval_samples_per_second": 1465.166, "eval_steps_per_second": 4.579, "step": 11720 }, { "epoch": 4.89, "learning_rate": 1e-06, "loss": 0.1479, "step": 11740 }, { "epoch": 4.89, "eval_accuracy": 0.9282083333333333, "eval_loss": 0.20830100774765015, "eval_runtime": 15.9731, "eval_samples_per_second": 1502.525, "eval_steps_per_second": 4.695, "step": 11740 }, { "epoch": 4.9, "learning_rate": 1e-06, "loss": 0.1684, "step": 11760 }, { "epoch": 4.9, "eval_accuracy": 0.9296666666666666, "eval_loss": 0.20390328764915466, "eval_runtime": 15.9815, "eval_samples_per_second": 1501.737, "eval_steps_per_second": 4.693, "step": 11760 }, { "epoch": 4.91, "learning_rate": 1e-06, "loss": 0.1802, "step": 11780 }, { "epoch": 4.91, "eval_accuracy": 0.928125, "eval_loss": 0.2074248492717743, "eval_runtime": 16.0187, "eval_samples_per_second": 1498.245, "eval_steps_per_second": 4.682, "step": 11780 }, { "epoch": 4.92, "learning_rate": 1e-06, "loss": 0.1707, "step": 11800 }, { "epoch": 4.92, "eval_accuracy": 0.92875, "eval_loss": 0.20657067000865936, "eval_runtime": 15.9205, "eval_samples_per_second": 1507.488, "eval_steps_per_second": 4.711, "step": 11800 }, { "epoch": 4.92, "learning_rate": 1e-06, "loss": 0.2031, "step": 11820 }, { "epoch": 4.92, "eval_accuracy": 0.928375, "eval_loss": 0.21004259586334229, "eval_runtime": 15.5995, "eval_samples_per_second": 1538.507, "eval_steps_per_second": 4.808, "step": 11820 }, { "epoch": 4.93, "learning_rate": 1e-06, "loss": 0.2169, "step": 11840 }, { "epoch": 4.93, "eval_accuracy": 0.9290416666666667, "eval_loss": 0.20725244283676147, "eval_runtime": 15.6197, "eval_samples_per_second": 1536.517, "eval_steps_per_second": 4.802, "step": 11840 }, { "epoch": 4.94, "learning_rate": 1e-06, "loss": 0.1941, "step": 11860 }, { "epoch": 4.94, "eval_accuracy": 0.930125, "eval_loss": 0.2036478966474533, "eval_runtime": 16.2424, "eval_samples_per_second": 1477.617, "eval_steps_per_second": 4.618, "step": 11860 }, { "epoch": 4.95, "learning_rate": 1e-06, "loss": 0.2131, "step": 11880 }, { "epoch": 4.95, "eval_accuracy": 0.9295, "eval_loss": 0.2052011936903, "eval_runtime": 15.8259, "eval_samples_per_second": 1516.501, "eval_steps_per_second": 4.739, "step": 11880 }, { "epoch": 4.96, "learning_rate": 1e-06, "loss": 0.1867, "step": 11900 }, { "epoch": 4.96, "eval_accuracy": 0.9289583333333333, "eval_loss": 0.20610161125659943, "eval_runtime": 15.9156, "eval_samples_per_second": 1507.958, "eval_steps_per_second": 4.712, "step": 11900 }, { "epoch": 4.97, "learning_rate": 1e-06, "loss": 0.176, "step": 11920 }, { "epoch": 4.97, "eval_accuracy": 0.9302916666666666, "eval_loss": 0.20278537273406982, "eval_runtime": 16.3933, "eval_samples_per_second": 1464.01, "eval_steps_per_second": 4.575, "step": 11920 }, { "epoch": 4.97, "learning_rate": 1e-06, "loss": 0.1932, "step": 11940 }, { "epoch": 4.97, "eval_accuracy": 0.9305833333333333, "eval_loss": 0.2031938135623932, "eval_runtime": 15.7386, "eval_samples_per_second": 1524.912, "eval_steps_per_second": 4.765, "step": 11940 }, { "epoch": 4.98, "learning_rate": 1e-06, "loss": 0.2253, "step": 11960 }, { "epoch": 4.98, "eval_accuracy": 0.930125, "eval_loss": 0.20466017723083496, "eval_runtime": 15.9542, "eval_samples_per_second": 1504.308, "eval_steps_per_second": 4.701, "step": 11960 }, { "epoch": 4.99, "learning_rate": 1e-06, "loss": 0.2001, "step": 11980 }, { "epoch": 4.99, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.20326323807239532, "eval_runtime": 16.2585, "eval_samples_per_second": 1476.147, "eval_steps_per_second": 4.613, "step": 11980 }, { "epoch": 5.0, "learning_rate": 1e-06, "loss": 0.2234, "step": 12000 }, { "epoch": 5.0, "eval_accuracy": 0.9305, "eval_loss": 0.20482754707336426, "eval_runtime": 15.5729, "eval_samples_per_second": 1541.134, "eval_steps_per_second": 4.816, "step": 12000 }, { "epoch": 5.01, "learning_rate": 1e-06, "loss": 0.2004, "step": 12020 }, { "epoch": 5.01, "eval_accuracy": 0.9305416666666667, "eval_loss": 0.20364505052566528, "eval_runtime": 16.9313, "eval_samples_per_second": 1417.493, "eval_steps_per_second": 4.43, "step": 12020 }, { "epoch": 5.02, "learning_rate": 1e-06, "loss": 0.1859, "step": 12040 }, { "epoch": 5.02, "eval_accuracy": 0.9309583333333333, "eval_loss": 0.2025202065706253, "eval_runtime": 16.472, "eval_samples_per_second": 1457.018, "eval_steps_per_second": 4.553, "step": 12040 }, { "epoch": 5.03, "learning_rate": 1e-06, "loss": 0.19, "step": 12060 }, { "epoch": 5.03, "eval_accuracy": 0.931375, "eval_loss": 0.20227618515491486, "eval_runtime": 15.727, "eval_samples_per_second": 1526.038, "eval_steps_per_second": 4.769, "step": 12060 }, { "epoch": 5.03, "learning_rate": 1e-06, "loss": 0.1645, "step": 12080 }, { "epoch": 5.03, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.20448292791843414, "eval_runtime": 16.1326, "eval_samples_per_second": 1487.669, "eval_steps_per_second": 4.649, "step": 12080 }, { "epoch": 5.04, "learning_rate": 1e-06, "loss": 0.2003, "step": 12100 }, { "epoch": 5.04, "eval_accuracy": 0.931875, "eval_loss": 0.20208178460597992, "eval_runtime": 16.0837, "eval_samples_per_second": 1492.198, "eval_steps_per_second": 4.663, "step": 12100 }, { "epoch": 5.05, "learning_rate": 1e-06, "loss": 0.1788, "step": 12120 }, { "epoch": 5.05, "eval_accuracy": 0.9320833333333334, "eval_loss": 0.2024490088224411, "eval_runtime": 15.8984, "eval_samples_per_second": 1509.589, "eval_steps_per_second": 4.717, "step": 12120 }, { "epoch": 5.06, "learning_rate": 1e-06, "loss": 0.1697, "step": 12140 }, { "epoch": 5.06, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.202731654047966, "eval_runtime": 15.9929, "eval_samples_per_second": 1500.664, "eval_steps_per_second": 4.69, "step": 12140 }, { "epoch": 5.07, "learning_rate": 1e-06, "loss": 0.1824, "step": 12160 }, { "epoch": 5.07, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.2048906683921814, "eval_runtime": 15.963, "eval_samples_per_second": 1503.477, "eval_steps_per_second": 4.698, "step": 12160 }, { "epoch": 5.08, "learning_rate": 1e-06, "loss": 0.1773, "step": 12180 }, { "epoch": 5.08, "eval_accuracy": 0.9302916666666666, "eval_loss": 0.20466017723083496, "eval_runtime": 16.2544, "eval_samples_per_second": 1476.528, "eval_steps_per_second": 4.614, "step": 12180 }, { "epoch": 5.08, "learning_rate": 1e-06, "loss": 0.2324, "step": 12200 }, { "epoch": 5.08, "eval_accuracy": 0.9304166666666667, "eval_loss": 0.20540772378444672, "eval_runtime": 16.2048, "eval_samples_per_second": 1481.039, "eval_steps_per_second": 4.628, "step": 12200 }, { "epoch": 5.09, "learning_rate": 1e-06, "loss": 0.1513, "step": 12220 }, { "epoch": 5.09, "eval_accuracy": 0.9298333333333333, "eval_loss": 0.20598597824573517, "eval_runtime": 15.5756, "eval_samples_per_second": 1540.872, "eval_steps_per_second": 4.815, "step": 12220 }, { "epoch": 5.1, "learning_rate": 1e-06, "loss": 0.1975, "step": 12240 }, { "epoch": 5.1, "eval_accuracy": 0.9305833333333333, "eval_loss": 0.20424893498420715, "eval_runtime": 15.9543, "eval_samples_per_second": 1504.299, "eval_steps_per_second": 4.701, "step": 12240 }, { "epoch": 5.11, "learning_rate": 1e-06, "loss": 0.2149, "step": 12260 }, { "epoch": 5.11, "eval_accuracy": 0.9297916666666667, "eval_loss": 0.205413356423378, "eval_runtime": 15.6474, "eval_samples_per_second": 1533.804, "eval_steps_per_second": 4.793, "step": 12260 }, { "epoch": 5.12, "learning_rate": 1e-06, "loss": 0.1685, "step": 12280 }, { "epoch": 5.12, "eval_accuracy": 0.9296666666666666, "eval_loss": 0.20603446662425995, "eval_runtime": 16.0146, "eval_samples_per_second": 1498.636, "eval_steps_per_second": 4.683, "step": 12280 }, { "epoch": 5.12, "learning_rate": 1e-06, "loss": 0.2097, "step": 12300 }, { "epoch": 5.12, "eval_accuracy": 0.93125, "eval_loss": 0.20300358533859253, "eval_runtime": 15.8596, "eval_samples_per_second": 1513.277, "eval_steps_per_second": 4.729, "step": 12300 }, { "epoch": 5.13, "learning_rate": 1e-06, "loss": 0.2318, "step": 12320 }, { "epoch": 5.13, "eval_accuracy": 0.931375, "eval_loss": 0.20188292860984802, "eval_runtime": 15.9958, "eval_samples_per_second": 1500.398, "eval_steps_per_second": 4.689, "step": 12320 }, { "epoch": 5.14, "learning_rate": 1e-06, "loss": 0.1662, "step": 12340 }, { "epoch": 5.14, "eval_accuracy": 0.9291666666666667, "eval_loss": 0.20399628579616547, "eval_runtime": 15.942, "eval_samples_per_second": 1505.459, "eval_steps_per_second": 4.705, "step": 12340 }, { "epoch": 5.15, "learning_rate": 1e-06, "loss": 0.1792, "step": 12360 }, { "epoch": 5.15, "eval_accuracy": 0.92925, "eval_loss": 0.205734983086586, "eval_runtime": 16.3128, "eval_samples_per_second": 1471.239, "eval_steps_per_second": 4.598, "step": 12360 }, { "epoch": 5.16, "learning_rate": 1e-06, "loss": 0.1762, "step": 12380 }, { "epoch": 5.16, "eval_accuracy": 0.9287916666666667, "eval_loss": 0.2092132568359375, "eval_runtime": 15.9137, "eval_samples_per_second": 1508.133, "eval_steps_per_second": 4.713, "step": 12380 }, { "epoch": 5.17, "learning_rate": 1e-06, "loss": 0.1693, "step": 12400 }, { "epoch": 5.17, "eval_accuracy": 0.929375, "eval_loss": 0.20484225451946259, "eval_runtime": 16.0154, "eval_samples_per_second": 1498.562, "eval_steps_per_second": 4.683, "step": 12400 }, { "epoch": 5.17, "learning_rate": 1e-06, "loss": 0.1659, "step": 12420 }, { "epoch": 5.17, "eval_accuracy": 0.9309166666666666, "eval_loss": 0.2023383527994156, "eval_runtime": 16.3394, "eval_samples_per_second": 1468.841, "eval_steps_per_second": 4.59, "step": 12420 }, { "epoch": 5.18, "learning_rate": 1e-06, "loss": 0.1561, "step": 12440 }, { "epoch": 5.18, "eval_accuracy": 0.930875, "eval_loss": 0.20242756605148315, "eval_runtime": 15.5429, "eval_samples_per_second": 1544.111, "eval_steps_per_second": 4.825, "step": 12440 }, { "epoch": 5.19, "learning_rate": 1e-06, "loss": 0.1826, "step": 12460 }, { "epoch": 5.19, "eval_accuracy": 0.9317916666666667, "eval_loss": 0.2020563930273056, "eval_runtime": 16.0095, "eval_samples_per_second": 1499.108, "eval_steps_per_second": 4.685, "step": 12460 }, { "epoch": 5.2, "learning_rate": 1e-06, "loss": 0.1544, "step": 12480 }, { "epoch": 5.2, "eval_accuracy": 0.93075, "eval_loss": 0.2041223645210266, "eval_runtime": 15.9404, "eval_samples_per_second": 1505.609, "eval_steps_per_second": 4.705, "step": 12480 }, { "epoch": 5.21, "learning_rate": 1e-06, "loss": 0.1836, "step": 12500 }, { "epoch": 5.21, "eval_accuracy": 0.9310833333333334, "eval_loss": 0.20302866399288177, "eval_runtime": 15.672, "eval_samples_per_second": 1531.396, "eval_steps_per_second": 4.786, "step": 12500 }, { "epoch": 5.22, "learning_rate": 1e-06, "loss": 0.1792, "step": 12520 }, { "epoch": 5.22, "eval_accuracy": 0.93125, "eval_loss": 0.20235809683799744, "eval_runtime": 16.0568, "eval_samples_per_second": 1494.695, "eval_steps_per_second": 4.671, "step": 12520 }, { "epoch": 5.22, "learning_rate": 1e-06, "loss": 0.1666, "step": 12540 }, { "epoch": 5.22, "eval_accuracy": 0.931875, "eval_loss": 0.2016390711069107, "eval_runtime": 15.811, "eval_samples_per_second": 1517.93, "eval_steps_per_second": 4.744, "step": 12540 }, { "epoch": 5.23, "learning_rate": 1e-06, "loss": 0.1714, "step": 12560 }, { "epoch": 5.23, "eval_accuracy": 0.9312916666666666, "eval_loss": 0.20161676406860352, "eval_runtime": 16.4914, "eval_samples_per_second": 1455.303, "eval_steps_per_second": 4.548, "step": 12560 }, { "epoch": 5.24, "learning_rate": 1e-06, "loss": 0.2023, "step": 12580 }, { "epoch": 5.24, "eval_accuracy": 0.930125, "eval_loss": 0.20450520515441895, "eval_runtime": 16.0345, "eval_samples_per_second": 1496.773, "eval_steps_per_second": 4.677, "step": 12580 }, { "epoch": 5.25, "learning_rate": 1e-06, "loss": 0.1996, "step": 12600 }, { "epoch": 5.25, "eval_accuracy": 0.9287916666666667, "eval_loss": 0.20813672244548798, "eval_runtime": 15.8781, "eval_samples_per_second": 1511.512, "eval_steps_per_second": 4.723, "step": 12600 }, { "epoch": 5.26, "learning_rate": 1e-06, "loss": 0.2113, "step": 12620 }, { "epoch": 5.26, "eval_accuracy": 0.931125, "eval_loss": 0.20202632248401642, "eval_runtime": 15.9398, "eval_samples_per_second": 1505.669, "eval_steps_per_second": 4.705, "step": 12620 }, { "epoch": 5.27, "learning_rate": 1e-06, "loss": 0.1826, "step": 12640 }, { "epoch": 5.27, "eval_accuracy": 0.9315416666666667, "eval_loss": 0.2010980248451233, "eval_runtime": 15.6531, "eval_samples_per_second": 1533.238, "eval_steps_per_second": 4.791, "step": 12640 }, { "epoch": 5.28, "learning_rate": 1e-06, "loss": 0.2069, "step": 12660 }, { "epoch": 5.28, "eval_accuracy": 0.9294583333333334, "eval_loss": 0.20446471869945526, "eval_runtime": 15.9931, "eval_samples_per_second": 1500.646, "eval_steps_per_second": 4.69, "step": 12660 }, { "epoch": 5.28, "learning_rate": 1e-06, "loss": 0.1621, "step": 12680 }, { "epoch": 5.28, "eval_accuracy": 0.9308333333333333, "eval_loss": 0.20204661786556244, "eval_runtime": 16.4396, "eval_samples_per_second": 1459.894, "eval_steps_per_second": 4.562, "step": 12680 }, { "epoch": 5.29, "learning_rate": 1e-06, "loss": 0.16, "step": 12700 }, { "epoch": 5.29, "eval_accuracy": 0.93075, "eval_loss": 0.20316839218139648, "eval_runtime": 15.9576, "eval_samples_per_second": 1503.99, "eval_steps_per_second": 4.7, "step": 12700 }, { "epoch": 5.3, "learning_rate": 1e-06, "loss": 0.165, "step": 12720 }, { "epoch": 5.3, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.20250557363033295, "eval_runtime": 15.8087, "eval_samples_per_second": 1518.148, "eval_steps_per_second": 4.744, "step": 12720 }, { "epoch": 5.31, "learning_rate": 1e-06, "loss": 0.1894, "step": 12740 }, { "epoch": 5.31, "eval_accuracy": 0.931625, "eval_loss": 0.20032641291618347, "eval_runtime": 15.4644, "eval_samples_per_second": 1551.949, "eval_steps_per_second": 4.85, "step": 12740 }, { "epoch": 5.32, "learning_rate": 1e-06, "loss": 0.1633, "step": 12760 }, { "epoch": 5.32, "eval_accuracy": 0.9314166666666667, "eval_loss": 0.20086656510829926, "eval_runtime": 16.2759, "eval_samples_per_second": 1474.571, "eval_steps_per_second": 4.608, "step": 12760 }, { "epoch": 5.33, "learning_rate": 1e-06, "loss": 0.2289, "step": 12780 }, { "epoch": 5.33, "eval_accuracy": 0.932625, "eval_loss": 0.20015648007392883, "eval_runtime": 16.3054, "eval_samples_per_second": 1471.905, "eval_steps_per_second": 4.6, "step": 12780 }, { "epoch": 5.33, "learning_rate": 1e-06, "loss": 0.1571, "step": 12800 }, { "epoch": 5.33, "eval_accuracy": 0.932, "eval_loss": 0.20201164484024048, "eval_runtime": 16.2407, "eval_samples_per_second": 1477.773, "eval_steps_per_second": 4.618, "step": 12800 }, { "epoch": 5.34, "learning_rate": 1e-06, "loss": 0.2073, "step": 12820 }, { "epoch": 5.34, "eval_accuracy": 0.9295416666666667, "eval_loss": 0.2069837599992752, "eval_runtime": 17.0384, "eval_samples_per_second": 1408.581, "eval_steps_per_second": 4.402, "step": 12820 }, { "epoch": 5.35, "learning_rate": 1e-06, "loss": 0.1605, "step": 12840 }, { "epoch": 5.35, "eval_accuracy": 0.93075, "eval_loss": 0.20616813004016876, "eval_runtime": 16.8287, "eval_samples_per_second": 1426.138, "eval_steps_per_second": 4.457, "step": 12840 }, { "epoch": 5.36, "learning_rate": 1e-06, "loss": 0.1879, "step": 12860 }, { "epoch": 5.36, "eval_accuracy": 0.9306666666666666, "eval_loss": 0.20293764770030975, "eval_runtime": 17.5436, "eval_samples_per_second": 1368.02, "eval_steps_per_second": 4.275, "step": 12860 }, { "epoch": 5.37, "learning_rate": 1e-06, "loss": 0.2104, "step": 12880 }, { "epoch": 5.37, "eval_accuracy": 0.9311666666666667, "eval_loss": 0.20171169936656952, "eval_runtime": 16.6339, "eval_samples_per_second": 1442.838, "eval_steps_per_second": 4.509, "step": 12880 }, { "epoch": 5.38, "learning_rate": 1e-06, "loss": 0.2058, "step": 12900 }, { "epoch": 5.38, "eval_accuracy": 0.9315416666666667, "eval_loss": 0.20113909244537354, "eval_runtime": 16.0022, "eval_samples_per_second": 1499.798, "eval_steps_per_second": 4.687, "step": 12900 }, { "epoch": 5.38, "learning_rate": 1e-06, "loss": 0.1624, "step": 12920 }, { "epoch": 5.38, "eval_accuracy": 0.9311666666666667, "eval_loss": 0.20383241772651672, "eval_runtime": 15.8073, "eval_samples_per_second": 1518.289, "eval_steps_per_second": 4.745, "step": 12920 }, { "epoch": 5.39, "learning_rate": 1e-06, "loss": 0.1765, "step": 12940 }, { "epoch": 5.39, "eval_accuracy": 0.9310416666666667, "eval_loss": 0.20215220749378204, "eval_runtime": 16.3386, "eval_samples_per_second": 1468.917, "eval_steps_per_second": 4.59, "step": 12940 }, { "epoch": 5.4, "learning_rate": 1e-06, "loss": 0.187, "step": 12960 }, { "epoch": 5.4, "eval_accuracy": 0.931375, "eval_loss": 0.20157550275325775, "eval_runtime": 16.0967, "eval_samples_per_second": 1490.991, "eval_steps_per_second": 4.659, "step": 12960 }, { "epoch": 5.41, "learning_rate": 1e-06, "loss": 0.1565, "step": 12980 }, { "epoch": 5.41, "eval_accuracy": 0.9317083333333334, "eval_loss": 0.20059089362621307, "eval_runtime": 15.7591, "eval_samples_per_second": 1522.928, "eval_steps_per_second": 4.759, "step": 12980 }, { "epoch": 5.42, "learning_rate": 1e-06, "loss": 0.1901, "step": 13000 }, { "epoch": 5.42, "eval_accuracy": 0.9322916666666666, "eval_loss": 0.20068366825580597, "eval_runtime": 15.8354, "eval_samples_per_second": 1515.591, "eval_steps_per_second": 4.736, "step": 13000 }, { "epoch": 5.42, "learning_rate": 1e-06, "loss": 0.2473, "step": 13020 }, { "epoch": 5.42, "eval_accuracy": 0.9309583333333333, "eval_loss": 0.20135918259620667, "eval_runtime": 15.7908, "eval_samples_per_second": 1519.875, "eval_steps_per_second": 4.75, "step": 13020 }, { "epoch": 5.43, "learning_rate": 1e-06, "loss": 0.1836, "step": 13040 }, { "epoch": 5.43, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.20122550427913666, "eval_runtime": 17.2214, "eval_samples_per_second": 1393.613, "eval_steps_per_second": 4.355, "step": 13040 }, { "epoch": 5.44, "learning_rate": 1e-06, "loss": 0.1759, "step": 13060 }, { "epoch": 5.44, "eval_accuracy": 0.9310416666666667, "eval_loss": 0.20236973464488983, "eval_runtime": 17.2846, "eval_samples_per_second": 1388.518, "eval_steps_per_second": 4.339, "step": 13060 }, { "epoch": 5.45, "learning_rate": 1e-06, "loss": 0.2385, "step": 13080 }, { "epoch": 5.45, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.202514186501503, "eval_runtime": 17.5809, "eval_samples_per_second": 1365.119, "eval_steps_per_second": 4.266, "step": 13080 }, { "epoch": 5.46, "learning_rate": 1e-06, "loss": 0.1586, "step": 13100 }, { "epoch": 5.46, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.20214223861694336, "eval_runtime": 17.3368, "eval_samples_per_second": 1384.339, "eval_steps_per_second": 4.326, "step": 13100 }, { "epoch": 5.47, "learning_rate": 1e-06, "loss": 0.1734, "step": 13120 }, { "epoch": 5.47, "eval_accuracy": 0.9310833333333334, "eval_loss": 0.20263217389583588, "eval_runtime": 15.8849, "eval_samples_per_second": 1510.867, "eval_steps_per_second": 4.721, "step": 13120 }, { "epoch": 5.47, "learning_rate": 1e-06, "loss": 0.1888, "step": 13140 }, { "epoch": 5.47, "eval_accuracy": 0.9295833333333333, "eval_loss": 0.20405107736587524, "eval_runtime": 15.9922, "eval_samples_per_second": 1500.728, "eval_steps_per_second": 4.69, "step": 13140 }, { "epoch": 5.48, "learning_rate": 1e-06, "loss": 0.1676, "step": 13160 }, { "epoch": 5.48, "eval_accuracy": 0.9306666666666666, "eval_loss": 0.20299804210662842, "eval_runtime": 15.9348, "eval_samples_per_second": 1506.14, "eval_steps_per_second": 4.707, "step": 13160 }, { "epoch": 5.49, "learning_rate": 1e-06, "loss": 0.2462, "step": 13180 }, { "epoch": 5.49, "eval_accuracy": 0.9300833333333334, "eval_loss": 0.20278708636760712, "eval_runtime": 16.0737, "eval_samples_per_second": 1493.119, "eval_steps_per_second": 4.666, "step": 13180 }, { "epoch": 5.5, "learning_rate": 1e-06, "loss": 0.1751, "step": 13200 }, { "epoch": 5.5, "eval_accuracy": 0.93125, "eval_loss": 0.20159900188446045, "eval_runtime": 15.4631, "eval_samples_per_second": 1552.08, "eval_steps_per_second": 4.85, "step": 13200 }, { "epoch": 5.51, "learning_rate": 1e-06, "loss": 0.2113, "step": 13220 }, { "epoch": 5.51, "eval_accuracy": 0.929125, "eval_loss": 0.20587190985679626, "eval_runtime": 15.9217, "eval_samples_per_second": 1507.375, "eval_steps_per_second": 4.711, "step": 13220 }, { "epoch": 5.52, "learning_rate": 1e-06, "loss": 0.1884, "step": 13240 }, { "epoch": 5.52, "eval_accuracy": 0.92875, "eval_loss": 0.20635303854942322, "eval_runtime": 15.8747, "eval_samples_per_second": 1511.836, "eval_steps_per_second": 4.724, "step": 13240 }, { "epoch": 5.53, "learning_rate": 1e-06, "loss": 0.1545, "step": 13260 }, { "epoch": 5.53, "eval_accuracy": 0.9289166666666666, "eval_loss": 0.20559687912464142, "eval_runtime": 15.9786, "eval_samples_per_second": 1502.007, "eval_steps_per_second": 4.694, "step": 13260 }, { "epoch": 5.53, "learning_rate": 1e-06, "loss": 0.209, "step": 13280 }, { "epoch": 5.53, "eval_accuracy": 0.9307916666666667, "eval_loss": 0.20248527824878693, "eval_runtime": 15.7154, "eval_samples_per_second": 1527.166, "eval_steps_per_second": 4.772, "step": 13280 }, { "epoch": 5.54, "learning_rate": 1e-06, "loss": 0.1843, "step": 13300 }, { "epoch": 5.54, "eval_accuracy": 0.9307916666666667, "eval_loss": 0.20135876536369324, "eval_runtime": 16.3985, "eval_samples_per_second": 1463.552, "eval_steps_per_second": 4.574, "step": 13300 }, { "epoch": 5.55, "learning_rate": 1e-06, "loss": 0.2096, "step": 13320 }, { "epoch": 5.55, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.20034638047218323, "eval_runtime": 15.8661, "eval_samples_per_second": 1512.659, "eval_steps_per_second": 4.727, "step": 13320 }, { "epoch": 5.56, "learning_rate": 1e-06, "loss": 0.1821, "step": 13340 }, { "epoch": 5.56, "eval_accuracy": 0.9300416666666667, "eval_loss": 0.20381678640842438, "eval_runtime": 16.2244, "eval_samples_per_second": 1479.257, "eval_steps_per_second": 4.623, "step": 13340 }, { "epoch": 5.57, "learning_rate": 1e-06, "loss": 0.1898, "step": 13360 }, { "epoch": 5.57, "eval_accuracy": 0.9309166666666666, "eval_loss": 0.20166015625, "eval_runtime": 15.944, "eval_samples_per_second": 1505.264, "eval_steps_per_second": 4.704, "step": 13360 }, { "epoch": 5.58, "learning_rate": 1e-06, "loss": 0.2068, "step": 13380 }, { "epoch": 5.58, "eval_accuracy": 0.9309583333333333, "eval_loss": 0.2012653946876526, "eval_runtime": 16.2049, "eval_samples_per_second": 1481.035, "eval_steps_per_second": 4.628, "step": 13380 }, { "epoch": 5.58, "learning_rate": 1e-06, "loss": 0.1822, "step": 13400 }, { "epoch": 5.58, "eval_accuracy": 0.931625, "eval_loss": 0.20043647289276123, "eval_runtime": 15.8256, "eval_samples_per_second": 1516.532, "eval_steps_per_second": 4.739, "step": 13400 }, { "epoch": 5.59, "learning_rate": 1e-06, "loss": 0.1974, "step": 13420 }, { "epoch": 5.59, "eval_accuracy": 0.9320416666666667, "eval_loss": 0.19964149594306946, "eval_runtime": 15.6329, "eval_samples_per_second": 1535.22, "eval_steps_per_second": 4.798, "step": 13420 }, { "epoch": 5.6, "learning_rate": 1e-06, "loss": 0.1634, "step": 13440 }, { "epoch": 5.6, "eval_accuracy": 0.93, "eval_loss": 0.2027622014284134, "eval_runtime": 15.6182, "eval_samples_per_second": 1536.672, "eval_steps_per_second": 4.802, "step": 13440 }, { "epoch": 5.61, "learning_rate": 1e-06, "loss": 0.2075, "step": 13460 }, { "epoch": 5.61, "eval_accuracy": 0.92925, "eval_loss": 0.2044484168291092, "eval_runtime": 15.6933, "eval_samples_per_second": 1529.315, "eval_steps_per_second": 4.779, "step": 13460 }, { "epoch": 5.62, "learning_rate": 1e-06, "loss": 0.1775, "step": 13480 }, { "epoch": 5.62, "eval_accuracy": 0.9289583333333333, "eval_loss": 0.20532788336277008, "eval_runtime": 15.7257, "eval_samples_per_second": 1526.161, "eval_steps_per_second": 4.769, "step": 13480 }, { "epoch": 5.62, "learning_rate": 1e-06, "loss": 0.1648, "step": 13500 }, { "epoch": 5.62, "eval_accuracy": 0.9272083333333333, "eval_loss": 0.2097957581281662, "eval_runtime": 15.4877, "eval_samples_per_second": 1549.617, "eval_steps_per_second": 4.843, "step": 13500 }, { "epoch": 5.63, "learning_rate": 1e-06, "loss": 0.1702, "step": 13520 }, { "epoch": 5.63, "eval_accuracy": 0.9284166666666667, "eval_loss": 0.2071061134338379, "eval_runtime": 15.7223, "eval_samples_per_second": 1526.495, "eval_steps_per_second": 4.77, "step": 13520 }, { "epoch": 5.64, "learning_rate": 1e-06, "loss": 0.1701, "step": 13540 }, { "epoch": 5.64, "eval_accuracy": 0.9302083333333333, "eval_loss": 0.205330029129982, "eval_runtime": 16.1803, "eval_samples_per_second": 1483.284, "eval_steps_per_second": 4.635, "step": 13540 }, { "epoch": 5.65, "learning_rate": 1e-06, "loss": 0.2158, "step": 13560 }, { "epoch": 5.65, "eval_accuracy": 0.929375, "eval_loss": 0.20697665214538574, "eval_runtime": 15.6178, "eval_samples_per_second": 1536.706, "eval_steps_per_second": 4.802, "step": 13560 }, { "epoch": 5.66, "learning_rate": 1e-06, "loss": 0.1622, "step": 13580 }, { "epoch": 5.66, "eval_accuracy": 0.9305, "eval_loss": 0.2030411958694458, "eval_runtime": 15.9497, "eval_samples_per_second": 1504.728, "eval_steps_per_second": 4.702, "step": 13580 }, { "epoch": 5.67, "learning_rate": 1e-06, "loss": 0.1764, "step": 13600 }, { "epoch": 5.67, "eval_accuracy": 0.9311666666666667, "eval_loss": 0.2010955512523651, "eval_runtime": 15.6626, "eval_samples_per_second": 1532.316, "eval_steps_per_second": 4.788, "step": 13600 }, { "epoch": 5.67, "learning_rate": 1e-06, "loss": 0.2191, "step": 13620 }, { "epoch": 5.67, "eval_accuracy": 0.93125, "eval_loss": 0.1993076354265213, "eval_runtime": 16.3922, "eval_samples_per_second": 1464.11, "eval_steps_per_second": 4.575, "step": 13620 }, { "epoch": 5.68, "learning_rate": 1e-06, "loss": 0.1962, "step": 13640 }, { "epoch": 5.68, "eval_accuracy": 0.931625, "eval_loss": 0.19903969764709473, "eval_runtime": 16.0762, "eval_samples_per_second": 1492.89, "eval_steps_per_second": 4.665, "step": 13640 }, { "epoch": 5.69, "learning_rate": 1e-06, "loss": 0.1954, "step": 13660 }, { "epoch": 5.69, "eval_accuracy": 0.9315833333333333, "eval_loss": 0.20040030777454376, "eval_runtime": 16.0741, "eval_samples_per_second": 1493.088, "eval_steps_per_second": 4.666, "step": 13660 }, { "epoch": 5.7, "learning_rate": 1e-06, "loss": 0.1903, "step": 13680 }, { "epoch": 5.7, "eval_accuracy": 0.9295, "eval_loss": 0.20218642055988312, "eval_runtime": 17.6769, "eval_samples_per_second": 1357.707, "eval_steps_per_second": 4.243, "step": 13680 }, { "epoch": 5.71, "learning_rate": 1e-06, "loss": 0.1867, "step": 13700 }, { "epoch": 5.71, "eval_accuracy": 0.930375, "eval_loss": 0.20176221430301666, "eval_runtime": 16.5988, "eval_samples_per_second": 1445.891, "eval_steps_per_second": 4.518, "step": 13700 }, { "epoch": 5.72, "learning_rate": 1e-06, "loss": 0.1991, "step": 13720 }, { "epoch": 5.72, "eval_accuracy": 0.93025, "eval_loss": 0.20237615704536438, "eval_runtime": 16.9965, "eval_samples_per_second": 1412.055, "eval_steps_per_second": 4.413, "step": 13720 }, { "epoch": 5.72, "learning_rate": 1e-06, "loss": 0.1646, "step": 13740 }, { "epoch": 5.72, "eval_accuracy": 0.9304583333333334, "eval_loss": 0.20343907177448273, "eval_runtime": 17.7322, "eval_samples_per_second": 1353.468, "eval_steps_per_second": 4.23, "step": 13740 }, { "epoch": 5.73, "learning_rate": 1e-06, "loss": 0.2051, "step": 13760 }, { "epoch": 5.73, "eval_accuracy": 0.9300416666666667, "eval_loss": 0.2030051052570343, "eval_runtime": 17.541, "eval_samples_per_second": 1368.227, "eval_steps_per_second": 4.276, "step": 13760 }, { "epoch": 5.74, "learning_rate": 1e-06, "loss": 0.1693, "step": 13780 }, { "epoch": 5.74, "eval_accuracy": 0.931, "eval_loss": 0.1998627781867981, "eval_runtime": 16.6335, "eval_samples_per_second": 1442.869, "eval_steps_per_second": 4.509, "step": 13780 }, { "epoch": 5.75, "learning_rate": 1e-06, "loss": 0.1663, "step": 13800 }, { "epoch": 5.75, "eval_accuracy": 0.9325, "eval_loss": 0.1989893913269043, "eval_runtime": 17.515, "eval_samples_per_second": 1370.253, "eval_steps_per_second": 4.282, "step": 13800 }, { "epoch": 5.76, "learning_rate": 1e-06, "loss": 0.1452, "step": 13820 }, { "epoch": 5.76, "eval_accuracy": 0.9319583333333333, "eval_loss": 0.1993263214826584, "eval_runtime": 17.7237, "eval_samples_per_second": 1354.116, "eval_steps_per_second": 4.232, "step": 13820 }, { "epoch": 5.77, "learning_rate": 1e-06, "loss": 0.1535, "step": 13840 }, { "epoch": 5.77, "eval_accuracy": 0.932125, "eval_loss": 0.19914114475250244, "eval_runtime": 17.0395, "eval_samples_per_second": 1408.496, "eval_steps_per_second": 4.402, "step": 13840 }, { "epoch": 5.78, "learning_rate": 1e-06, "loss": 0.2072, "step": 13860 }, { "epoch": 5.78, "eval_accuracy": 0.9300833333333334, "eval_loss": 0.20136070251464844, "eval_runtime": 17.6045, "eval_samples_per_second": 1363.289, "eval_steps_per_second": 4.26, "step": 13860 }, { "epoch": 5.78, "learning_rate": 1e-06, "loss": 0.2069, "step": 13880 }, { "epoch": 5.78, "eval_accuracy": 0.9298333333333333, "eval_loss": 0.2024083137512207, "eval_runtime": 17.3144, "eval_samples_per_second": 1386.128, "eval_steps_per_second": 4.332, "step": 13880 }, { "epoch": 5.79, "learning_rate": 1e-06, "loss": 0.2298, "step": 13900 }, { "epoch": 5.79, "eval_accuracy": 0.93225, "eval_loss": 0.19839055836200714, "eval_runtime": 18.3284, "eval_samples_per_second": 1309.44, "eval_steps_per_second": 4.092, "step": 13900 }, { "epoch": 5.8, "learning_rate": 1e-06, "loss": 0.201, "step": 13920 }, { "epoch": 5.8, "eval_accuracy": 0.932125, "eval_loss": 0.1982399821281433, "eval_runtime": 18.1938, "eval_samples_per_second": 1319.131, "eval_steps_per_second": 4.122, "step": 13920 }, { "epoch": 5.81, "learning_rate": 1e-06, "loss": 0.184, "step": 13940 }, { "epoch": 5.81, "eval_accuracy": 0.9317083333333334, "eval_loss": 0.1989286094903946, "eval_runtime": 18.406, "eval_samples_per_second": 1303.923, "eval_steps_per_second": 4.075, "step": 13940 }, { "epoch": 5.82, "learning_rate": 1e-06, "loss": 0.1942, "step": 13960 }, { "epoch": 5.82, "eval_accuracy": 0.9319166666666666, "eval_loss": 0.19913487136363983, "eval_runtime": 18.2526, "eval_samples_per_second": 1314.879, "eval_steps_per_second": 4.109, "step": 13960 }, { "epoch": 5.83, "learning_rate": 1e-06, "loss": 0.2085, "step": 13980 }, { "epoch": 5.83, "eval_accuracy": 0.932375, "eval_loss": 0.19867363572120667, "eval_runtime": 17.554, "eval_samples_per_second": 1367.21, "eval_steps_per_second": 4.273, "step": 13980 }, { "epoch": 5.83, "learning_rate": 1e-06, "loss": 0.177, "step": 14000 }, { "epoch": 5.83, "eval_accuracy": 0.9326666666666666, "eval_loss": 0.19808466732501984, "eval_runtime": 16.6018, "eval_samples_per_second": 1445.627, "eval_steps_per_second": 4.518, "step": 14000 }, { "epoch": 5.84, "learning_rate": 1e-06, "loss": 0.1746, "step": 14020 }, { "epoch": 5.84, "eval_accuracy": 0.9325416666666667, "eval_loss": 0.19782015681266785, "eval_runtime": 17.9546, "eval_samples_per_second": 1336.706, "eval_steps_per_second": 4.177, "step": 14020 }, { "epoch": 5.85, "learning_rate": 1e-06, "loss": 0.1731, "step": 14040 }, { "epoch": 5.85, "eval_accuracy": 0.9310416666666667, "eval_loss": 0.20012834668159485, "eval_runtime": 17.9142, "eval_samples_per_second": 1339.719, "eval_steps_per_second": 4.187, "step": 14040 }, { "epoch": 5.86, "learning_rate": 1e-06, "loss": 0.1888, "step": 14060 }, { "epoch": 5.86, "eval_accuracy": 0.93075, "eval_loss": 0.20140434801578522, "eval_runtime": 18.4741, "eval_samples_per_second": 1299.116, "eval_steps_per_second": 4.06, "step": 14060 }, { "epoch": 5.87, "learning_rate": 1e-06, "loss": 0.1708, "step": 14080 }, { "epoch": 5.87, "eval_accuracy": 0.9315416666666667, "eval_loss": 0.19907809793949127, "eval_runtime": 18.0588, "eval_samples_per_second": 1328.991, "eval_steps_per_second": 4.153, "step": 14080 }, { "epoch": 5.88, "learning_rate": 1e-06, "loss": 0.1927, "step": 14100 }, { "epoch": 5.88, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.2014717161655426, "eval_runtime": 17.5458, "eval_samples_per_second": 1367.846, "eval_steps_per_second": 4.275, "step": 14100 }, { "epoch": 5.88, "learning_rate": 1e-06, "loss": 0.1756, "step": 14120 }, { "epoch": 5.88, "eval_accuracy": 0.9304583333333334, "eval_loss": 0.20116497576236725, "eval_runtime": 16.0925, "eval_samples_per_second": 1491.379, "eval_steps_per_second": 4.661, "step": 14120 }, { "epoch": 5.89, "learning_rate": 1e-06, "loss": 0.1829, "step": 14140 }, { "epoch": 5.89, "eval_accuracy": 0.9317083333333334, "eval_loss": 0.2000524401664734, "eval_runtime": 16.8659, "eval_samples_per_second": 1422.991, "eval_steps_per_second": 4.447, "step": 14140 }, { "epoch": 5.9, "learning_rate": 1e-06, "loss": 0.1348, "step": 14160 }, { "epoch": 5.9, "eval_accuracy": 0.932375, "eval_loss": 0.19840501248836517, "eval_runtime": 16.5412, "eval_samples_per_second": 1450.921, "eval_steps_per_second": 4.534, "step": 14160 }, { "epoch": 5.91, "learning_rate": 1e-06, "loss": 0.1773, "step": 14180 }, { "epoch": 5.91, "eval_accuracy": 0.9315416666666667, "eval_loss": 0.20062309503555298, "eval_runtime": 16.2967, "eval_samples_per_second": 1472.691, "eval_steps_per_second": 4.602, "step": 14180 }, { "epoch": 5.92, "learning_rate": 1e-06, "loss": 0.1639, "step": 14200 }, { "epoch": 5.92, "eval_accuracy": 0.9306666666666666, "eval_loss": 0.2011023461818695, "eval_runtime": 16.2819, "eval_samples_per_second": 1474.03, "eval_steps_per_second": 4.606, "step": 14200 }, { "epoch": 5.92, "learning_rate": 1e-06, "loss": 0.2275, "step": 14220 }, { "epoch": 5.92, "eval_accuracy": 0.9318333333333333, "eval_loss": 0.19922685623168945, "eval_runtime": 17.1405, "eval_samples_per_second": 1400.191, "eval_steps_per_second": 4.376, "step": 14220 }, { "epoch": 5.93, "learning_rate": 1e-06, "loss": 0.1934, "step": 14240 }, { "epoch": 5.93, "eval_accuracy": 0.932125, "eval_loss": 0.19886505603790283, "eval_runtime": 16.0782, "eval_samples_per_second": 1492.703, "eval_steps_per_second": 4.665, "step": 14240 }, { "epoch": 5.94, "learning_rate": 1e-06, "loss": 0.1847, "step": 14260 }, { "epoch": 5.94, "eval_accuracy": 0.9330833333333334, "eval_loss": 0.19734573364257812, "eval_runtime": 17.2888, "eval_samples_per_second": 1388.183, "eval_steps_per_second": 4.338, "step": 14260 }, { "epoch": 5.95, "learning_rate": 1e-06, "loss": 0.1572, "step": 14280 }, { "epoch": 5.95, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.1978907287120819, "eval_runtime": 19.318, "eval_samples_per_second": 1242.362, "eval_steps_per_second": 3.882, "step": 14280 }, { "epoch": 5.96, "learning_rate": 1e-06, "loss": 0.2191, "step": 14300 }, { "epoch": 5.96, "eval_accuracy": 0.9322083333333333, "eval_loss": 0.19908125698566437, "eval_runtime": 17.7053, "eval_samples_per_second": 1355.53, "eval_steps_per_second": 4.236, "step": 14300 }, { "epoch": 5.97, "learning_rate": 1e-06, "loss": 0.2022, "step": 14320 }, { "epoch": 5.97, "eval_accuracy": 0.9321666666666667, "eval_loss": 0.19820664823055267, "eval_runtime": 17.7778, "eval_samples_per_second": 1350.001, "eval_steps_per_second": 4.219, "step": 14320 }, { "epoch": 5.97, "learning_rate": 1e-06, "loss": 0.1575, "step": 14340 }, { "epoch": 5.97, "eval_accuracy": 0.9321666666666667, "eval_loss": 0.19856464862823486, "eval_runtime": 17.5559, "eval_samples_per_second": 1367.059, "eval_steps_per_second": 4.272, "step": 14340 }, { "epoch": 5.98, "learning_rate": 1e-06, "loss": 0.1826, "step": 14360 }, { "epoch": 5.98, "eval_accuracy": 0.9322916666666666, "eval_loss": 0.19818070530891418, "eval_runtime": 17.7144, "eval_samples_per_second": 1354.827, "eval_steps_per_second": 4.234, "step": 14360 }, { "epoch": 5.99, "learning_rate": 1e-06, "loss": 0.1855, "step": 14380 }, { "epoch": 5.99, "eval_accuracy": 0.9326666666666666, "eval_loss": 0.19788843393325806, "eval_runtime": 17.5812, "eval_samples_per_second": 1365.094, "eval_steps_per_second": 4.266, "step": 14380 }, { "epoch": 6.0, "learning_rate": 1e-06, "loss": 0.2027, "step": 14400 }, { "epoch": 6.0, "eval_accuracy": 0.9319583333333333, "eval_loss": 0.19935990869998932, "eval_runtime": 17.5236, "eval_samples_per_second": 1369.581, "eval_steps_per_second": 4.28, "step": 14400 }, { "epoch": 6.01, "learning_rate": 1e-06, "loss": 0.1585, "step": 14420 }, { "epoch": 6.01, "eval_accuracy": 0.9324583333333333, "eval_loss": 0.20092272758483887, "eval_runtime": 17.5852, "eval_samples_per_second": 1364.781, "eval_steps_per_second": 4.265, "step": 14420 }, { "epoch": 6.02, "learning_rate": 1e-06, "loss": 0.2092, "step": 14440 }, { "epoch": 6.02, "eval_accuracy": 0.9339583333333333, "eval_loss": 0.1970815807580948, "eval_runtime": 17.1214, "eval_samples_per_second": 1401.757, "eval_steps_per_second": 4.38, "step": 14440 }, { "epoch": 6.03, "learning_rate": 1e-06, "loss": 0.1773, "step": 14460 }, { "epoch": 6.03, "eval_accuracy": 0.9334583333333333, "eval_loss": 0.19676099717617035, "eval_runtime": 16.7515, "eval_samples_per_second": 1432.707, "eval_steps_per_second": 4.477, "step": 14460 }, { "epoch": 6.03, "learning_rate": 1e-06, "loss": 0.186, "step": 14480 }, { "epoch": 6.03, "eval_accuracy": 0.9323333333333333, "eval_loss": 0.19818897545337677, "eval_runtime": 18.0357, "eval_samples_per_second": 1330.693, "eval_steps_per_second": 4.158, "step": 14480 }, { "epoch": 6.04, "learning_rate": 1e-06, "loss": 0.1759, "step": 14500 }, { "epoch": 6.04, "eval_accuracy": 0.9315, "eval_loss": 0.19834274053573608, "eval_runtime": 16.9189, "eval_samples_per_second": 1418.535, "eval_steps_per_second": 4.433, "step": 14500 }, { "epoch": 6.05, "learning_rate": 1e-06, "loss": 0.168, "step": 14520 }, { "epoch": 6.05, "eval_accuracy": 0.9335416666666667, "eval_loss": 0.19659017026424408, "eval_runtime": 16.8237, "eval_samples_per_second": 1426.555, "eval_steps_per_second": 4.458, "step": 14520 }, { "epoch": 6.06, "learning_rate": 1e-06, "loss": 0.168, "step": 14540 }, { "epoch": 6.06, "eval_accuracy": 0.9322083333333333, "eval_loss": 0.1991969645023346, "eval_runtime": 17.2701, "eval_samples_per_second": 1389.683, "eval_steps_per_second": 4.343, "step": 14540 }, { "epoch": 6.07, "learning_rate": 1e-06, "loss": 0.2085, "step": 14560 }, { "epoch": 6.07, "eval_accuracy": 0.9324583333333333, "eval_loss": 0.19824150204658508, "eval_runtime": 17.4283, "eval_samples_per_second": 1377.067, "eval_steps_per_second": 4.303, "step": 14560 }, { "epoch": 6.08, "learning_rate": 1e-06, "loss": 0.1866, "step": 14580 }, { "epoch": 6.08, "eval_accuracy": 0.93325, "eval_loss": 0.19639739394187927, "eval_runtime": 17.7717, "eval_samples_per_second": 1350.465, "eval_steps_per_second": 4.22, "step": 14580 }, { "epoch": 6.08, "learning_rate": 1e-06, "loss": 0.1772, "step": 14600 }, { "epoch": 6.08, "eval_accuracy": 0.93325, "eval_loss": 0.19745591282844543, "eval_runtime": 16.2574, "eval_samples_per_second": 1476.246, "eval_steps_per_second": 4.613, "step": 14600 }, { "epoch": 6.09, "learning_rate": 1e-06, "loss": 0.1772, "step": 14620 }, { "epoch": 6.09, "eval_accuracy": 0.933375, "eval_loss": 0.1968727856874466, "eval_runtime": 16.8404, "eval_samples_per_second": 1425.142, "eval_steps_per_second": 4.454, "step": 14620 }, { "epoch": 6.1, "learning_rate": 1e-06, "loss": 0.1442, "step": 14640 }, { "epoch": 6.1, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19765588641166687, "eval_runtime": 16.5146, "eval_samples_per_second": 1453.262, "eval_steps_per_second": 4.541, "step": 14640 }, { "epoch": 6.11, "learning_rate": 1e-06, "loss": 0.1531, "step": 14660 }, { "epoch": 6.11, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19997546076774597, "eval_runtime": 16.1908, "eval_samples_per_second": 1482.324, "eval_steps_per_second": 4.632, "step": 14660 }, { "epoch": 6.12, "learning_rate": 1e-06, "loss": 0.2187, "step": 14680 }, { "epoch": 6.12, "eval_accuracy": 0.931625, "eval_loss": 0.20033469796180725, "eval_runtime": 16.76, "eval_samples_per_second": 1431.985, "eval_steps_per_second": 4.475, "step": 14680 }, { "epoch": 6.12, "learning_rate": 1e-06, "loss": 0.1788, "step": 14700 }, { "epoch": 6.12, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.2029111534357071, "eval_runtime": 16.3811, "eval_samples_per_second": 1465.1, "eval_steps_per_second": 4.578, "step": 14700 }, { "epoch": 6.13, "learning_rate": 1e-06, "loss": 0.2018, "step": 14720 }, { "epoch": 6.13, "eval_accuracy": 0.9309583333333333, "eval_loss": 0.20074012875556946, "eval_runtime": 16.4407, "eval_samples_per_second": 1459.794, "eval_steps_per_second": 4.562, "step": 14720 }, { "epoch": 6.14, "learning_rate": 1e-06, "loss": 0.1636, "step": 14740 }, { "epoch": 6.14, "eval_accuracy": 0.9325833333333333, "eval_loss": 0.1991911381483078, "eval_runtime": 16.8493, "eval_samples_per_second": 1424.389, "eval_steps_per_second": 4.451, "step": 14740 }, { "epoch": 6.15, "learning_rate": 1e-06, "loss": 0.1622, "step": 14760 }, { "epoch": 6.15, "eval_accuracy": 0.9322916666666666, "eval_loss": 0.19850997626781464, "eval_runtime": 16.3552, "eval_samples_per_second": 1467.424, "eval_steps_per_second": 4.586, "step": 14760 }, { "epoch": 6.16, "learning_rate": 1e-06, "loss": 0.1947, "step": 14780 }, { "epoch": 6.16, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.19700346887111664, "eval_runtime": 16.8167, "eval_samples_per_second": 1427.151, "eval_steps_per_second": 4.46, "step": 14780 }, { "epoch": 6.17, "learning_rate": 1e-06, "loss": 0.198, "step": 14800 }, { "epoch": 6.17, "eval_accuracy": 0.933875, "eval_loss": 0.19635510444641113, "eval_runtime": 17.4336, "eval_samples_per_second": 1376.652, "eval_steps_per_second": 4.302, "step": 14800 }, { "epoch": 6.17, "learning_rate": 1e-06, "loss": 0.2261, "step": 14820 }, { "epoch": 6.17, "eval_accuracy": 0.9331666666666667, "eval_loss": 0.19697488844394684, "eval_runtime": 16.4872, "eval_samples_per_second": 1455.677, "eval_steps_per_second": 4.549, "step": 14820 }, { "epoch": 6.18, "learning_rate": 1e-06, "loss": 0.2095, "step": 14840 }, { "epoch": 6.18, "eval_accuracy": 0.9303333333333333, "eval_loss": 0.2015983760356903, "eval_runtime": 16.9074, "eval_samples_per_second": 1419.494, "eval_steps_per_second": 4.436, "step": 14840 }, { "epoch": 6.19, "learning_rate": 1e-06, "loss": 0.1589, "step": 14860 }, { "epoch": 6.19, "eval_accuracy": 0.9330833333333334, "eval_loss": 0.19708770513534546, "eval_runtime": 16.2449, "eval_samples_per_second": 1477.385, "eval_steps_per_second": 4.617, "step": 14860 }, { "epoch": 6.2, "learning_rate": 1e-06, "loss": 0.191, "step": 14880 }, { "epoch": 6.2, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19845397770404816, "eval_runtime": 17.192, "eval_samples_per_second": 1395.999, "eval_steps_per_second": 4.362, "step": 14880 }, { "epoch": 6.21, "learning_rate": 1e-06, "loss": 0.1484, "step": 14900 }, { "epoch": 6.21, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19584734737873077, "eval_runtime": 17.8255, "eval_samples_per_second": 1346.386, "eval_steps_per_second": 4.207, "step": 14900 }, { "epoch": 6.22, "learning_rate": 1e-06, "loss": 0.1791, "step": 14920 }, { "epoch": 6.22, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19735968112945557, "eval_runtime": 17.6186, "eval_samples_per_second": 1362.195, "eval_steps_per_second": 4.257, "step": 14920 }, { "epoch": 6.22, "learning_rate": 1e-06, "loss": 0.2077, "step": 14940 }, { "epoch": 6.22, "eval_accuracy": 0.932375, "eval_loss": 0.19711482524871826, "eval_runtime": 17.4097, "eval_samples_per_second": 1378.545, "eval_steps_per_second": 4.308, "step": 14940 }, { "epoch": 6.23, "learning_rate": 1e-06, "loss": 0.1918, "step": 14960 }, { "epoch": 6.23, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19635051488876343, "eval_runtime": 16.6079, "eval_samples_per_second": 1445.094, "eval_steps_per_second": 4.516, "step": 14960 }, { "epoch": 6.24, "learning_rate": 1e-06, "loss": 0.2291, "step": 14980 }, { "epoch": 6.24, "eval_accuracy": 0.9330833333333334, "eval_loss": 0.19559039175510406, "eval_runtime": 16.5817, "eval_samples_per_second": 1447.382, "eval_steps_per_second": 4.523, "step": 14980 }, { "epoch": 6.25, "learning_rate": 1e-06, "loss": 0.1597, "step": 15000 }, { "epoch": 6.25, "eval_accuracy": 0.9320833333333334, "eval_loss": 0.1965901404619217, "eval_runtime": 16.2198, "eval_samples_per_second": 1479.673, "eval_steps_per_second": 4.624, "step": 15000 }, { "epoch": 6.26, "learning_rate": 1e-06, "loss": 0.1844, "step": 15020 }, { "epoch": 6.26, "eval_accuracy": 0.9323333333333333, "eval_loss": 0.19741342961788177, "eval_runtime": 16.1794, "eval_samples_per_second": 1483.371, "eval_steps_per_second": 4.636, "step": 15020 }, { "epoch": 6.27, "learning_rate": 1e-06, "loss": 0.1653, "step": 15040 }, { "epoch": 6.27, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.1963772028684616, "eval_runtime": 16.9569, "eval_samples_per_second": 1415.356, "eval_steps_per_second": 4.423, "step": 15040 }, { "epoch": 6.28, "learning_rate": 1e-06, "loss": 0.158, "step": 15060 }, { "epoch": 6.28, "eval_accuracy": 0.9309583333333333, "eval_loss": 0.20028692483901978, "eval_runtime": 16.1902, "eval_samples_per_second": 1482.38, "eval_steps_per_second": 4.632, "step": 15060 }, { "epoch": 6.28, "learning_rate": 1e-06, "loss": 0.1602, "step": 15080 }, { "epoch": 6.28, "eval_accuracy": 0.932625, "eval_loss": 0.19667039811611176, "eval_runtime": 16.4266, "eval_samples_per_second": 1461.041, "eval_steps_per_second": 4.566, "step": 15080 }, { "epoch": 6.29, "learning_rate": 1e-06, "loss": 0.1656, "step": 15100 }, { "epoch": 6.29, "eval_accuracy": 0.9329166666666666, "eval_loss": 0.19664861261844635, "eval_runtime": 17.1479, "eval_samples_per_second": 1399.589, "eval_steps_per_second": 4.374, "step": 15100 }, { "epoch": 6.3, "learning_rate": 1e-06, "loss": 0.1691, "step": 15120 }, { "epoch": 6.3, "eval_accuracy": 0.9322916666666666, "eval_loss": 0.19764386117458344, "eval_runtime": 17.5239, "eval_samples_per_second": 1369.555, "eval_steps_per_second": 4.28, "step": 15120 }, { "epoch": 6.31, "learning_rate": 1e-06, "loss": 0.1598, "step": 15140 }, { "epoch": 6.31, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.1968608945608139, "eval_runtime": 17.6137, "eval_samples_per_second": 1362.576, "eval_steps_per_second": 4.258, "step": 15140 }, { "epoch": 6.32, "learning_rate": 1e-06, "loss": 0.1768, "step": 15160 }, { "epoch": 6.32, "eval_accuracy": 0.9334583333333333, "eval_loss": 0.19633811712265015, "eval_runtime": 16.2937, "eval_samples_per_second": 1472.96, "eval_steps_per_second": 4.603, "step": 15160 }, { "epoch": 6.33, "learning_rate": 1e-06, "loss": 0.1653, "step": 15180 }, { "epoch": 6.33, "eval_accuracy": 0.9334166666666667, "eval_loss": 0.1963397115468979, "eval_runtime": 16.377, "eval_samples_per_second": 1465.471, "eval_steps_per_second": 4.58, "step": 15180 }, { "epoch": 6.33, "learning_rate": 1e-06, "loss": 0.1822, "step": 15200 }, { "epoch": 6.33, "eval_accuracy": 0.9312916666666666, "eval_loss": 0.20008063316345215, "eval_runtime": 16.4032, "eval_samples_per_second": 1463.126, "eval_steps_per_second": 4.572, "step": 15200 }, { "epoch": 6.34, "learning_rate": 1e-06, "loss": 0.1895, "step": 15220 }, { "epoch": 6.34, "eval_accuracy": 0.9325416666666667, "eval_loss": 0.19740186631679535, "eval_runtime": 17.3348, "eval_samples_per_second": 1384.502, "eval_steps_per_second": 4.327, "step": 15220 }, { "epoch": 6.35, "learning_rate": 1e-06, "loss": 0.1622, "step": 15240 }, { "epoch": 6.35, "eval_accuracy": 0.9315, "eval_loss": 0.19918110966682434, "eval_runtime": 16.9401, "eval_samples_per_second": 1416.753, "eval_steps_per_second": 4.427, "step": 15240 }, { "epoch": 6.36, "learning_rate": 1e-06, "loss": 0.1982, "step": 15260 }, { "epoch": 6.36, "eval_accuracy": 0.93225, "eval_loss": 0.19843068718910217, "eval_runtime": 17.3631, "eval_samples_per_second": 1382.241, "eval_steps_per_second": 4.32, "step": 15260 }, { "epoch": 6.37, "learning_rate": 1e-06, "loss": 0.172, "step": 15280 }, { "epoch": 6.37, "eval_accuracy": 0.9322916666666666, "eval_loss": 0.19916561245918274, "eval_runtime": 17.0127, "eval_samples_per_second": 1410.711, "eval_steps_per_second": 4.408, "step": 15280 }, { "epoch": 6.38, "learning_rate": 1e-06, "loss": 0.1573, "step": 15300 }, { "epoch": 6.38, "eval_accuracy": 0.9306666666666666, "eval_loss": 0.20143656432628632, "eval_runtime": 17.4933, "eval_samples_per_second": 1371.953, "eval_steps_per_second": 4.287, "step": 15300 }, { "epoch": 6.38, "learning_rate": 1e-06, "loss": 0.158, "step": 15320 }, { "epoch": 6.38, "eval_accuracy": 0.9324583333333333, "eval_loss": 0.19875669479370117, "eval_runtime": 16.9838, "eval_samples_per_second": 1413.115, "eval_steps_per_second": 4.416, "step": 15320 }, { "epoch": 6.39, "learning_rate": 1e-06, "loss": 0.1359, "step": 15340 }, { "epoch": 6.39, "eval_accuracy": 0.9317916666666667, "eval_loss": 0.19885526597499847, "eval_runtime": 16.3343, "eval_samples_per_second": 1469.303, "eval_steps_per_second": 4.592, "step": 15340 }, { "epoch": 6.4, "learning_rate": 1e-06, "loss": 0.2088, "step": 15360 }, { "epoch": 6.4, "eval_accuracy": 0.931625, "eval_loss": 0.19839046895503998, "eval_runtime": 16.2551, "eval_samples_per_second": 1476.461, "eval_steps_per_second": 4.614, "step": 15360 }, { "epoch": 6.41, "learning_rate": 1e-06, "loss": 0.1952, "step": 15380 }, { "epoch": 6.41, "eval_accuracy": 0.9315833333333333, "eval_loss": 0.1983661949634552, "eval_runtime": 16.7153, "eval_samples_per_second": 1435.806, "eval_steps_per_second": 4.487, "step": 15380 }, { "epoch": 6.42, "learning_rate": 1e-06, "loss": 0.1516, "step": 15400 }, { "epoch": 6.42, "eval_accuracy": 0.9327083333333334, "eval_loss": 0.1965571790933609, "eval_runtime": 16.5815, "eval_samples_per_second": 1447.4, "eval_steps_per_second": 4.523, "step": 15400 }, { "epoch": 6.42, "learning_rate": 1e-06, "loss": 0.2063, "step": 15420 }, { "epoch": 6.42, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.19558171927928925, "eval_runtime": 17.0847, "eval_samples_per_second": 1404.766, "eval_steps_per_second": 4.39, "step": 15420 }, { "epoch": 6.43, "learning_rate": 1e-06, "loss": 0.1766, "step": 15440 }, { "epoch": 6.43, "eval_accuracy": 0.932875, "eval_loss": 0.19576992094516754, "eval_runtime": 17.3112, "eval_samples_per_second": 1386.388, "eval_steps_per_second": 4.332, "step": 15440 }, { "epoch": 6.44, "learning_rate": 1e-06, "loss": 0.1711, "step": 15460 }, { "epoch": 6.44, "eval_accuracy": 0.93275, "eval_loss": 0.1965101659297943, "eval_runtime": 16.7762, "eval_samples_per_second": 1430.595, "eval_steps_per_second": 4.471, "step": 15460 }, { "epoch": 6.45, "learning_rate": 1e-06, "loss": 0.1621, "step": 15480 }, { "epoch": 6.45, "eval_accuracy": 0.932375, "eval_loss": 0.19722963869571686, "eval_runtime": 16.5923, "eval_samples_per_second": 1446.456, "eval_steps_per_second": 4.52, "step": 15480 }, { "epoch": 6.46, "learning_rate": 1e-06, "loss": 0.1854, "step": 15500 }, { "epoch": 6.46, "eval_accuracy": 0.9300833333333334, "eval_loss": 0.20203134417533875, "eval_runtime": 16.701, "eval_samples_per_second": 1437.04, "eval_steps_per_second": 4.491, "step": 15500 }, { "epoch": 6.47, "learning_rate": 1e-06, "loss": 0.1731, "step": 15520 }, { "epoch": 6.47, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.19797883927822113, "eval_runtime": 17.3415, "eval_samples_per_second": 1383.962, "eval_steps_per_second": 4.325, "step": 15520 }, { "epoch": 6.47, "learning_rate": 1e-06, "loss": 0.1948, "step": 15540 }, { "epoch": 6.47, "eval_accuracy": 0.9307916666666667, "eval_loss": 0.20005568861961365, "eval_runtime": 17.5386, "eval_samples_per_second": 1368.409, "eval_steps_per_second": 4.276, "step": 15540 }, { "epoch": 6.48, "learning_rate": 1e-06, "loss": 0.2325, "step": 15560 }, { "epoch": 6.48, "eval_accuracy": 0.93075, "eval_loss": 0.1986798346042633, "eval_runtime": 17.0336, "eval_samples_per_second": 1408.979, "eval_steps_per_second": 4.403, "step": 15560 }, { "epoch": 6.49, "learning_rate": 1e-06, "loss": 0.1913, "step": 15580 }, { "epoch": 6.49, "eval_accuracy": 0.9315833333333333, "eval_loss": 0.19669051468372345, "eval_runtime": 16.2579, "eval_samples_per_second": 1476.206, "eval_steps_per_second": 4.613, "step": 15580 }, { "epoch": 6.5, "learning_rate": 1e-06, "loss": 0.1755, "step": 15600 }, { "epoch": 6.5, "eval_accuracy": 0.9321666666666667, "eval_loss": 0.1981978416442871, "eval_runtime": 16.3857, "eval_samples_per_second": 1464.688, "eval_steps_per_second": 4.577, "step": 15600 }, { "epoch": 6.51, "learning_rate": 1e-06, "loss": 0.1944, "step": 15620 }, { "epoch": 6.51, "eval_accuracy": 0.9316666666666666, "eval_loss": 0.20202693343162537, "eval_runtime": 16.8881, "eval_samples_per_second": 1421.117, "eval_steps_per_second": 4.441, "step": 15620 }, { "epoch": 6.52, "learning_rate": 1e-06, "loss": 0.1487, "step": 15640 }, { "epoch": 6.52, "eval_accuracy": 0.9341666666666667, "eval_loss": 0.19385740160942078, "eval_runtime": 16.5171, "eval_samples_per_second": 1453.042, "eval_steps_per_second": 4.541, "step": 15640 }, { "epoch": 6.53, "learning_rate": 1e-06, "loss": 0.1854, "step": 15660 }, { "epoch": 6.53, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19531066715717316, "eval_runtime": 17.796, "eval_samples_per_second": 1348.616, "eval_steps_per_second": 4.214, "step": 15660 }, { "epoch": 6.53, "learning_rate": 1e-06, "loss": 0.1331, "step": 15680 }, { "epoch": 6.53, "eval_accuracy": 0.933125, "eval_loss": 0.1954515278339386, "eval_runtime": 17.1458, "eval_samples_per_second": 1399.762, "eval_steps_per_second": 4.374, "step": 15680 }, { "epoch": 6.54, "learning_rate": 1e-06, "loss": 0.2017, "step": 15700 }, { "epoch": 6.54, "eval_accuracy": 0.93275, "eval_loss": 0.19526571035385132, "eval_runtime": 17.0288, "eval_samples_per_second": 1409.381, "eval_steps_per_second": 4.404, "step": 15700 }, { "epoch": 6.55, "learning_rate": 1e-06, "loss": 0.1507, "step": 15720 }, { "epoch": 6.55, "eval_accuracy": 0.9327083333333334, "eval_loss": 0.19576336443424225, "eval_runtime": 16.53, "eval_samples_per_second": 1451.904, "eval_steps_per_second": 4.537, "step": 15720 }, { "epoch": 6.56, "learning_rate": 1e-06, "loss": 0.1459, "step": 15740 }, { "epoch": 6.56, "eval_accuracy": 0.9308333333333333, "eval_loss": 0.19955700635910034, "eval_runtime": 16.5253, "eval_samples_per_second": 1452.316, "eval_steps_per_second": 4.538, "step": 15740 }, { "epoch": 6.57, "learning_rate": 1e-06, "loss": 0.1585, "step": 15760 }, { "epoch": 6.57, "eval_accuracy": 0.932625, "eval_loss": 0.1977219432592392, "eval_runtime": 16.8034, "eval_samples_per_second": 1428.285, "eval_steps_per_second": 4.463, "step": 15760 }, { "epoch": 6.58, "learning_rate": 1e-06, "loss": 0.1814, "step": 15780 }, { "epoch": 6.58, "eval_accuracy": 0.93375, "eval_loss": 0.19515223801136017, "eval_runtime": 17.0293, "eval_samples_per_second": 1409.337, "eval_steps_per_second": 4.404, "step": 15780 }, { "epoch": 6.58, "learning_rate": 1e-06, "loss": 0.1383, "step": 15800 }, { "epoch": 6.58, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.19543632864952087, "eval_runtime": 17.0436, "eval_samples_per_second": 1408.152, "eval_steps_per_second": 4.4, "step": 15800 }, { "epoch": 6.59, "learning_rate": 1e-06, "loss": 0.1981, "step": 15820 }, { "epoch": 6.59, "eval_accuracy": 0.9323333333333333, "eval_loss": 0.19736984372138977, "eval_runtime": 16.4411, "eval_samples_per_second": 1459.753, "eval_steps_per_second": 4.562, "step": 15820 }, { "epoch": 6.6, "learning_rate": 1e-06, "loss": 0.1715, "step": 15840 }, { "epoch": 6.6, "eval_accuracy": 0.93225, "eval_loss": 0.1977250874042511, "eval_runtime": 16.9027, "eval_samples_per_second": 1419.887, "eval_steps_per_second": 4.437, "step": 15840 }, { "epoch": 6.61, "learning_rate": 1e-06, "loss": 0.1392, "step": 15860 }, { "epoch": 6.61, "eval_accuracy": 0.9314166666666667, "eval_loss": 0.19858193397521973, "eval_runtime": 16.2175, "eval_samples_per_second": 1479.884, "eval_steps_per_second": 4.625, "step": 15860 }, { "epoch": 6.62, "learning_rate": 1e-06, "loss": 0.1587, "step": 15880 }, { "epoch": 6.62, "eval_accuracy": 0.93225, "eval_loss": 0.19824790954589844, "eval_runtime": 16.4062, "eval_samples_per_second": 1462.863, "eval_steps_per_second": 4.571, "step": 15880 }, { "epoch": 6.62, "learning_rate": 1e-06, "loss": 0.1697, "step": 15900 }, { "epoch": 6.62, "eval_accuracy": 0.9299583333333333, "eval_loss": 0.2014349400997162, "eval_runtime": 16.3856, "eval_samples_per_second": 1464.702, "eval_steps_per_second": 4.577, "step": 15900 }, { "epoch": 6.63, "learning_rate": 1e-06, "loss": 0.1861, "step": 15920 }, { "epoch": 6.63, "eval_accuracy": 0.9314166666666667, "eval_loss": 0.1988927125930786, "eval_runtime": 16.3893, "eval_samples_per_second": 1464.366, "eval_steps_per_second": 4.576, "step": 15920 }, { "epoch": 6.64, "learning_rate": 1e-06, "loss": 0.174, "step": 15940 }, { "epoch": 6.64, "eval_accuracy": 0.931375, "eval_loss": 0.1998647302389145, "eval_runtime": 16.7731, "eval_samples_per_second": 1430.861, "eval_steps_per_second": 4.471, "step": 15940 }, { "epoch": 6.65, "learning_rate": 1e-06, "loss": 0.1641, "step": 15960 }, { "epoch": 6.65, "eval_accuracy": 0.9314166666666667, "eval_loss": 0.20076820254325867, "eval_runtime": 16.2481, "eval_samples_per_second": 1477.098, "eval_steps_per_second": 4.616, "step": 15960 }, { "epoch": 6.66, "learning_rate": 1e-06, "loss": 0.1408, "step": 15980 }, { "epoch": 6.66, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.19549201428890228, "eval_runtime": 16.2222, "eval_samples_per_second": 1479.458, "eval_steps_per_second": 4.623, "step": 15980 }, { "epoch": 6.67, "learning_rate": 1e-06, "loss": 0.1878, "step": 16000 }, { "epoch": 6.67, "eval_accuracy": 0.9340833333333334, "eval_loss": 0.1959511786699295, "eval_runtime": 16.3546, "eval_samples_per_second": 1467.474, "eval_steps_per_second": 4.586, "step": 16000 }, { "epoch": 6.67, "learning_rate": 1e-06, "loss": 0.1425, "step": 16020 }, { "epoch": 6.67, "eval_accuracy": 0.9337916666666667, "eval_loss": 0.1953056901693344, "eval_runtime": 16.458, "eval_samples_per_second": 1458.255, "eval_steps_per_second": 4.557, "step": 16020 }, { "epoch": 6.68, "learning_rate": 1e-06, "loss": 0.184, "step": 16040 }, { "epoch": 6.68, "eval_accuracy": 0.931625, "eval_loss": 0.19988182187080383, "eval_runtime": 17.0453, "eval_samples_per_second": 1408.016, "eval_steps_per_second": 4.4, "step": 16040 }, { "epoch": 6.69, "learning_rate": 1e-06, "loss": 0.1609, "step": 16060 }, { "epoch": 6.69, "eval_accuracy": 0.9342916666666666, "eval_loss": 0.19486786425113678, "eval_runtime": 16.3548, "eval_samples_per_second": 1467.457, "eval_steps_per_second": 4.586, "step": 16060 }, { "epoch": 6.7, "learning_rate": 1e-06, "loss": 0.1757, "step": 16080 }, { "epoch": 6.7, "eval_accuracy": 0.93375, "eval_loss": 0.195814847946167, "eval_runtime": 16.6686, "eval_samples_per_second": 1439.835, "eval_steps_per_second": 4.499, "step": 16080 }, { "epoch": 6.71, "learning_rate": 1e-06, "loss": 0.1657, "step": 16100 }, { "epoch": 6.71, "eval_accuracy": 0.9322083333333333, "eval_loss": 0.19810351729393005, "eval_runtime": 17.5937, "eval_samples_per_second": 1364.125, "eval_steps_per_second": 4.263, "step": 16100 }, { "epoch": 6.72, "learning_rate": 1e-06, "loss": 0.1496, "step": 16120 }, { "epoch": 6.72, "eval_accuracy": 0.933875, "eval_loss": 0.19454562664031982, "eval_runtime": 17.5068, "eval_samples_per_second": 1370.9, "eval_steps_per_second": 4.284, "step": 16120 }, { "epoch": 6.72, "learning_rate": 1e-06, "loss": 0.2053, "step": 16140 }, { "epoch": 6.72, "eval_accuracy": 0.9334166666666667, "eval_loss": 0.19530512392520905, "eval_runtime": 17.6899, "eval_samples_per_second": 1356.703, "eval_steps_per_second": 4.24, "step": 16140 }, { "epoch": 6.73, "learning_rate": 1e-06, "loss": 0.1905, "step": 16160 }, { "epoch": 6.73, "eval_accuracy": 0.933375, "eval_loss": 0.19494038820266724, "eval_runtime": 17.39, "eval_samples_per_second": 1380.102, "eval_steps_per_second": 4.313, "step": 16160 }, { "epoch": 6.74, "learning_rate": 1e-06, "loss": 0.1515, "step": 16180 }, { "epoch": 6.74, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.19500760734081268, "eval_runtime": 17.8113, "eval_samples_per_second": 1347.457, "eval_steps_per_second": 4.211, "step": 16180 }, { "epoch": 6.75, "learning_rate": 1e-06, "loss": 0.1705, "step": 16200 }, { "epoch": 6.75, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19622166454792023, "eval_runtime": 17.3835, "eval_samples_per_second": 1380.617, "eval_steps_per_second": 4.314, "step": 16200 }, { "epoch": 6.76, "learning_rate": 1e-06, "loss": 0.1756, "step": 16220 }, { "epoch": 6.76, "eval_accuracy": 0.9332916666666666, "eval_loss": 0.1943114697933197, "eval_runtime": 17.2208, "eval_samples_per_second": 1393.664, "eval_steps_per_second": 4.355, "step": 16220 }, { "epoch": 6.77, "learning_rate": 1e-06, "loss": 0.1729, "step": 16240 }, { "epoch": 6.77, "eval_accuracy": 0.9343333333333333, "eval_loss": 0.19441558420658112, "eval_runtime": 17.5483, "eval_samples_per_second": 1367.653, "eval_steps_per_second": 4.274, "step": 16240 }, { "epoch": 6.78, "learning_rate": 1e-06, "loss": 0.1526, "step": 16260 }, { "epoch": 6.78, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.1942491978406906, "eval_runtime": 16.3394, "eval_samples_per_second": 1468.842, "eval_steps_per_second": 4.59, "step": 16260 }, { "epoch": 6.78, "learning_rate": 1e-06, "loss": 0.1684, "step": 16280 }, { "epoch": 6.78, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.1948528289794922, "eval_runtime": 16.0087, "eval_samples_per_second": 1499.183, "eval_steps_per_second": 4.685, "step": 16280 }, { "epoch": 6.79, "learning_rate": 1e-06, "loss": 0.1756, "step": 16300 }, { "epoch": 6.79, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19495390355587006, "eval_runtime": 16.737, "eval_samples_per_second": 1433.945, "eval_steps_per_second": 4.481, "step": 16300 }, { "epoch": 6.8, "learning_rate": 1e-06, "loss": 0.1706, "step": 16320 }, { "epoch": 6.8, "eval_accuracy": 0.9318333333333333, "eval_loss": 0.19855649769306183, "eval_runtime": 16.285, "eval_samples_per_second": 1473.752, "eval_steps_per_second": 4.605, "step": 16320 }, { "epoch": 6.81, "learning_rate": 1e-06, "loss": 0.1655, "step": 16340 }, { "epoch": 6.81, "eval_accuracy": 0.9319166666666666, "eval_loss": 0.19920918345451355, "eval_runtime": 17.3955, "eval_samples_per_second": 1379.671, "eval_steps_per_second": 4.311, "step": 16340 }, { "epoch": 6.82, "learning_rate": 1e-06, "loss": 0.1858, "step": 16360 }, { "epoch": 6.82, "eval_accuracy": 0.9325, "eval_loss": 0.19792011380195618, "eval_runtime": 17.319, "eval_samples_per_second": 1385.762, "eval_steps_per_second": 4.331, "step": 16360 }, { "epoch": 6.83, "learning_rate": 1e-06, "loss": 0.1926, "step": 16380 }, { "epoch": 6.83, "eval_accuracy": 0.9332083333333333, "eval_loss": 0.19599467515945435, "eval_runtime": 16.5773, "eval_samples_per_second": 1447.759, "eval_steps_per_second": 4.524, "step": 16380 }, { "epoch": 6.83, "learning_rate": 1e-06, "loss": 0.162, "step": 16400 }, { "epoch": 6.83, "eval_accuracy": 0.9325, "eval_loss": 0.1975654661655426, "eval_runtime": 16.0954, "eval_samples_per_second": 1491.106, "eval_steps_per_second": 4.66, "step": 16400 }, { "epoch": 6.84, "learning_rate": 1e-06, "loss": 0.2168, "step": 16420 }, { "epoch": 6.84, "eval_accuracy": 0.9322083333333333, "eval_loss": 0.19847214221954346, "eval_runtime": 16.3923, "eval_samples_per_second": 1464.098, "eval_steps_per_second": 4.575, "step": 16420 }, { "epoch": 6.85, "learning_rate": 1e-06, "loss": 0.173, "step": 16440 }, { "epoch": 6.85, "eval_accuracy": 0.9312916666666666, "eval_loss": 0.2017899602651596, "eval_runtime": 16.2391, "eval_samples_per_second": 1477.915, "eval_steps_per_second": 4.618, "step": 16440 }, { "epoch": 6.86, "learning_rate": 1e-06, "loss": 0.1891, "step": 16460 }, { "epoch": 6.86, "eval_accuracy": 0.93225, "eval_loss": 0.19827592372894287, "eval_runtime": 16.3796, "eval_samples_per_second": 1465.236, "eval_steps_per_second": 4.579, "step": 16460 }, { "epoch": 6.87, "learning_rate": 1e-06, "loss": 0.1619, "step": 16480 }, { "epoch": 6.87, "eval_accuracy": 0.9332916666666666, "eval_loss": 0.19625312089920044, "eval_runtime": 17.7432, "eval_samples_per_second": 1352.634, "eval_steps_per_second": 4.227, "step": 16480 }, { "epoch": 6.88, "learning_rate": 1e-06, "loss": 0.1884, "step": 16500 }, { "epoch": 6.88, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19771744310855865, "eval_runtime": 16.6178, "eval_samples_per_second": 1444.233, "eval_steps_per_second": 4.513, "step": 16500 }, { "epoch": 6.88, "learning_rate": 1e-06, "loss": 0.1735, "step": 16520 }, { "epoch": 6.88, "eval_accuracy": 0.9327916666666667, "eval_loss": 0.19762367010116577, "eval_runtime": 16.9624, "eval_samples_per_second": 1414.896, "eval_steps_per_second": 4.422, "step": 16520 }, { "epoch": 6.89, "learning_rate": 1e-06, "loss": 0.1949, "step": 16540 }, { "epoch": 6.89, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19725541770458221, "eval_runtime": 16.0777, "eval_samples_per_second": 1492.755, "eval_steps_per_second": 4.665, "step": 16540 }, { "epoch": 6.9, "learning_rate": 1e-06, "loss": 0.2021, "step": 16560 }, { "epoch": 6.9, "eval_accuracy": 0.9340833333333334, "eval_loss": 0.1951657384634018, "eval_runtime": 16.8714, "eval_samples_per_second": 1422.525, "eval_steps_per_second": 4.445, "step": 16560 }, { "epoch": 6.91, "learning_rate": 1e-06, "loss": 0.1742, "step": 16580 }, { "epoch": 6.91, "eval_accuracy": 0.9345833333333333, "eval_loss": 0.19460086524486542, "eval_runtime": 16.336, "eval_samples_per_second": 1469.149, "eval_steps_per_second": 4.591, "step": 16580 }, { "epoch": 6.92, "learning_rate": 1e-06, "loss": 0.1935, "step": 16600 }, { "epoch": 6.92, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.19422036409378052, "eval_runtime": 16.245, "eval_samples_per_second": 1477.38, "eval_steps_per_second": 4.617, "step": 16600 }, { "epoch": 6.92, "learning_rate": 1e-06, "loss": 0.1709, "step": 16620 }, { "epoch": 6.92, "eval_accuracy": 0.9335, "eval_loss": 0.1949995458126068, "eval_runtime": 16.3728, "eval_samples_per_second": 1465.845, "eval_steps_per_second": 4.581, "step": 16620 }, { "epoch": 6.93, "learning_rate": 1e-06, "loss": 0.1675, "step": 16640 }, { "epoch": 6.93, "eval_accuracy": 0.9334166666666667, "eval_loss": 0.19600391387939453, "eval_runtime": 16.421, "eval_samples_per_second": 1461.543, "eval_steps_per_second": 4.567, "step": 16640 }, { "epoch": 6.94, "learning_rate": 1e-06, "loss": 0.1648, "step": 16660 }, { "epoch": 6.94, "eval_accuracy": 0.9329583333333333, "eval_loss": 0.19674494862556458, "eval_runtime": 16.3324, "eval_samples_per_second": 1469.47, "eval_steps_per_second": 4.592, "step": 16660 }, { "epoch": 6.95, "learning_rate": 1e-06, "loss": 0.1757, "step": 16680 }, { "epoch": 6.95, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.1960502564907074, "eval_runtime": 17.1637, "eval_samples_per_second": 1398.302, "eval_steps_per_second": 4.37, "step": 16680 }, { "epoch": 6.96, "learning_rate": 1e-06, "loss": 0.1743, "step": 16700 }, { "epoch": 6.96, "eval_accuracy": 0.9329583333333333, "eval_loss": 0.19483575224876404, "eval_runtime": 15.8961, "eval_samples_per_second": 1509.806, "eval_steps_per_second": 4.718, "step": 16700 }, { "epoch": 6.97, "learning_rate": 1e-06, "loss": 0.17, "step": 16720 }, { "epoch": 6.97, "eval_accuracy": 0.9332083333333333, "eval_loss": 0.19447939097881317, "eval_runtime": 16.8287, "eval_samples_per_second": 1426.132, "eval_steps_per_second": 4.457, "step": 16720 }, { "epoch": 6.97, "learning_rate": 1e-06, "loss": 0.1625, "step": 16740 }, { "epoch": 6.97, "eval_accuracy": 0.933875, "eval_loss": 0.1952148824930191, "eval_runtime": 16.0124, "eval_samples_per_second": 1498.839, "eval_steps_per_second": 4.684, "step": 16740 }, { "epoch": 6.98, "learning_rate": 1e-06, "loss": 0.1802, "step": 16760 }, { "epoch": 6.98, "eval_accuracy": 0.9338333333333333, "eval_loss": 0.19577091932296753, "eval_runtime": 16.0326, "eval_samples_per_second": 1496.951, "eval_steps_per_second": 4.678, "step": 16760 }, { "epoch": 6.99, "learning_rate": 1e-06, "loss": 0.1855, "step": 16780 }, { "epoch": 6.99, "eval_accuracy": 0.932625, "eval_loss": 0.19726844131946564, "eval_runtime": 17.465, "eval_samples_per_second": 1374.176, "eval_steps_per_second": 4.294, "step": 16780 }, { "epoch": 7.0, "learning_rate": 1e-06, "loss": 0.1623, "step": 16800 }, { "epoch": 7.0, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.19629396498203278, "eval_runtime": 17.5957, "eval_samples_per_second": 1363.968, "eval_steps_per_second": 4.262, "step": 16800 }, { "epoch": 7.01, "learning_rate": 1e-06, "loss": 0.1521, "step": 16820 }, { "epoch": 7.01, "eval_accuracy": 0.931625, "eval_loss": 0.1997893750667572, "eval_runtime": 17.3287, "eval_samples_per_second": 1384.986, "eval_steps_per_second": 4.328, "step": 16820 }, { "epoch": 7.02, "learning_rate": 1e-06, "loss": 0.2071, "step": 16840 }, { "epoch": 7.02, "eval_accuracy": 0.9344166666666667, "eval_loss": 0.1939399242401123, "eval_runtime": 16.6723, "eval_samples_per_second": 1439.518, "eval_steps_per_second": 4.498, "step": 16840 }, { "epoch": 7.03, "learning_rate": 1e-06, "loss": 0.1669, "step": 16860 }, { "epoch": 7.03, "eval_accuracy": 0.9325833333333333, "eval_loss": 0.1989278346300125, "eval_runtime": 16.9163, "eval_samples_per_second": 1418.746, "eval_steps_per_second": 4.434, "step": 16860 }, { "epoch": 7.03, "learning_rate": 1e-06, "loss": 0.1695, "step": 16880 }, { "epoch": 7.03, "eval_accuracy": 0.9313333333333333, "eval_loss": 0.20084674656391144, "eval_runtime": 17.4712, "eval_samples_per_second": 1373.69, "eval_steps_per_second": 4.293, "step": 16880 }, { "epoch": 7.04, "learning_rate": 1e-06, "loss": 0.2092, "step": 16900 }, { "epoch": 7.04, "eval_accuracy": 0.9324166666666667, "eval_loss": 0.19925980269908905, "eval_runtime": 17.5432, "eval_samples_per_second": 1368.052, "eval_steps_per_second": 4.275, "step": 16900 }, { "epoch": 7.05, "learning_rate": 1e-06, "loss": 0.1859, "step": 16920 }, { "epoch": 7.05, "eval_accuracy": 0.9331666666666667, "eval_loss": 0.19550496339797974, "eval_runtime": 17.387, "eval_samples_per_second": 1380.338, "eval_steps_per_second": 4.314, "step": 16920 }, { "epoch": 7.06, "learning_rate": 1e-06, "loss": 0.1407, "step": 16940 }, { "epoch": 7.06, "eval_accuracy": 0.9350833333333334, "eval_loss": 0.19279736280441284, "eval_runtime": 17.6476, "eval_samples_per_second": 1359.958, "eval_steps_per_second": 4.25, "step": 16940 }, { "epoch": 7.07, "learning_rate": 1e-06, "loss": 0.1564, "step": 16960 }, { "epoch": 7.07, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.19368121027946472, "eval_runtime": 17.8587, "eval_samples_per_second": 1343.884, "eval_steps_per_second": 4.2, "step": 16960 }, { "epoch": 7.08, "learning_rate": 1e-06, "loss": 0.1514, "step": 16980 }, { "epoch": 7.08, "eval_accuracy": 0.9342083333333333, "eval_loss": 0.19475901126861572, "eval_runtime": 17.1608, "eval_samples_per_second": 1398.536, "eval_steps_per_second": 4.37, "step": 16980 }, { "epoch": 7.08, "learning_rate": 1e-06, "loss": 0.1425, "step": 17000 }, { "epoch": 7.08, "eval_accuracy": 0.932125, "eval_loss": 0.19865868985652924, "eval_runtime": 17.0882, "eval_samples_per_second": 1404.476, "eval_steps_per_second": 4.389, "step": 17000 }, { "epoch": 7.09, "learning_rate": 1e-06, "loss": 0.1849, "step": 17020 }, { "epoch": 7.09, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.1954139769077301, "eval_runtime": 17.452, "eval_samples_per_second": 1375.197, "eval_steps_per_second": 4.297, "step": 17020 }, { "epoch": 7.1, "learning_rate": 1e-06, "loss": 0.1662, "step": 17040 }, { "epoch": 7.1, "eval_accuracy": 0.934875, "eval_loss": 0.19401773810386658, "eval_runtime": 16.9959, "eval_samples_per_second": 1412.107, "eval_steps_per_second": 4.413, "step": 17040 }, { "epoch": 7.11, "learning_rate": 1e-06, "loss": 0.1887, "step": 17060 }, { "epoch": 7.11, "eval_accuracy": 0.9336666666666666, "eval_loss": 0.1946951448917389, "eval_runtime": 16.4323, "eval_samples_per_second": 1460.542, "eval_steps_per_second": 4.564, "step": 17060 }, { "epoch": 7.12, "learning_rate": 1e-06, "loss": 0.1704, "step": 17080 }, { "epoch": 7.12, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.1954944133758545, "eval_runtime": 17.7938, "eval_samples_per_second": 1348.781, "eval_steps_per_second": 4.215, "step": 17080 }, { "epoch": 7.12, "learning_rate": 1e-06, "loss": 0.2087, "step": 17100 }, { "epoch": 7.12, "eval_accuracy": 0.9334166666666667, "eval_loss": 0.19571976363658905, "eval_runtime": 16.3621, "eval_samples_per_second": 1466.803, "eval_steps_per_second": 4.584, "step": 17100 }, { "epoch": 7.13, "learning_rate": 1e-06, "loss": 0.1576, "step": 17120 }, { "epoch": 7.13, "eval_accuracy": 0.934875, "eval_loss": 0.19232991337776184, "eval_runtime": 17.0875, "eval_samples_per_second": 1404.537, "eval_steps_per_second": 4.389, "step": 17120 }, { "epoch": 7.14, "learning_rate": 1e-06, "loss": 0.1837, "step": 17140 }, { "epoch": 7.14, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19416970014572144, "eval_runtime": 17.2421, "eval_samples_per_second": 1391.941, "eval_steps_per_second": 4.35, "step": 17140 }, { "epoch": 7.15, "learning_rate": 1e-06, "loss": 0.1771, "step": 17160 }, { "epoch": 7.15, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.19288769364356995, "eval_runtime": 15.9944, "eval_samples_per_second": 1500.527, "eval_steps_per_second": 4.689, "step": 17160 }, { "epoch": 7.16, "learning_rate": 1e-06, "loss": 0.1661, "step": 17180 }, { "epoch": 7.16, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.19377556443214417, "eval_runtime": 16.1242, "eval_samples_per_second": 1488.45, "eval_steps_per_second": 4.651, "step": 17180 }, { "epoch": 7.17, "learning_rate": 1e-06, "loss": 0.1839, "step": 17200 }, { "epoch": 7.17, "eval_accuracy": 0.9345833333333333, "eval_loss": 0.19333358108997345, "eval_runtime": 16.31, "eval_samples_per_second": 1471.488, "eval_steps_per_second": 4.598, "step": 17200 }, { "epoch": 7.17, "learning_rate": 1e-06, "loss": 0.172, "step": 17220 }, { "epoch": 7.17, "eval_accuracy": 0.9345416666666667, "eval_loss": 0.19157427549362183, "eval_runtime": 16.7874, "eval_samples_per_second": 1429.64, "eval_steps_per_second": 4.468, "step": 17220 }, { "epoch": 7.18, "learning_rate": 1e-06, "loss": 0.1563, "step": 17240 }, { "epoch": 7.18, "eval_accuracy": 0.9334583333333333, "eval_loss": 0.19396959245204926, "eval_runtime": 16.9407, "eval_samples_per_second": 1416.708, "eval_steps_per_second": 4.427, "step": 17240 }, { "epoch": 7.19, "learning_rate": 1e-06, "loss": 0.1835, "step": 17260 }, { "epoch": 7.19, "eval_accuracy": 0.9312916666666666, "eval_loss": 0.20071102678775787, "eval_runtime": 17.2109, "eval_samples_per_second": 1394.462, "eval_steps_per_second": 4.358, "step": 17260 }, { "epoch": 7.2, "learning_rate": 1e-06, "loss": 0.1794, "step": 17280 }, { "epoch": 7.2, "eval_accuracy": 0.932375, "eval_loss": 0.1961183249950409, "eval_runtime": 16.6977, "eval_samples_per_second": 1437.327, "eval_steps_per_second": 4.492, "step": 17280 }, { "epoch": 7.21, "learning_rate": 1e-06, "loss": 0.2048, "step": 17300 }, { "epoch": 7.21, "eval_accuracy": 0.934125, "eval_loss": 0.1937197893857956, "eval_runtime": 17.0498, "eval_samples_per_second": 1407.643, "eval_steps_per_second": 4.399, "step": 17300 }, { "epoch": 7.22, "learning_rate": 1e-06, "loss": 0.1609, "step": 17320 }, { "epoch": 7.22, "eval_accuracy": 0.9335, "eval_loss": 0.1952856183052063, "eval_runtime": 16.2943, "eval_samples_per_second": 1472.908, "eval_steps_per_second": 4.603, "step": 17320 }, { "epoch": 7.22, "learning_rate": 1e-06, "loss": 0.1588, "step": 17340 }, { "epoch": 7.22, "eval_accuracy": 0.932375, "eval_loss": 0.19826608896255493, "eval_runtime": 16.8162, "eval_samples_per_second": 1427.195, "eval_steps_per_second": 4.46, "step": 17340 }, { "epoch": 7.23, "learning_rate": 1e-06, "loss": 0.1796, "step": 17360 }, { "epoch": 7.23, "eval_accuracy": 0.932375, "eval_loss": 0.1968182921409607, "eval_runtime": 17.6501, "eval_samples_per_second": 1359.765, "eval_steps_per_second": 4.249, "step": 17360 }, { "epoch": 7.24, "learning_rate": 1e-06, "loss": 0.1834, "step": 17380 }, { "epoch": 7.24, "eval_accuracy": 0.932625, "eval_loss": 0.19632968306541443, "eval_runtime": 17.2336, "eval_samples_per_second": 1392.628, "eval_steps_per_second": 4.352, "step": 17380 }, { "epoch": 7.25, "learning_rate": 1e-06, "loss": 0.1564, "step": 17400 }, { "epoch": 7.25, "eval_accuracy": 0.9337916666666667, "eval_loss": 0.19396378099918365, "eval_runtime": 17.7707, "eval_samples_per_second": 1350.539, "eval_steps_per_second": 4.22, "step": 17400 }, { "epoch": 7.26, "learning_rate": 1e-06, "loss": 0.1513, "step": 17420 }, { "epoch": 7.26, "eval_accuracy": 0.932875, "eval_loss": 0.19559015333652496, "eval_runtime": 17.1532, "eval_samples_per_second": 1399.156, "eval_steps_per_second": 4.372, "step": 17420 }, { "epoch": 7.27, "learning_rate": 1e-06, "loss": 0.1568, "step": 17440 }, { "epoch": 7.27, "eval_accuracy": 0.935125, "eval_loss": 0.19202855229377747, "eval_runtime": 17.1051, "eval_samples_per_second": 1403.089, "eval_steps_per_second": 4.385, "step": 17440 }, { "epoch": 7.28, "learning_rate": 1e-06, "loss": 0.1748, "step": 17460 }, { "epoch": 7.28, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.19286368787288666, "eval_runtime": 15.5136, "eval_samples_per_second": 1547.025, "eval_steps_per_second": 4.834, "step": 17460 }, { "epoch": 7.28, "learning_rate": 1e-06, "loss": 0.1578, "step": 17480 }, { "epoch": 7.28, "eval_accuracy": 0.9335, "eval_loss": 0.194309800863266, "eval_runtime": 16.1632, "eval_samples_per_second": 1484.852, "eval_steps_per_second": 4.64, "step": 17480 }, { "epoch": 7.29, "learning_rate": 1e-06, "loss": 0.1321, "step": 17500 }, { "epoch": 7.29, "eval_accuracy": 0.9351666666666667, "eval_loss": 0.19201384484767914, "eval_runtime": 16.5178, "eval_samples_per_second": 1452.974, "eval_steps_per_second": 4.541, "step": 17500 }, { "epoch": 7.3, "learning_rate": 1e-06, "loss": 0.1963, "step": 17520 }, { "epoch": 7.3, "eval_accuracy": 0.9339166666666666, "eval_loss": 0.19566002488136292, "eval_runtime": 16.995, "eval_samples_per_second": 1412.184, "eval_steps_per_second": 4.413, "step": 17520 }, { "epoch": 7.31, "learning_rate": 1e-06, "loss": 0.1927, "step": 17540 }, { "epoch": 7.31, "eval_accuracy": 0.9330416666666667, "eval_loss": 0.19617126882076263, "eval_runtime": 17.1067, "eval_samples_per_second": 1402.962, "eval_steps_per_second": 4.384, "step": 17540 }, { "epoch": 7.32, "learning_rate": 1e-06, "loss": 0.1658, "step": 17560 }, { "epoch": 7.32, "eval_accuracy": 0.9332916666666666, "eval_loss": 0.195390984416008, "eval_runtime": 17.0629, "eval_samples_per_second": 1406.565, "eval_steps_per_second": 4.396, "step": 17560 }, { "epoch": 7.33, "learning_rate": 1e-06, "loss": 0.1452, "step": 17580 }, { "epoch": 7.33, "eval_accuracy": 0.9345833333333333, "eval_loss": 0.19409912824630737, "eval_runtime": 16.3617, "eval_samples_per_second": 1466.84, "eval_steps_per_second": 4.584, "step": 17580 }, { "epoch": 7.33, "learning_rate": 1e-06, "loss": 0.1992, "step": 17600 }, { "epoch": 7.33, "eval_accuracy": 0.934125, "eval_loss": 0.1932896226644516, "eval_runtime": 16.2659, "eval_samples_per_second": 1475.476, "eval_steps_per_second": 4.611, "step": 17600 }, { "epoch": 7.34, "learning_rate": 1e-06, "loss": 0.1824, "step": 17620 }, { "epoch": 7.34, "eval_accuracy": 0.934625, "eval_loss": 0.1922486424446106, "eval_runtime": 16.3428, "eval_samples_per_second": 1468.54, "eval_steps_per_second": 4.589, "step": 17620 }, { "epoch": 7.35, "learning_rate": 1e-06, "loss": 0.1388, "step": 17640 }, { "epoch": 7.35, "eval_accuracy": 0.9345, "eval_loss": 0.19244614243507385, "eval_runtime": 17.0407, "eval_samples_per_second": 1408.391, "eval_steps_per_second": 4.401, "step": 17640 }, { "epoch": 7.36, "learning_rate": 1e-06, "loss": 0.1732, "step": 17660 }, { "epoch": 7.36, "eval_accuracy": 0.9343333333333333, "eval_loss": 0.19296354055404663, "eval_runtime": 16.3329, "eval_samples_per_second": 1469.426, "eval_steps_per_second": 4.592, "step": 17660 }, { "epoch": 7.37, "learning_rate": 1e-06, "loss": 0.1824, "step": 17680 }, { "epoch": 7.37, "eval_accuracy": 0.9335, "eval_loss": 0.19450555741786957, "eval_runtime": 16.422, "eval_samples_per_second": 1461.453, "eval_steps_per_second": 4.567, "step": 17680 }, { "epoch": 7.38, "learning_rate": 1e-06, "loss": 0.1715, "step": 17700 }, { "epoch": 7.38, "eval_accuracy": 0.9337916666666667, "eval_loss": 0.19444973766803741, "eval_runtime": 17.29, "eval_samples_per_second": 1388.083, "eval_steps_per_second": 4.338, "step": 17700 }, { "epoch": 7.38, "learning_rate": 1e-06, "loss": 0.1228, "step": 17720 }, { "epoch": 7.38, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.1941564828157425, "eval_runtime": 17.8125, "eval_samples_per_second": 1347.37, "eval_steps_per_second": 4.211, "step": 17720 }, { "epoch": 7.39, "learning_rate": 1e-06, "loss": 0.1787, "step": 17740 }, { "epoch": 7.39, "eval_accuracy": 0.93425, "eval_loss": 0.19363898038864136, "eval_runtime": 16.6565, "eval_samples_per_second": 1440.875, "eval_steps_per_second": 4.503, "step": 17740 }, { "epoch": 7.4, "learning_rate": 1e-06, "loss": 0.1422, "step": 17760 }, { "epoch": 7.4, "eval_accuracy": 0.9340833333333334, "eval_loss": 0.19510914385318756, "eval_runtime": 16.7029, "eval_samples_per_second": 1436.875, "eval_steps_per_second": 4.49, "step": 17760 }, { "epoch": 7.41, "learning_rate": 1e-06, "loss": 0.1541, "step": 17780 }, { "epoch": 7.41, "eval_accuracy": 0.934, "eval_loss": 0.19492153823375702, "eval_runtime": 16.117, "eval_samples_per_second": 1489.115, "eval_steps_per_second": 4.653, "step": 17780 }, { "epoch": 7.42, "learning_rate": 1e-06, "loss": 0.188, "step": 17800 }, { "epoch": 7.42, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.1961117386817932, "eval_runtime": 16.3107, "eval_samples_per_second": 1471.429, "eval_steps_per_second": 4.598, "step": 17800 }, { "epoch": 7.42, "learning_rate": 1e-06, "loss": 0.1591, "step": 17820 }, { "epoch": 7.42, "eval_accuracy": 0.93475, "eval_loss": 0.19403943419456482, "eval_runtime": 16.2708, "eval_samples_per_second": 1475.037, "eval_steps_per_second": 4.609, "step": 17820 }, { "epoch": 7.43, "learning_rate": 1e-06, "loss": 0.1526, "step": 17840 }, { "epoch": 7.43, "eval_accuracy": 0.933, "eval_loss": 0.19669285416603088, "eval_runtime": 16.4196, "eval_samples_per_second": 1461.668, "eval_steps_per_second": 4.568, "step": 17840 }, { "epoch": 7.44, "learning_rate": 1e-06, "loss": 0.1988, "step": 17860 }, { "epoch": 7.44, "eval_accuracy": 0.933, "eval_loss": 0.19575349986553192, "eval_runtime": 17.4894, "eval_samples_per_second": 1372.256, "eval_steps_per_second": 4.288, "step": 17860 }, { "epoch": 7.45, "learning_rate": 1e-06, "loss": 0.1471, "step": 17880 }, { "epoch": 7.45, "eval_accuracy": 0.9339166666666666, "eval_loss": 0.19493769109249115, "eval_runtime": 17.3622, "eval_samples_per_second": 1382.312, "eval_steps_per_second": 4.32, "step": 17880 }, { "epoch": 7.46, "learning_rate": 1e-06, "loss": 0.1631, "step": 17900 }, { "epoch": 7.46, "eval_accuracy": 0.9326666666666666, "eval_loss": 0.1954115778207779, "eval_runtime": 16.3935, "eval_samples_per_second": 1463.997, "eval_steps_per_second": 4.575, "step": 17900 }, { "epoch": 7.47, "learning_rate": 1e-06, "loss": 0.2076, "step": 17920 }, { "epoch": 7.47, "eval_accuracy": 0.9314583333333334, "eval_loss": 0.20004239678382874, "eval_runtime": 16.8491, "eval_samples_per_second": 1424.406, "eval_steps_per_second": 4.451, "step": 17920 }, { "epoch": 7.47, "learning_rate": 1e-06, "loss": 0.1541, "step": 17940 }, { "epoch": 7.47, "eval_accuracy": 0.9330833333333334, "eval_loss": 0.1946871429681778, "eval_runtime": 16.9255, "eval_samples_per_second": 1417.978, "eval_steps_per_second": 4.431, "step": 17940 }, { "epoch": 7.48, "learning_rate": 1e-06, "loss": 0.2057, "step": 17960 }, { "epoch": 7.48, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.194530189037323, "eval_runtime": 17.2774, "eval_samples_per_second": 1389.094, "eval_steps_per_second": 4.341, "step": 17960 }, { "epoch": 7.49, "learning_rate": 1e-06, "loss": 0.1721, "step": 17980 }, { "epoch": 7.49, "eval_accuracy": 0.934, "eval_loss": 0.19205187261104584, "eval_runtime": 16.4737, "eval_samples_per_second": 1456.864, "eval_steps_per_second": 4.553, "step": 17980 }, { "epoch": 7.5, "learning_rate": 1e-06, "loss": 0.1528, "step": 18000 }, { "epoch": 7.5, "eval_accuracy": 0.934375, "eval_loss": 0.19169993698596954, "eval_runtime": 16.6059, "eval_samples_per_second": 1445.27, "eval_steps_per_second": 4.516, "step": 18000 }, { "epoch": 7.51, "learning_rate": 1e-06, "loss": 0.1758, "step": 18020 }, { "epoch": 7.51, "eval_accuracy": 0.9336666666666666, "eval_loss": 0.19537770748138428, "eval_runtime": 16.2403, "eval_samples_per_second": 1477.804, "eval_steps_per_second": 4.618, "step": 18020 }, { "epoch": 7.52, "learning_rate": 1e-06, "loss": 0.1757, "step": 18040 }, { "epoch": 7.52, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19484874606132507, "eval_runtime": 16.9236, "eval_samples_per_second": 1418.134, "eval_steps_per_second": 4.432, "step": 18040 }, { "epoch": 7.53, "learning_rate": 1e-06, "loss": 0.1499, "step": 18060 }, { "epoch": 7.53, "eval_accuracy": 0.9348333333333333, "eval_loss": 0.19193783402442932, "eval_runtime": 16.6031, "eval_samples_per_second": 1445.509, "eval_steps_per_second": 4.517, "step": 18060 }, { "epoch": 7.53, "learning_rate": 1e-06, "loss": 0.2056, "step": 18080 }, { "epoch": 7.53, "eval_accuracy": 0.9350416666666667, "eval_loss": 0.1917405128479004, "eval_runtime": 17.4615, "eval_samples_per_second": 1374.451, "eval_steps_per_second": 4.295, "step": 18080 }, { "epoch": 7.54, "learning_rate": 1e-06, "loss": 0.1646, "step": 18100 }, { "epoch": 7.54, "eval_accuracy": 0.9340833333333334, "eval_loss": 0.1928911805152893, "eval_runtime": 17.1226, "eval_samples_per_second": 1401.658, "eval_steps_per_second": 4.38, "step": 18100 }, { "epoch": 7.55, "learning_rate": 1e-06, "loss": 0.1312, "step": 18120 }, { "epoch": 7.55, "eval_accuracy": 0.9336666666666666, "eval_loss": 0.19554804265499115, "eval_runtime": 16.8658, "eval_samples_per_second": 1422.998, "eval_steps_per_second": 4.447, "step": 18120 }, { "epoch": 7.56, "learning_rate": 1e-06, "loss": 0.1759, "step": 18140 }, { "epoch": 7.56, "eval_accuracy": 0.9339166666666666, "eval_loss": 0.1938326209783554, "eval_runtime": 16.5184, "eval_samples_per_second": 1452.925, "eval_steps_per_second": 4.54, "step": 18140 }, { "epoch": 7.57, "learning_rate": 1e-06, "loss": 0.1907, "step": 18160 }, { "epoch": 7.57, "eval_accuracy": 0.934125, "eval_loss": 0.19356830418109894, "eval_runtime": 16.2934, "eval_samples_per_second": 1472.985, "eval_steps_per_second": 4.603, "step": 18160 }, { "epoch": 7.58, "learning_rate": 1e-06, "loss": 0.156, "step": 18180 }, { "epoch": 7.58, "eval_accuracy": 0.932625, "eval_loss": 0.19519713521003723, "eval_runtime": 16.5186, "eval_samples_per_second": 1452.905, "eval_steps_per_second": 4.54, "step": 18180 }, { "epoch": 7.58, "learning_rate": 1e-06, "loss": 0.1656, "step": 18200 }, { "epoch": 7.58, "eval_accuracy": 0.934, "eval_loss": 0.19366022944450378, "eval_runtime": 17.0576, "eval_samples_per_second": 1406.998, "eval_steps_per_second": 4.397, "step": 18200 }, { "epoch": 7.59, "learning_rate": 1e-06, "loss": 0.1321, "step": 18220 }, { "epoch": 7.59, "eval_accuracy": 0.9335, "eval_loss": 0.19374357163906097, "eval_runtime": 16.6623, "eval_samples_per_second": 1440.375, "eval_steps_per_second": 4.501, "step": 18220 }, { "epoch": 7.6, "learning_rate": 1e-06, "loss": 0.1367, "step": 18240 }, { "epoch": 7.6, "eval_accuracy": 0.934875, "eval_loss": 0.1920449137687683, "eval_runtime": 17.6693, "eval_samples_per_second": 1358.287, "eval_steps_per_second": 4.245, "step": 18240 }, { "epoch": 7.61, "learning_rate": 1e-06, "loss": 0.1935, "step": 18260 }, { "epoch": 7.61, "eval_accuracy": 0.9331666666666667, "eval_loss": 0.19506241381168365, "eval_runtime": 16.8545, "eval_samples_per_second": 1423.948, "eval_steps_per_second": 4.45, "step": 18260 }, { "epoch": 7.62, "learning_rate": 1e-06, "loss": 0.1625, "step": 18280 }, { "epoch": 7.62, "eval_accuracy": 0.9312083333333333, "eval_loss": 0.20022885501384735, "eval_runtime": 15.6569, "eval_samples_per_second": 1532.875, "eval_steps_per_second": 4.79, "step": 18280 }, { "epoch": 7.62, "learning_rate": 1e-06, "loss": 0.1959, "step": 18300 }, { "epoch": 7.62, "eval_accuracy": 0.932, "eval_loss": 0.19843071699142456, "eval_runtime": 17.5133, "eval_samples_per_second": 1370.39, "eval_steps_per_second": 4.282, "step": 18300 }, { "epoch": 7.63, "learning_rate": 1e-06, "loss": 0.1523, "step": 18320 }, { "epoch": 7.63, "eval_accuracy": 0.9332916666666666, "eval_loss": 0.19627678394317627, "eval_runtime": 17.5177, "eval_samples_per_second": 1370.041, "eval_steps_per_second": 4.281, "step": 18320 }, { "epoch": 7.64, "learning_rate": 1e-06, "loss": 0.1248, "step": 18340 }, { "epoch": 7.64, "eval_accuracy": 0.9340416666666667, "eval_loss": 0.1947159618139267, "eval_runtime": 17.1988, "eval_samples_per_second": 1395.448, "eval_steps_per_second": 4.361, "step": 18340 }, { "epoch": 7.65, "learning_rate": 1e-06, "loss": 0.1575, "step": 18360 }, { "epoch": 7.65, "eval_accuracy": 0.9345416666666667, "eval_loss": 0.1939171701669693, "eval_runtime": 16.9923, "eval_samples_per_second": 1412.401, "eval_steps_per_second": 4.414, "step": 18360 }, { "epoch": 7.66, "learning_rate": 1e-06, "loss": 0.145, "step": 18380 }, { "epoch": 7.66, "eval_accuracy": 0.9348333333333333, "eval_loss": 0.19376207888126373, "eval_runtime": 16.8495, "eval_samples_per_second": 1424.374, "eval_steps_per_second": 4.451, "step": 18380 }, { "epoch": 7.67, "learning_rate": 1e-06, "loss": 0.1981, "step": 18400 }, { "epoch": 7.67, "eval_accuracy": 0.9344166666666667, "eval_loss": 0.1934199035167694, "eval_runtime": 16.679, "eval_samples_per_second": 1438.939, "eval_steps_per_second": 4.497, "step": 18400 }, { "epoch": 7.67, "learning_rate": 1e-06, "loss": 0.1237, "step": 18420 }, { "epoch": 7.67, "eval_accuracy": 0.9329166666666666, "eval_loss": 0.19676241278648376, "eval_runtime": 16.3441, "eval_samples_per_second": 1468.422, "eval_steps_per_second": 4.589, "step": 18420 }, { "epoch": 7.68, "learning_rate": 1e-06, "loss": 0.1649, "step": 18440 }, { "epoch": 7.68, "eval_accuracy": 0.9333333333333333, "eval_loss": 0.19533593952655792, "eval_runtime": 16.5476, "eval_samples_per_second": 1450.361, "eval_steps_per_second": 4.532, "step": 18440 }, { "epoch": 7.69, "learning_rate": 1e-06, "loss": 0.1832, "step": 18460 }, { "epoch": 7.69, "eval_accuracy": 0.9344166666666667, "eval_loss": 0.19351038336753845, "eval_runtime": 16.5442, "eval_samples_per_second": 1450.662, "eval_steps_per_second": 4.533, "step": 18460 }, { "epoch": 7.7, "learning_rate": 1e-06, "loss": 0.1337, "step": 18480 }, { "epoch": 7.7, "eval_accuracy": 0.9326666666666666, "eval_loss": 0.197190523147583, "eval_runtime": 16.6092, "eval_samples_per_second": 1444.979, "eval_steps_per_second": 4.516, "step": 18480 }, { "epoch": 7.71, "learning_rate": 1e-06, "loss": 0.1407, "step": 18500 }, { "epoch": 7.71, "eval_accuracy": 0.9353333333333333, "eval_loss": 0.19291214644908905, "eval_runtime": 16.5801, "eval_samples_per_second": 1447.52, "eval_steps_per_second": 4.523, "step": 18500 }, { "epoch": 7.72, "learning_rate": 1e-06, "loss": 0.1489, "step": 18520 }, { "epoch": 7.72, "eval_accuracy": 0.9338333333333333, "eval_loss": 0.19552947580814362, "eval_runtime": 16.9798, "eval_samples_per_second": 1413.444, "eval_steps_per_second": 4.417, "step": 18520 }, { "epoch": 7.72, "learning_rate": 1e-06, "loss": 0.1603, "step": 18540 }, { "epoch": 7.72, "eval_accuracy": 0.9324583333333333, "eval_loss": 0.19699038565158844, "eval_runtime": 17.4729, "eval_samples_per_second": 1373.558, "eval_steps_per_second": 4.292, "step": 18540 }, { "epoch": 7.73, "learning_rate": 1e-06, "loss": 0.1468, "step": 18560 }, { "epoch": 7.73, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19590060412883759, "eval_runtime": 16.5983, "eval_samples_per_second": 1445.927, "eval_steps_per_second": 4.519, "step": 18560 }, { "epoch": 7.74, "learning_rate": 1e-06, "loss": 0.213, "step": 18580 }, { "epoch": 7.74, "eval_accuracy": 0.934125, "eval_loss": 0.1953480988740921, "eval_runtime": 15.9179, "eval_samples_per_second": 1507.733, "eval_steps_per_second": 4.712, "step": 18580 }, { "epoch": 7.75, "learning_rate": 1e-06, "loss": 0.2005, "step": 18600 }, { "epoch": 7.75, "eval_accuracy": 0.934875, "eval_loss": 0.1933307647705078, "eval_runtime": 16.5952, "eval_samples_per_second": 1446.199, "eval_steps_per_second": 4.519, "step": 18600 }, { "epoch": 7.76, "learning_rate": 1e-06, "loss": 0.1741, "step": 18620 }, { "epoch": 7.76, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.19205695390701294, "eval_runtime": 16.7425, "eval_samples_per_second": 1433.48, "eval_steps_per_second": 4.48, "step": 18620 }, { "epoch": 7.77, "learning_rate": 1e-06, "loss": 0.1618, "step": 18640 }, { "epoch": 7.77, "eval_accuracy": 0.93375, "eval_loss": 0.19280995428562164, "eval_runtime": 16.6122, "eval_samples_per_second": 1444.719, "eval_steps_per_second": 4.515, "step": 18640 }, { "epoch": 7.78, "learning_rate": 1e-06, "loss": 0.2304, "step": 18660 }, { "epoch": 7.78, "eval_accuracy": 0.93325, "eval_loss": 0.1952376365661621, "eval_runtime": 17.2502, "eval_samples_per_second": 1391.287, "eval_steps_per_second": 4.348, "step": 18660 }, { "epoch": 7.78, "learning_rate": 1e-06, "loss": 0.1729, "step": 18680 }, { "epoch": 7.78, "eval_accuracy": 0.933875, "eval_loss": 0.1918487548828125, "eval_runtime": 16.6725, "eval_samples_per_second": 1439.495, "eval_steps_per_second": 4.498, "step": 18680 }, { "epoch": 7.79, "learning_rate": 1e-06, "loss": 0.1632, "step": 18700 }, { "epoch": 7.79, "eval_accuracy": 0.9339583333333333, "eval_loss": 0.193745419383049, "eval_runtime": 17.0655, "eval_samples_per_second": 1406.35, "eval_steps_per_second": 4.395, "step": 18700 }, { "epoch": 7.8, "learning_rate": 1e-06, "loss": 0.1381, "step": 18720 }, { "epoch": 7.8, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19378027319908142, "eval_runtime": 16.1642, "eval_samples_per_second": 1484.763, "eval_steps_per_second": 4.64, "step": 18720 }, { "epoch": 7.81, "learning_rate": 1e-06, "loss": 0.1334, "step": 18740 }, { "epoch": 7.81, "eval_accuracy": 0.9339166666666666, "eval_loss": 0.19540290534496307, "eval_runtime": 17.2682, "eval_samples_per_second": 1389.836, "eval_steps_per_second": 4.343, "step": 18740 }, { "epoch": 7.82, "learning_rate": 1e-06, "loss": 0.1706, "step": 18760 }, { "epoch": 7.82, "eval_accuracy": 0.9347083333333334, "eval_loss": 0.19170990586280823, "eval_runtime": 17.5111, "eval_samples_per_second": 1370.562, "eval_steps_per_second": 4.283, "step": 18760 }, { "epoch": 7.83, "learning_rate": 1e-06, "loss": 0.1774, "step": 18780 }, { "epoch": 7.83, "eval_accuracy": 0.93425, "eval_loss": 0.19449305534362793, "eval_runtime": 18.1073, "eval_samples_per_second": 1325.429, "eval_steps_per_second": 4.142, "step": 18780 }, { "epoch": 7.83, "learning_rate": 1e-06, "loss": 0.1891, "step": 18800 }, { "epoch": 7.83, "eval_accuracy": 0.935125, "eval_loss": 0.19196221232414246, "eval_runtime": 16.1253, "eval_samples_per_second": 1488.341, "eval_steps_per_second": 4.651, "step": 18800 }, { "epoch": 7.84, "learning_rate": 1e-06, "loss": 0.1949, "step": 18820 }, { "epoch": 7.84, "eval_accuracy": 0.9347083333333334, "eval_loss": 0.1915530413389206, "eval_runtime": 16.928, "eval_samples_per_second": 1417.773, "eval_steps_per_second": 4.431, "step": 18820 }, { "epoch": 7.85, "learning_rate": 1e-06, "loss": 0.1511, "step": 18840 }, { "epoch": 7.85, "eval_accuracy": 0.9354583333333333, "eval_loss": 0.1908595710992813, "eval_runtime": 16.4669, "eval_samples_per_second": 1457.473, "eval_steps_per_second": 4.555, "step": 18840 }, { "epoch": 7.86, "learning_rate": 1e-06, "loss": 0.1501, "step": 18860 }, { "epoch": 7.86, "eval_accuracy": 0.9355, "eval_loss": 0.19139742851257324, "eval_runtime": 16.7079, "eval_samples_per_second": 1436.446, "eval_steps_per_second": 4.489, "step": 18860 }, { "epoch": 7.87, "learning_rate": 1e-06, "loss": 0.1367, "step": 18880 }, { "epoch": 7.87, "eval_accuracy": 0.9359166666666666, "eval_loss": 0.19081765413284302, "eval_runtime": 16.8019, "eval_samples_per_second": 1428.413, "eval_steps_per_second": 4.464, "step": 18880 }, { "epoch": 7.88, "learning_rate": 1e-06, "loss": 0.179, "step": 18900 }, { "epoch": 7.88, "eval_accuracy": 0.936, "eval_loss": 0.1911996752023697, "eval_runtime": 17.4687, "eval_samples_per_second": 1373.888, "eval_steps_per_second": 4.293, "step": 18900 }, { "epoch": 7.88, "learning_rate": 1e-06, "loss": 0.1737, "step": 18920 }, { "epoch": 7.88, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.19142092764377594, "eval_runtime": 17.9819, "eval_samples_per_second": 1334.677, "eval_steps_per_second": 4.171, "step": 18920 }, { "epoch": 7.89, "learning_rate": 1e-06, "loss": 0.1821, "step": 18940 }, { "epoch": 7.89, "eval_accuracy": 0.9349166666666666, "eval_loss": 0.19184531271457672, "eval_runtime": 17.6224, "eval_samples_per_second": 1361.903, "eval_steps_per_second": 4.256, "step": 18940 }, { "epoch": 7.9, "learning_rate": 1e-06, "loss": 0.1611, "step": 18960 }, { "epoch": 7.9, "eval_accuracy": 0.9360416666666667, "eval_loss": 0.19008147716522217, "eval_runtime": 16.936, "eval_samples_per_second": 1417.103, "eval_steps_per_second": 4.428, "step": 18960 }, { "epoch": 7.91, "learning_rate": 1e-06, "loss": 0.1389, "step": 18980 }, { "epoch": 7.91, "eval_accuracy": 0.935875, "eval_loss": 0.1900467723608017, "eval_runtime": 17.5135, "eval_samples_per_second": 1370.37, "eval_steps_per_second": 4.282, "step": 18980 }, { "epoch": 7.92, "learning_rate": 1e-06, "loss": 0.1751, "step": 19000 }, { "epoch": 7.92, "eval_accuracy": 0.936875, "eval_loss": 0.19042351841926575, "eval_runtime": 17.8277, "eval_samples_per_second": 1346.22, "eval_steps_per_second": 4.207, "step": 19000 }, { "epoch": 7.92, "learning_rate": 1e-06, "loss": 0.1955, "step": 19020 }, { "epoch": 7.92, "eval_accuracy": 0.9355, "eval_loss": 0.19257663190364838, "eval_runtime": 17.5409, "eval_samples_per_second": 1368.229, "eval_steps_per_second": 4.276, "step": 19020 }, { "epoch": 7.93, "learning_rate": 1e-06, "loss": 0.1762, "step": 19040 }, { "epoch": 7.93, "eval_accuracy": 0.9362083333333333, "eval_loss": 0.19056767225265503, "eval_runtime": 17.5352, "eval_samples_per_second": 1368.676, "eval_steps_per_second": 4.277, "step": 19040 }, { "epoch": 7.94, "learning_rate": 1e-06, "loss": 0.1417, "step": 19060 }, { "epoch": 7.94, "eval_accuracy": 0.935625, "eval_loss": 0.19115488231182098, "eval_runtime": 17.4988, "eval_samples_per_second": 1371.52, "eval_steps_per_second": 4.286, "step": 19060 }, { "epoch": 7.95, "learning_rate": 1e-06, "loss": 0.1602, "step": 19080 }, { "epoch": 7.95, "eval_accuracy": 0.9348333333333333, "eval_loss": 0.19205690920352936, "eval_runtime": 17.3557, "eval_samples_per_second": 1382.829, "eval_steps_per_second": 4.321, "step": 19080 }, { "epoch": 7.96, "learning_rate": 1e-06, "loss": 0.1355, "step": 19100 }, { "epoch": 7.96, "eval_accuracy": 0.9320833333333334, "eval_loss": 0.1974695324897766, "eval_runtime": 17.4599, "eval_samples_per_second": 1374.582, "eval_steps_per_second": 4.296, "step": 19100 }, { "epoch": 7.97, "learning_rate": 1e-06, "loss": 0.1488, "step": 19120 }, { "epoch": 7.97, "eval_accuracy": 0.9339166666666666, "eval_loss": 0.19483338296413422, "eval_runtime": 17.4251, "eval_samples_per_second": 1377.325, "eval_steps_per_second": 4.304, "step": 19120 }, { "epoch": 7.97, "learning_rate": 1e-06, "loss": 0.2128, "step": 19140 }, { "epoch": 7.97, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.19111751019954681, "eval_runtime": 17.5678, "eval_samples_per_second": 1366.137, "eval_steps_per_second": 4.269, "step": 19140 }, { "epoch": 7.98, "learning_rate": 1e-06, "loss": 0.1223, "step": 19160 }, { "epoch": 7.98, "eval_accuracy": 0.9352083333333333, "eval_loss": 0.19168196618556976, "eval_runtime": 17.4393, "eval_samples_per_second": 1376.199, "eval_steps_per_second": 4.301, "step": 19160 }, { "epoch": 7.99, "learning_rate": 1e-06, "loss": 0.1564, "step": 19180 }, { "epoch": 7.99, "eval_accuracy": 0.934, "eval_loss": 0.19453711807727814, "eval_runtime": 16.8108, "eval_samples_per_second": 1427.651, "eval_steps_per_second": 4.461, "step": 19180 }, { "epoch": 8.0, "learning_rate": 1e-06, "loss": 0.1961, "step": 19200 }, { "epoch": 8.0, "eval_accuracy": 0.934125, "eval_loss": 0.19330745935440063, "eval_runtime": 16.8924, "eval_samples_per_second": 1420.757, "eval_steps_per_second": 4.44, "step": 19200 }, { "epoch": 8.01, "learning_rate": 1e-06, "loss": 0.1791, "step": 19220 }, { "epoch": 8.01, "eval_accuracy": 0.9350416666666667, "eval_loss": 0.19044369459152222, "eval_runtime": 16.3873, "eval_samples_per_second": 1464.552, "eval_steps_per_second": 4.577, "step": 19220 }, { "epoch": 8.02, "learning_rate": 1e-06, "loss": 0.1466, "step": 19240 }, { "epoch": 8.02, "eval_accuracy": 0.935875, "eval_loss": 0.19002403318881989, "eval_runtime": 16.82, "eval_samples_per_second": 1426.875, "eval_steps_per_second": 4.459, "step": 19240 }, { "epoch": 8.03, "learning_rate": 1e-06, "loss": 0.135, "step": 19260 }, { "epoch": 8.03, "eval_accuracy": 0.9357083333333334, "eval_loss": 0.19046556949615479, "eval_runtime": 16.49, "eval_samples_per_second": 1455.424, "eval_steps_per_second": 4.548, "step": 19260 }, { "epoch": 8.03, "learning_rate": 1e-06, "loss": 0.129, "step": 19280 }, { "epoch": 8.03, "eval_accuracy": 0.935125, "eval_loss": 0.1913367062807083, "eval_runtime": 17.0223, "eval_samples_per_second": 1409.919, "eval_steps_per_second": 4.406, "step": 19280 }, { "epoch": 8.04, "learning_rate": 1e-06, "loss": 0.1778, "step": 19300 }, { "epoch": 8.04, "eval_accuracy": 0.9329166666666666, "eval_loss": 0.1969105303287506, "eval_runtime": 16.1602, "eval_samples_per_second": 1485.13, "eval_steps_per_second": 4.641, "step": 19300 }, { "epoch": 8.05, "learning_rate": 1e-06, "loss": 0.1362, "step": 19320 }, { "epoch": 8.05, "eval_accuracy": 0.9340416666666667, "eval_loss": 0.19413326680660248, "eval_runtime": 17.3298, "eval_samples_per_second": 1384.896, "eval_steps_per_second": 4.328, "step": 19320 }, { "epoch": 8.06, "learning_rate": 1e-06, "loss": 0.157, "step": 19340 }, { "epoch": 8.06, "eval_accuracy": 0.93275, "eval_loss": 0.19760599732398987, "eval_runtime": 17.4331, "eval_samples_per_second": 1376.69, "eval_steps_per_second": 4.302, "step": 19340 }, { "epoch": 8.07, "learning_rate": 1e-06, "loss": 0.1544, "step": 19360 }, { "epoch": 8.07, "eval_accuracy": 0.935125, "eval_loss": 0.1911655217409134, "eval_runtime": 16.3075, "eval_samples_per_second": 1471.715, "eval_steps_per_second": 4.599, "step": 19360 }, { "epoch": 8.07, "learning_rate": 1e-06, "loss": 0.1566, "step": 19380 }, { "epoch": 8.07, "eval_accuracy": 0.9355, "eval_loss": 0.19158610701560974, "eval_runtime": 16.834, "eval_samples_per_second": 1425.685, "eval_steps_per_second": 4.455, "step": 19380 }, { "epoch": 8.08, "learning_rate": 1e-06, "loss": 0.1572, "step": 19400 }, { "epoch": 8.08, "eval_accuracy": 0.93525, "eval_loss": 0.19257643818855286, "eval_runtime": 16.0408, "eval_samples_per_second": 1496.18, "eval_steps_per_second": 4.676, "step": 19400 }, { "epoch": 8.09, "learning_rate": 1e-06, "loss": 0.1077, "step": 19420 }, { "epoch": 8.09, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.1925935298204422, "eval_runtime": 17.6153, "eval_samples_per_second": 1362.454, "eval_steps_per_second": 4.258, "step": 19420 }, { "epoch": 8.1, "learning_rate": 1e-06, "loss": 0.1365, "step": 19440 }, { "epoch": 8.1, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.19453419744968414, "eval_runtime": 16.5289, "eval_samples_per_second": 1451.999, "eval_steps_per_second": 4.537, "step": 19440 }, { "epoch": 8.11, "learning_rate": 1e-06, "loss": 0.1526, "step": 19460 }, { "epoch": 8.11, "eval_accuracy": 0.9355833333333333, "eval_loss": 0.19276762008666992, "eval_runtime": 16.3532, "eval_samples_per_second": 1467.604, "eval_steps_per_second": 4.586, "step": 19460 }, { "epoch": 8.12, "learning_rate": 1e-06, "loss": 0.1794, "step": 19480 }, { "epoch": 8.12, "eval_accuracy": 0.9357083333333334, "eval_loss": 0.19133615493774414, "eval_runtime": 16.7591, "eval_samples_per_second": 1432.06, "eval_steps_per_second": 4.475, "step": 19480 }, { "epoch": 8.12, "learning_rate": 1e-06, "loss": 0.127, "step": 19500 }, { "epoch": 8.12, "eval_accuracy": 0.9350833333333334, "eval_loss": 0.19240260124206543, "eval_runtime": 17.3908, "eval_samples_per_second": 1380.04, "eval_steps_per_second": 4.313, "step": 19500 }, { "epoch": 8.13, "learning_rate": 1e-06, "loss": 0.1636, "step": 19520 }, { "epoch": 8.13, "eval_accuracy": 0.9345, "eval_loss": 0.19413629174232483, "eval_runtime": 17.1984, "eval_samples_per_second": 1395.482, "eval_steps_per_second": 4.361, "step": 19520 }, { "epoch": 8.14, "learning_rate": 1e-06, "loss": 0.1826, "step": 19540 }, { "epoch": 8.14, "eval_accuracy": 0.934625, "eval_loss": 0.19350507855415344, "eval_runtime": 16.3498, "eval_samples_per_second": 1467.911, "eval_steps_per_second": 4.587, "step": 19540 }, { "epoch": 8.15, "learning_rate": 1e-06, "loss": 0.1524, "step": 19560 }, { "epoch": 8.15, "eval_accuracy": 0.933375, "eval_loss": 0.19596153497695923, "eval_runtime": 16.6776, "eval_samples_per_second": 1439.057, "eval_steps_per_second": 4.497, "step": 19560 }, { "epoch": 8.16, "learning_rate": 1e-06, "loss": 0.1208, "step": 19580 }, { "epoch": 8.16, "eval_accuracy": 0.9338333333333333, "eval_loss": 0.19459760189056396, "eval_runtime": 16.3642, "eval_samples_per_second": 1466.618, "eval_steps_per_second": 4.583, "step": 19580 }, { "epoch": 8.17, "learning_rate": 1e-06, "loss": 0.1516, "step": 19600 }, { "epoch": 8.17, "eval_accuracy": 0.9324583333333333, "eval_loss": 0.19873768091201782, "eval_runtime": 16.1804, "eval_samples_per_second": 1483.275, "eval_steps_per_second": 4.635, "step": 19600 }, { "epoch": 8.18, "learning_rate": 1e-06, "loss": 0.1581, "step": 19620 }, { "epoch": 8.18, "eval_accuracy": 0.935375, "eval_loss": 0.19273337721824646, "eval_runtime": 16.628, "eval_samples_per_second": 1443.351, "eval_steps_per_second": 4.51, "step": 19620 }, { "epoch": 8.18, "learning_rate": 1e-06, "loss": 0.1537, "step": 19640 }, { "epoch": 8.18, "eval_accuracy": 0.935625, "eval_loss": 0.1912853717803955, "eval_runtime": 16.501, "eval_samples_per_second": 1454.455, "eval_steps_per_second": 4.545, "step": 19640 }, { "epoch": 8.19, "learning_rate": 1e-06, "loss": 0.1165, "step": 19660 }, { "epoch": 8.19, "eval_accuracy": 0.9357083333333334, "eval_loss": 0.19141745567321777, "eval_runtime": 16.8771, "eval_samples_per_second": 1422.043, "eval_steps_per_second": 4.444, "step": 19660 }, { "epoch": 8.2, "learning_rate": 1e-06, "loss": 0.1374, "step": 19680 }, { "epoch": 8.2, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.1914907842874527, "eval_runtime": 15.9508, "eval_samples_per_second": 1504.626, "eval_steps_per_second": 4.702, "step": 19680 }, { "epoch": 8.21, "learning_rate": 1e-06, "loss": 0.1806, "step": 19700 }, { "epoch": 8.21, "eval_accuracy": 0.9340416666666667, "eval_loss": 0.1957368105649948, "eval_runtime": 17.0085, "eval_samples_per_second": 1411.059, "eval_steps_per_second": 4.41, "step": 19700 }, { "epoch": 8.22, "learning_rate": 1e-06, "loss": 0.2198, "step": 19720 }, { "epoch": 8.22, "eval_accuracy": 0.9335, "eval_loss": 0.19590935111045837, "eval_runtime": 17.4075, "eval_samples_per_second": 1378.713, "eval_steps_per_second": 4.308, "step": 19720 }, { "epoch": 8.22, "learning_rate": 1e-06, "loss": 0.177, "step": 19740 }, { "epoch": 8.22, "eval_accuracy": 0.93525, "eval_loss": 0.19214990735054016, "eval_runtime": 17.3221, "eval_samples_per_second": 1385.511, "eval_steps_per_second": 4.33, "step": 19740 }, { "epoch": 8.23, "learning_rate": 1e-06, "loss": 0.1417, "step": 19760 }, { "epoch": 8.23, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.1939082145690918, "eval_runtime": 17.0832, "eval_samples_per_second": 1404.892, "eval_steps_per_second": 4.39, "step": 19760 }, { "epoch": 8.24, "learning_rate": 1e-06, "loss": 0.2049, "step": 19780 }, { "epoch": 8.24, "eval_accuracy": 0.9353333333333333, "eval_loss": 0.19216576218605042, "eval_runtime": 16.5657, "eval_samples_per_second": 1448.776, "eval_steps_per_second": 4.527, "step": 19780 }, { "epoch": 8.25, "learning_rate": 1e-06, "loss": 0.1704, "step": 19800 }, { "epoch": 8.25, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.1919444501399994, "eval_runtime": 17.0092, "eval_samples_per_second": 1410.998, "eval_steps_per_second": 4.409, "step": 19800 }, { "epoch": 8.26, "learning_rate": 1e-06, "loss": 0.1448, "step": 19820 }, { "epoch": 8.26, "eval_accuracy": 0.9349166666666666, "eval_loss": 0.19339194893836975, "eval_runtime": 16.9285, "eval_samples_per_second": 1417.726, "eval_steps_per_second": 4.43, "step": 19820 }, { "epoch": 8.27, "learning_rate": 1e-06, "loss": 0.1578, "step": 19840 }, { "epoch": 8.27, "eval_accuracy": 0.9350833333333334, "eval_loss": 0.19213108718395233, "eval_runtime": 16.9637, "eval_samples_per_second": 1414.783, "eval_steps_per_second": 4.421, "step": 19840 }, { "epoch": 8.28, "learning_rate": 1e-06, "loss": 0.2108, "step": 19860 }, { "epoch": 8.28, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.1917588859796524, "eval_runtime": 16.9832, "eval_samples_per_second": 1413.159, "eval_steps_per_second": 4.416, "step": 19860 }, { "epoch": 8.28, "learning_rate": 1e-06, "loss": 0.1945, "step": 19880 }, { "epoch": 8.28, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.1911618411540985, "eval_runtime": 17.2548, "eval_samples_per_second": 1390.917, "eval_steps_per_second": 4.347, "step": 19880 }, { "epoch": 8.29, "learning_rate": 1e-06, "loss": 0.1808, "step": 19900 }, { "epoch": 8.29, "eval_accuracy": 0.935125, "eval_loss": 0.19140420854091644, "eval_runtime": 16.7213, "eval_samples_per_second": 1435.291, "eval_steps_per_second": 4.485, "step": 19900 }, { "epoch": 8.3, "learning_rate": 1e-06, "loss": 0.153, "step": 19920 }, { "epoch": 8.3, "eval_accuracy": 0.9332916666666666, "eval_loss": 0.19377712905406952, "eval_runtime": 17.2963, "eval_samples_per_second": 1387.582, "eval_steps_per_second": 4.336, "step": 19920 }, { "epoch": 8.31, "learning_rate": 1e-06, "loss": 0.1723, "step": 19940 }, { "epoch": 8.31, "eval_accuracy": 0.9352083333333333, "eval_loss": 0.19146102666854858, "eval_runtime": 16.9128, "eval_samples_per_second": 1419.047, "eval_steps_per_second": 4.435, "step": 19940 }, { "epoch": 8.32, "learning_rate": 1e-06, "loss": 0.1505, "step": 19960 }, { "epoch": 8.32, "eval_accuracy": 0.9335, "eval_loss": 0.1947011649608612, "eval_runtime": 17.0812, "eval_samples_per_second": 1405.052, "eval_steps_per_second": 4.391, "step": 19960 }, { "epoch": 8.32, "learning_rate": 1e-06, "loss": 0.1784, "step": 19980 }, { "epoch": 8.32, "eval_accuracy": 0.935, "eval_loss": 0.19037066400051117, "eval_runtime": 16.5401, "eval_samples_per_second": 1451.021, "eval_steps_per_second": 4.534, "step": 19980 }, { "epoch": 8.33, "learning_rate": 1e-06, "loss": 0.1611, "step": 20000 }, { "epoch": 8.33, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.1925501823425293, "eval_runtime": 17.2658, "eval_samples_per_second": 1390.034, "eval_steps_per_second": 4.344, "step": 20000 }, { "epoch": 8.34, "learning_rate": 1e-06, "loss": 0.1505, "step": 20020 }, { "epoch": 8.34, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.19143855571746826, "eval_runtime": 16.5059, "eval_samples_per_second": 1454.028, "eval_steps_per_second": 4.544, "step": 20020 }, { "epoch": 8.35, "learning_rate": 1e-06, "loss": 0.148, "step": 20040 }, { "epoch": 8.35, "eval_accuracy": 0.9352083333333333, "eval_loss": 0.1917656511068344, "eval_runtime": 16.2857, "eval_samples_per_second": 1473.686, "eval_steps_per_second": 4.605, "step": 20040 }, { "epoch": 8.36, "learning_rate": 1e-06, "loss": 0.1958, "step": 20060 }, { "epoch": 8.36, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.1908416450023651, "eval_runtime": 16.8917, "eval_samples_per_second": 1420.819, "eval_steps_per_second": 4.44, "step": 20060 }, { "epoch": 8.37, "learning_rate": 1e-06, "loss": 0.1007, "step": 20080 }, { "epoch": 8.37, "eval_accuracy": 0.93475, "eval_loss": 0.19260205328464508, "eval_runtime": 16.2197, "eval_samples_per_second": 1479.681, "eval_steps_per_second": 4.624, "step": 20080 }, { "epoch": 8.38, "learning_rate": 1e-06, "loss": 0.1829, "step": 20100 }, { "epoch": 8.38, "eval_accuracy": 0.93425, "eval_loss": 0.19410496950149536, "eval_runtime": 16.7814, "eval_samples_per_second": 1430.153, "eval_steps_per_second": 4.469, "step": 20100 }, { "epoch": 8.38, "learning_rate": 1e-06, "loss": 0.1413, "step": 20120 }, { "epoch": 8.38, "eval_accuracy": 0.93475, "eval_loss": 0.19318026304244995, "eval_runtime": 16.4709, "eval_samples_per_second": 1457.115, "eval_steps_per_second": 4.553, "step": 20120 }, { "epoch": 8.39, "learning_rate": 1e-06, "loss": 0.1769, "step": 20140 }, { "epoch": 8.39, "eval_accuracy": 0.9340833333333334, "eval_loss": 0.19297830760478973, "eval_runtime": 16.9359, "eval_samples_per_second": 1417.108, "eval_steps_per_second": 4.428, "step": 20140 }, { "epoch": 8.4, "learning_rate": 1e-06, "loss": 0.1843, "step": 20160 }, { "epoch": 8.4, "eval_accuracy": 0.9353333333333333, "eval_loss": 0.19084292650222778, "eval_runtime": 17.2429, "eval_samples_per_second": 1391.878, "eval_steps_per_second": 4.35, "step": 20160 }, { "epoch": 8.41, "learning_rate": 1e-06, "loss": 0.2067, "step": 20180 }, { "epoch": 8.41, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.19021664559841156, "eval_runtime": 16.663, "eval_samples_per_second": 1440.319, "eval_steps_per_second": 4.501, "step": 20180 }, { "epoch": 8.42, "learning_rate": 1e-06, "loss": 0.1183, "step": 20200 }, { "epoch": 8.42, "eval_accuracy": 0.9347083333333334, "eval_loss": 0.19230937957763672, "eval_runtime": 16.3196, "eval_samples_per_second": 1470.622, "eval_steps_per_second": 4.596, "step": 20200 }, { "epoch": 8.43, "learning_rate": 1e-06, "loss": 0.2065, "step": 20220 }, { "epoch": 8.43, "eval_accuracy": 0.9350833333333334, "eval_loss": 0.19128607213497162, "eval_runtime": 16.6007, "eval_samples_per_second": 1445.718, "eval_steps_per_second": 4.518, "step": 20220 }, { "epoch": 8.43, "learning_rate": 1e-06, "loss": 0.1466, "step": 20240 }, { "epoch": 8.43, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19124051928520203, "eval_runtime": 17.0198, "eval_samples_per_second": 1410.12, "eval_steps_per_second": 4.407, "step": 20240 }, { "epoch": 8.44, "learning_rate": 1e-06, "loss": 0.1557, "step": 20260 }, { "epoch": 8.44, "eval_accuracy": 0.9355833333333333, "eval_loss": 0.19068805873394012, "eval_runtime": 16.4856, "eval_samples_per_second": 1455.816, "eval_steps_per_second": 4.549, "step": 20260 }, { "epoch": 8.45, "learning_rate": 1e-06, "loss": 0.1437, "step": 20280 }, { "epoch": 8.45, "eval_accuracy": 0.934875, "eval_loss": 0.1921066790819168, "eval_runtime": 16.6454, "eval_samples_per_second": 1441.842, "eval_steps_per_second": 4.506, "step": 20280 }, { "epoch": 8.46, "learning_rate": 1e-06, "loss": 0.1567, "step": 20300 }, { "epoch": 8.46, "eval_accuracy": 0.9345833333333333, "eval_loss": 0.1937917321920395, "eval_runtime": 16.4297, "eval_samples_per_second": 1460.769, "eval_steps_per_second": 4.565, "step": 20300 }, { "epoch": 8.47, "learning_rate": 1e-06, "loss": 0.1363, "step": 20320 }, { "epoch": 8.47, "eval_accuracy": 0.935625, "eval_loss": 0.19153279066085815, "eval_runtime": 16.7967, "eval_samples_per_second": 1428.851, "eval_steps_per_second": 4.465, "step": 20320 }, { "epoch": 8.47, "learning_rate": 1e-06, "loss": 0.2041, "step": 20340 }, { "epoch": 8.47, "eval_accuracy": 0.9349166666666666, "eval_loss": 0.19226667284965515, "eval_runtime": 16.8781, "eval_samples_per_second": 1421.958, "eval_steps_per_second": 4.444, "step": 20340 }, { "epoch": 8.48, "learning_rate": 1e-06, "loss": 0.1656, "step": 20360 }, { "epoch": 8.48, "eval_accuracy": 0.933875, "eval_loss": 0.19749757647514343, "eval_runtime": 16.5317, "eval_samples_per_second": 1451.756, "eval_steps_per_second": 4.537, "step": 20360 }, { "epoch": 8.49, "learning_rate": 1e-06, "loss": 0.1774, "step": 20380 }, { "epoch": 8.49, "eval_accuracy": 0.9335416666666667, "eval_loss": 0.1959267109632492, "eval_runtime": 16.9568, "eval_samples_per_second": 1415.361, "eval_steps_per_second": 4.423, "step": 20380 }, { "epoch": 8.5, "learning_rate": 1e-06, "loss": 0.1472, "step": 20400 }, { "epoch": 8.5, "eval_accuracy": 0.9355833333333333, "eval_loss": 0.19094142317771912, "eval_runtime": 16.4125, "eval_samples_per_second": 1462.3, "eval_steps_per_second": 4.57, "step": 20400 }, { "epoch": 8.51, "learning_rate": 1e-06, "loss": 0.178, "step": 20420 }, { "epoch": 8.51, "eval_accuracy": 0.9337083333333334, "eval_loss": 0.19597363471984863, "eval_runtime": 16.6336, "eval_samples_per_second": 1442.864, "eval_steps_per_second": 4.509, "step": 20420 }, { "epoch": 8.52, "learning_rate": 1e-06, "loss": 0.1561, "step": 20440 }, { "epoch": 8.52, "eval_accuracy": 0.9354583333333333, "eval_loss": 0.19258776307106018, "eval_runtime": 17.0606, "eval_samples_per_second": 1406.748, "eval_steps_per_second": 4.396, "step": 20440 }, { "epoch": 8.53, "learning_rate": 1e-06, "loss": 0.1726, "step": 20460 }, { "epoch": 8.53, "eval_accuracy": 0.9355416666666667, "eval_loss": 0.19182823598384857, "eval_runtime": 17.3, "eval_samples_per_second": 1387.283, "eval_steps_per_second": 4.335, "step": 20460 }, { "epoch": 8.53, "learning_rate": 1e-06, "loss": 0.1465, "step": 20480 }, { "epoch": 8.53, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.19172848761081696, "eval_runtime": 16.287, "eval_samples_per_second": 1473.569, "eval_steps_per_second": 4.605, "step": 20480 }, { "epoch": 8.54, "learning_rate": 1e-06, "loss": 0.1504, "step": 20500 }, { "epoch": 8.54, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19191785156726837, "eval_runtime": 15.5845, "eval_samples_per_second": 1539.99, "eval_steps_per_second": 4.812, "step": 20500 }, { "epoch": 8.55, "learning_rate": 1e-06, "loss": 0.1593, "step": 20520 }, { "epoch": 8.55, "eval_accuracy": 0.9358333333333333, "eval_loss": 0.1908203661441803, "eval_runtime": 16.4463, "eval_samples_per_second": 1459.298, "eval_steps_per_second": 4.56, "step": 20520 }, { "epoch": 8.56, "learning_rate": 1e-06, "loss": 0.1346, "step": 20540 }, { "epoch": 8.56, "eval_accuracy": 0.9362916666666666, "eval_loss": 0.19051861763000488, "eval_runtime": 15.646, "eval_samples_per_second": 1533.937, "eval_steps_per_second": 4.794, "step": 20540 }, { "epoch": 8.57, "learning_rate": 1e-06, "loss": 0.1509, "step": 20560 }, { "epoch": 8.57, "eval_accuracy": 0.9364166666666667, "eval_loss": 0.18970748782157898, "eval_runtime": 15.4588, "eval_samples_per_second": 1552.512, "eval_steps_per_second": 4.852, "step": 20560 }, { "epoch": 8.57, "learning_rate": 1e-06, "loss": 0.1419, "step": 20580 }, { "epoch": 8.57, "eval_accuracy": 0.936375, "eval_loss": 0.18967027962207794, "eval_runtime": 15.484, "eval_samples_per_second": 1549.985, "eval_steps_per_second": 4.844, "step": 20580 }, { "epoch": 8.58, "learning_rate": 1e-06, "loss": 0.1477, "step": 20600 }, { "epoch": 8.58, "eval_accuracy": 0.93525, "eval_loss": 0.1920623481273651, "eval_runtime": 15.7145, "eval_samples_per_second": 1527.252, "eval_steps_per_second": 4.773, "step": 20600 }, { "epoch": 8.59, "learning_rate": 1e-06, "loss": 0.1791, "step": 20620 }, { "epoch": 8.59, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.19344042241573334, "eval_runtime": 15.3045, "eval_samples_per_second": 1568.164, "eval_steps_per_second": 4.901, "step": 20620 }, { "epoch": 8.6, "learning_rate": 1e-06, "loss": 0.1848, "step": 20640 }, { "epoch": 8.6, "eval_accuracy": 0.935, "eval_loss": 0.1935572475194931, "eval_runtime": 15.6596, "eval_samples_per_second": 1532.606, "eval_steps_per_second": 4.789, "step": 20640 }, { "epoch": 8.61, "learning_rate": 1e-06, "loss": 0.1561, "step": 20660 }, { "epoch": 8.61, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.19057251513004303, "eval_runtime": 15.8036, "eval_samples_per_second": 1518.644, "eval_steps_per_second": 4.746, "step": 20660 }, { "epoch": 8.62, "learning_rate": 1e-06, "loss": 0.1619, "step": 20680 }, { "epoch": 8.62, "eval_accuracy": 0.9355, "eval_loss": 0.1917581558227539, "eval_runtime": 15.4829, "eval_samples_per_second": 1550.099, "eval_steps_per_second": 4.844, "step": 20680 }, { "epoch": 8.62, "learning_rate": 1e-06, "loss": 0.1778, "step": 20700 }, { "epoch": 8.62, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.18889638781547546, "eval_runtime": 15.6817, "eval_samples_per_second": 1530.446, "eval_steps_per_second": 4.783, "step": 20700 }, { "epoch": 8.63, "learning_rate": 1e-06, "loss": 0.1892, "step": 20720 }, { "epoch": 8.63, "eval_accuracy": 0.9360416666666667, "eval_loss": 0.18932241201400757, "eval_runtime": 15.6011, "eval_samples_per_second": 1538.354, "eval_steps_per_second": 4.807, "step": 20720 }, { "epoch": 8.64, "learning_rate": 1e-06, "loss": 0.1358, "step": 20740 }, { "epoch": 8.64, "eval_accuracy": 0.9362083333333333, "eval_loss": 0.18881624937057495, "eval_runtime": 15.4873, "eval_samples_per_second": 1549.656, "eval_steps_per_second": 4.843, "step": 20740 }, { "epoch": 8.65, "learning_rate": 1e-06, "loss": 0.1466, "step": 20760 }, { "epoch": 8.65, "eval_accuracy": 0.935625, "eval_loss": 0.19135870039463043, "eval_runtime": 15.7754, "eval_samples_per_second": 1521.355, "eval_steps_per_second": 4.754, "step": 20760 }, { "epoch": 8.66, "learning_rate": 1e-06, "loss": 0.1536, "step": 20780 }, { "epoch": 8.66, "eval_accuracy": 0.9358333333333333, "eval_loss": 0.19162118434906006, "eval_runtime": 15.6911, "eval_samples_per_second": 1529.529, "eval_steps_per_second": 4.78, "step": 20780 }, { "epoch": 8.67, "learning_rate": 1e-06, "loss": 0.1417, "step": 20800 }, { "epoch": 8.67, "eval_accuracy": 0.9351666666666667, "eval_loss": 0.19261544942855835, "eval_runtime": 15.318, "eval_samples_per_second": 1566.781, "eval_steps_per_second": 4.896, "step": 20800 }, { "epoch": 8.68, "learning_rate": 1e-06, "loss": 0.1335, "step": 20820 }, { "epoch": 8.68, "eval_accuracy": 0.9355, "eval_loss": 0.19034945964813232, "eval_runtime": 15.7498, "eval_samples_per_second": 1523.829, "eval_steps_per_second": 4.762, "step": 20820 }, { "epoch": 8.68, "learning_rate": 1e-06, "loss": 0.1698, "step": 20840 }, { "epoch": 8.68, "eval_accuracy": 0.935, "eval_loss": 0.19306336343288422, "eval_runtime": 15.5793, "eval_samples_per_second": 1540.501, "eval_steps_per_second": 4.814, "step": 20840 }, { "epoch": 8.69, "learning_rate": 1e-06, "loss": 0.1394, "step": 20860 }, { "epoch": 8.69, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19079020619392395, "eval_runtime": 15.4491, "eval_samples_per_second": 1553.491, "eval_steps_per_second": 4.855, "step": 20860 }, { "epoch": 8.7, "learning_rate": 1e-06, "loss": 0.1467, "step": 20880 }, { "epoch": 8.7, "eval_accuracy": 0.9353333333333333, "eval_loss": 0.1910465806722641, "eval_runtime": 15.5826, "eval_samples_per_second": 1540.175, "eval_steps_per_second": 4.813, "step": 20880 }, { "epoch": 8.71, "learning_rate": 1e-06, "loss": 0.1307, "step": 20900 }, { "epoch": 8.71, "eval_accuracy": 0.9344166666666667, "eval_loss": 0.1931913048028946, "eval_runtime": 15.7361, "eval_samples_per_second": 1525.151, "eval_steps_per_second": 4.766, "step": 20900 }, { "epoch": 8.72, "learning_rate": 1e-06, "loss": 0.1566, "step": 20920 }, { "epoch": 8.72, "eval_accuracy": 0.934375, "eval_loss": 0.19242696464061737, "eval_runtime": 16.0207, "eval_samples_per_second": 1498.065, "eval_steps_per_second": 4.681, "step": 20920 }, { "epoch": 8.72, "learning_rate": 1e-06, "loss": 0.1916, "step": 20940 }, { "epoch": 8.72, "eval_accuracy": 0.9332083333333333, "eval_loss": 0.19626210629940033, "eval_runtime": 15.5195, "eval_samples_per_second": 1546.445, "eval_steps_per_second": 4.833, "step": 20940 }, { "epoch": 8.73, "learning_rate": 1e-06, "loss": 0.1366, "step": 20960 }, { "epoch": 8.73, "eval_accuracy": 0.934375, "eval_loss": 0.1936860978603363, "eval_runtime": 16.4371, "eval_samples_per_second": 1460.113, "eval_steps_per_second": 4.563, "step": 20960 }, { "epoch": 8.74, "learning_rate": 1e-06, "loss": 0.1532, "step": 20980 }, { "epoch": 8.74, "eval_accuracy": 0.934, "eval_loss": 0.19347819685935974, "eval_runtime": 16.8336, "eval_samples_per_second": 1425.717, "eval_steps_per_second": 4.455, "step": 20980 }, { "epoch": 8.75, "learning_rate": 1e-06, "loss": 0.1524, "step": 21000 }, { "epoch": 8.75, "eval_accuracy": 0.9351666666666667, "eval_loss": 0.19147901237010956, "eval_runtime": 17.5314, "eval_samples_per_second": 1368.97, "eval_steps_per_second": 4.278, "step": 21000 }, { "epoch": 8.76, "learning_rate": 1e-06, "loss": 0.1331, "step": 21020 }, { "epoch": 8.76, "eval_accuracy": 0.9347083333333334, "eval_loss": 0.19231154024600983, "eval_runtime": 17.9038, "eval_samples_per_second": 1340.496, "eval_steps_per_second": 4.189, "step": 21020 }, { "epoch": 8.77, "learning_rate": 1e-06, "loss": 0.1776, "step": 21040 }, { "epoch": 8.77, "eval_accuracy": 0.935125, "eval_loss": 0.19299417734146118, "eval_runtime": 17.3849, "eval_samples_per_second": 1380.506, "eval_steps_per_second": 4.314, "step": 21040 }, { "epoch": 8.78, "learning_rate": 1e-06, "loss": 0.1772, "step": 21060 }, { "epoch": 8.78, "eval_accuracy": 0.9354583333333333, "eval_loss": 0.1907561719417572, "eval_runtime": 17.0346, "eval_samples_per_second": 1408.894, "eval_steps_per_second": 4.403, "step": 21060 }, { "epoch": 8.78, "learning_rate": 1e-06, "loss": 0.1501, "step": 21080 }, { "epoch": 8.78, "eval_accuracy": 0.934125, "eval_loss": 0.19375211000442505, "eval_runtime": 17.0995, "eval_samples_per_second": 1403.552, "eval_steps_per_second": 4.386, "step": 21080 }, { "epoch": 8.79, "learning_rate": 1e-06, "loss": 0.1544, "step": 21100 }, { "epoch": 8.79, "eval_accuracy": 0.936125, "eval_loss": 0.18984746932983398, "eval_runtime": 16.9332, "eval_samples_per_second": 1417.338, "eval_steps_per_second": 4.429, "step": 21100 }, { "epoch": 8.8, "learning_rate": 1e-06, "loss": 0.1852, "step": 21120 }, { "epoch": 8.8, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.18966814875602722, "eval_runtime": 16.7173, "eval_samples_per_second": 1435.64, "eval_steps_per_second": 4.486, "step": 21120 }, { "epoch": 8.81, "learning_rate": 1e-06, "loss": 0.1483, "step": 21140 }, { "epoch": 8.81, "eval_accuracy": 0.9352083333333333, "eval_loss": 0.19126974046230316, "eval_runtime": 16.9887, "eval_samples_per_second": 1412.7, "eval_steps_per_second": 4.415, "step": 21140 }, { "epoch": 8.82, "learning_rate": 1e-06, "loss": 0.1434, "step": 21160 }, { "epoch": 8.82, "eval_accuracy": 0.9355416666666667, "eval_loss": 0.19145381450653076, "eval_runtime": 17.2555, "eval_samples_per_second": 1390.863, "eval_steps_per_second": 4.346, "step": 21160 }, { "epoch": 8.82, "learning_rate": 1e-06, "loss": 0.2023, "step": 21180 }, { "epoch": 8.82, "eval_accuracy": 0.9348333333333333, "eval_loss": 0.19298754632472992, "eval_runtime": 16.4729, "eval_samples_per_second": 1456.937, "eval_steps_per_second": 4.553, "step": 21180 }, { "epoch": 8.83, "learning_rate": 1e-06, "loss": 0.1471, "step": 21200 }, { "epoch": 8.83, "eval_accuracy": 0.933625, "eval_loss": 0.19501623511314392, "eval_runtime": 16.7339, "eval_samples_per_second": 1434.212, "eval_steps_per_second": 4.482, "step": 21200 }, { "epoch": 8.84, "learning_rate": 1e-06, "loss": 0.1606, "step": 21220 }, { "epoch": 8.84, "eval_accuracy": 0.934375, "eval_loss": 0.19614289700984955, "eval_runtime": 16.3664, "eval_samples_per_second": 1466.416, "eval_steps_per_second": 4.583, "step": 21220 }, { "epoch": 8.85, "learning_rate": 1e-06, "loss": 0.1873, "step": 21240 }, { "epoch": 8.85, "eval_accuracy": 0.9364166666666667, "eval_loss": 0.19085697829723358, "eval_runtime": 16.7737, "eval_samples_per_second": 1430.812, "eval_steps_per_second": 4.471, "step": 21240 }, { "epoch": 8.86, "learning_rate": 1e-06, "loss": 0.1676, "step": 21260 }, { "epoch": 8.86, "eval_accuracy": 0.936625, "eval_loss": 0.18988537788391113, "eval_runtime": 16.3918, "eval_samples_per_second": 1464.148, "eval_steps_per_second": 4.575, "step": 21260 }, { "epoch": 8.87, "learning_rate": 1e-06, "loss": 0.1421, "step": 21280 }, { "epoch": 8.87, "eval_accuracy": 0.93625, "eval_loss": 0.1904492825269699, "eval_runtime": 16.5295, "eval_samples_per_second": 1451.948, "eval_steps_per_second": 4.537, "step": 21280 }, { "epoch": 8.88, "learning_rate": 1e-06, "loss": 0.1475, "step": 21300 }, { "epoch": 8.88, "eval_accuracy": 0.935, "eval_loss": 0.1930071860551834, "eval_runtime": 16.4595, "eval_samples_per_second": 1458.128, "eval_steps_per_second": 4.557, "step": 21300 }, { "epoch": 8.88, "learning_rate": 1e-06, "loss": 0.1928, "step": 21320 }, { "epoch": 8.88, "eval_accuracy": 0.934875, "eval_loss": 0.19229546189308167, "eval_runtime": 16.6828, "eval_samples_per_second": 1438.611, "eval_steps_per_second": 4.496, "step": 21320 }, { "epoch": 8.89, "learning_rate": 1e-06, "loss": 0.1633, "step": 21340 }, { "epoch": 8.89, "eval_accuracy": 0.9347916666666667, "eval_loss": 0.19193056225776672, "eval_runtime": 16.5324, "eval_samples_per_second": 1451.694, "eval_steps_per_second": 4.537, "step": 21340 }, { "epoch": 8.9, "learning_rate": 1e-06, "loss": 0.1504, "step": 21360 }, { "epoch": 8.9, "eval_accuracy": 0.935375, "eval_loss": 0.1920657902956009, "eval_runtime": 16.6038, "eval_samples_per_second": 1445.452, "eval_steps_per_second": 4.517, "step": 21360 }, { "epoch": 8.91, "learning_rate": 1e-06, "loss": 0.1489, "step": 21380 }, { "epoch": 8.91, "eval_accuracy": 0.9335833333333333, "eval_loss": 0.19495506584644318, "eval_runtime": 17.9072, "eval_samples_per_second": 1340.243, "eval_steps_per_second": 4.188, "step": 21380 }, { "epoch": 8.92, "learning_rate": 1e-06, "loss": 0.1646, "step": 21400 }, { "epoch": 8.92, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.19053585827350616, "eval_runtime": 17.2542, "eval_samples_per_second": 1390.963, "eval_steps_per_second": 4.347, "step": 21400 }, { "epoch": 8.93, "learning_rate": 1e-06, "loss": 0.161, "step": 21420 }, { "epoch": 8.93, "eval_accuracy": 0.9343333333333333, "eval_loss": 0.19274799525737762, "eval_runtime": 16.8753, "eval_samples_per_second": 1422.195, "eval_steps_per_second": 4.444, "step": 21420 }, { "epoch": 8.93, "learning_rate": 1e-06, "loss": 0.1885, "step": 21440 }, { "epoch": 8.93, "eval_accuracy": 0.9355833333333333, "eval_loss": 0.18927617371082306, "eval_runtime": 17.7571, "eval_samples_per_second": 1351.572, "eval_steps_per_second": 4.224, "step": 21440 }, { "epoch": 8.94, "learning_rate": 1e-06, "loss": 0.1789, "step": 21460 }, { "epoch": 8.94, "eval_accuracy": 0.9364583333333333, "eval_loss": 0.18856315314769745, "eval_runtime": 16.8879, "eval_samples_per_second": 1421.133, "eval_steps_per_second": 4.441, "step": 21460 }, { "epoch": 8.95, "learning_rate": 1e-06, "loss": 0.1831, "step": 21480 }, { "epoch": 8.95, "eval_accuracy": 0.9367083333333334, "eval_loss": 0.18908809125423431, "eval_runtime": 16.5935, "eval_samples_per_second": 1446.346, "eval_steps_per_second": 4.52, "step": 21480 }, { "epoch": 8.96, "learning_rate": 1e-06, "loss": 0.1553, "step": 21500 }, { "epoch": 8.96, "eval_accuracy": 0.9363333333333334, "eval_loss": 0.1887378990650177, "eval_runtime": 17.029, "eval_samples_per_second": 1409.364, "eval_steps_per_second": 4.404, "step": 21500 }, { "epoch": 8.97, "learning_rate": 1e-06, "loss": 0.1609, "step": 21520 }, { "epoch": 8.97, "eval_accuracy": 0.936625, "eval_loss": 0.1878993958234787, "eval_runtime": 17.4532, "eval_samples_per_second": 1375.109, "eval_steps_per_second": 4.297, "step": 21520 }, { "epoch": 8.97, "learning_rate": 1e-06, "loss": 0.1616, "step": 21540 }, { "epoch": 8.97, "eval_accuracy": 0.93625, "eval_loss": 0.18860691785812378, "eval_runtime": 16.6603, "eval_samples_per_second": 1440.553, "eval_steps_per_second": 4.502, "step": 21540 }, { "epoch": 8.98, "learning_rate": 1e-06, "loss": 0.1623, "step": 21560 }, { "epoch": 8.98, "eval_accuracy": 0.934375, "eval_loss": 0.19211074709892273, "eval_runtime": 16.5367, "eval_samples_per_second": 1451.315, "eval_steps_per_second": 4.535, "step": 21560 }, { "epoch": 8.99, "learning_rate": 1e-06, "loss": 0.1485, "step": 21580 }, { "epoch": 8.99, "eval_accuracy": 0.93575, "eval_loss": 0.1881760060787201, "eval_runtime": 16.5469, "eval_samples_per_second": 1450.422, "eval_steps_per_second": 4.533, "step": 21580 }, { "epoch": 9.0, "learning_rate": 1e-06, "loss": 0.1922, "step": 21600 }, { "epoch": 9.0, "eval_accuracy": 0.9357916666666667, "eval_loss": 0.18824529647827148, "eval_runtime": 17.1765, "eval_samples_per_second": 1397.261, "eval_steps_per_second": 4.366, "step": 21600 }, { "epoch": 9.01, "learning_rate": 1e-06, "loss": 0.1314, "step": 21620 }, { "epoch": 9.01, "eval_accuracy": 0.9365, "eval_loss": 0.18808509409427643, "eval_runtime": 16.3714, "eval_samples_per_second": 1465.972, "eval_steps_per_second": 4.581, "step": 21620 }, { "epoch": 9.02, "learning_rate": 1e-06, "loss": 0.1579, "step": 21640 }, { "epoch": 9.02, "eval_accuracy": 0.935, "eval_loss": 0.1919311285018921, "eval_runtime": 16.3749, "eval_samples_per_second": 1465.66, "eval_steps_per_second": 4.58, "step": 21640 }, { "epoch": 9.03, "learning_rate": 1e-06, "loss": 0.1449, "step": 21660 }, { "epoch": 9.03, "eval_accuracy": 0.9358333333333333, "eval_loss": 0.19011430442333221, "eval_runtime": 16.428, "eval_samples_per_second": 1460.921, "eval_steps_per_second": 4.565, "step": 21660 }, { "epoch": 9.03, "learning_rate": 1e-06, "loss": 0.1394, "step": 21680 }, { "epoch": 9.03, "eval_accuracy": 0.936875, "eval_loss": 0.19003826379776, "eval_runtime": 17.008, "eval_samples_per_second": 1411.103, "eval_steps_per_second": 4.41, "step": 21680 }, { "epoch": 9.04, "learning_rate": 1e-06, "loss": 0.1411, "step": 21700 }, { "epoch": 9.04, "eval_accuracy": 0.9344583333333333, "eval_loss": 0.19364295899868011, "eval_runtime": 17.6973, "eval_samples_per_second": 1356.136, "eval_steps_per_second": 4.238, "step": 21700 }, { "epoch": 9.05, "learning_rate": 1e-06, "loss": 0.1622, "step": 21720 }, { "epoch": 9.05, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.190113365650177, "eval_runtime": 17.0907, "eval_samples_per_second": 1404.273, "eval_steps_per_second": 4.388, "step": 21720 }, { "epoch": 9.06, "learning_rate": 1e-06, "loss": 0.1758, "step": 21740 }, { "epoch": 9.06, "eval_accuracy": 0.9359166666666666, "eval_loss": 0.19059090316295624, "eval_runtime": 17.4852, "eval_samples_per_second": 1372.587, "eval_steps_per_second": 4.289, "step": 21740 }, { "epoch": 9.07, "learning_rate": 1e-06, "loss": 0.1469, "step": 21760 }, { "epoch": 9.07, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.19149552285671234, "eval_runtime": 15.6633, "eval_samples_per_second": 1532.24, "eval_steps_per_second": 4.788, "step": 21760 }, { "epoch": 9.07, "learning_rate": 1e-06, "loss": 0.1543, "step": 21780 }, { "epoch": 9.07, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.1922057569026947, "eval_runtime": 15.6817, "eval_samples_per_second": 1530.451, "eval_steps_per_second": 4.783, "step": 21780 }, { "epoch": 9.08, "learning_rate": 1e-06, "loss": 0.1411, "step": 21800 }, { "epoch": 9.08, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19251596927642822, "eval_runtime": 15.8744, "eval_samples_per_second": 1511.864, "eval_steps_per_second": 4.725, "step": 21800 }, { "epoch": 9.09, "learning_rate": 1e-06, "loss": 0.1686, "step": 21820 }, { "epoch": 9.09, "eval_accuracy": 0.937125, "eval_loss": 0.1892337054014206, "eval_runtime": 16.2971, "eval_samples_per_second": 1472.653, "eval_steps_per_second": 4.602, "step": 21820 }, { "epoch": 9.1, "learning_rate": 1e-06, "loss": 0.1332, "step": 21840 }, { "epoch": 9.1, "eval_accuracy": 0.9366666666666666, "eval_loss": 0.1890205591917038, "eval_runtime": 15.4417, "eval_samples_per_second": 1554.228, "eval_steps_per_second": 4.857, "step": 21840 }, { "epoch": 9.11, "learning_rate": 1e-06, "loss": 0.1217, "step": 21860 }, { "epoch": 9.11, "eval_accuracy": 0.9357083333333334, "eval_loss": 0.1898655891418457, "eval_runtime": 16.1103, "eval_samples_per_second": 1489.727, "eval_steps_per_second": 4.655, "step": 21860 }, { "epoch": 9.12, "learning_rate": 1e-06, "loss": 0.1765, "step": 21880 }, { "epoch": 9.12, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19171109795570374, "eval_runtime": 16.5889, "eval_samples_per_second": 1446.753, "eval_steps_per_second": 4.521, "step": 21880 }, { "epoch": 9.12, "learning_rate": 1e-06, "loss": 0.1733, "step": 21900 }, { "epoch": 9.12, "eval_accuracy": 0.9349583333333333, "eval_loss": 0.19177711009979248, "eval_runtime": 17.5638, "eval_samples_per_second": 1366.449, "eval_steps_per_second": 4.27, "step": 21900 }, { "epoch": 9.13, "learning_rate": 1e-06, "loss": 0.1424, "step": 21920 }, { "epoch": 9.13, "eval_accuracy": 0.934625, "eval_loss": 0.19338135421276093, "eval_runtime": 17.301, "eval_samples_per_second": 1387.204, "eval_steps_per_second": 4.335, "step": 21920 }, { "epoch": 9.14, "learning_rate": 1e-06, "loss": 0.1389, "step": 21940 }, { "epoch": 9.14, "eval_accuracy": 0.9360416666666667, "eval_loss": 0.19054777920246124, "eval_runtime": 16.9906, "eval_samples_per_second": 1412.546, "eval_steps_per_second": 4.414, "step": 21940 }, { "epoch": 9.15, "learning_rate": 1e-06, "loss": 0.1426, "step": 21960 }, { "epoch": 9.15, "eval_accuracy": 0.9355, "eval_loss": 0.1908566951751709, "eval_runtime": 17.0809, "eval_samples_per_second": 1405.076, "eval_steps_per_second": 4.391, "step": 21960 }, { "epoch": 9.16, "learning_rate": 1e-06, "loss": 0.1376, "step": 21980 }, { "epoch": 9.16, "eval_accuracy": 0.936375, "eval_loss": 0.18984192609786987, "eval_runtime": 16.3175, "eval_samples_per_second": 1470.81, "eval_steps_per_second": 4.596, "step": 21980 }, { "epoch": 9.17, "learning_rate": 1e-06, "loss": 0.162, "step": 22000 }, { "epoch": 9.17, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.1910352259874344, "eval_runtime": 16.1268, "eval_samples_per_second": 1488.209, "eval_steps_per_second": 4.651, "step": 22000 }, { "epoch": 9.18, "learning_rate": 1e-06, "loss": 0.1555, "step": 22020 }, { "epoch": 9.18, "eval_accuracy": 0.93475, "eval_loss": 0.19323420524597168, "eval_runtime": 16.5391, "eval_samples_per_second": 1451.109, "eval_steps_per_second": 4.535, "step": 22020 }, { "epoch": 9.18, "learning_rate": 1e-06, "loss": 0.1613, "step": 22040 }, { "epoch": 9.18, "eval_accuracy": 0.9364583333333333, "eval_loss": 0.1894889771938324, "eval_runtime": 16.1517, "eval_samples_per_second": 1485.912, "eval_steps_per_second": 4.643, "step": 22040 }, { "epoch": 9.19, "learning_rate": 1e-06, "loss": 0.1407, "step": 22060 }, { "epoch": 9.19, "eval_accuracy": 0.9354583333333333, "eval_loss": 0.1917874664068222, "eval_runtime": 17.701, "eval_samples_per_second": 1355.857, "eval_steps_per_second": 4.237, "step": 22060 }, { "epoch": 9.2, "learning_rate": 1e-06, "loss": 0.1709, "step": 22080 }, { "epoch": 9.2, "eval_accuracy": 0.93425, "eval_loss": 0.19433771073818207, "eval_runtime": 17.0414, "eval_samples_per_second": 1408.333, "eval_steps_per_second": 4.401, "step": 22080 }, { "epoch": 9.21, "learning_rate": 1e-06, "loss": 0.1435, "step": 22100 }, { "epoch": 9.21, "eval_accuracy": 0.93575, "eval_loss": 0.19121021032333374, "eval_runtime": 17.2755, "eval_samples_per_second": 1389.249, "eval_steps_per_second": 4.341, "step": 22100 }, { "epoch": 9.22, "learning_rate": 1e-06, "loss": 0.1778, "step": 22120 }, { "epoch": 9.22, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.1902051866054535, "eval_runtime": 16.2731, "eval_samples_per_second": 1474.822, "eval_steps_per_second": 4.609, "step": 22120 }, { "epoch": 9.22, "learning_rate": 1e-06, "loss": 0.1413, "step": 22140 }, { "epoch": 9.22, "eval_accuracy": 0.9355416666666667, "eval_loss": 0.1910925656557083, "eval_runtime": 16.4198, "eval_samples_per_second": 1461.653, "eval_steps_per_second": 4.568, "step": 22140 }, { "epoch": 9.23, "learning_rate": 1e-06, "loss": 0.1772, "step": 22160 }, { "epoch": 9.23, "eval_accuracy": 0.9365, "eval_loss": 0.1890135258436203, "eval_runtime": 16.5257, "eval_samples_per_second": 1452.285, "eval_steps_per_second": 4.538, "step": 22160 }, { "epoch": 9.24, "learning_rate": 1e-06, "loss": 0.1545, "step": 22180 }, { "epoch": 9.24, "eval_accuracy": 0.9360416666666667, "eval_loss": 0.19075444340705872, "eval_runtime": 16.9041, "eval_samples_per_second": 1419.773, "eval_steps_per_second": 4.437, "step": 22180 }, { "epoch": 9.25, "learning_rate": 1e-06, "loss": 0.1434, "step": 22200 }, { "epoch": 9.25, "eval_accuracy": 0.9331666666666667, "eval_loss": 0.19980815052986145, "eval_runtime": 16.2294, "eval_samples_per_second": 1478.797, "eval_steps_per_second": 4.621, "step": 22200 }, { "epoch": 9.26, "learning_rate": 1e-06, "loss": 0.1981, "step": 22220 }, { "epoch": 9.26, "eval_accuracy": 0.9367916666666667, "eval_loss": 0.1893180012702942, "eval_runtime": 16.4467, "eval_samples_per_second": 1459.263, "eval_steps_per_second": 4.56, "step": 22220 }, { "epoch": 9.27, "learning_rate": 1e-06, "loss": 0.1585, "step": 22240 }, { "epoch": 9.27, "eval_accuracy": 0.936, "eval_loss": 0.18898563086986542, "eval_runtime": 16.2988, "eval_samples_per_second": 1472.504, "eval_steps_per_second": 4.602, "step": 22240 }, { "epoch": 9.28, "learning_rate": 1e-06, "loss": 0.1361, "step": 22260 }, { "epoch": 9.28, "eval_accuracy": 0.9357916666666667, "eval_loss": 0.1903260499238968, "eval_runtime": 16.4778, "eval_samples_per_second": 1456.508, "eval_steps_per_second": 4.552, "step": 22260 }, { "epoch": 9.28, "learning_rate": 1e-06, "loss": 0.1666, "step": 22280 }, { "epoch": 9.28, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.19420108199119568, "eval_runtime": 16.7685, "eval_samples_per_second": 1431.255, "eval_steps_per_second": 4.473, "step": 22280 }, { "epoch": 9.29, "learning_rate": 1e-06, "loss": 0.1855, "step": 22300 }, { "epoch": 9.29, "eval_accuracy": 0.9355416666666667, "eval_loss": 0.19144552946090698, "eval_runtime": 16.1429, "eval_samples_per_second": 1486.719, "eval_steps_per_second": 4.646, "step": 22300 }, { "epoch": 9.3, "learning_rate": 1e-06, "loss": 0.126, "step": 22320 }, { "epoch": 9.3, "eval_accuracy": 0.935875, "eval_loss": 0.1904752403497696, "eval_runtime": 16.2377, "eval_samples_per_second": 1478.039, "eval_steps_per_second": 4.619, "step": 22320 }, { "epoch": 9.31, "learning_rate": 1e-06, "loss": 0.1244, "step": 22340 }, { "epoch": 9.31, "eval_accuracy": 0.9362916666666666, "eval_loss": 0.19049489498138428, "eval_runtime": 16.6183, "eval_samples_per_second": 1444.194, "eval_steps_per_second": 4.513, "step": 22340 }, { "epoch": 9.32, "learning_rate": 1e-06, "loss": 0.1737, "step": 22360 }, { "epoch": 9.32, "eval_accuracy": 0.93575, "eval_loss": 0.19159464538097382, "eval_runtime": 16.5579, "eval_samples_per_second": 1449.463, "eval_steps_per_second": 4.53, "step": 22360 }, { "epoch": 9.32, "learning_rate": 1e-06, "loss": 0.1568, "step": 22380 }, { "epoch": 9.32, "eval_accuracy": 0.9359166666666666, "eval_loss": 0.19013747572898865, "eval_runtime": 16.3587, "eval_samples_per_second": 1467.107, "eval_steps_per_second": 4.585, "step": 22380 }, { "epoch": 9.33, "learning_rate": 1e-06, "loss": 0.1501, "step": 22400 }, { "epoch": 9.33, "eval_accuracy": 0.936625, "eval_loss": 0.1898564100265503, "eval_runtime": 16.4813, "eval_samples_per_second": 1456.2, "eval_steps_per_second": 4.551, "step": 22400 }, { "epoch": 9.34, "learning_rate": 1e-06, "loss": 0.133, "step": 22420 }, { "epoch": 9.34, "eval_accuracy": 0.935875, "eval_loss": 0.19129687547683716, "eval_runtime": 17.9192, "eval_samples_per_second": 1339.343, "eval_steps_per_second": 4.185, "step": 22420 }, { "epoch": 9.35, "learning_rate": 1e-06, "loss": 0.1577, "step": 22440 }, { "epoch": 9.35, "eval_accuracy": 0.9364583333333333, "eval_loss": 0.19107209146022797, "eval_runtime": 17.3207, "eval_samples_per_second": 1385.625, "eval_steps_per_second": 4.33, "step": 22440 }, { "epoch": 9.36, "learning_rate": 1e-06, "loss": 0.1463, "step": 22460 }, { "epoch": 9.36, "eval_accuracy": 0.935875, "eval_loss": 0.19182544946670532, "eval_runtime": 17.9384, "eval_samples_per_second": 1337.915, "eval_steps_per_second": 4.181, "step": 22460 }, { "epoch": 9.37, "learning_rate": 1e-06, "loss": 0.1466, "step": 22480 }, { "epoch": 9.37, "eval_accuracy": 0.93225, "eval_loss": 0.20088885724544525, "eval_runtime": 17.7162, "eval_samples_per_second": 1354.693, "eval_steps_per_second": 4.233, "step": 22480 }, { "epoch": 9.38, "learning_rate": 1e-06, "loss": 0.1537, "step": 22500 }, { "epoch": 9.38, "eval_accuracy": 0.9361666666666667, "eval_loss": 0.19151511788368225, "eval_runtime": 17.6, "eval_samples_per_second": 1363.639, "eval_steps_per_second": 4.261, "step": 22500 }, { "epoch": 9.38, "learning_rate": 1e-06, "loss": 0.1463, "step": 22520 }, { "epoch": 9.38, "eval_accuracy": 0.933875, "eval_loss": 0.1945749670267105, "eval_runtime": 15.7657, "eval_samples_per_second": 1522.29, "eval_steps_per_second": 4.757, "step": 22520 }, { "epoch": 9.39, "learning_rate": 1e-06, "loss": 0.1763, "step": 22540 }, { "epoch": 9.39, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.19165822863578796, "eval_runtime": 17.4199, "eval_samples_per_second": 1377.737, "eval_steps_per_second": 4.305, "step": 22540 }, { "epoch": 9.4, "learning_rate": 1e-06, "loss": 0.1375, "step": 22560 }, { "epoch": 9.4, "eval_accuracy": 0.935625, "eval_loss": 0.19137227535247803, "eval_runtime": 17.3107, "eval_samples_per_second": 1386.428, "eval_steps_per_second": 4.333, "step": 22560 }, { "epoch": 9.41, "learning_rate": 1e-06, "loss": 0.1616, "step": 22580 }, { "epoch": 9.41, "eval_accuracy": 0.93575, "eval_loss": 0.19043239951133728, "eval_runtime": 17.1156, "eval_samples_per_second": 1402.228, "eval_steps_per_second": 4.382, "step": 22580 }, { "epoch": 9.42, "learning_rate": 1e-06, "loss": 0.1809, "step": 22600 }, { "epoch": 9.42, "eval_accuracy": 0.9346666666666666, "eval_loss": 0.19357599318027496, "eval_runtime": 17.8709, "eval_samples_per_second": 1342.962, "eval_steps_per_second": 4.197, "step": 22600 }, { "epoch": 9.43, "learning_rate": 1e-06, "loss": 0.1436, "step": 22620 }, { "epoch": 9.43, "eval_accuracy": 0.935625, "eval_loss": 0.18992185592651367, "eval_runtime": 16.5938, "eval_samples_per_second": 1446.323, "eval_steps_per_second": 4.52, "step": 22620 }, { "epoch": 9.43, "learning_rate": 1e-06, "loss": 0.1831, "step": 22640 }, { "epoch": 9.43, "eval_accuracy": 0.9361666666666667, "eval_loss": 0.19047614932060242, "eval_runtime": 17.0882, "eval_samples_per_second": 1404.476, "eval_steps_per_second": 4.389, "step": 22640 }, { "epoch": 9.44, "learning_rate": 1e-06, "loss": 0.135, "step": 22660 }, { "epoch": 9.44, "eval_accuracy": 0.9363333333333334, "eval_loss": 0.1894386261701584, "eval_runtime": 17.1834, "eval_samples_per_second": 1396.7, "eval_steps_per_second": 4.365, "step": 22660 }, { "epoch": 9.45, "learning_rate": 1e-06, "loss": 0.198, "step": 22680 }, { "epoch": 9.45, "eval_accuracy": 0.93575, "eval_loss": 0.18925711512565613, "eval_runtime": 16.7997, "eval_samples_per_second": 1428.594, "eval_steps_per_second": 4.464, "step": 22680 }, { "epoch": 9.46, "learning_rate": 1e-06, "loss": 0.1595, "step": 22700 }, { "epoch": 9.46, "eval_accuracy": 0.936, "eval_loss": 0.19023475050926208, "eval_runtime": 17.1467, "eval_samples_per_second": 1399.688, "eval_steps_per_second": 4.374, "step": 22700 }, { "epoch": 9.47, "learning_rate": 1e-06, "loss": 0.1594, "step": 22720 }, { "epoch": 9.47, "eval_accuracy": 0.9362083333333333, "eval_loss": 0.18937428295612335, "eval_runtime": 16.3859, "eval_samples_per_second": 1464.678, "eval_steps_per_second": 4.577, "step": 22720 }, { "epoch": 9.47, "learning_rate": 1e-06, "loss": 0.1741, "step": 22740 }, { "epoch": 9.47, "eval_accuracy": 0.9365416666666667, "eval_loss": 0.18718321621418, "eval_runtime": 16.7434, "eval_samples_per_second": 1433.4, "eval_steps_per_second": 4.479, "step": 22740 }, { "epoch": 9.48, "learning_rate": 1e-06, "loss": 0.1526, "step": 22760 }, { "epoch": 9.48, "eval_accuracy": 0.936125, "eval_loss": 0.18912020325660706, "eval_runtime": 16.0636, "eval_samples_per_second": 1494.062, "eval_steps_per_second": 4.669, "step": 22760 }, { "epoch": 9.49, "learning_rate": 1e-06, "loss": 0.1785, "step": 22780 }, { "epoch": 9.49, "eval_accuracy": 0.9362083333333333, "eval_loss": 0.1892082542181015, "eval_runtime": 16.7318, "eval_samples_per_second": 1434.395, "eval_steps_per_second": 4.482, "step": 22780 }, { "epoch": 9.5, "learning_rate": 1e-06, "loss": 0.1487, "step": 22800 }, { "epoch": 9.5, "eval_accuracy": 0.93375, "eval_loss": 0.1960740089416504, "eval_runtime": 16.2354, "eval_samples_per_second": 1478.256, "eval_steps_per_second": 4.62, "step": 22800 }, { "epoch": 9.51, "learning_rate": 1e-06, "loss": 0.151, "step": 22820 }, { "epoch": 9.51, "eval_accuracy": 0.9355, "eval_loss": 0.19206097722053528, "eval_runtime": 16.4046, "eval_samples_per_second": 1463.004, "eval_steps_per_second": 4.572, "step": 22820 }, { "epoch": 9.52, "learning_rate": 1e-06, "loss": 0.1641, "step": 22840 }, { "epoch": 9.52, "eval_accuracy": 0.9347083333333334, "eval_loss": 0.1922009140253067, "eval_runtime": 16.3419, "eval_samples_per_second": 1468.617, "eval_steps_per_second": 4.589, "step": 22840 }, { "epoch": 9.53, "learning_rate": 1e-06, "loss": 0.1403, "step": 22860 }, { "epoch": 9.53, "eval_accuracy": 0.936625, "eval_loss": 0.18889163434505463, "eval_runtime": 16.6017, "eval_samples_per_second": 1445.631, "eval_steps_per_second": 4.518, "step": 22860 }, { "epoch": 9.53, "learning_rate": 1e-06, "loss": 0.182, "step": 22880 }, { "epoch": 9.53, "eval_accuracy": 0.9352083333333333, "eval_loss": 0.19116635620594025, "eval_runtime": 16.607, "eval_samples_per_second": 1445.171, "eval_steps_per_second": 4.516, "step": 22880 }, { "epoch": 9.54, "learning_rate": 1e-06, "loss": 0.1655, "step": 22900 }, { "epoch": 9.54, "eval_accuracy": 0.9345833333333333, "eval_loss": 0.1919187605381012, "eval_runtime": 16.4494, "eval_samples_per_second": 1459.021, "eval_steps_per_second": 4.559, "step": 22900 }, { "epoch": 9.55, "learning_rate": 1e-06, "loss": 0.1651, "step": 22920 }, { "epoch": 9.55, "eval_accuracy": 0.9361666666666667, "eval_loss": 0.1887436956167221, "eval_runtime": 16.2982, "eval_samples_per_second": 1472.556, "eval_steps_per_second": 4.602, "step": 22920 }, { "epoch": 9.56, "learning_rate": 1e-06, "loss": 0.1493, "step": 22940 }, { "epoch": 9.56, "eval_accuracy": 0.9370416666666667, "eval_loss": 0.18585175275802612, "eval_runtime": 16.7173, "eval_samples_per_second": 1435.637, "eval_steps_per_second": 4.486, "step": 22940 }, { "epoch": 9.57, "learning_rate": 1e-06, "loss": 0.1563, "step": 22960 }, { "epoch": 9.57, "eval_accuracy": 0.9357916666666667, "eval_loss": 0.18925468623638153, "eval_runtime": 17.5796, "eval_samples_per_second": 1365.215, "eval_steps_per_second": 4.266, "step": 22960 }, { "epoch": 9.57, "learning_rate": 1e-06, "loss": 0.1498, "step": 22980 }, { "epoch": 9.57, "eval_accuracy": 0.93575, "eval_loss": 0.19032344222068787, "eval_runtime": 15.9593, "eval_samples_per_second": 1503.826, "eval_steps_per_second": 4.699, "step": 22980 }, { "epoch": 9.58, "learning_rate": 1e-06, "loss": 0.131, "step": 23000 }, { "epoch": 9.58, "eval_accuracy": 0.9360833333333334, "eval_loss": 0.18864993751049042, "eval_runtime": 15.7391, "eval_samples_per_second": 1524.867, "eval_steps_per_second": 4.765, "step": 23000 }, { "epoch": 9.59, "learning_rate": 1e-06, "loss": 0.1588, "step": 23020 }, { "epoch": 9.59, "eval_accuracy": 0.9375416666666667, "eval_loss": 0.18759743869304657, "eval_runtime": 82.369, "eval_samples_per_second": 291.372, "eval_steps_per_second": 0.911, "step": 23020 }, { "epoch": 9.6, "learning_rate": 1e-06, "loss": 0.1555, "step": 23040 }, { "epoch": 9.6, "eval_accuracy": 0.9354583333333333, "eval_loss": 0.1895967274904251, "eval_runtime": 16.3955, "eval_samples_per_second": 1463.818, "eval_steps_per_second": 4.574, "step": 23040 }, { "epoch": 9.61, "learning_rate": 1e-06, "loss": 0.1625, "step": 23060 }, { "epoch": 9.61, "eval_accuracy": 0.9355416666666667, "eval_loss": 0.1896596997976303, "eval_runtime": 17.4896, "eval_samples_per_second": 1372.245, "eval_steps_per_second": 4.288, "step": 23060 }, { "epoch": 9.62, "learning_rate": 1e-06, "loss": 0.1183, "step": 23080 }, { "epoch": 9.62, "eval_accuracy": 0.93425, "eval_loss": 0.19340163469314575, "eval_runtime": 17.2768, "eval_samples_per_second": 1389.142, "eval_steps_per_second": 4.341, "step": 23080 }, { "epoch": 9.62, "learning_rate": 1e-06, "loss": 0.1668, "step": 23100 }, { "epoch": 9.62, "eval_accuracy": 0.93425, "eval_loss": 0.1944810450077057, "eval_runtime": 17.3938, "eval_samples_per_second": 1379.802, "eval_steps_per_second": 4.312, "step": 23100 }, { "epoch": 9.63, "learning_rate": 1e-06, "loss": 0.1514, "step": 23120 }, { "epoch": 9.63, "eval_accuracy": 0.9350416666666667, "eval_loss": 0.19056230783462524, "eval_runtime": 17.3545, "eval_samples_per_second": 1382.923, "eval_steps_per_second": 4.322, "step": 23120 }, { "epoch": 9.64, "learning_rate": 1e-06, "loss": 0.0979, "step": 23140 }, { "epoch": 9.64, "eval_accuracy": 0.935375, "eval_loss": 0.18961113691329956, "eval_runtime": 17.9278, "eval_samples_per_second": 1338.707, "eval_steps_per_second": 4.183, "step": 23140 }, { "epoch": 9.65, "learning_rate": 1e-06, "loss": 0.1414, "step": 23160 }, { "epoch": 9.65, "eval_accuracy": 0.93675, "eval_loss": 0.18856357038021088, "eval_runtime": 17.2651, "eval_samples_per_second": 1390.087, "eval_steps_per_second": 4.344, "step": 23160 }, { "epoch": 9.66, "learning_rate": 1e-06, "loss": 0.1359, "step": 23180 }, { "epoch": 9.66, "eval_accuracy": 0.937625, "eval_loss": 0.18765127658843994, "eval_runtime": 17.5512, "eval_samples_per_second": 1367.431, "eval_steps_per_second": 4.273, "step": 23180 }, { "epoch": 9.67, "learning_rate": 1e-06, "loss": 0.1342, "step": 23200 }, { "epoch": 9.67, "eval_accuracy": 0.9369166666666666, "eval_loss": 0.1894044429063797, "eval_runtime": 17.3459, "eval_samples_per_second": 1383.612, "eval_steps_per_second": 4.324, "step": 23200 }, { "epoch": 9.68, "learning_rate": 1e-06, "loss": 0.156, "step": 23220 }, { "epoch": 9.68, "eval_accuracy": 0.9369166666666666, "eval_loss": 0.18876871466636658, "eval_runtime": 18.1165, "eval_samples_per_second": 1324.757, "eval_steps_per_second": 4.14, "step": 23220 }, { "epoch": 9.68, "learning_rate": 1e-06, "loss": 0.1527, "step": 23240 }, { "epoch": 9.68, "eval_accuracy": 0.937375, "eval_loss": 0.18748903274536133, "eval_runtime": 17.4524, "eval_samples_per_second": 1375.169, "eval_steps_per_second": 4.297, "step": 23240 }, { "epoch": 9.69, "learning_rate": 1e-06, "loss": 0.1608, "step": 23260 }, { "epoch": 9.69, "eval_accuracy": 0.9370416666666667, "eval_loss": 0.18767432868480682, "eval_runtime": 17.2496, "eval_samples_per_second": 1391.339, "eval_steps_per_second": 4.348, "step": 23260 }, { "epoch": 9.7, "learning_rate": 1e-06, "loss": 0.1576, "step": 23280 }, { "epoch": 9.7, "eval_accuracy": 0.9365, "eval_loss": 0.18888776004314423, "eval_runtime": 17.5705, "eval_samples_per_second": 1365.923, "eval_steps_per_second": 4.269, "step": 23280 }, { "epoch": 9.71, "learning_rate": 1e-06, "loss": 0.1618, "step": 23300 }, { "epoch": 9.71, "eval_accuracy": 0.9361666666666667, "eval_loss": 0.19041982293128967, "eval_runtime": 16.6527, "eval_samples_per_second": 1441.209, "eval_steps_per_second": 4.504, "step": 23300 }, { "epoch": 9.72, "learning_rate": 1e-06, "loss": 0.1551, "step": 23320 }, { "epoch": 9.72, "eval_accuracy": 0.9355833333333333, "eval_loss": 0.19049932062625885, "eval_runtime": 16.922, "eval_samples_per_second": 1418.271, "eval_steps_per_second": 4.432, "step": 23320 }, { "epoch": 9.72, "learning_rate": 1e-06, "loss": 0.1311, "step": 23340 }, { "epoch": 9.72, "eval_accuracy": 0.9359166666666666, "eval_loss": 0.18927621841430664, "eval_runtime": 16.8033, "eval_samples_per_second": 1428.291, "eval_steps_per_second": 4.463, "step": 23340 }, { "epoch": 9.73, "learning_rate": 1e-06, "loss": 0.128, "step": 23360 }, { "epoch": 9.73, "eval_accuracy": 0.93625, "eval_loss": 0.1896050125360489, "eval_runtime": 16.664, "eval_samples_per_second": 1440.231, "eval_steps_per_second": 4.501, "step": 23360 }, { "epoch": 9.74, "learning_rate": 1e-06, "loss": 0.1733, "step": 23380 }, { "epoch": 9.74, "eval_accuracy": 0.934375, "eval_loss": 0.19381864368915558, "eval_runtime": 17.6268, "eval_samples_per_second": 1361.562, "eval_steps_per_second": 4.255, "step": 23380 }, { "epoch": 9.75, "learning_rate": 1e-06, "loss": 0.1546, "step": 23400 }, { "epoch": 9.75, "eval_accuracy": 0.93575, "eval_loss": 0.19130919873714447, "eval_runtime": 17.4959, "eval_samples_per_second": 1371.752, "eval_steps_per_second": 4.287, "step": 23400 }, { "epoch": 9.76, "learning_rate": 1e-06, "loss": 0.2157, "step": 23420 }, { "epoch": 9.76, "eval_accuracy": 0.9359166666666666, "eval_loss": 0.18996796011924744, "eval_runtime": 17.4322, "eval_samples_per_second": 1376.762, "eval_steps_per_second": 4.302, "step": 23420 }, { "epoch": 9.77, "learning_rate": 1e-06, "loss": 0.1345, "step": 23440 }, { "epoch": 9.77, "eval_accuracy": 0.93425, "eval_loss": 0.19452691078186035, "eval_runtime": 17.4799, "eval_samples_per_second": 1373.007, "eval_steps_per_second": 4.291, "step": 23440 }, { "epoch": 9.78, "learning_rate": 1e-06, "loss": 0.1598, "step": 23460 }, { "epoch": 9.78, "eval_accuracy": 0.9352916666666666, "eval_loss": 0.19014814496040344, "eval_runtime": 16.5871, "eval_samples_per_second": 1446.91, "eval_steps_per_second": 4.522, "step": 23460 }, { "epoch": 9.78, "learning_rate": 1e-06, "loss": 0.1189, "step": 23480 }, { "epoch": 9.78, "eval_accuracy": 0.936875, "eval_loss": 0.1875523328781128, "eval_runtime": 15.4296, "eval_samples_per_second": 1555.449, "eval_steps_per_second": 4.861, "step": 23480 }, { "epoch": 9.79, "learning_rate": 1e-06, "loss": 0.1406, "step": 23500 }, { "epoch": 9.79, "eval_accuracy": 0.9364583333333333, "eval_loss": 0.19084198772907257, "eval_runtime": 15.6993, "eval_samples_per_second": 1528.734, "eval_steps_per_second": 4.777, "step": 23500 }, { "epoch": 9.8, "learning_rate": 1e-06, "loss": 0.1624, "step": 23520 }, { "epoch": 9.8, "eval_accuracy": 0.9345, "eval_loss": 0.19372619688510895, "eval_runtime": 15.5172, "eval_samples_per_second": 1546.671, "eval_steps_per_second": 4.833, "step": 23520 }, { "epoch": 9.81, "learning_rate": 1e-06, "loss": 0.1422, "step": 23540 }, { "epoch": 9.81, "eval_accuracy": 0.9336666666666666, "eval_loss": 0.19716989994049072, "eval_runtime": 15.6871, "eval_samples_per_second": 1529.92, "eval_steps_per_second": 4.781, "step": 23540 }, { "epoch": 9.82, "learning_rate": 1e-06, "loss": 0.149, "step": 23560 }, { "epoch": 9.82, "eval_accuracy": 0.9368333333333333, "eval_loss": 0.1898457407951355, "eval_runtime": 15.5314, "eval_samples_per_second": 1545.261, "eval_steps_per_second": 4.829, "step": 23560 }, { "epoch": 9.82, "learning_rate": 1e-06, "loss": 0.1635, "step": 23580 }, { "epoch": 9.82, "eval_accuracy": 0.935625, "eval_loss": 0.19061589241027832, "eval_runtime": 15.9229, "eval_samples_per_second": 1507.261, "eval_steps_per_second": 4.71, "step": 23580 }, { "epoch": 9.83, "learning_rate": 1e-06, "loss": 0.1648, "step": 23600 }, { "epoch": 9.83, "eval_accuracy": 0.9361666666666667, "eval_loss": 0.18975524604320526, "eval_runtime": 15.3175, "eval_samples_per_second": 1566.837, "eval_steps_per_second": 4.896, "step": 23600 }, { "epoch": 9.84, "learning_rate": 1e-06, "loss": 0.1577, "step": 23620 }, { "epoch": 9.84, "eval_accuracy": 0.9354166666666667, "eval_loss": 0.1917409747838974, "eval_runtime": 15.4166, "eval_samples_per_second": 1556.767, "eval_steps_per_second": 4.865, "step": 23620 }, { "epoch": 9.85, "learning_rate": 1e-06, "loss": 0.1604, "step": 23640 }, { "epoch": 9.85, "eval_accuracy": 0.9365, "eval_loss": 0.19033245742321014, "eval_runtime": 15.7074, "eval_samples_per_second": 1527.939, "eval_steps_per_second": 4.775, "step": 23640 }, { "epoch": 9.86, "learning_rate": 1e-06, "loss": 0.1477, "step": 23660 }, { "epoch": 9.86, "eval_accuracy": 0.9347916666666667, "eval_loss": 0.19305865466594696, "eval_runtime": 15.5818, "eval_samples_per_second": 1540.26, "eval_steps_per_second": 4.813, "step": 23660 }, { "epoch": 9.87, "learning_rate": 1e-06, "loss": 0.1374, "step": 23680 }, { "epoch": 9.87, "eval_accuracy": 0.9369166666666666, "eval_loss": 0.18935944139957428, "eval_runtime": 15.6633, "eval_samples_per_second": 1532.247, "eval_steps_per_second": 4.788, "step": 23680 }, { "epoch": 9.88, "learning_rate": 1e-06, "loss": 0.1524, "step": 23700 }, { "epoch": 9.88, "eval_accuracy": 0.93625, "eval_loss": 0.19033636152744293, "eval_runtime": 15.5153, "eval_samples_per_second": 1546.863, "eval_steps_per_second": 4.834, "step": 23700 }, { "epoch": 9.88, "learning_rate": 1e-06, "loss": 0.1899, "step": 23720 }, { "epoch": 9.88, "eval_accuracy": 0.937, "eval_loss": 0.18940836191177368, "eval_runtime": 15.9167, "eval_samples_per_second": 1507.855, "eval_steps_per_second": 4.712, "step": 23720 }, { "epoch": 9.89, "learning_rate": 1e-06, "loss": 0.1542, "step": 23740 }, { "epoch": 9.89, "eval_accuracy": 0.9356666666666666, "eval_loss": 0.1927276849746704, "eval_runtime": 15.6406, "eval_samples_per_second": 1534.465, "eval_steps_per_second": 4.795, "step": 23740 }, { "epoch": 9.9, "learning_rate": 1e-06, "loss": 0.1709, "step": 23760 }, { "epoch": 9.9, "eval_accuracy": 0.936, "eval_loss": 0.19114099442958832, "eval_runtime": 16.1487, "eval_samples_per_second": 1486.188, "eval_steps_per_second": 4.644, "step": 23760 }, { "epoch": 9.91, "learning_rate": 1e-06, "loss": 0.1555, "step": 23780 }, { "epoch": 9.91, "eval_accuracy": 0.93725, "eval_loss": 0.18706054985523224, "eval_runtime": 15.697, "eval_samples_per_second": 1528.954, "eval_steps_per_second": 4.778, "step": 23780 }, { "epoch": 9.92, "learning_rate": 1e-06, "loss": 0.1723, "step": 23800 }, { "epoch": 9.92, "eval_accuracy": 0.9372916666666666, "eval_loss": 0.18747729063034058, "eval_runtime": 15.4803, "eval_samples_per_second": 1550.356, "eval_steps_per_second": 4.845, "step": 23800 }, { "epoch": 9.93, "learning_rate": 1e-06, "loss": 0.1736, "step": 23820 }, { "epoch": 9.93, "eval_accuracy": 0.935875, "eval_loss": 0.19002044200897217, "eval_runtime": 15.9218, "eval_samples_per_second": 1507.368, "eval_steps_per_second": 4.711, "step": 23820 }, { "epoch": 9.93, "learning_rate": 1e-06, "loss": 0.1354, "step": 23840 }, { "epoch": 9.93, "eval_accuracy": 0.9373333333333334, "eval_loss": 0.18683619797229767, "eval_runtime": 15.4111, "eval_samples_per_second": 1557.318, "eval_steps_per_second": 4.867, "step": 23840 }, { "epoch": 9.94, "learning_rate": 1e-06, "loss": 0.1798, "step": 23860 }, { "epoch": 9.94, "eval_accuracy": 0.937375, "eval_loss": 0.1869436800479889, "eval_runtime": 16.0901, "eval_samples_per_second": 1491.604, "eval_steps_per_second": 4.661, "step": 23860 }, { "epoch": 9.95, "learning_rate": 1e-06, "loss": 0.1258, "step": 23880 }, { "epoch": 9.95, "eval_accuracy": 0.937, "eval_loss": 0.18814752995967865, "eval_runtime": 15.464, "eval_samples_per_second": 1551.992, "eval_steps_per_second": 4.85, "step": 23880 }, { "epoch": 9.96, "learning_rate": 1e-06, "loss": 0.1389, "step": 23900 }, { "epoch": 9.96, "eval_accuracy": 0.9367083333333334, "eval_loss": 0.18954576551914215, "eval_runtime": 15.4402, "eval_samples_per_second": 1554.384, "eval_steps_per_second": 4.857, "step": 23900 }, { "epoch": 9.97, "learning_rate": 1e-06, "loss": 0.1307, "step": 23920 }, { "epoch": 9.97, "eval_accuracy": 0.93675, "eval_loss": 0.189622163772583, "eval_runtime": 15.4543, "eval_samples_per_second": 1552.963, "eval_steps_per_second": 4.853, "step": 23920 }, { "epoch": 9.97, "learning_rate": 1e-06, "loss": 0.1579, "step": 23940 }, { "epoch": 9.97, "eval_accuracy": 0.937125, "eval_loss": 0.1884654462337494, "eval_runtime": 15.6522, "eval_samples_per_second": 1533.332, "eval_steps_per_second": 4.792, "step": 23940 }, { "epoch": 9.98, "learning_rate": 1e-06, "loss": 0.1667, "step": 23960 }, { "epoch": 9.98, "eval_accuracy": 0.937, "eval_loss": 0.18847951292991638, "eval_runtime": 15.9741, "eval_samples_per_second": 1502.433, "eval_steps_per_second": 4.695, "step": 23960 }, { "epoch": 9.99, "learning_rate": 1e-06, "loss": 0.1407, "step": 23980 }, { "epoch": 9.99, "eval_accuracy": 0.9372916666666666, "eval_loss": 0.1892939805984497, "eval_runtime": 15.4305, "eval_samples_per_second": 1555.363, "eval_steps_per_second": 4.861, "step": 23980 }, { "epoch": 10.0, "learning_rate": 1e-06, "loss": 0.1624, "step": 24000 }, { "epoch": 10.0, "eval_accuracy": 0.9372083333333333, "eval_loss": 0.18687449395656586, "eval_runtime": 15.6942, "eval_samples_per_second": 1529.227, "eval_steps_per_second": 4.779, "step": 24000 } ], "max_steps": 24000, "num_train_epochs": 10, "total_flos": 3.005791444826016e+16, "trial_name": null, "trial_params": null }