llama3_truth_model / trainer_log.jsonl
Ogamon's picture
Initial commit
f11a4a4 verified
{"current_steps": 1, "total_steps": 385, "loss": 7.8034, "learning_rate": 8.333333333333335e-09, "epoch": 0.012861736334405145, "percentage": 0.26, "elapsed_time": "0:00:16", "remaining_time": "1:47:01", "throughput": "803.70", "total_tokens": 13440}
{"current_steps": 2, "total_steps": 385, "loss": 7.7577, "learning_rate": 1.666666666666667e-08, "epoch": 0.02572347266881029, "percentage": 0.52, "elapsed_time": "0:00:29", "remaining_time": "1:35:05", "throughput": "905.46", "total_tokens": 26976}
{"current_steps": 3, "total_steps": 385, "loss": 7.8132, "learning_rate": 2.5000000000000002e-08, "epoch": 0.03858520900321544, "percentage": 0.78, "elapsed_time": "0:00:42", "remaining_time": "1:30:54", "throughput": "952.51", "total_tokens": 40800}
{"current_steps": 4, "total_steps": 385, "loss": 7.7906, "learning_rate": 3.333333333333334e-08, "epoch": 0.05144694533762058, "percentage": 1.04, "elapsed_time": "0:00:55", "remaining_time": "1:28:42", "throughput": "967.80", "total_tokens": 54080}
{"current_steps": 5, "total_steps": 385, "loss": 7.7763, "learning_rate": 4.166666666666667e-08, "epoch": 0.06430868167202572, "percentage": 1.3, "elapsed_time": "0:01:08", "remaining_time": "1:27:17", "throughput": "980.16", "total_tokens": 67552}
{"current_steps": 6, "total_steps": 385, "loss": 7.7824, "learning_rate": 5.0000000000000004e-08, "epoch": 0.07717041800643087, "percentage": 1.56, "elapsed_time": "0:01:21", "remaining_time": "1:26:19", "throughput": "995.20", "total_tokens": 81600}
{"current_steps": 7, "total_steps": 385, "loss": 7.833, "learning_rate": 5.833333333333334e-08, "epoch": 0.09003215434083602, "percentage": 1.82, "elapsed_time": "0:01:35", "remaining_time": "1:25:32", "throughput": "996.54", "total_tokens": 94720}
{"current_steps": 8, "total_steps": 385, "loss": 7.6949, "learning_rate": 6.666666666666668e-08, "epoch": 0.10289389067524116, "percentage": 2.08, "elapsed_time": "0:01:48", "remaining_time": "1:24:54", "throughput": "994.24", "total_tokens": 107488}
{"current_steps": 9, "total_steps": 385, "loss": 7.8336, "learning_rate": 7.500000000000001e-08, "epoch": 0.1157556270096463, "percentage": 2.34, "elapsed_time": "0:02:01", "remaining_time": "1:24:23", "throughput": "998.22", "total_tokens": 120992}
{"current_steps": 10, "total_steps": 385, "loss": 7.7282, "learning_rate": 8.333333333333334e-08, "epoch": 0.12861736334405144, "percentage": 2.6, "elapsed_time": "0:02:14", "remaining_time": "1:23:53", "throughput": "1000.27", "total_tokens": 134272}
{"current_steps": 11, "total_steps": 385, "loss": 7.6916, "learning_rate": 9.166666666666668e-08, "epoch": 0.1414790996784566, "percentage": 2.86, "elapsed_time": "0:02:27", "remaining_time": "1:23:27", "throughput": "1005.49", "total_tokens": 148096}
{"current_steps": 12, "total_steps": 385, "loss": 7.7333, "learning_rate": 1.0000000000000001e-07, "epoch": 0.15434083601286175, "percentage": 3.12, "elapsed_time": "0:02:40", "remaining_time": "1:23:02", "throughput": "1002.74", "total_tokens": 160736}
{"current_steps": 13, "total_steps": 385, "loss": 7.6017, "learning_rate": 1.0833333333333335e-07, "epoch": 0.16720257234726688, "percentage": 3.38, "elapsed_time": "0:02:53", "remaining_time": "1:22:40", "throughput": "1000.21", "total_tokens": 173376}
{"current_steps": 14, "total_steps": 385, "loss": 7.644, "learning_rate": 1.1666666666666668e-07, "epoch": 0.18006430868167203, "percentage": 3.64, "elapsed_time": "0:03:06", "remaining_time": "1:22:19", "throughput": "1004.07", "total_tokens": 187136}
{"current_steps": 15, "total_steps": 385, "loss": 7.5965, "learning_rate": 1.2500000000000002e-07, "epoch": 0.19292604501607716, "percentage": 3.9, "elapsed_time": "0:03:19", "remaining_time": "1:22:00", "throughput": "1003.49", "total_tokens": 200192}
{"current_steps": 16, "total_steps": 385, "loss": 7.5883, "learning_rate": 1.3333333333333336e-07, "epoch": 0.2057877813504823, "percentage": 4.16, "elapsed_time": "0:03:32", "remaining_time": "1:21:42", "throughput": "1006.03", "total_tokens": 213856}
{"current_steps": 17, "total_steps": 385, "loss": 7.2464, "learning_rate": 1.4166666666666668e-07, "epoch": 0.21864951768488747, "percentage": 4.42, "elapsed_time": "0:03:45", "remaining_time": "1:21:24", "throughput": "1005.85", "total_tokens": 226944}
{"current_steps": 18, "total_steps": 385, "loss": 7.3133, "learning_rate": 1.5000000000000002e-07, "epoch": 0.2315112540192926, "percentage": 4.68, "elapsed_time": "0:03:58", "remaining_time": "1:21:05", "throughput": "1008.31", "total_tokens": 240640}
{"current_steps": 19, "total_steps": 385, "loss": 7.2133, "learning_rate": 1.5833333333333336e-07, "epoch": 0.24437299035369775, "percentage": 4.94, "elapsed_time": "0:04:11", "remaining_time": "1:20:48", "throughput": "1006.28", "total_tokens": 253280}
{"current_steps": 20, "total_steps": 385, "loss": 7.2431, "learning_rate": 1.6666666666666668e-07, "epoch": 0.2572347266881029, "percentage": 5.19, "elapsed_time": "0:04:24", "remaining_time": "1:20:31", "throughput": "1006.36", "total_tokens": 266400}
{"current_steps": 21, "total_steps": 385, "loss": 7.1875, "learning_rate": 1.7500000000000002e-07, "epoch": 0.27009646302250806, "percentage": 5.45, "elapsed_time": "0:04:37", "remaining_time": "1:20:15", "throughput": "1009.41", "total_tokens": 280416}
{"current_steps": 22, "total_steps": 385, "loss": 7.0659, "learning_rate": 1.8333333333333336e-07, "epoch": 0.2829581993569132, "percentage": 5.71, "elapsed_time": "0:04:50", "remaining_time": "1:19:59", "throughput": "1011.98", "total_tokens": 294368}
{"current_steps": 23, "total_steps": 385, "loss": 6.3595, "learning_rate": 1.9166666666666668e-07, "epoch": 0.2958199356913183, "percentage": 5.97, "elapsed_time": "0:05:04", "remaining_time": "1:19:44", "throughput": "1012.60", "total_tokens": 307840}
{"current_steps": 24, "total_steps": 385, "loss": 6.0417, "learning_rate": 2.0000000000000002e-07, "epoch": 0.3086816720257235, "percentage": 6.23, "elapsed_time": "0:05:17", "remaining_time": "1:19:29", "throughput": "1015.95", "total_tokens": 322144}
{"current_steps": 25, "total_steps": 385, "loss": 5.9894, "learning_rate": 2.0833333333333333e-07, "epoch": 0.3215434083601286, "percentage": 6.49, "elapsed_time": "0:05:30", "remaining_time": "1:19:13", "throughput": "1016.08", "total_tokens": 335424}
{"current_steps": 26, "total_steps": 385, "loss": 5.9259, "learning_rate": 2.166666666666667e-07, "epoch": 0.33440514469453375, "percentage": 6.75, "elapsed_time": "0:05:43", "remaining_time": "1:18:58", "throughput": "1018.08", "total_tokens": 349376}
{"current_steps": 27, "total_steps": 385, "loss": 5.8983, "learning_rate": 2.2500000000000002e-07, "epoch": 0.34726688102893893, "percentage": 7.01, "elapsed_time": "0:05:56", "remaining_time": "1:18:43", "throughput": "1018.88", "total_tokens": 362944}
{"current_steps": 28, "total_steps": 385, "loss": 5.6848, "learning_rate": 2.3333333333333336e-07, "epoch": 0.36012861736334406, "percentage": 7.27, "elapsed_time": "0:06:09", "remaining_time": "1:18:28", "throughput": "1019.07", "total_tokens": 376352}
{"current_steps": 29, "total_steps": 385, "loss": 5.5649, "learning_rate": 2.416666666666667e-07, "epoch": 0.3729903536977492, "percentage": 7.53, "elapsed_time": "0:06:22", "remaining_time": "1:18:13", "throughput": "1018.19", "total_tokens": 389312}
{"current_steps": 30, "total_steps": 385, "loss": 5.4642, "learning_rate": 2.5000000000000004e-07, "epoch": 0.3858520900321543, "percentage": 7.79, "elapsed_time": "0:06:35", "remaining_time": "1:17:59", "throughput": "1018.74", "total_tokens": 402848}
{"current_steps": 31, "total_steps": 385, "loss": 4.7955, "learning_rate": 2.5833333333333333e-07, "epoch": 0.3987138263665595, "percentage": 8.05, "elapsed_time": "0:06:48", "remaining_time": "1:17:45", "throughput": "1019.13", "total_tokens": 416352}
{"current_steps": 32, "total_steps": 385, "loss": 2.8339, "learning_rate": 2.666666666666667e-07, "epoch": 0.4115755627009646, "percentage": 8.31, "elapsed_time": "0:07:01", "remaining_time": "1:17:30", "throughput": "1019.78", "total_tokens": 429920}
{"current_steps": 33, "total_steps": 385, "loss": 2.4477, "learning_rate": 2.75e-07, "epoch": 0.42443729903536975, "percentage": 8.57, "elapsed_time": "0:07:14", "remaining_time": "1:17:16", "throughput": "1021.60", "total_tokens": 444064}
{"current_steps": 34, "total_steps": 385, "loss": 2.3331, "learning_rate": 2.8333333333333336e-07, "epoch": 0.43729903536977494, "percentage": 8.83, "elapsed_time": "0:07:27", "remaining_time": "1:17:02", "throughput": "1023.57", "total_tokens": 458304}
{"current_steps": 35, "total_steps": 385, "loss": 2.2143, "learning_rate": 2.916666666666667e-07, "epoch": 0.45016077170418006, "percentage": 9.09, "elapsed_time": "0:07:40", "remaining_time": "1:16:47", "throughput": "1024.12", "total_tokens": 471904}
{"current_steps": 36, "total_steps": 385, "loss": 2.0067, "learning_rate": 3.0000000000000004e-07, "epoch": 0.4630225080385852, "percentage": 9.35, "elapsed_time": "0:07:53", "remaining_time": "1:16:33", "throughput": "1023.09", "total_tokens": 484768}
{"current_steps": 37, "total_steps": 385, "loss": 1.7702, "learning_rate": 3.083333333333334e-07, "epoch": 0.4758842443729904, "percentage": 9.61, "elapsed_time": "0:08:06", "remaining_time": "1:16:19", "throughput": "1022.13", "total_tokens": 497696}
{"current_steps": 38, "total_steps": 385, "loss": 1.5557, "learning_rate": 3.166666666666667e-07, "epoch": 0.4887459807073955, "percentage": 9.87, "elapsed_time": "0:08:19", "remaining_time": "1:16:05", "throughput": "1022.16", "total_tokens": 511072}
{"current_steps": 39, "total_steps": 385, "loss": 1.3024, "learning_rate": 3.25e-07, "epoch": 0.5016077170418006, "percentage": 10.13, "elapsed_time": "0:08:33", "remaining_time": "1:15:51", "throughput": "1022.51", "total_tokens": 524576}
{"current_steps": 40, "total_steps": 385, "loss": 1.1652, "learning_rate": 3.3333333333333335e-07, "epoch": 0.5144694533762058, "percentage": 10.39, "elapsed_time": "0:08:46", "remaining_time": "1:15:37", "throughput": "1022.95", "total_tokens": 538112}
{"current_steps": 41, "total_steps": 385, "loss": 0.6839, "learning_rate": 3.416666666666667e-07, "epoch": 0.5273311897106109, "percentage": 10.65, "elapsed_time": "0:08:59", "remaining_time": "1:15:23", "throughput": "1024.05", "total_tokens": 552096}
{"current_steps": 42, "total_steps": 385, "loss": 0.4774, "learning_rate": 3.5000000000000004e-07, "epoch": 0.5401929260450161, "percentage": 10.91, "elapsed_time": "0:09:12", "remaining_time": "1:15:09", "throughput": "1024.88", "total_tokens": 565920}
{"current_steps": 43, "total_steps": 385, "loss": 0.3841, "learning_rate": 3.583333333333334e-07, "epoch": 0.5530546623794212, "percentage": 11.17, "elapsed_time": "0:09:25", "remaining_time": "1:14:55", "throughput": "1024.68", "total_tokens": 579200}
{"current_steps": 44, "total_steps": 385, "loss": 0.3588, "learning_rate": 3.666666666666667e-07, "epoch": 0.5659163987138264, "percentage": 11.43, "elapsed_time": "0:09:38", "remaining_time": "1:14:41", "throughput": "1025.16", "total_tokens": 592864}
{"current_steps": 45, "total_steps": 385, "loss": 0.3628, "learning_rate": 3.75e-07, "epoch": 0.5787781350482315, "percentage": 11.69, "elapsed_time": "0:09:51", "remaining_time": "1:14:28", "throughput": "1025.76", "total_tokens": 606656}
{"current_steps": 46, "total_steps": 385, "loss": 0.3426, "learning_rate": 3.8333333333333335e-07, "epoch": 0.5916398713826366, "percentage": 11.95, "elapsed_time": "0:10:04", "remaining_time": "1:14:14", "throughput": "1025.64", "total_tokens": 619968}
{"current_steps": 47, "total_steps": 385, "loss": 0.3279, "learning_rate": 3.9166666666666675e-07, "epoch": 0.6045016077170418, "percentage": 12.21, "elapsed_time": "0:10:17", "remaining_time": "1:14:01", "throughput": "1026.06", "total_tokens": 633632}
{"current_steps": 48, "total_steps": 385, "loss": 0.3947, "learning_rate": 4.0000000000000003e-07, "epoch": 0.617363344051447, "percentage": 12.47, "elapsed_time": "0:10:30", "remaining_time": "1:13:47", "throughput": "1026.56", "total_tokens": 647360}
{"current_steps": 49, "total_steps": 385, "loss": 0.3075, "learning_rate": 4.083333333333334e-07, "epoch": 0.6302250803858521, "percentage": 12.73, "elapsed_time": "0:10:43", "remaining_time": "1:13:33", "throughput": "1027.66", "total_tokens": 661504}
{"current_steps": 50, "total_steps": 385, "loss": 0.3236, "learning_rate": 4.1666666666666667e-07, "epoch": 0.6430868167202572, "percentage": 12.99, "elapsed_time": "0:10:56", "remaining_time": "1:13:20", "throughput": "1028.43", "total_tokens": 675424}
{"current_steps": 51, "total_steps": 385, "loss": 0.3557, "learning_rate": 4.2500000000000006e-07, "epoch": 0.6559485530546624, "percentage": 13.25, "elapsed_time": "0:11:09", "remaining_time": "1:13:06", "throughput": "1028.37", "total_tokens": 688800}
{"current_steps": 52, "total_steps": 385, "loss": 0.4008, "learning_rate": 4.333333333333334e-07, "epoch": 0.6688102893890675, "percentage": 13.51, "elapsed_time": "0:11:22", "remaining_time": "1:12:53", "throughput": "1028.30", "total_tokens": 702208}
{"current_steps": 53, "total_steps": 385, "loss": 0.3586, "learning_rate": 4.416666666666667e-07, "epoch": 0.6816720257234726, "percentage": 13.77, "elapsed_time": "0:11:35", "remaining_time": "1:12:39", "throughput": "1028.93", "total_tokens": 716096}
{"current_steps": 54, "total_steps": 385, "loss": 0.3023, "learning_rate": 4.5000000000000003e-07, "epoch": 0.6945337620578779, "percentage": 14.03, "elapsed_time": "0:11:48", "remaining_time": "1:12:25", "throughput": "1027.78", "total_tokens": 728672}
{"current_steps": 55, "total_steps": 385, "loss": 0.3547, "learning_rate": 4.583333333333333e-07, "epoch": 0.707395498392283, "percentage": 14.29, "elapsed_time": "0:12:02", "remaining_time": "1:12:12", "throughput": "1028.91", "total_tokens": 742912}
{"current_steps": 56, "total_steps": 385, "loss": 0.3846, "learning_rate": 4.666666666666667e-07, "epoch": 0.7202572347266881, "percentage": 14.55, "elapsed_time": "0:12:15", "remaining_time": "1:11:58", "throughput": "1028.23", "total_tokens": 755808}
{"current_steps": 57, "total_steps": 385, "loss": 0.3743, "learning_rate": 4.7500000000000006e-07, "epoch": 0.7331189710610932, "percentage": 14.81, "elapsed_time": "0:12:28", "remaining_time": "1:11:44", "throughput": "1028.85", "total_tokens": 769696}
{"current_steps": 58, "total_steps": 385, "loss": 0.3091, "learning_rate": 4.833333333333334e-07, "epoch": 0.7459807073954984, "percentage": 15.06, "elapsed_time": "0:12:41", "remaining_time": "1:11:31", "throughput": "1029.54", "total_tokens": 783680}
{"current_steps": 59, "total_steps": 385, "loss": 0.3094, "learning_rate": 4.916666666666667e-07, "epoch": 0.7588424437299035, "percentage": 15.32, "elapsed_time": "0:12:54", "remaining_time": "1:11:18", "throughput": "1028.92", "total_tokens": 796672}
{"current_steps": 60, "total_steps": 385, "loss": 0.3309, "learning_rate": 5.000000000000001e-07, "epoch": 0.7717041800643086, "percentage": 15.58, "elapsed_time": "0:13:07", "remaining_time": "1:11:04", "throughput": "1029.14", "total_tokens": 810304}
{"current_steps": 61, "total_steps": 385, "loss": 0.3276, "learning_rate": 5.083333333333334e-07, "epoch": 0.7845659163987139, "percentage": 15.84, "elapsed_time": "0:13:20", "remaining_time": "1:10:51", "throughput": "1029.68", "total_tokens": 824160}
{"current_steps": 62, "total_steps": 385, "loss": 0.3084, "learning_rate": 5.166666666666667e-07, "epoch": 0.797427652733119, "percentage": 16.1, "elapsed_time": "0:13:33", "remaining_time": "1:10:37", "throughput": "1029.34", "total_tokens": 837312}
{"current_steps": 63, "total_steps": 385, "loss": 0.3182, "learning_rate": 5.250000000000001e-07, "epoch": 0.8102893890675241, "percentage": 16.36, "elapsed_time": "0:13:46", "remaining_time": "1:10:24", "throughput": "1029.83", "total_tokens": 851168}
{"current_steps": 64, "total_steps": 385, "loss": 0.3469, "learning_rate": 5.333333333333335e-07, "epoch": 0.8231511254019293, "percentage": 16.62, "elapsed_time": "0:13:59", "remaining_time": "1:10:10", "throughput": "1029.25", "total_tokens": 864096}
{"current_steps": 65, "total_steps": 385, "loss": 0.3253, "learning_rate": 5.416666666666667e-07, "epoch": 0.8360128617363344, "percentage": 16.88, "elapsed_time": "0:14:12", "remaining_time": "1:09:57", "throughput": "1029.85", "total_tokens": 878048}
{"current_steps": 66, "total_steps": 385, "loss": 0.2746, "learning_rate": 5.5e-07, "epoch": 0.8488745980707395, "percentage": 17.14, "elapsed_time": "0:14:25", "remaining_time": "1:09:44", "throughput": "1030.30", "total_tokens": 891904}
{"current_steps": 67, "total_steps": 385, "loss": 0.2893, "learning_rate": 5.583333333333333e-07, "epoch": 0.8617363344051447, "percentage": 17.4, "elapsed_time": "0:14:38", "remaining_time": "1:09:30", "throughput": "1031.01", "total_tokens": 906016}
{"current_steps": 68, "total_steps": 385, "loss": 0.2827, "learning_rate": 5.666666666666667e-07, "epoch": 0.8745980707395499, "percentage": 17.66, "elapsed_time": "0:14:51", "remaining_time": "1:09:17", "throughput": "1030.43", "total_tokens": 918944}
{"current_steps": 69, "total_steps": 385, "loss": 0.2978, "learning_rate": 5.750000000000001e-07, "epoch": 0.887459807073955, "percentage": 17.92, "elapsed_time": "0:15:04", "remaining_time": "1:09:04", "throughput": "1030.71", "total_tokens": 932672}
{"current_steps": 70, "total_steps": 385, "loss": 0.2703, "learning_rate": 5.833333333333334e-07, "epoch": 0.9003215434083601, "percentage": 18.18, "elapsed_time": "0:15:17", "remaining_time": "1:08:50", "throughput": "1029.78", "total_tokens": 945248}
{"current_steps": 71, "total_steps": 385, "loss": 0.2968, "learning_rate": 5.916666666666667e-07, "epoch": 0.9131832797427653, "percentage": 18.44, "elapsed_time": "0:15:30", "remaining_time": "1:08:37", "throughput": "1029.88", "total_tokens": 958784}
{"current_steps": 72, "total_steps": 385, "loss": 0.3035, "learning_rate": 6.000000000000001e-07, "epoch": 0.9260450160771704, "percentage": 18.7, "elapsed_time": "0:15:44", "remaining_time": "1:08:23", "throughput": "1030.06", "total_tokens": 972416}
{"current_steps": 73, "total_steps": 385, "loss": 0.3211, "learning_rate": 6.083333333333334e-07, "epoch": 0.9389067524115756, "percentage": 18.96, "elapsed_time": "0:15:57", "remaining_time": "1:08:10", "throughput": "1030.07", "total_tokens": 985888}
{"current_steps": 74, "total_steps": 385, "loss": 0.2913, "learning_rate": 6.166666666666668e-07, "epoch": 0.9517684887459807, "percentage": 19.22, "elapsed_time": "0:16:10", "remaining_time": "1:07:57", "throughput": "1030.00", "total_tokens": 999296}
{"current_steps": 75, "total_steps": 385, "loss": 0.2817, "learning_rate": 6.25e-07, "epoch": 0.9646302250803859, "percentage": 19.48, "elapsed_time": "0:16:23", "remaining_time": "1:07:44", "throughput": "1029.88", "total_tokens": 1012640}
{"current_steps": 76, "total_steps": 385, "loss": 0.2827, "learning_rate": 6.333333333333334e-07, "epoch": 0.977491961414791, "percentage": 19.74, "elapsed_time": "0:16:36", "remaining_time": "1:07:30", "throughput": "1029.93", "total_tokens": 1026112}
{"current_steps": 77, "total_steps": 385, "loss": 0.229, "learning_rate": 6.416666666666667e-07, "epoch": 0.9903536977491961, "percentage": 20.0, "elapsed_time": "0:16:49", "remaining_time": "1:07:17", "throughput": "1029.81", "total_tokens": 1039424}
{"current_steps": 78, "total_steps": 385, "loss": 0.2503, "learning_rate": 6.5e-07, "epoch": 1.0032154340836013, "percentage": 20.26, "elapsed_time": "0:17:02", "remaining_time": "1:07:04", "throughput": "1030.33", "total_tokens": 1053408}
{"current_steps": 79, "total_steps": 385, "loss": 0.2453, "learning_rate": 6.583333333333333e-07, "epoch": 1.0160771704180065, "percentage": 20.52, "elapsed_time": "0:17:15", "remaining_time": "1:06:50", "throughput": "1030.76", "total_tokens": 1067328}
{"current_steps": 80, "total_steps": 385, "loss": 0.2167, "learning_rate": 6.666666666666667e-07, "epoch": 1.0289389067524115, "percentage": 20.78, "elapsed_time": "0:17:28", "remaining_time": "1:06:37", "throughput": "1030.29", "total_tokens": 1080288}
{"current_steps": 81, "total_steps": 385, "loss": 0.2361, "learning_rate": 6.750000000000001e-07, "epoch": 1.0418006430868167, "percentage": 21.04, "elapsed_time": "0:17:41", "remaining_time": "1:06:24", "throughput": "1030.47", "total_tokens": 1093952}
{"current_steps": 82, "total_steps": 385, "loss": 0.2248, "learning_rate": 6.833333333333334e-07, "epoch": 1.0546623794212218, "percentage": 21.3, "elapsed_time": "0:17:54", "remaining_time": "1:06:11", "throughput": "1030.91", "total_tokens": 1107904}
{"current_steps": 83, "total_steps": 385, "loss": 0.2491, "learning_rate": 6.916666666666668e-07, "epoch": 1.067524115755627, "percentage": 21.56, "elapsed_time": "0:18:07", "remaining_time": "1:05:57", "throughput": "1030.71", "total_tokens": 1121120}
{"current_steps": 84, "total_steps": 385, "loss": 0.2352, "learning_rate": 7.000000000000001e-07, "epoch": 1.0803858520900322, "percentage": 21.82, "elapsed_time": "0:18:20", "remaining_time": "1:05:44", "throughput": "1031.10", "total_tokens": 1135040}
{"current_steps": 85, "total_steps": 385, "loss": 0.2365, "learning_rate": 7.083333333333334e-07, "epoch": 1.0932475884244373, "percentage": 22.08, "elapsed_time": "0:18:33", "remaining_time": "1:05:31", "throughput": "1031.51", "total_tokens": 1148992}
{"current_steps": 86, "total_steps": 385, "loss": 0.217, "learning_rate": 7.166666666666668e-07, "epoch": 1.1061093247588425, "percentage": 22.34, "elapsed_time": "0:18:46", "remaining_time": "1:05:18", "throughput": "1031.65", "total_tokens": 1162592}
{"current_steps": 87, "total_steps": 385, "loss": 0.2258, "learning_rate": 7.25e-07, "epoch": 1.1189710610932475, "percentage": 22.6, "elapsed_time": "0:18:59", "remaining_time": "1:05:04", "throughput": "1030.83", "total_tokens": 1175104}
{"current_steps": 88, "total_steps": 385, "loss": 0.245, "learning_rate": 7.333333333333334e-07, "epoch": 1.1318327974276527, "percentage": 22.86, "elapsed_time": "0:19:13", "remaining_time": "1:04:51", "throughput": "1031.53", "total_tokens": 1189376}
{"current_steps": 89, "total_steps": 385, "loss": 0.3132, "learning_rate": 7.416666666666668e-07, "epoch": 1.144694533762058, "percentage": 23.12, "elapsed_time": "0:19:26", "remaining_time": "1:04:38", "throughput": "1031.26", "total_tokens": 1202560}
{"current_steps": 90, "total_steps": 385, "loss": 0.284, "learning_rate": 7.5e-07, "epoch": 1.157556270096463, "percentage": 23.38, "elapsed_time": "0:19:39", "remaining_time": "1:04:25", "throughput": "1031.93", "total_tokens": 1216832}
{"current_steps": 91, "total_steps": 385, "loss": 0.1933, "learning_rate": 7.583333333333334e-07, "epoch": 1.1704180064308682, "percentage": 23.64, "elapsed_time": "0:19:52", "remaining_time": "1:04:11", "throughput": "1032.15", "total_tokens": 1230528}
{"current_steps": 92, "total_steps": 385, "loss": 0.2154, "learning_rate": 7.666666666666667e-07, "epoch": 1.1832797427652733, "percentage": 23.9, "elapsed_time": "0:20:05", "remaining_time": "1:03:58", "throughput": "1032.42", "total_tokens": 1244352}
{"current_steps": 93, "total_steps": 385, "loss": 0.2064, "learning_rate": 7.750000000000001e-07, "epoch": 1.1961414790996785, "percentage": 24.16, "elapsed_time": "0:20:18", "remaining_time": "1:03:45", "throughput": "1032.18", "total_tokens": 1257472}
{"current_steps": 94, "total_steps": 385, "loss": 0.2038, "learning_rate": 7.833333333333335e-07, "epoch": 1.2090032154340835, "percentage": 24.42, "elapsed_time": "0:20:31", "remaining_time": "1:03:31", "throughput": "1032.52", "total_tokens": 1271392}
{"current_steps": 95, "total_steps": 385, "loss": 0.2152, "learning_rate": 7.916666666666667e-07, "epoch": 1.2218649517684887, "percentage": 24.68, "elapsed_time": "0:20:44", "remaining_time": "1:03:18", "throughput": "1033.74", "total_tokens": 1286432}
{"current_steps": 96, "total_steps": 385, "loss": 0.1961, "learning_rate": 8.000000000000001e-07, "epoch": 1.234726688102894, "percentage": 24.94, "elapsed_time": "0:20:57", "remaining_time": "1:03:05", "throughput": "1033.85", "total_tokens": 1300096}
{"current_steps": 97, "total_steps": 385, "loss": 0.1772, "learning_rate": 8.083333333333334e-07, "epoch": 1.247588424437299, "percentage": 25.19, "elapsed_time": "0:21:10", "remaining_time": "1:02:52", "throughput": "1033.81", "total_tokens": 1313568}
{"current_steps": 98, "total_steps": 385, "loss": 0.1846, "learning_rate": 8.166666666666668e-07, "epoch": 1.2604501607717042, "percentage": 25.45, "elapsed_time": "0:21:23", "remaining_time": "1:02:39", "throughput": "1033.97", "total_tokens": 1327328}
{"current_steps": 99, "total_steps": 385, "loss": 0.1823, "learning_rate": 8.250000000000001e-07, "epoch": 1.2733118971061093, "percentage": 25.71, "elapsed_time": "0:21:36", "remaining_time": "1:02:26", "throughput": "1034.09", "total_tokens": 1340960}
{"current_steps": 100, "total_steps": 385, "loss": 0.1794, "learning_rate": 8.333333333333333e-07, "epoch": 1.2861736334405145, "percentage": 25.97, "elapsed_time": "0:21:49", "remaining_time": "1:02:12", "throughput": "1033.35", "total_tokens": 1353440}
{"current_steps": 101, "total_steps": 385, "loss": 0.2106, "learning_rate": 8.416666666666667e-07, "epoch": 1.2990353697749195, "percentage": 26.23, "elapsed_time": "0:22:02", "remaining_time": "1:01:59", "throughput": "1033.90", "total_tokens": 1367680}
{"current_steps": 102, "total_steps": 385, "loss": 0.2123, "learning_rate": 8.500000000000001e-07, "epoch": 1.3118971061093248, "percentage": 26.49, "elapsed_time": "0:22:15", "remaining_time": "1:01:46", "throughput": "1033.68", "total_tokens": 1380864}
{"current_steps": 103, "total_steps": 385, "loss": 0.2413, "learning_rate": 8.583333333333334e-07, "epoch": 1.32475884244373, "percentage": 26.75, "elapsed_time": "0:22:28", "remaining_time": "1:01:33", "throughput": "1033.33", "total_tokens": 1393888}
{"current_steps": 104, "total_steps": 385, "loss": 0.2334, "learning_rate": 8.666666666666668e-07, "epoch": 1.337620578778135, "percentage": 27.01, "elapsed_time": "0:22:42", "remaining_time": "1:01:20", "throughput": "1032.97", "total_tokens": 1406912}
{"current_steps": 105, "total_steps": 385, "loss": 0.2069, "learning_rate": 8.75e-07, "epoch": 1.3504823151125402, "percentage": 27.27, "elapsed_time": "0:22:55", "remaining_time": "1:01:06", "throughput": "1032.70", "total_tokens": 1420000}
{"current_steps": 106, "total_steps": 385, "loss": 0.2262, "learning_rate": 8.833333333333334e-07, "epoch": 1.3633440514469453, "percentage": 27.53, "elapsed_time": "0:23:08", "remaining_time": "1:00:53", "throughput": "1032.67", "total_tokens": 1433440}
{"current_steps": 107, "total_steps": 385, "loss": 0.1718, "learning_rate": 8.916666666666668e-07, "epoch": 1.3762057877813505, "percentage": 27.79, "elapsed_time": "0:23:21", "remaining_time": "1:00:40", "throughput": "1032.39", "total_tokens": 1446560}
{"current_steps": 108, "total_steps": 385, "loss": 0.204, "learning_rate": 9.000000000000001e-07, "epoch": 1.3890675241157555, "percentage": 28.05, "elapsed_time": "0:23:34", "remaining_time": "1:00:27", "throughput": "1032.58", "total_tokens": 1460320}
{"current_steps": 109, "total_steps": 385, "loss": 0.1849, "learning_rate": 9.083333333333335e-07, "epoch": 1.4019292604501608, "percentage": 28.31, "elapsed_time": "0:23:47", "remaining_time": "1:00:14", "throughput": "1032.91", "total_tokens": 1474272}
{"current_steps": 110, "total_steps": 385, "loss": 0.2028, "learning_rate": 9.166666666666666e-07, "epoch": 1.414790996784566, "percentage": 28.57, "elapsed_time": "0:24:00", "remaining_time": "1:00:00", "throughput": "1033.00", "total_tokens": 1487904}
{"current_steps": 111, "total_steps": 385, "loss": 0.179, "learning_rate": 9.25e-07, "epoch": 1.427652733118971, "percentage": 28.83, "elapsed_time": "0:24:13", "remaining_time": "0:59:47", "throughput": "1033.15", "total_tokens": 1501664}
{"current_steps": 112, "total_steps": 385, "loss": 0.1813, "learning_rate": 9.333333333333334e-07, "epoch": 1.4405144694533762, "percentage": 29.09, "elapsed_time": "0:24:26", "remaining_time": "0:59:34", "throughput": "1033.22", "total_tokens": 1515264}
{"current_steps": 113, "total_steps": 385, "loss": 0.1955, "learning_rate": 9.416666666666667e-07, "epoch": 1.4533762057877815, "percentage": 29.35, "elapsed_time": "0:24:39", "remaining_time": "0:59:21", "throughput": "1033.14", "total_tokens": 1528640}
{"current_steps": 114, "total_steps": 385, "loss": 0.1577, "learning_rate": 9.500000000000001e-07, "epoch": 1.4662379421221865, "percentage": 29.61, "elapsed_time": "0:24:52", "remaining_time": "0:59:08", "throughput": "1032.82", "total_tokens": 1541632}
{"current_steps": 115, "total_steps": 385, "loss": 0.1509, "learning_rate": 9.583333333333334e-07, "epoch": 1.4790996784565915, "percentage": 29.87, "elapsed_time": "0:25:05", "remaining_time": "0:58:55", "throughput": "1032.48", "total_tokens": 1554592}
{"current_steps": 116, "total_steps": 385, "loss": 0.2052, "learning_rate": 9.666666666666668e-07, "epoch": 1.4919614147909968, "percentage": 30.13, "elapsed_time": "0:25:18", "remaining_time": "0:58:41", "throughput": "1031.98", "total_tokens": 1567296}
{"current_steps": 117, "total_steps": 385, "loss": 0.1576, "learning_rate": 9.750000000000002e-07, "epoch": 1.504823151125402, "percentage": 30.39, "elapsed_time": "0:25:31", "remaining_time": "0:58:28", "throughput": "1031.99", "total_tokens": 1580800}
{"current_steps": 118, "total_steps": 385, "loss": 0.1459, "learning_rate": 9.833333333333334e-07, "epoch": 1.517684887459807, "percentage": 30.65, "elapsed_time": "0:25:44", "remaining_time": "0:58:15", "throughput": "1031.67", "total_tokens": 1593792}
{"current_steps": 119, "total_steps": 385, "loss": 0.2694, "learning_rate": 9.916666666666668e-07, "epoch": 1.5305466237942122, "percentage": 30.91, "elapsed_time": "0:25:57", "remaining_time": "0:58:02", "throughput": "1031.92", "total_tokens": 1607648}
{"current_steps": 120, "total_steps": 385, "loss": 0.1891, "learning_rate": 1.0000000000000002e-06, "epoch": 1.5434083601286175, "percentage": 31.17, "elapsed_time": "0:26:10", "remaining_time": "0:57:49", "throughput": "1031.95", "total_tokens": 1621184}
{"current_steps": 121, "total_steps": 385, "loss": 0.1655, "learning_rate": 1.0083333333333333e-06, "epoch": 1.5562700964630225, "percentage": 31.43, "elapsed_time": "0:26:23", "remaining_time": "0:57:35", "throughput": "1031.72", "total_tokens": 1634240}
{"current_steps": 122, "total_steps": 385, "loss": 0.1534, "learning_rate": 1.0166666666666667e-06, "epoch": 1.5691318327974275, "percentage": 31.69, "elapsed_time": "0:26:37", "remaining_time": "0:57:22", "throughput": "1031.72", "total_tokens": 1647712}
{"current_steps": 123, "total_steps": 385, "loss": 0.1373, "learning_rate": 1.025e-06, "epoch": 1.5819935691318328, "percentage": 31.95, "elapsed_time": "0:26:50", "remaining_time": "0:57:09", "throughput": "1031.81", "total_tokens": 1661344}
{"current_steps": 124, "total_steps": 385, "loss": 0.1528, "learning_rate": 1.0333333333333333e-06, "epoch": 1.594855305466238, "percentage": 32.21, "elapsed_time": "0:27:03", "remaining_time": "0:56:56", "throughput": "1031.84", "total_tokens": 1674880}
{"current_steps": 125, "total_steps": 385, "loss": 0.2017, "learning_rate": 1.0416666666666667e-06, "epoch": 1.607717041800643, "percentage": 32.47, "elapsed_time": "0:27:16", "remaining_time": "0:56:43", "throughput": "1032.20", "total_tokens": 1688992}
{"current_steps": 126, "total_steps": 385, "loss": 0.1554, "learning_rate": 1.0500000000000001e-06, "epoch": 1.6205787781350482, "percentage": 32.73, "elapsed_time": "0:27:29", "remaining_time": "0:56:30", "throughput": "1032.47", "total_tokens": 1702944}
{"current_steps": 127, "total_steps": 385, "loss": 0.1332, "learning_rate": 1.0583333333333335e-06, "epoch": 1.6334405144694535, "percentage": 32.99, "elapsed_time": "0:27:42", "remaining_time": "0:56:17", "throughput": "1032.68", "total_tokens": 1716768}
{"current_steps": 128, "total_steps": 385, "loss": 0.115, "learning_rate": 1.066666666666667e-06, "epoch": 1.6463022508038585, "percentage": 33.25, "elapsed_time": "0:27:55", "remaining_time": "0:56:04", "throughput": "1033.04", "total_tokens": 1730848}
{"current_steps": 129, "total_steps": 385, "loss": 0.119, "learning_rate": 1.075e-06, "epoch": 1.6591639871382635, "percentage": 33.51, "elapsed_time": "0:28:08", "remaining_time": "0:55:50", "throughput": "1032.57", "total_tokens": 1743520}
{"current_steps": 130, "total_steps": 385, "loss": 0.1164, "learning_rate": 1.0833333333333335e-06, "epoch": 1.6720257234726688, "percentage": 33.77, "elapsed_time": "0:28:21", "remaining_time": "0:55:37", "throughput": "1032.98", "total_tokens": 1757696}
{"current_steps": 131, "total_steps": 385, "loss": 0.1981, "learning_rate": 1.0916666666666667e-06, "epoch": 1.684887459807074, "percentage": 34.03, "elapsed_time": "0:28:34", "remaining_time": "0:55:24", "throughput": "1033.03", "total_tokens": 1771264}
{"current_steps": 132, "total_steps": 385, "loss": 0.168, "learning_rate": 1.1e-06, "epoch": 1.697749196141479, "percentage": 34.29, "elapsed_time": "0:28:47", "remaining_time": "0:55:11", "throughput": "1032.86", "total_tokens": 1784480}
{"current_steps": 133, "total_steps": 385, "loss": 0.0741, "learning_rate": 1.1083333333333335e-06, "epoch": 1.7106109324758842, "percentage": 34.55, "elapsed_time": "0:29:00", "remaining_time": "0:54:58", "throughput": "1032.96", "total_tokens": 1798176}
{"current_steps": 134, "total_steps": 385, "loss": 0.1847, "learning_rate": 1.1166666666666666e-06, "epoch": 1.7234726688102895, "percentage": 34.81, "elapsed_time": "0:29:13", "remaining_time": "0:54:45", "throughput": "1032.74", "total_tokens": 1811264}
{"current_steps": 135, "total_steps": 385, "loss": 0.108, "learning_rate": 1.125e-06, "epoch": 1.7363344051446945, "percentage": 35.06, "elapsed_time": "0:29:26", "remaining_time": "0:54:32", "throughput": "1032.88", "total_tokens": 1825024}
{"current_steps": 136, "total_steps": 385, "loss": 0.1214, "learning_rate": 1.1333333333333334e-06, "epoch": 1.7491961414790995, "percentage": 35.32, "elapsed_time": "0:29:39", "remaining_time": "0:54:18", "throughput": "1032.96", "total_tokens": 1838624}
{"current_steps": 137, "total_steps": 385, "loss": 0.1252, "learning_rate": 1.1416666666666668e-06, "epoch": 1.762057877813505, "percentage": 35.58, "elapsed_time": "0:29:53", "remaining_time": "0:54:05", "throughput": "1033.06", "total_tokens": 1852288}
{"current_steps": 138, "total_steps": 385, "loss": 0.144, "learning_rate": 1.1500000000000002e-06, "epoch": 1.77491961414791, "percentage": 35.84, "elapsed_time": "0:30:06", "remaining_time": "0:53:52", "throughput": "1032.84", "total_tokens": 1865376}
{"current_steps": 139, "total_steps": 385, "loss": 0.1269, "learning_rate": 1.1583333333333334e-06, "epoch": 1.787781350482315, "percentage": 36.1, "elapsed_time": "0:30:19", "remaining_time": "0:53:39", "throughput": "1032.79", "total_tokens": 1878784}
{"current_steps": 140, "total_steps": 385, "loss": 0.1283, "learning_rate": 1.1666666666666668e-06, "epoch": 1.8006430868167203, "percentage": 36.36, "elapsed_time": "0:30:32", "remaining_time": "0:53:26", "throughput": "1032.81", "total_tokens": 1892320}
{"current_steps": 141, "total_steps": 385, "loss": 0.0929, "learning_rate": 1.175e-06, "epoch": 1.8135048231511255, "percentage": 36.62, "elapsed_time": "0:30:45", "remaining_time": "0:53:13", "throughput": "1032.78", "total_tokens": 1905728}
{"current_steps": 142, "total_steps": 385, "loss": 0.1349, "learning_rate": 1.1833333333333334e-06, "epoch": 1.8263665594855305, "percentage": 36.88, "elapsed_time": "0:30:58", "remaining_time": "0:52:59", "throughput": "1032.56", "total_tokens": 1918752}
{"current_steps": 143, "total_steps": 385, "loss": 0.1277, "learning_rate": 1.1916666666666668e-06, "epoch": 1.8392282958199357, "percentage": 37.14, "elapsed_time": "0:31:11", "remaining_time": "0:52:46", "throughput": "1032.33", "total_tokens": 1931808}
{"current_steps": 144, "total_steps": 385, "loss": 0.1585, "learning_rate": 1.2000000000000002e-06, "epoch": 1.852090032154341, "percentage": 37.4, "elapsed_time": "0:31:24", "remaining_time": "0:52:33", "throughput": "1032.35", "total_tokens": 1945312}
{"current_steps": 145, "total_steps": 385, "loss": 0.1468, "learning_rate": 1.2083333333333333e-06, "epoch": 1.864951768488746, "percentage": 37.66, "elapsed_time": "0:31:37", "remaining_time": "0:52:20", "throughput": "1031.96", "total_tokens": 1958016}
{"current_steps": 146, "total_steps": 385, "loss": 0.1049, "learning_rate": 1.2166666666666667e-06, "epoch": 1.877813504823151, "percentage": 37.92, "elapsed_time": "0:31:50", "remaining_time": "0:52:07", "throughput": "1031.92", "total_tokens": 1971392}
{"current_steps": 147, "total_steps": 385, "loss": 0.1297, "learning_rate": 1.2250000000000001e-06, "epoch": 1.8906752411575563, "percentage": 38.18, "elapsed_time": "0:32:03", "remaining_time": "0:51:54", "throughput": "1031.96", "total_tokens": 1984992}
{"current_steps": 148, "total_steps": 385, "loss": 0.1111, "learning_rate": 1.2333333333333335e-06, "epoch": 1.9035369774919615, "percentage": 38.44, "elapsed_time": "0:32:16", "remaining_time": "0:51:41", "throughput": "1031.79", "total_tokens": 1998144}
{"current_steps": 149, "total_steps": 385, "loss": 0.1202, "learning_rate": 1.2416666666666667e-06, "epoch": 1.9163987138263665, "percentage": 38.7, "elapsed_time": "0:32:29", "remaining_time": "0:51:28", "throughput": "1032.01", "total_tokens": 2012032}
{"current_steps": 150, "total_steps": 385, "loss": 0.0829, "learning_rate": 1.25e-06, "epoch": 1.9292604501607717, "percentage": 38.96, "elapsed_time": "0:32:42", "remaining_time": "0:51:14", "throughput": "1032.05", "total_tokens": 2025600}
{"current_steps": 151, "total_steps": 385, "loss": 0.1119, "learning_rate": 1.2583333333333333e-06, "epoch": 1.942122186495177, "percentage": 39.22, "elapsed_time": "0:32:55", "remaining_time": "0:51:01", "throughput": "1032.55", "total_tokens": 2040096}
{"current_steps": 152, "total_steps": 385, "loss": 0.1144, "learning_rate": 1.2666666666666669e-06, "epoch": 1.954983922829582, "percentage": 39.48, "elapsed_time": "0:33:08", "remaining_time": "0:50:48", "throughput": "1032.51", "total_tokens": 2053504}
{"current_steps": 153, "total_steps": 385, "loss": 0.117, "learning_rate": 1.275e-06, "epoch": 1.967845659163987, "percentage": 39.74, "elapsed_time": "0:33:21", "remaining_time": "0:50:35", "throughput": "1031.96", "total_tokens": 2065856}
{"current_steps": 154, "total_steps": 385, "loss": 0.0998, "learning_rate": 1.2833333333333335e-06, "epoch": 1.9807073954983923, "percentage": 40.0, "elapsed_time": "0:33:34", "remaining_time": "0:50:22", "throughput": "1031.63", "total_tokens": 2078656}
{"current_steps": 155, "total_steps": 385, "loss": 0.1384, "learning_rate": 1.2916666666666669e-06, "epoch": 1.9935691318327975, "percentage": 40.26, "elapsed_time": "0:33:48", "remaining_time": "0:50:09", "throughput": "1031.88", "total_tokens": 2092672}
{"current_steps": 156, "total_steps": 385, "loss": 0.1157, "learning_rate": 1.3e-06, "epoch": 2.0064308681672025, "percentage": 40.52, "elapsed_time": "0:34:01", "remaining_time": "0:49:56", "throughput": "1031.69", "total_tokens": 2105728}
{"current_steps": 157, "total_steps": 385, "loss": 0.0696, "learning_rate": 1.3083333333333334e-06, "epoch": 2.0192926045016075, "percentage": 40.78, "elapsed_time": "0:34:14", "remaining_time": "0:49:43", "throughput": "1031.58", "total_tokens": 2118976}
{"current_steps": 158, "total_steps": 385, "loss": 0.0665, "learning_rate": 1.3166666666666666e-06, "epoch": 2.032154340836013, "percentage": 41.04, "elapsed_time": "0:34:27", "remaining_time": "0:49:29", "throughput": "1031.54", "total_tokens": 2132352}
{"current_steps": 159, "total_steps": 385, "loss": 0.0783, "learning_rate": 1.3250000000000002e-06, "epoch": 2.045016077170418, "percentage": 41.3, "elapsed_time": "0:34:40", "remaining_time": "0:49:16", "throughput": "1031.64", "total_tokens": 2146048}
{"current_steps": 160, "total_steps": 385, "loss": 0.0749, "learning_rate": 1.3333333333333334e-06, "epoch": 2.057877813504823, "percentage": 41.56, "elapsed_time": "0:34:53", "remaining_time": "0:49:03", "throughput": "1031.29", "total_tokens": 2158752}
{"current_steps": 161, "total_steps": 385, "loss": 0.0731, "learning_rate": 1.3416666666666666e-06, "epoch": 2.0707395498392285, "percentage": 41.82, "elapsed_time": "0:35:06", "remaining_time": "0:48:50", "throughput": "1031.12", "total_tokens": 2171872}
{"current_steps": 162, "total_steps": 385, "loss": 0.0913, "learning_rate": 1.3500000000000002e-06, "epoch": 2.0836012861736335, "percentage": 42.08, "elapsed_time": "0:35:19", "remaining_time": "0:48:37", "throughput": "1030.87", "total_tokens": 2184800}
{"current_steps": 163, "total_steps": 385, "loss": 0.0521, "learning_rate": 1.3583333333333334e-06, "epoch": 2.0964630225080385, "percentage": 42.34, "elapsed_time": "0:35:32", "remaining_time": "0:48:24", "throughput": "1030.82", "total_tokens": 2198176}
{"current_steps": 164, "total_steps": 385, "loss": 0.068, "learning_rate": 1.3666666666666668e-06, "epoch": 2.1093247588424435, "percentage": 42.6, "elapsed_time": "0:35:45", "remaining_time": "0:48:11", "throughput": "1030.71", "total_tokens": 2211392}
{"current_steps": 165, "total_steps": 385, "loss": 0.0686, "learning_rate": 1.3750000000000002e-06, "epoch": 2.122186495176849, "percentage": 42.86, "elapsed_time": "0:35:58", "remaining_time": "0:47:58", "throughput": "1030.53", "total_tokens": 2224480}
{"current_steps": 166, "total_steps": 385, "loss": 0.0545, "learning_rate": 1.3833333333333336e-06, "epoch": 2.135048231511254, "percentage": 43.12, "elapsed_time": "0:36:11", "remaining_time": "0:47:44", "throughput": "1030.54", "total_tokens": 2237920}
{"current_steps": 167, "total_steps": 385, "loss": 0.0347, "learning_rate": 1.3916666666666668e-06, "epoch": 2.147909967845659, "percentage": 43.38, "elapsed_time": "0:36:24", "remaining_time": "0:47:31", "throughput": "1030.37", "total_tokens": 2251008}
{"current_steps": 168, "total_steps": 385, "loss": 0.0993, "learning_rate": 1.4000000000000001e-06, "epoch": 2.1607717041800645, "percentage": 43.64, "elapsed_time": "0:36:37", "remaining_time": "0:47:18", "throughput": "1030.36", "total_tokens": 2264448}
{"current_steps": 169, "total_steps": 385, "loss": 0.1059, "learning_rate": 1.4083333333333335e-06, "epoch": 2.1736334405144695, "percentage": 43.9, "elapsed_time": "0:36:50", "remaining_time": "0:47:05", "throughput": "1030.18", "total_tokens": 2277568}
{"current_steps": 170, "total_steps": 385, "loss": 0.089, "learning_rate": 1.4166666666666667e-06, "epoch": 2.1864951768488745, "percentage": 44.16, "elapsed_time": "0:37:03", "remaining_time": "0:46:52", "throughput": "1030.24", "total_tokens": 2291168}
{"current_steps": 171, "total_steps": 385, "loss": 0.0379, "learning_rate": 1.425e-06, "epoch": 2.19935691318328, "percentage": 44.42, "elapsed_time": "0:37:16", "remaining_time": "0:46:39", "throughput": "1030.07", "total_tokens": 2304192}
{"current_steps": 172, "total_steps": 385, "loss": 0.0626, "learning_rate": 1.4333333333333335e-06, "epoch": 2.212218649517685, "percentage": 44.68, "elapsed_time": "0:37:29", "remaining_time": "0:46:26", "throughput": "1030.20", "total_tokens": 2317920}
{"current_steps": 173, "total_steps": 385, "loss": 0.0957, "learning_rate": 1.4416666666666667e-06, "epoch": 2.22508038585209, "percentage": 44.94, "elapsed_time": "0:37:43", "remaining_time": "0:46:13", "throughput": "1030.64", "total_tokens": 2332416}
{"current_steps": 174, "total_steps": 385, "loss": 0.0636, "learning_rate": 1.45e-06, "epoch": 2.237942122186495, "percentage": 45.19, "elapsed_time": "0:37:56", "remaining_time": "0:46:00", "throughput": "1030.75", "total_tokens": 2346112}
{"current_steps": 175, "total_steps": 385, "loss": 0.074, "learning_rate": 1.4583333333333335e-06, "epoch": 2.2508038585209005, "percentage": 45.45, "elapsed_time": "0:38:09", "remaining_time": "0:45:47", "throughput": "1030.71", "total_tokens": 2359488}
{"current_steps": 176, "total_steps": 385, "loss": 0.0685, "learning_rate": 1.4666666666666669e-06, "epoch": 2.2636655948553055, "percentage": 45.71, "elapsed_time": "0:38:22", "remaining_time": "0:45:33", "throughput": "1030.69", "total_tokens": 2372928}
{"current_steps": 177, "total_steps": 385, "loss": 0.0574, "learning_rate": 1.475e-06, "epoch": 2.2765273311897105, "percentage": 45.97, "elapsed_time": "0:38:35", "remaining_time": "0:45:20", "throughput": "1030.53", "total_tokens": 2386016}
{"current_steps": 178, "total_steps": 385, "loss": 0.0619, "learning_rate": 1.4833333333333337e-06, "epoch": 2.289389067524116, "percentage": 46.23, "elapsed_time": "0:38:48", "remaining_time": "0:45:07", "throughput": "1030.73", "total_tokens": 2399936}
{"current_steps": 179, "total_steps": 385, "loss": 0.0683, "learning_rate": 1.4916666666666669e-06, "epoch": 2.302250803858521, "percentage": 46.49, "elapsed_time": "0:39:01", "remaining_time": "0:44:54", "throughput": "1030.36", "total_tokens": 2412480}
{"current_steps": 180, "total_steps": 385, "loss": 0.07, "learning_rate": 1.5e-06, "epoch": 2.315112540192926, "percentage": 46.75, "elapsed_time": "0:39:14", "remaining_time": "0:44:41", "throughput": "1030.33", "total_tokens": 2425888}
{"current_steps": 181, "total_steps": 385, "loss": 0.1154, "learning_rate": 1.5083333333333336e-06, "epoch": 2.327974276527331, "percentage": 47.01, "elapsed_time": "0:39:27", "remaining_time": "0:44:28", "throughput": "1030.26", "total_tokens": 2439168}
{"current_steps": 182, "total_steps": 385, "loss": 0.0923, "learning_rate": 1.5166666666666668e-06, "epoch": 2.3408360128617365, "percentage": 47.27, "elapsed_time": "0:39:40", "remaining_time": "0:44:15", "throughput": "1030.40", "total_tokens": 2452928}
{"current_steps": 183, "total_steps": 385, "loss": 0.0777, "learning_rate": 1.525e-06, "epoch": 2.3536977491961415, "percentage": 47.53, "elapsed_time": "0:39:53", "remaining_time": "0:44:02", "throughput": "1030.14", "total_tokens": 2465760}
{"current_steps": 184, "total_steps": 385, "loss": 0.0754, "learning_rate": 1.5333333333333334e-06, "epoch": 2.3665594855305465, "percentage": 47.79, "elapsed_time": "0:40:06", "remaining_time": "0:43:49", "throughput": "1030.34", "total_tokens": 2479744}
{"current_steps": 185, "total_steps": 385, "loss": 0.0704, "learning_rate": 1.5416666666666668e-06, "epoch": 2.379421221864952, "percentage": 48.05, "elapsed_time": "0:40:19", "remaining_time": "0:43:35", "throughput": "1030.27", "total_tokens": 2493024}
{"current_steps": 186, "total_steps": 385, "loss": 0.0915, "learning_rate": 1.5500000000000002e-06, "epoch": 2.392282958199357, "percentage": 48.31, "elapsed_time": "0:40:32", "remaining_time": "0:43:22", "throughput": "1030.38", "total_tokens": 2506720}
{"current_steps": 187, "total_steps": 385, "loss": 0.087, "learning_rate": 1.5583333333333334e-06, "epoch": 2.405144694533762, "percentage": 48.57, "elapsed_time": "0:40:45", "remaining_time": "0:43:09", "throughput": "1030.22", "total_tokens": 2519776}
{"current_steps": 188, "total_steps": 385, "loss": 0.0566, "learning_rate": 1.566666666666667e-06, "epoch": 2.418006430868167, "percentage": 48.83, "elapsed_time": "0:40:58", "remaining_time": "0:42:56", "throughput": "1030.35", "total_tokens": 2533536}
{"current_steps": 189, "total_steps": 385, "loss": 0.1037, "learning_rate": 1.5750000000000002e-06, "epoch": 2.4308681672025725, "percentage": 49.09, "elapsed_time": "0:41:11", "remaining_time": "0:42:43", "throughput": "1030.56", "total_tokens": 2547520}
{"current_steps": 190, "total_steps": 385, "loss": 0.1143, "learning_rate": 1.5833333333333333e-06, "epoch": 2.4437299035369775, "percentage": 49.35, "elapsed_time": "0:41:25", "remaining_time": "0:42:30", "throughput": "1030.68", "total_tokens": 2561280}
{"current_steps": 191, "total_steps": 385, "loss": 0.0829, "learning_rate": 1.591666666666667e-06, "epoch": 2.4565916398713825, "percentage": 49.61, "elapsed_time": "0:41:38", "remaining_time": "0:42:17", "throughput": "1030.82", "total_tokens": 2575136}
{"current_steps": 192, "total_steps": 385, "loss": 0.0422, "learning_rate": 1.6000000000000001e-06, "epoch": 2.469453376205788, "percentage": 49.87, "elapsed_time": "0:41:51", "remaining_time": "0:42:04", "throughput": "1030.87", "total_tokens": 2588736}
{"current_steps": 193, "total_steps": 385, "loss": 0.0727, "learning_rate": 1.6083333333333333e-06, "epoch": 2.482315112540193, "percentage": 50.13, "elapsed_time": "0:42:04", "remaining_time": "0:41:51", "throughput": "1030.95", "total_tokens": 2602400}
{"current_steps": 194, "total_steps": 385, "loss": 0.0836, "learning_rate": 1.6166666666666667e-06, "epoch": 2.495176848874598, "percentage": 50.39, "elapsed_time": "0:42:17", "remaining_time": "0:41:38", "throughput": "1030.88", "total_tokens": 2615648}
{"current_steps": 195, "total_steps": 385, "loss": 0.0803, "learning_rate": 1.6250000000000001e-06, "epoch": 2.508038585209003, "percentage": 50.65, "elapsed_time": "0:42:30", "remaining_time": "0:41:24", "throughput": "1030.85", "total_tokens": 2629024}
{"current_steps": 196, "total_steps": 385, "loss": 0.0654, "learning_rate": 1.6333333333333335e-06, "epoch": 2.5209003215434085, "percentage": 50.91, "elapsed_time": "0:42:43", "remaining_time": "0:41:11", "throughput": "1030.96", "total_tokens": 2642784}
{"current_steps": 197, "total_steps": 385, "loss": 0.0587, "learning_rate": 1.6416666666666667e-06, "epoch": 2.5337620578778135, "percentage": 51.17, "elapsed_time": "0:42:56", "remaining_time": "0:40:58", "throughput": "1030.89", "total_tokens": 2656064}
{"current_steps": 198, "total_steps": 385, "loss": 0.0848, "learning_rate": 1.6500000000000003e-06, "epoch": 2.5466237942122185, "percentage": 51.43, "elapsed_time": "0:43:09", "remaining_time": "0:40:45", "throughput": "1030.87", "total_tokens": 2669472}
{"current_steps": 199, "total_steps": 385, "loss": 0.0525, "learning_rate": 1.6583333333333335e-06, "epoch": 2.559485530546624, "percentage": 51.69, "elapsed_time": "0:43:22", "remaining_time": "0:40:32", "throughput": "1030.88", "total_tokens": 2682944}
{"current_steps": 200, "total_steps": 385, "loss": 0.0677, "learning_rate": 1.6666666666666667e-06, "epoch": 2.572347266881029, "percentage": 51.95, "elapsed_time": "0:43:35", "remaining_time": "0:40:19", "throughput": "1030.73", "total_tokens": 2695968}
{"current_steps": 201, "total_steps": 385, "loss": 0.062, "learning_rate": 1.6750000000000003e-06, "epoch": 2.585209003215434, "percentage": 52.21, "elapsed_time": "0:43:48", "remaining_time": "0:40:06", "throughput": "1030.77", "total_tokens": 2709504}
{"current_steps": 202, "total_steps": 385, "loss": 0.0674, "learning_rate": 1.6833333333333335e-06, "epoch": 2.598070739549839, "percentage": 52.47, "elapsed_time": "0:44:01", "remaining_time": "0:39:53", "throughput": "1030.89", "total_tokens": 2723264}
{"current_steps": 203, "total_steps": 385, "loss": 0.0533, "learning_rate": 1.6916666666666666e-06, "epoch": 2.6109324758842445, "percentage": 52.73, "elapsed_time": "0:44:14", "remaining_time": "0:39:40", "throughput": "1030.91", "total_tokens": 2736768}
{"current_steps": 204, "total_steps": 385, "loss": 0.0757, "learning_rate": 1.7000000000000002e-06, "epoch": 2.6237942122186495, "percentage": 52.99, "elapsed_time": "0:44:27", "remaining_time": "0:39:27", "throughput": "1031.12", "total_tokens": 2750816}
{"current_steps": 205, "total_steps": 385, "loss": 0.0777, "learning_rate": 1.7083333333333334e-06, "epoch": 2.6366559485530545, "percentage": 53.25, "elapsed_time": "0:44:40", "remaining_time": "0:39:13", "throughput": "1031.22", "total_tokens": 2764576}
{"current_steps": 206, "total_steps": 385, "loss": 0.0921, "learning_rate": 1.7166666666666668e-06, "epoch": 2.64951768488746, "percentage": 53.51, "elapsed_time": "0:44:53", "remaining_time": "0:39:00", "throughput": "1031.22", "total_tokens": 2778080}
{"current_steps": 207, "total_steps": 385, "loss": 0.0378, "learning_rate": 1.725e-06, "epoch": 2.662379421221865, "percentage": 53.77, "elapsed_time": "0:45:07", "remaining_time": "0:38:47", "throughput": "1031.02", "total_tokens": 2790976}
{"current_steps": 208, "total_steps": 385, "loss": 0.0671, "learning_rate": 1.7333333333333336e-06, "epoch": 2.67524115755627, "percentage": 54.03, "elapsed_time": "0:45:20", "remaining_time": "0:38:34", "throughput": "1031.28", "total_tokens": 2805152}
{"current_steps": 209, "total_steps": 385, "loss": 0.0664, "learning_rate": 1.7416666666666668e-06, "epoch": 2.688102893890675, "percentage": 54.29, "elapsed_time": "0:45:33", "remaining_time": "0:38:21", "throughput": "1031.30", "total_tokens": 2818688}
{"current_steps": 210, "total_steps": 385, "loss": 0.072, "learning_rate": 1.75e-06, "epoch": 2.7009646302250805, "percentage": 54.55, "elapsed_time": "0:45:46", "remaining_time": "0:38:08", "throughput": "1031.20", "total_tokens": 2831872}
{"current_steps": 211, "total_steps": 385, "loss": 0.0883, "learning_rate": 1.7583333333333336e-06, "epoch": 2.7138263665594855, "percentage": 54.81, "elapsed_time": "0:45:59", "remaining_time": "0:37:55", "throughput": "1031.43", "total_tokens": 2845984}
{"current_steps": 212, "total_steps": 385, "loss": 0.0414, "learning_rate": 1.7666666666666668e-06, "epoch": 2.7266881028938905, "percentage": 55.06, "elapsed_time": "0:46:12", "remaining_time": "0:37:42", "throughput": "1031.35", "total_tokens": 2859232}
{"current_steps": 213, "total_steps": 385, "loss": 0.031, "learning_rate": 1.7750000000000002e-06, "epoch": 2.739549839228296, "percentage": 55.32, "elapsed_time": "0:46:25", "remaining_time": "0:37:29", "throughput": "1031.16", "total_tokens": 2872192}
{"current_steps": 214, "total_steps": 385, "loss": 0.0634, "learning_rate": 1.7833333333333336e-06, "epoch": 2.752411575562701, "percentage": 55.58, "elapsed_time": "0:46:38", "remaining_time": "0:37:16", "throughput": "1031.04", "total_tokens": 2885312}
{"current_steps": 215, "total_steps": 385, "loss": 0.0837, "learning_rate": 1.7916666666666667e-06, "epoch": 2.765273311897106, "percentage": 55.84, "elapsed_time": "0:46:51", "remaining_time": "0:37:03", "throughput": "1031.15", "total_tokens": 2899072}
{"current_steps": 216, "total_steps": 385, "loss": 0.0855, "learning_rate": 1.8000000000000001e-06, "epoch": 2.778135048231511, "percentage": 56.1, "elapsed_time": "0:47:04", "remaining_time": "0:36:49", "throughput": "1031.11", "total_tokens": 2912448}
{"current_steps": 217, "total_steps": 385, "loss": 0.0945, "learning_rate": 1.8083333333333335e-06, "epoch": 2.7909967845659165, "percentage": 56.36, "elapsed_time": "0:47:17", "remaining_time": "0:36:36", "throughput": "1030.85", "total_tokens": 2925120}
{"current_steps": 218, "total_steps": 385, "loss": 0.078, "learning_rate": 1.816666666666667e-06, "epoch": 2.8038585209003215, "percentage": 56.62, "elapsed_time": "0:47:30", "remaining_time": "0:36:23", "throughput": "1030.78", "total_tokens": 2938336}
{"current_steps": 219, "total_steps": 385, "loss": 0.0573, "learning_rate": 1.825e-06, "epoch": 2.816720257234727, "percentage": 56.88, "elapsed_time": "0:47:43", "remaining_time": "0:36:10", "throughput": "1030.59", "total_tokens": 2951264}
{"current_steps": 220, "total_steps": 385, "loss": 0.0806, "learning_rate": 1.8333333333333333e-06, "epoch": 2.829581993569132, "percentage": 57.14, "elapsed_time": "0:47:56", "remaining_time": "0:35:57", "throughput": "1030.71", "total_tokens": 2965120}
{"current_steps": 221, "total_steps": 385, "loss": 0.0961, "learning_rate": 1.8416666666666669e-06, "epoch": 2.842443729903537, "percentage": 57.4, "elapsed_time": "0:48:09", "remaining_time": "0:35:44", "throughput": "1030.63", "total_tokens": 2978368}
{"current_steps": 222, "total_steps": 385, "loss": 0.0732, "learning_rate": 1.85e-06, "epoch": 2.855305466237942, "percentage": 57.66, "elapsed_time": "0:48:22", "remaining_time": "0:35:31", "throughput": "1030.70", "total_tokens": 2992000}
{"current_steps": 223, "total_steps": 385, "loss": 0.0957, "learning_rate": 1.8583333333333335e-06, "epoch": 2.868167202572347, "percentage": 57.92, "elapsed_time": "0:48:35", "remaining_time": "0:35:18", "throughput": "1030.89", "total_tokens": 3006048}
{"current_steps": 224, "total_steps": 385, "loss": 0.0774, "learning_rate": 1.8666666666666669e-06, "epoch": 2.8810289389067525, "percentage": 58.18, "elapsed_time": "0:48:49", "remaining_time": "0:35:05", "throughput": "1030.97", "total_tokens": 3019744}
{"current_steps": 225, "total_steps": 385, "loss": 0.0691, "learning_rate": 1.8750000000000003e-06, "epoch": 2.8938906752411575, "percentage": 58.44, "elapsed_time": "0:49:02", "remaining_time": "0:34:52", "throughput": "1031.09", "total_tokens": 3033568}
{"current_steps": 226, "total_steps": 385, "loss": 0.0529, "learning_rate": 1.8833333333333334e-06, "epoch": 2.906752411575563, "percentage": 58.7, "elapsed_time": "0:49:15", "remaining_time": "0:34:39", "throughput": "1031.31", "total_tokens": 3047712}
{"current_steps": 227, "total_steps": 385, "loss": 0.0811, "learning_rate": 1.8916666666666668e-06, "epoch": 2.919614147909968, "percentage": 58.96, "elapsed_time": "0:49:28", "remaining_time": "0:34:26", "throughput": "1031.18", "total_tokens": 3060800}
{"current_steps": 228, "total_steps": 385, "loss": 0.1211, "learning_rate": 1.9000000000000002e-06, "epoch": 2.932475884244373, "percentage": 59.22, "elapsed_time": "0:49:41", "remaining_time": "0:34:12", "throughput": "1031.32", "total_tokens": 3074720}
{"current_steps": 229, "total_steps": 385, "loss": 0.0489, "learning_rate": 1.9083333333333334e-06, "epoch": 2.945337620578778, "percentage": 59.48, "elapsed_time": "0:49:54", "remaining_time": "0:33:59", "throughput": "1031.40", "total_tokens": 3088448}
{"current_steps": 230, "total_steps": 385, "loss": 0.0947, "learning_rate": 1.916666666666667e-06, "epoch": 2.958199356913183, "percentage": 59.74, "elapsed_time": "0:50:07", "remaining_time": "0:33:46", "throughput": "1031.77", "total_tokens": 3103008}
{"current_steps": 231, "total_steps": 385, "loss": 0.0561, "learning_rate": 1.925e-06, "epoch": 2.9710610932475885, "percentage": 60.0, "elapsed_time": "0:50:20", "remaining_time": "0:33:33", "throughput": "1031.72", "total_tokens": 3116320}
{"current_steps": 232, "total_steps": 385, "loss": 0.0629, "learning_rate": 1.9333333333333336e-06, "epoch": 2.9839228295819935, "percentage": 60.26, "elapsed_time": "0:50:33", "remaining_time": "0:33:20", "throughput": "1031.97", "total_tokens": 3130592}
{"current_steps": 233, "total_steps": 385, "loss": 0.0579, "learning_rate": 1.9416666666666666e-06, "epoch": 2.996784565916399, "percentage": 60.52, "elapsed_time": "0:50:46", "remaining_time": "0:33:07", "throughput": "1031.94", "total_tokens": 3144000}
{"current_steps": 234, "total_steps": 385, "loss": 0.0285, "learning_rate": 1.9500000000000004e-06, "epoch": 3.009646302250804, "percentage": 60.78, "elapsed_time": "0:50:59", "remaining_time": "0:32:54", "throughput": "1031.94", "total_tokens": 3157472}
{"current_steps": 235, "total_steps": 385, "loss": 0.0256, "learning_rate": 1.9583333333333334e-06, "epoch": 3.022508038585209, "percentage": 61.04, "elapsed_time": "0:51:12", "remaining_time": "0:32:41", "throughput": "1031.83", "total_tokens": 3170656}
{"current_steps": 236, "total_steps": 385, "loss": 0.0247, "learning_rate": 1.9666666666666668e-06, "epoch": 3.035369774919614, "percentage": 61.3, "elapsed_time": "0:51:25", "remaining_time": "0:32:28", "throughput": "1031.89", "total_tokens": 3184320}
{"current_steps": 237, "total_steps": 385, "loss": 0.0325, "learning_rate": 1.975e-06, "epoch": 3.0482315112540195, "percentage": 61.56, "elapsed_time": "0:51:38", "remaining_time": "0:32:15", "throughput": "1031.71", "total_tokens": 3197216}
{"current_steps": 238, "total_steps": 385, "loss": 0.0172, "learning_rate": 1.9833333333333335e-06, "epoch": 3.0610932475884245, "percentage": 61.82, "elapsed_time": "0:51:51", "remaining_time": "0:32:02", "throughput": "1031.83", "total_tokens": 3211008}
{"current_steps": 239, "total_steps": 385, "loss": 0.05, "learning_rate": 1.991666666666667e-06, "epoch": 3.0739549839228295, "percentage": 62.08, "elapsed_time": "0:52:05", "remaining_time": "0:31:49", "throughput": "1032.03", "total_tokens": 3225152}
{"current_steps": 240, "total_steps": 385, "loss": 0.0134, "learning_rate": 2.0000000000000003e-06, "epoch": 3.0868167202572345, "percentage": 62.34, "elapsed_time": "0:52:18", "remaining_time": "0:31:35", "throughput": "1032.07", "total_tokens": 3238752}
{"current_steps": 241, "total_steps": 385, "loss": 0.0434, "learning_rate": 2.0083333333333337e-06, "epoch": 3.09967845659164, "percentage": 62.6, "elapsed_time": "0:52:31", "remaining_time": "0:31:22", "throughput": "1032.06", "total_tokens": 3252224}
{"current_steps": 242, "total_steps": 385, "loss": 0.0186, "learning_rate": 2.0166666666666667e-06, "epoch": 3.112540192926045, "percentage": 62.86, "elapsed_time": "0:52:44", "remaining_time": "0:31:09", "throughput": "1031.79", "total_tokens": 3264832}
{"current_steps": 243, "total_steps": 385, "loss": 0.0341, "learning_rate": 2.025e-06, "epoch": 3.12540192926045, "percentage": 63.12, "elapsed_time": "0:52:57", "remaining_time": "0:30:56", "throughput": "1031.70", "total_tokens": 3278048}
{"current_steps": 244, "total_steps": 385, "loss": 0.0386, "learning_rate": 2.0333333333333335e-06, "epoch": 3.1382636655948555, "percentage": 63.38, "elapsed_time": "0:53:10", "remaining_time": "0:30:43", "throughput": "1031.75", "total_tokens": 3291680}
{"current_steps": 245, "total_steps": 385, "loss": 0.0389, "learning_rate": 2.041666666666667e-06, "epoch": 3.1511254019292605, "percentage": 63.64, "elapsed_time": "0:53:23", "remaining_time": "0:30:30", "throughput": "1031.84", "total_tokens": 3305440}
{"current_steps": 246, "total_steps": 385, "loss": 0.0227, "learning_rate": 2.05e-06, "epoch": 3.1639871382636655, "percentage": 63.9, "elapsed_time": "0:53:36", "remaining_time": "0:30:17", "throughput": "1031.98", "total_tokens": 3319360}
{"current_steps": 247, "total_steps": 385, "loss": 0.0317, "learning_rate": 2.0583333333333337e-06, "epoch": 3.176848874598071, "percentage": 64.16, "elapsed_time": "0:53:49", "remaining_time": "0:30:04", "throughput": "1032.06", "total_tokens": 3333088}
{"current_steps": 248, "total_steps": 385, "loss": 0.0335, "learning_rate": 2.0666666666666666e-06, "epoch": 3.189710610932476, "percentage": 64.42, "elapsed_time": "0:54:02", "remaining_time": "0:29:51", "throughput": "1031.85", "total_tokens": 3345856}
{"current_steps": 249, "total_steps": 385, "loss": 0.0257, "learning_rate": 2.075e-06, "epoch": 3.202572347266881, "percentage": 64.68, "elapsed_time": "0:54:15", "remaining_time": "0:29:38", "throughput": "1031.71", "total_tokens": 3358880}
{"current_steps": 250, "total_steps": 385, "loss": 0.0244, "learning_rate": 2.0833333333333334e-06, "epoch": 3.215434083601286, "percentage": 64.94, "elapsed_time": "0:54:28", "remaining_time": "0:29:25", "throughput": "1031.99", "total_tokens": 3373312}
{"current_steps": 251, "total_steps": 385, "loss": 0.0285, "learning_rate": 2.091666666666667e-06, "epoch": 3.2282958199356915, "percentage": 65.19, "elapsed_time": "0:54:41", "remaining_time": "0:29:12", "throughput": "1031.85", "total_tokens": 3386336}
{"current_steps": 252, "total_steps": 385, "loss": 0.0093, "learning_rate": 2.1000000000000002e-06, "epoch": 3.2411575562700965, "percentage": 65.45, "elapsed_time": "0:54:54", "remaining_time": "0:28:58", "throughput": "1031.89", "total_tokens": 3399904}
{"current_steps": 253, "total_steps": 385, "loss": 0.0415, "learning_rate": 2.1083333333333336e-06, "epoch": 3.2540192926045015, "percentage": 65.71, "elapsed_time": "0:55:07", "remaining_time": "0:28:45", "throughput": "1031.75", "total_tokens": 3412896}
{"current_steps": 254, "total_steps": 385, "loss": 0.0239, "learning_rate": 2.116666666666667e-06, "epoch": 3.266881028938907, "percentage": 65.97, "elapsed_time": "0:55:20", "remaining_time": "0:28:32", "throughput": "1031.75", "total_tokens": 3426336}
{"current_steps": 255, "total_steps": 385, "loss": 0.0412, "learning_rate": 2.125e-06, "epoch": 3.279742765273312, "percentage": 66.23, "elapsed_time": "0:55:33", "remaining_time": "0:28:19", "throughput": "1031.71", "total_tokens": 3439680}
{"current_steps": 256, "total_steps": 385, "loss": 0.0503, "learning_rate": 2.133333333333334e-06, "epoch": 3.292604501607717, "percentage": 66.49, "elapsed_time": "0:55:47", "remaining_time": "0:28:06", "throughput": "1031.89", "total_tokens": 3453760}
{"current_steps": 257, "total_steps": 385, "loss": 0.0046, "learning_rate": 2.1416666666666668e-06, "epoch": 3.305466237942122, "percentage": 66.75, "elapsed_time": "0:56:00", "remaining_time": "0:27:53", "throughput": "1031.66", "total_tokens": 3466496}
{"current_steps": 258, "total_steps": 385, "loss": 0.041, "learning_rate": 2.15e-06, "epoch": 3.3183279742765275, "percentage": 67.01, "elapsed_time": "0:56:13", "remaining_time": "0:27:40", "throughput": "1031.77", "total_tokens": 3480352}
{"current_steps": 259, "total_steps": 385, "loss": 0.0257, "learning_rate": 2.1583333333333336e-06, "epoch": 3.3311897106109325, "percentage": 67.27, "elapsed_time": "0:56:26", "remaining_time": "0:27:27", "throughput": "1031.85", "total_tokens": 3494112}
{"current_steps": 260, "total_steps": 385, "loss": 0.0168, "learning_rate": 2.166666666666667e-06, "epoch": 3.3440514469453375, "percentage": 67.53, "elapsed_time": "0:56:39", "remaining_time": "0:27:14", "throughput": "1031.84", "total_tokens": 3507520}
{"current_steps": 261, "total_steps": 385, "loss": 0.0439, "learning_rate": 2.1750000000000004e-06, "epoch": 3.356913183279743, "percentage": 67.79, "elapsed_time": "0:56:52", "remaining_time": "0:27:01", "throughput": "1031.65", "total_tokens": 3520352}
{"current_steps": 262, "total_steps": 385, "loss": 0.0204, "learning_rate": 2.1833333333333333e-06, "epoch": 3.369774919614148, "percentage": 68.05, "elapsed_time": "0:57:05", "remaining_time": "0:26:48", "throughput": "1031.70", "total_tokens": 3533984}
{"current_steps": 263, "total_steps": 385, "loss": 0.0284, "learning_rate": 2.191666666666667e-06, "epoch": 3.382636655948553, "percentage": 68.31, "elapsed_time": "0:57:18", "remaining_time": "0:26:35", "throughput": "1031.59", "total_tokens": 3547072}
{"current_steps": 264, "total_steps": 385, "loss": 0.0684, "learning_rate": 2.2e-06, "epoch": 3.395498392282958, "percentage": 68.57, "elapsed_time": "0:57:31", "remaining_time": "0:26:21", "throughput": "1031.39", "total_tokens": 3559872}
{"current_steps": 265, "total_steps": 385, "loss": 0.0479, "learning_rate": 2.2083333333333335e-06, "epoch": 3.4083601286173635, "percentage": 68.83, "elapsed_time": "0:57:44", "remaining_time": "0:26:08", "throughput": "1031.24", "total_tokens": 3572832}
{"current_steps": 266, "total_steps": 385, "loss": 0.0434, "learning_rate": 2.216666666666667e-06, "epoch": 3.4212218649517685, "percentage": 69.09, "elapsed_time": "0:57:57", "remaining_time": "0:25:55", "throughput": "1031.11", "total_tokens": 3585792}
{"current_steps": 267, "total_steps": 385, "loss": 0.0213, "learning_rate": 2.2250000000000003e-06, "epoch": 3.4340836012861735, "percentage": 69.35, "elapsed_time": "0:58:10", "remaining_time": "0:25:42", "throughput": "1031.15", "total_tokens": 3599392}
{"current_steps": 268, "total_steps": 385, "loss": 0.0415, "learning_rate": 2.2333333333333333e-06, "epoch": 3.446945337620579, "percentage": 69.61, "elapsed_time": "0:58:23", "remaining_time": "0:25:29", "throughput": "1031.22", "total_tokens": 3613088}
{"current_steps": 269, "total_steps": 385, "loss": 0.0404, "learning_rate": 2.2416666666666667e-06, "epoch": 3.459807073954984, "percentage": 69.87, "elapsed_time": "0:58:36", "remaining_time": "0:25:16", "throughput": "1031.14", "total_tokens": 3626304}
{"current_steps": 270, "total_steps": 385, "loss": 0.0566, "learning_rate": 2.25e-06, "epoch": 3.472668810289389, "percentage": 70.13, "elapsed_time": "0:58:49", "remaining_time": "0:25:03", "throughput": "1031.20", "total_tokens": 3639968}
{"current_steps": 271, "total_steps": 385, "loss": 0.0509, "learning_rate": 2.2583333333333335e-06, "epoch": 3.485530546623794, "percentage": 70.39, "elapsed_time": "0:59:02", "remaining_time": "0:24:50", "throughput": "1031.20", "total_tokens": 3653472}
{"current_steps": 272, "total_steps": 385, "loss": 0.0385, "learning_rate": 2.266666666666667e-06, "epoch": 3.4983922829581995, "percentage": 70.65, "elapsed_time": "0:59:16", "remaining_time": "0:24:37", "throughput": "1031.23", "total_tokens": 3667104}
{"current_steps": 273, "total_steps": 385, "loss": 0.0225, "learning_rate": 2.2750000000000002e-06, "epoch": 3.5112540192926045, "percentage": 70.91, "elapsed_time": "0:59:29", "remaining_time": "0:24:24", "throughput": "1031.20", "total_tokens": 3680480}
{"current_steps": 274, "total_steps": 385, "loss": 0.0255, "learning_rate": 2.2833333333333336e-06, "epoch": 3.5241157556270095, "percentage": 71.17, "elapsed_time": "0:59:42", "remaining_time": "0:24:11", "throughput": "1031.11", "total_tokens": 3693568}
{"current_steps": 275, "total_steps": 385, "loss": 0.0531, "learning_rate": 2.2916666666666666e-06, "epoch": 3.536977491961415, "percentage": 71.43, "elapsed_time": "0:59:55", "remaining_time": "0:23:58", "throughput": "1030.96", "total_tokens": 3706496}
{"current_steps": 276, "total_steps": 385, "loss": 0.0095, "learning_rate": 2.3000000000000004e-06, "epoch": 3.54983922829582, "percentage": 71.69, "elapsed_time": "1:00:08", "remaining_time": "0:23:44", "throughput": "1031.07", "total_tokens": 3720352}
{"current_steps": 277, "total_steps": 385, "loss": 0.0229, "learning_rate": 2.3083333333333334e-06, "epoch": 3.562700964630225, "percentage": 71.95, "elapsed_time": "1:00:21", "remaining_time": "0:23:31", "throughput": "1031.17", "total_tokens": 3734176}
{"current_steps": 278, "total_steps": 385, "loss": 0.038, "learning_rate": 2.316666666666667e-06, "epoch": 3.57556270096463, "percentage": 72.21, "elapsed_time": "1:00:34", "remaining_time": "0:23:18", "throughput": "1031.41", "total_tokens": 3748544}
{"current_steps": 279, "total_steps": 385, "loss": 0.0316, "learning_rate": 2.325e-06, "epoch": 3.5884244372990355, "percentage": 72.47, "elapsed_time": "1:00:47", "remaining_time": "0:23:05", "throughput": "1031.40", "total_tokens": 3761984}
{"current_steps": 280, "total_steps": 385, "loss": 0.0861, "learning_rate": 2.3333333333333336e-06, "epoch": 3.6012861736334405, "percentage": 72.73, "elapsed_time": "1:01:00", "remaining_time": "0:22:52", "throughput": "1031.09", "total_tokens": 3774304}
{"current_steps": 281, "total_steps": 385, "loss": 0.0566, "learning_rate": 2.341666666666667e-06, "epoch": 3.6141479099678455, "percentage": 72.99, "elapsed_time": "1:01:13", "remaining_time": "0:22:39", "throughput": "1031.26", "total_tokens": 3788416}
{"current_steps": 282, "total_steps": 385, "loss": 0.0804, "learning_rate": 2.35e-06, "epoch": 3.627009646302251, "percentage": 73.25, "elapsed_time": "1:01:26", "remaining_time": "0:22:26", "throughput": "1031.33", "total_tokens": 3802112}
{"current_steps": 283, "total_steps": 385, "loss": 0.046, "learning_rate": 2.3583333333333338e-06, "epoch": 3.639871382636656, "percentage": 73.51, "elapsed_time": "1:01:39", "remaining_time": "0:22:13", "throughput": "1031.43", "total_tokens": 3815968}
{"current_steps": 284, "total_steps": 385, "loss": 0.0693, "learning_rate": 2.3666666666666667e-06, "epoch": 3.652733118971061, "percentage": 73.77, "elapsed_time": "1:01:52", "remaining_time": "0:22:00", "throughput": "1031.38", "total_tokens": 3829248}
{"current_steps": 285, "total_steps": 385, "loss": 0.0342, "learning_rate": 2.375e-06, "epoch": 3.665594855305466, "percentage": 74.03, "elapsed_time": "1:02:05", "remaining_time": "0:21:47", "throughput": "1031.47", "total_tokens": 3843072}
{"current_steps": 286, "total_steps": 385, "loss": 0.0479, "learning_rate": 2.3833333333333335e-06, "epoch": 3.6784565916398715, "percentage": 74.29, "elapsed_time": "1:02:18", "remaining_time": "0:21:34", "throughput": "1031.40", "total_tokens": 3856288}
{"current_steps": 287, "total_steps": 385, "loss": 0.0388, "learning_rate": 2.391666666666667e-06, "epoch": 3.6913183279742765, "percentage": 74.55, "elapsed_time": "1:02:32", "remaining_time": "0:21:21", "throughput": "1031.51", "total_tokens": 3870240}
{"current_steps": 288, "total_steps": 385, "loss": 0.0274, "learning_rate": 2.4000000000000003e-06, "epoch": 3.7041800643086815, "percentage": 74.81, "elapsed_time": "1:02:45", "remaining_time": "0:21:08", "throughput": "1031.62", "total_tokens": 3884096}
{"current_steps": 289, "total_steps": 385, "loss": 0.0259, "learning_rate": 2.4083333333333337e-06, "epoch": 3.717041800643087, "percentage": 75.06, "elapsed_time": "1:02:58", "remaining_time": "0:20:55", "throughput": "1031.83", "total_tokens": 3898368}
{"current_steps": 290, "total_steps": 385, "loss": 0.0367, "learning_rate": 2.4166666666666667e-06, "epoch": 3.729903536977492, "percentage": 75.32, "elapsed_time": "1:03:11", "remaining_time": "0:20:41", "throughput": "1031.66", "total_tokens": 3911200}
{"current_steps": 291, "total_steps": 385, "loss": 0.0661, "learning_rate": 2.425e-06, "epoch": 3.742765273311897, "percentage": 75.58, "elapsed_time": "1:03:24", "remaining_time": "0:20:28", "throughput": "1031.57", "total_tokens": 3924320}
{"current_steps": 292, "total_steps": 385, "loss": 0.0466, "learning_rate": 2.4333333333333335e-06, "epoch": 3.755627009646302, "percentage": 75.84, "elapsed_time": "1:03:37", "remaining_time": "0:20:15", "throughput": "1031.53", "total_tokens": 3937632}
{"current_steps": 293, "total_steps": 385, "loss": 0.0286, "learning_rate": 2.441666666666667e-06, "epoch": 3.7684887459807075, "percentage": 76.1, "elapsed_time": "1:03:50", "remaining_time": "0:20:02", "throughput": "1031.51", "total_tokens": 3951040}
{"current_steps": 294, "total_steps": 385, "loss": 0.0586, "learning_rate": 2.4500000000000003e-06, "epoch": 3.7813504823151125, "percentage": 76.36, "elapsed_time": "1:04:03", "remaining_time": "0:19:49", "throughput": "1031.36", "total_tokens": 3963936}
{"current_steps": 295, "total_steps": 385, "loss": 0.0329, "learning_rate": 2.4583333333333332e-06, "epoch": 3.7942122186495175, "percentage": 76.62, "elapsed_time": "1:04:16", "remaining_time": "0:19:36", "throughput": "1031.22", "total_tokens": 3976832}
{"current_steps": 296, "total_steps": 385, "loss": 0.0582, "learning_rate": 2.466666666666667e-06, "epoch": 3.807073954983923, "percentage": 76.88, "elapsed_time": "1:04:29", "remaining_time": "0:19:23", "throughput": "1031.30", "total_tokens": 3990592}
{"current_steps": 297, "total_steps": 385, "loss": 0.0312, "learning_rate": 2.475e-06, "epoch": 3.819935691318328, "percentage": 77.14, "elapsed_time": "1:04:42", "remaining_time": "0:19:10", "throughput": "1031.36", "total_tokens": 4004288}
{"current_steps": 298, "total_steps": 385, "loss": 0.0329, "learning_rate": 2.4833333333333334e-06, "epoch": 3.832797427652733, "percentage": 77.4, "elapsed_time": "1:04:55", "remaining_time": "0:18:57", "throughput": "1031.39", "total_tokens": 4017856}
{"current_steps": 299, "total_steps": 385, "loss": 0.0206, "learning_rate": 2.491666666666667e-06, "epoch": 3.845659163987138, "percentage": 77.66, "elapsed_time": "1:05:08", "remaining_time": "0:18:44", "throughput": "1031.35", "total_tokens": 4031168}
{"current_steps": 300, "total_steps": 385, "loss": 0.0426, "learning_rate": 2.5e-06, "epoch": 3.8585209003215435, "percentage": 77.92, "elapsed_time": "1:05:21", "remaining_time": "0:18:31", "throughput": "1031.30", "total_tokens": 4044416}
{"current_steps": 301, "total_steps": 385, "loss": 0.0179, "learning_rate": 2.5083333333333336e-06, "epoch": 3.8713826366559485, "percentage": 78.18, "elapsed_time": "1:05:34", "remaining_time": "0:18:18", "throughput": "1031.28", "total_tokens": 4057824}
{"current_steps": 302, "total_steps": 385, "loss": 0.0289, "learning_rate": 2.5166666666666666e-06, "epoch": 3.884244372990354, "percentage": 78.44, "elapsed_time": "1:05:47", "remaining_time": "0:18:04", "throughput": "1031.45", "total_tokens": 4071936}
{"current_steps": 303, "total_steps": 385, "loss": 0.0303, "learning_rate": 2.5250000000000004e-06, "epoch": 3.897106109324759, "percentage": 78.7, "elapsed_time": "1:06:00", "remaining_time": "0:17:51", "throughput": "1031.48", "total_tokens": 4085536}
{"current_steps": 304, "total_steps": 385, "loss": 0.046, "learning_rate": 2.5333333333333338e-06, "epoch": 3.909967845659164, "percentage": 78.96, "elapsed_time": "1:06:13", "remaining_time": "0:17:38", "throughput": "1031.62", "total_tokens": 4099552}
{"current_steps": 305, "total_steps": 385, "loss": 0.0523, "learning_rate": 2.5416666666666668e-06, "epoch": 3.922829581993569, "percentage": 79.22, "elapsed_time": "1:06:26", "remaining_time": "0:17:25", "throughput": "1031.55", "total_tokens": 4112736}
{"current_steps": 306, "total_steps": 385, "loss": 0.0329, "learning_rate": 2.55e-06, "epoch": 3.935691318327974, "percentage": 79.48, "elapsed_time": "1:06:39", "remaining_time": "0:17:12", "throughput": "1031.52", "total_tokens": 4126080}
{"current_steps": 307, "total_steps": 385, "loss": 0.0072, "learning_rate": 2.558333333333334e-06, "epoch": 3.9485530546623795, "percentage": 79.74, "elapsed_time": "1:06:53", "remaining_time": "0:16:59", "throughput": "1031.37", "total_tokens": 4138912}
{"current_steps": 308, "total_steps": 385, "loss": 0.0415, "learning_rate": 2.566666666666667e-06, "epoch": 3.9614147909967845, "percentage": 80.0, "elapsed_time": "1:07:06", "remaining_time": "0:16:46", "throughput": "1031.46", "total_tokens": 4152768}
{"current_steps": 309, "total_steps": 385, "loss": 0.0233, "learning_rate": 2.5750000000000003e-06, "epoch": 3.97427652733119, "percentage": 80.26, "elapsed_time": "1:07:19", "remaining_time": "0:16:33", "throughput": "1031.53", "total_tokens": 4166528}
{"current_steps": 310, "total_steps": 385, "loss": 0.0423, "learning_rate": 2.5833333333333337e-06, "epoch": 3.987138263665595, "percentage": 80.52, "elapsed_time": "1:07:32", "remaining_time": "0:16:20", "throughput": "1031.73", "total_tokens": 4180832}
{"current_steps": 311, "total_steps": 385, "loss": 0.0295, "learning_rate": 2.5916666666666667e-06, "epoch": 4.0, "percentage": 80.78, "elapsed_time": "1:07:45", "remaining_time": "0:16:07", "throughput": "1031.67", "total_tokens": 4194048}
{"current_steps": 312, "total_steps": 385, "loss": 0.0327, "learning_rate": 2.6e-06, "epoch": 4.012861736334405, "percentage": 81.04, "elapsed_time": "1:07:58", "remaining_time": "0:15:54", "throughput": "1031.56", "total_tokens": 4207040}
{"current_steps": 313, "total_steps": 385, "loss": 0.0301, "learning_rate": 2.608333333333333e-06, "epoch": 4.02572347266881, "percentage": 81.3, "elapsed_time": "1:08:11", "remaining_time": "0:15:41", "throughput": "1031.43", "total_tokens": 4220000}
{"current_steps": 314, "total_steps": 385, "loss": 0.0301, "learning_rate": 2.616666666666667e-06, "epoch": 4.038585209003215, "percentage": 81.56, "elapsed_time": "1:08:24", "remaining_time": "0:15:28", "throughput": "1031.40", "total_tokens": 4233344}
{"current_steps": 315, "total_steps": 385, "loss": 0.0281, "learning_rate": 2.6250000000000003e-06, "epoch": 4.051446945337621, "percentage": 81.82, "elapsed_time": "1:08:37", "remaining_time": "0:15:15", "throughput": "1031.24", "total_tokens": 4246144}
{"current_steps": 316, "total_steps": 385, "loss": 0.0136, "learning_rate": 2.6333333333333332e-06, "epoch": 4.064308681672026, "percentage": 82.08, "elapsed_time": "1:08:50", "remaining_time": "0:15:01", "throughput": "1031.56", "total_tokens": 4260992}
{"current_steps": 317, "total_steps": 385, "loss": 0.0219, "learning_rate": 2.6416666666666666e-06, "epoch": 4.077170418006431, "percentage": 82.34, "elapsed_time": "1:09:03", "remaining_time": "0:14:48", "throughput": "1031.48", "total_tokens": 4274112}
{"current_steps": 318, "total_steps": 385, "loss": 0.0044, "learning_rate": 2.6500000000000005e-06, "epoch": 4.090032154340836, "percentage": 82.6, "elapsed_time": "1:09:16", "remaining_time": "0:14:35", "throughput": "1031.55", "total_tokens": 4287904}
{"current_steps": 319, "total_steps": 385, "loss": 0.0335, "learning_rate": 2.6583333333333334e-06, "epoch": 4.102893890675241, "percentage": 82.86, "elapsed_time": "1:09:29", "remaining_time": "0:14:22", "throughput": "1031.59", "total_tokens": 4301568}
{"current_steps": 320, "total_steps": 385, "loss": 0.0053, "learning_rate": 2.666666666666667e-06, "epoch": 4.115755627009646, "percentage": 83.12, "elapsed_time": "1:09:42", "remaining_time": "0:14:09", "throughput": "1031.55", "total_tokens": 4314848}
{"current_steps": 321, "total_steps": 385, "loss": 0.0196, "learning_rate": 2.6750000000000002e-06, "epoch": 4.128617363344051, "percentage": 83.38, "elapsed_time": "1:09:55", "remaining_time": "0:13:56", "throughput": "1031.35", "total_tokens": 4327456}
{"current_steps": 322, "total_steps": 385, "loss": 0.0309, "learning_rate": 2.683333333333333e-06, "epoch": 4.141479099678457, "percentage": 83.64, "elapsed_time": "1:10:09", "remaining_time": "0:13:43", "throughput": "1031.36", "total_tokens": 4341024}
{"current_steps": 323, "total_steps": 385, "loss": 0.0382, "learning_rate": 2.691666666666667e-06, "epoch": 4.154340836012862, "percentage": 83.9, "elapsed_time": "1:10:22", "remaining_time": "0:13:30", "throughput": "1031.20", "total_tokens": 4353824}
{"current_steps": 324, "total_steps": 385, "loss": 0.046, "learning_rate": 2.7000000000000004e-06, "epoch": 4.167202572347267, "percentage": 84.16, "elapsed_time": "1:10:35", "remaining_time": "0:13:17", "throughput": "1031.28", "total_tokens": 4367616}
{"current_steps": 325, "total_steps": 385, "loss": 0.0133, "learning_rate": 2.7083333333333334e-06, "epoch": 4.180064308681672, "percentage": 84.42, "elapsed_time": "1:10:48", "remaining_time": "0:13:04", "throughput": "1031.13", "total_tokens": 4380448}
{"current_steps": 326, "total_steps": 385, "loss": 0.0265, "learning_rate": 2.7166666666666668e-06, "epoch": 4.192926045016077, "percentage": 84.68, "elapsed_time": "1:11:01", "remaining_time": "0:12:51", "throughput": "1031.13", "total_tokens": 4393920}
{"current_steps": 327, "total_steps": 385, "loss": 0.0084, "learning_rate": 2.7250000000000006e-06, "epoch": 4.205787781350482, "percentage": 84.94, "elapsed_time": "1:11:14", "remaining_time": "0:12:38", "throughput": "1031.11", "total_tokens": 4407264}
{"current_steps": 328, "total_steps": 385, "loss": 0.0382, "learning_rate": 2.7333333333333336e-06, "epoch": 4.218649517684887, "percentage": 85.19, "elapsed_time": "1:11:27", "remaining_time": "0:12:25", "throughput": "1031.02", "total_tokens": 4420352}
{"current_steps": 329, "total_steps": 385, "loss": 0.0101, "learning_rate": 2.741666666666667e-06, "epoch": 4.231511254019293, "percentage": 85.45, "elapsed_time": "1:11:40", "remaining_time": "0:12:11", "throughput": "1031.21", "total_tokens": 4434688}
{"current_steps": 330, "total_steps": 385, "loss": 0.0174, "learning_rate": 2.7500000000000004e-06, "epoch": 4.244372990353698, "percentage": 85.71, "elapsed_time": "1:11:53", "remaining_time": "0:11:58", "throughput": "1031.21", "total_tokens": 4448160}
{"current_steps": 331, "total_steps": 385, "loss": 0.023, "learning_rate": 2.7583333333333333e-06, "epoch": 4.257234726688103, "percentage": 85.97, "elapsed_time": "1:12:06", "remaining_time": "0:11:45", "throughput": "1031.11", "total_tokens": 4461184}
{"current_steps": 332, "total_steps": 385, "loss": 0.0162, "learning_rate": 2.766666666666667e-06, "epoch": 4.270096463022508, "percentage": 86.23, "elapsed_time": "1:12:19", "remaining_time": "0:11:32", "throughput": "1031.23", "total_tokens": 4475168}
{"current_steps": 333, "total_steps": 385, "loss": 0.0261, "learning_rate": 2.7750000000000005e-06, "epoch": 4.282958199356913, "percentage": 86.49, "elapsed_time": "1:12:32", "remaining_time": "0:11:19", "throughput": "1031.06", "total_tokens": 4487872}
{"current_steps": 334, "total_steps": 385, "loss": 0.0266, "learning_rate": 2.7833333333333335e-06, "epoch": 4.295819935691318, "percentage": 86.75, "elapsed_time": "1:12:45", "remaining_time": "0:11:06", "throughput": "1031.04", "total_tokens": 4501216}
{"current_steps": 335, "total_steps": 385, "loss": 0.0194, "learning_rate": 2.791666666666667e-06, "epoch": 4.308681672025724, "percentage": 87.01, "elapsed_time": "1:12:58", "remaining_time": "0:10:53", "throughput": "1030.96", "total_tokens": 4514336}
{"current_steps": 336, "total_steps": 385, "loss": 0.0058, "learning_rate": 2.8000000000000003e-06, "epoch": 4.321543408360129, "percentage": 87.27, "elapsed_time": "1:13:11", "remaining_time": "0:10:40", "throughput": "1031.08", "total_tokens": 4528288}
{"current_steps": 337, "total_steps": 385, "loss": 0.0065, "learning_rate": 2.8083333333333333e-06, "epoch": 4.334405144694534, "percentage": 87.53, "elapsed_time": "1:13:24", "remaining_time": "0:10:27", "throughput": "1031.06", "total_tokens": 4541728}
{"current_steps": 338, "total_steps": 385, "loss": 0.0202, "learning_rate": 2.816666666666667e-06, "epoch": 4.347266881028939, "percentage": 87.79, "elapsed_time": "1:13:38", "remaining_time": "0:10:14", "throughput": "1031.13", "total_tokens": 4555552}
{"current_steps": 339, "total_steps": 385, "loss": 0.0135, "learning_rate": 2.825e-06, "epoch": 4.360128617363344, "percentage": 88.05, "elapsed_time": "1:13:51", "remaining_time": "0:10:01", "throughput": "1031.08", "total_tokens": 4568768}
{"current_steps": 340, "total_steps": 385, "loss": 0.01, "learning_rate": 2.8333333333333335e-06, "epoch": 4.372990353697749, "percentage": 88.31, "elapsed_time": "1:14:04", "remaining_time": "0:09:48", "throughput": "1031.12", "total_tokens": 4582400}
{"current_steps": 341, "total_steps": 385, "loss": 0.0051, "learning_rate": 2.841666666666667e-06, "epoch": 4.385852090032154, "percentage": 88.57, "elapsed_time": "1:14:17", "remaining_time": "0:09:35", "throughput": "1031.01", "total_tokens": 4595360}
{"current_steps": 342, "total_steps": 385, "loss": 0.0293, "learning_rate": 2.85e-06, "epoch": 4.39871382636656, "percentage": 88.83, "elapsed_time": "1:14:30", "remaining_time": "0:09:22", "throughput": "1031.19", "total_tokens": 4609632}
{"current_steps": 343, "total_steps": 385, "loss": 0.046, "learning_rate": 2.8583333333333336e-06, "epoch": 4.411575562700965, "percentage": 89.09, "elapsed_time": "1:14:43", "remaining_time": "0:09:08", "throughput": "1031.23", "total_tokens": 4623232}
{"current_steps": 344, "total_steps": 385, "loss": 0.0024, "learning_rate": 2.866666666666667e-06, "epoch": 4.42443729903537, "percentage": 89.35, "elapsed_time": "1:14:56", "remaining_time": "0:08:55", "throughput": "1031.26", "total_tokens": 4636864}
{"current_steps": 345, "total_steps": 385, "loss": 0.0211, "learning_rate": 2.875e-06, "epoch": 4.437299035369775, "percentage": 89.61, "elapsed_time": "1:15:09", "remaining_time": "0:08:42", "throughput": "1031.10", "total_tokens": 4649600}
{"current_steps": 346, "total_steps": 385, "loss": 0.0229, "learning_rate": 2.8833333333333334e-06, "epoch": 4.45016077170418, "percentage": 89.87, "elapsed_time": "1:15:22", "remaining_time": "0:08:29", "throughput": "1031.11", "total_tokens": 4663072}
{"current_steps": 347, "total_steps": 385, "loss": 0.0103, "learning_rate": 2.8916666666666672e-06, "epoch": 4.463022508038585, "percentage": 90.13, "elapsed_time": "1:15:35", "remaining_time": "0:08:16", "throughput": "1031.19", "total_tokens": 4676960}
{"current_steps": 348, "total_steps": 385, "loss": 0.0262, "learning_rate": 2.9e-06, "epoch": 4.47588424437299, "percentage": 90.39, "elapsed_time": "1:15:48", "remaining_time": "0:08:03", "throughput": "1031.12", "total_tokens": 4690080}
{"current_steps": 349, "total_steps": 385, "loss": 0.0295, "learning_rate": 2.9083333333333336e-06, "epoch": 4.488745980707396, "percentage": 90.65, "elapsed_time": "1:16:01", "remaining_time": "0:07:50", "throughput": "1031.24", "total_tokens": 4704064}
{"current_steps": 350, "total_steps": 385, "loss": 0.0149, "learning_rate": 2.916666666666667e-06, "epoch": 4.501607717041801, "percentage": 90.91, "elapsed_time": "1:16:14", "remaining_time": "0:07:37", "throughput": "1031.25", "total_tokens": 4717536}
{"current_steps": 351, "total_steps": 385, "loss": 0.0337, "learning_rate": 2.925e-06, "epoch": 4.514469453376206, "percentage": 91.17, "elapsed_time": "1:16:27", "remaining_time": "0:07:24", "throughput": "1031.25", "total_tokens": 4731040}
{"current_steps": 352, "total_steps": 385, "loss": 0.0318, "learning_rate": 2.9333333333333338e-06, "epoch": 4.527331189710611, "percentage": 91.43, "elapsed_time": "1:16:40", "remaining_time": "0:07:11", "throughput": "1031.32", "total_tokens": 4744864}
{"current_steps": 353, "total_steps": 385, "loss": 0.0213, "learning_rate": 2.941666666666667e-06, "epoch": 4.540192926045016, "percentage": 91.69, "elapsed_time": "1:16:53", "remaining_time": "0:06:58", "throughput": "1031.30", "total_tokens": 4758240}
{"current_steps": 354, "total_steps": 385, "loss": 0.0048, "learning_rate": 2.95e-06, "epoch": 4.553054662379421, "percentage": 91.95, "elapsed_time": "1:17:06", "remaining_time": "0:06:45", "throughput": "1031.03", "total_tokens": 4770368}
{"current_steps": 355, "total_steps": 385, "loss": 0.0326, "learning_rate": 2.9583333333333335e-06, "epoch": 4.565916398713826, "percentage": 92.21, "elapsed_time": "1:17:19", "remaining_time": "0:06:32", "throughput": "1030.92", "total_tokens": 4783328}
{"current_steps": 356, "total_steps": 385, "loss": 0.013, "learning_rate": 2.9666666666666673e-06, "epoch": 4.578778135048232, "percentage": 92.47, "elapsed_time": "1:17:32", "remaining_time": "0:06:19", "throughput": "1030.99", "total_tokens": 4797088}
{"current_steps": 357, "total_steps": 385, "loss": 0.0293, "learning_rate": 2.9750000000000003e-06, "epoch": 4.591639871382637, "percentage": 92.73, "elapsed_time": "1:17:45", "remaining_time": "0:06:05", "throughput": "1030.97", "total_tokens": 4810464}
{"current_steps": 358, "total_steps": 385, "loss": 0.0411, "learning_rate": 2.9833333333333337e-06, "epoch": 4.604501607717042, "percentage": 92.99, "elapsed_time": "1:17:59", "remaining_time": "0:05:52", "throughput": "1030.93", "total_tokens": 4823744}
{"current_steps": 359, "total_steps": 385, "loss": 0.0389, "learning_rate": 2.991666666666667e-06, "epoch": 4.617363344051447, "percentage": 93.25, "elapsed_time": "1:18:12", "remaining_time": "0:05:39", "throughput": "1031.06", "total_tokens": 4837824}
{"current_steps": 360, "total_steps": 385, "loss": 0.0395, "learning_rate": 3e-06, "epoch": 4.630225080385852, "percentage": 93.51, "elapsed_time": "1:18:25", "remaining_time": "0:05:26", "throughput": "1031.22", "total_tokens": 4852096}
{"current_steps": 361, "total_steps": 385, "loss": 0.0065, "learning_rate": 3.0083333333333335e-06, "epoch": 4.643086816720257, "percentage": 93.77, "elapsed_time": "1:18:38", "remaining_time": "0:05:13", "throughput": "1031.24", "total_tokens": 4865600}
{"current_steps": 362, "total_steps": 385, "loss": 0.0294, "learning_rate": 3.0166666666666673e-06, "epoch": 4.655948553054662, "percentage": 94.03, "elapsed_time": "1:18:51", "remaining_time": "0:05:00", "throughput": "1031.15", "total_tokens": 4878592}
{"current_steps": 363, "total_steps": 385, "loss": 0.0192, "learning_rate": 3.0250000000000003e-06, "epoch": 4.668810289389068, "percentage": 94.29, "elapsed_time": "1:19:04", "remaining_time": "0:04:47", "throughput": "1031.15", "total_tokens": 4892064}
{"current_steps": 364, "total_steps": 385, "loss": 0.0179, "learning_rate": 3.0333333333333337e-06, "epoch": 4.681672025723473, "percentage": 94.55, "elapsed_time": "1:19:17", "remaining_time": "0:04:34", "throughput": "1031.31", "total_tokens": 4906304}
{"current_steps": 365, "total_steps": 385, "loss": 0.0131, "learning_rate": 3.0416666666666666e-06, "epoch": 4.694533762057878, "percentage": 94.81, "elapsed_time": "1:19:30", "remaining_time": "0:04:21", "throughput": "1031.45", "total_tokens": 4920480}
{"current_steps": 366, "total_steps": 385, "loss": 0.0216, "learning_rate": 3.05e-06, "epoch": 4.707395498392283, "percentage": 95.06, "elapsed_time": "1:19:43", "remaining_time": "0:04:08", "throughput": "1031.33", "total_tokens": 4933376}
{"current_steps": 367, "total_steps": 385, "loss": 0.0171, "learning_rate": 3.058333333333334e-06, "epoch": 4.720257234726688, "percentage": 95.32, "elapsed_time": "1:19:56", "remaining_time": "0:03:55", "throughput": "1031.54", "total_tokens": 4947872}
{"current_steps": 368, "total_steps": 385, "loss": 0.0129, "learning_rate": 3.066666666666667e-06, "epoch": 4.733118971061093, "percentage": 95.58, "elapsed_time": "1:20:09", "remaining_time": "0:03:42", "throughput": "1031.53", "total_tokens": 4961280}
{"current_steps": 369, "total_steps": 385, "loss": 0.0268, "learning_rate": 3.075e-06, "epoch": 4.745980707395498, "percentage": 95.84, "elapsed_time": "1:20:22", "remaining_time": "0:03:29", "throughput": "1031.60", "total_tokens": 4975072}
{"current_steps": 370, "total_steps": 385, "loss": 0.0313, "learning_rate": 3.0833333333333336e-06, "epoch": 4.758842443729904, "percentage": 96.1, "elapsed_time": "1:20:35", "remaining_time": "0:03:16", "throughput": "1031.67", "total_tokens": 4988928}
{"current_steps": 371, "total_steps": 385, "loss": 0.0197, "learning_rate": 3.0916666666666666e-06, "epoch": 4.771704180064309, "percentage": 96.36, "elapsed_time": "1:20:48", "remaining_time": "0:03:02", "throughput": "1031.73", "total_tokens": 5002656}
{"current_steps": 372, "total_steps": 385, "loss": 0.0051, "learning_rate": 3.1000000000000004e-06, "epoch": 4.784565916398714, "percentage": 96.62, "elapsed_time": "1:21:01", "remaining_time": "0:02:49", "throughput": "1031.89", "total_tokens": 5016928}
{"current_steps": 373, "total_steps": 385, "loss": 0.0107, "learning_rate": 3.1083333333333338e-06, "epoch": 4.797427652733119, "percentage": 96.88, "elapsed_time": "1:21:15", "remaining_time": "0:02:36", "throughput": "1031.88", "total_tokens": 5030432}
{"current_steps": 374, "total_steps": 385, "loss": 0.0299, "learning_rate": 3.1166666666666668e-06, "epoch": 4.810289389067524, "percentage": 97.14, "elapsed_time": "1:21:28", "remaining_time": "0:02:23", "throughput": "1031.92", "total_tokens": 5044128}
{"current_steps": 375, "total_steps": 385, "loss": 0.0549, "learning_rate": 3.125e-06, "epoch": 4.823151125401929, "percentage": 97.4, "elapsed_time": "1:21:41", "remaining_time": "0:02:10", "throughput": "1032.12", "total_tokens": 5058592}
{"current_steps": 376, "total_steps": 385, "loss": 0.0163, "learning_rate": 3.133333333333334e-06, "epoch": 4.836012861736334, "percentage": 97.66, "elapsed_time": "1:21:54", "remaining_time": "0:01:57", "throughput": "1032.05", "total_tokens": 5071680}
{"current_steps": 377, "total_steps": 385, "loss": 0.0133, "learning_rate": 3.141666666666667e-06, "epoch": 4.84887459807074, "percentage": 97.92, "elapsed_time": "1:22:07", "remaining_time": "0:01:44", "throughput": "1032.16", "total_tokens": 5085760}
{"current_steps": 378, "total_steps": 385, "loss": 0.0338, "learning_rate": 3.1500000000000003e-06, "epoch": 4.861736334405145, "percentage": 98.18, "elapsed_time": "1:22:20", "remaining_time": "0:01:31", "throughput": "1032.15", "total_tokens": 5099168}
{"current_steps": 379, "total_steps": 385, "loss": 0.0219, "learning_rate": 3.1583333333333337e-06, "epoch": 4.87459807073955, "percentage": 98.44, "elapsed_time": "1:22:33", "remaining_time": "0:01:18", "throughput": "1032.12", "total_tokens": 5112512}
{"current_steps": 380, "total_steps": 385, "loss": 0.0113, "learning_rate": 3.1666666666666667e-06, "epoch": 4.887459807073955, "percentage": 98.7, "elapsed_time": "1:22:46", "remaining_time": "0:01:05", "throughput": "1032.06", "total_tokens": 5125696}
{"current_steps": 381, "total_steps": 385, "loss": 0.0297, "learning_rate": 3.175e-06, "epoch": 4.90032154340836, "percentage": 98.96, "elapsed_time": "1:22:59", "remaining_time": "0:00:52", "throughput": "1031.99", "total_tokens": 5138816}
{"current_steps": 382, "total_steps": 385, "loss": 0.0417, "learning_rate": 3.183333333333334e-06, "epoch": 4.913183279742765, "percentage": 99.22, "elapsed_time": "1:23:12", "remaining_time": "0:00:39", "throughput": "1031.99", "total_tokens": 5152320}
{"current_steps": 383, "total_steps": 385, "loss": 0.027, "learning_rate": 3.191666666666667e-06, "epoch": 4.92604501607717, "percentage": 99.48, "elapsed_time": "1:23:25", "remaining_time": "0:00:26", "throughput": "1031.98", "total_tokens": 5165696}
{"current_steps": 384, "total_steps": 385, "loss": 0.0271, "learning_rate": 3.2000000000000003e-06, "epoch": 4.938906752411576, "percentage": 99.74, "elapsed_time": "1:23:38", "remaining_time": "0:00:13", "throughput": "1031.83", "total_tokens": 5178368}
{"current_steps": 385, "total_steps": 385, "loss": 0.0207, "learning_rate": 3.2083333333333337e-06, "epoch": 4.951768488745981, "percentage": 100.0, "elapsed_time": "1:23:51", "remaining_time": "0:00:00", "throughput": "1032.00", "total_tokens": 5192736}
{"current_steps": 385, "total_steps": 385, "epoch": 4.951768488745981, "percentage": 100.0, "elapsed_time": "1:24:52", "remaining_time": "0:00:00", "throughput": "1019.69", "total_tokens": 5192736}