ben81828's picture
Training in progress, step 300
7d96018 verified
raw
history blame
15.9 kB
{"current_steps": 5, "total_steps": 3882, "loss": 3.0388, "lr": 2.564102564102564e-06, "epoch": 0.0025753283543651817, "percentage": 0.13, "elapsed_time": "0:02:49", "remaining_time": "1 day, 12:31:05", "throughput": 345.02, "total_tokens": 58496}
{"current_steps": 10, "total_steps": 3882, "loss": 2.9831, "lr": 5.128205128205128e-06, "epoch": 0.0051506567087303634, "percentage": 0.26, "elapsed_time": "0:04:19", "remaining_time": "1 day, 3:52:53", "throughput": 451.18, "total_tokens": 116960}
{"current_steps": 15, "total_steps": 3882, "loss": 2.8696, "lr": 7.692307692307694e-06, "epoch": 0.007725985063095545, "percentage": 0.39, "elapsed_time": "0:05:47", "remaining_time": "1 day, 0:54:38", "throughput": 504.36, "total_tokens": 175448}
{"current_steps": 20, "total_steps": 3882, "loss": 2.6316, "lr": 1.0256410256410256e-05, "epoch": 0.010301313417460727, "percentage": 0.52, "elapsed_time": "0:07:17", "remaining_time": "23:26:52", "throughput": 535.17, "total_tokens": 233944}
{"current_steps": 25, "total_steps": 3882, "loss": 1.9707, "lr": 1.282051282051282e-05, "epoch": 0.012876641771825908, "percentage": 0.64, "elapsed_time": "0:08:45", "remaining_time": "22:30:52", "throughput": 556.6, "total_tokens": 292416}
{"current_steps": 30, "total_steps": 3882, "loss": 1.3782, "lr": 1.5384615384615387e-05, "epoch": 0.01545197012619109, "percentage": 0.77, "elapsed_time": "0:10:14", "remaining_time": "21:55:33", "throughput": 570.81, "total_tokens": 350904}
{"current_steps": 35, "total_steps": 3882, "loss": 1.0628, "lr": 1.794871794871795e-05, "epoch": 0.018027298480556272, "percentage": 0.9, "elapsed_time": "0:11:42", "remaining_time": "21:27:25", "throughput": 582.52, "total_tokens": 409384}
{"current_steps": 40, "total_steps": 3882, "loss": 0.9789, "lr": 2.0512820512820512e-05, "epoch": 0.020602626834921454, "percentage": 1.03, "elapsed_time": "0:13:11", "remaining_time": "21:07:29", "throughput": 590.91, "total_tokens": 467864}
{"current_steps": 45, "total_steps": 3882, "loss": 0.9327, "lr": 2.307692307692308e-05, "epoch": 0.023177955189286635, "percentage": 1.16, "elapsed_time": "0:14:39", "remaining_time": "20:50:06", "throughput": 598.39, "total_tokens": 526384}
{"current_steps": 50, "total_steps": 3882, "loss": 0.9233, "lr": 2.564102564102564e-05, "epoch": 0.025753283543651816, "percentage": 1.29, "elapsed_time": "0:16:09", "remaining_time": "20:38:40", "throughput": 603.11, "total_tokens": 584856}
{"current_steps": 50, "total_steps": 3882, "eval_loss": 0.9281821846961975, "epoch": 0.025753283543651816, "percentage": 1.29, "elapsed_time": "0:16:57", "remaining_time": "21:40:17", "throughput": 574.53, "total_tokens": 584856}
{"current_steps": 55, "total_steps": 3882, "loss": 0.897, "lr": 2.8205128205128207e-05, "epoch": 0.028328611898016998, "percentage": 1.42, "elapsed_time": "0:18:34", "remaining_time": "21:32:15", "throughput": 577.35, "total_tokens": 643344}
{"current_steps": 60, "total_steps": 3882, "loss": 0.9169, "lr": 3.0769230769230774e-05, "epoch": 0.03090394025238218, "percentage": 1.55, "elapsed_time": "0:20:03", "remaining_time": "21:17:49", "throughput": 583.09, "total_tokens": 701808}
{"current_steps": 65, "total_steps": 3882, "loss": 0.9019, "lr": 3.3333333333333335e-05, "epoch": 0.03347926860674736, "percentage": 1.67, "elapsed_time": "0:21:32", "remaining_time": "21:04:47", "throughput": 588.34, "total_tokens": 760304}
{"current_steps": 70, "total_steps": 3882, "loss": 0.8996, "lr": 3.58974358974359e-05, "epoch": 0.036054596961112545, "percentage": 1.8, "elapsed_time": "0:23:02", "remaining_time": "20:55:11", "throughput": 592.04, "total_tokens": 818760}
{"current_steps": 75, "total_steps": 3882, "loss": 0.9073, "lr": 3.846153846153846e-05, "epoch": 0.03862992531547772, "percentage": 1.93, "elapsed_time": "0:24:32", "remaining_time": "20:46:08", "throughput": 595.56, "total_tokens": 877256}
{"current_steps": 80, "total_steps": 3882, "loss": 0.9081, "lr": 4.1025641025641023e-05, "epoch": 0.04120525366984291, "percentage": 2.06, "elapsed_time": "0:26:03", "remaining_time": "20:38:41", "throughput": 598.36, "total_tokens": 935752}
{"current_steps": 85, "total_steps": 3882, "loss": 0.906, "lr": 4.358974358974359e-05, "epoch": 0.043780582024208085, "percentage": 2.19, "elapsed_time": "0:27:32", "remaining_time": "20:30:19", "throughput": 601.63, "total_tokens": 994216}
{"current_steps": 90, "total_steps": 3882, "loss": 0.8952, "lr": 4.615384615384616e-05, "epoch": 0.04635591037857327, "percentage": 2.32, "elapsed_time": "0:29:02", "remaining_time": "20:23:48", "throughput": 604.04, "total_tokens": 1052704}
{"current_steps": 95, "total_steps": 3882, "loss": 0.8996, "lr": 4.871794871794872e-05, "epoch": 0.04893123873293845, "percentage": 2.45, "elapsed_time": "0:30:31", "remaining_time": "20:17:00", "throughput": 606.61, "total_tokens": 1111176}
{"current_steps": 100, "total_steps": 3882, "loss": 0.9024, "lr": 5.128205128205128e-05, "epoch": 0.05150656708730363, "percentage": 2.58, "elapsed_time": "0:32:01", "remaining_time": "20:10:52", "throughput": 608.88, "total_tokens": 1169664}
{"current_steps": 100, "total_steps": 3882, "eval_loss": 0.911374032497406, "epoch": 0.05150656708730363, "percentage": 2.58, "elapsed_time": "0:32:20", "remaining_time": "20:23:12", "throughput": 602.74, "total_tokens": 1169664}
{"current_steps": 105, "total_steps": 3882, "loss": 0.9142, "lr": 5.384615384615385e-05, "epoch": 0.05408189544166881, "percentage": 2.7, "elapsed_time": "0:33:57", "remaining_time": "20:21:47", "throughput": 602.63, "total_tokens": 1228112}
{"current_steps": 110, "total_steps": 3882, "loss": 0.9054, "lr": 5.6410256410256414e-05, "epoch": 0.056657223796033995, "percentage": 2.83, "elapsed_time": "0:35:26", "remaining_time": "20:15:28", "throughput": 604.96, "total_tokens": 1286608}
{"current_steps": 115, "total_steps": 3882, "loss": 0.8997, "lr": 5.897435897435898e-05, "epoch": 0.05923255215039917, "percentage": 2.96, "elapsed_time": "0:36:56", "remaining_time": "20:10:10", "throughput": 606.8, "total_tokens": 1345072}
{"current_steps": 120, "total_steps": 3882, "loss": 0.8988, "lr": 6.153846153846155e-05, "epoch": 0.06180788050476436, "percentage": 3.09, "elapsed_time": "0:38:25", "remaining_time": "20:04:43", "throughput": 608.73, "total_tokens": 1403544}
{"current_steps": 125, "total_steps": 3882, "loss": 0.9087, "lr": 6.410256410256412e-05, "epoch": 0.06438320885912954, "percentage": 3.22, "elapsed_time": "0:39:56", "remaining_time": "20:00:26", "throughput": 610.09, "total_tokens": 1462024}
{"current_steps": 130, "total_steps": 3882, "loss": 0.8961, "lr": 6.666666666666667e-05, "epoch": 0.06695853721349472, "percentage": 3.35, "elapsed_time": "0:41:25", "remaining_time": "19:55:28", "throughput": 611.82, "total_tokens": 1520528}
{"current_steps": 135, "total_steps": 3882, "loss": 0.903, "lr": 6.923076923076924e-05, "epoch": 0.0695338655678599, "percentage": 3.48, "elapsed_time": "0:42:56", "remaining_time": "19:51:42", "throughput": 612.94, "total_tokens": 1579024}
{"current_steps": 140, "total_steps": 3882, "loss": 0.899, "lr": 7.17948717948718e-05, "epoch": 0.07210919392222509, "percentage": 3.61, "elapsed_time": "0:44:25", "remaining_time": "19:47:14", "throughput": 614.42, "total_tokens": 1637504}
{"current_steps": 145, "total_steps": 3882, "loss": 0.9071, "lr": 7.435897435897436e-05, "epoch": 0.07468452227659027, "percentage": 3.74, "elapsed_time": "0:45:55", "remaining_time": "19:43:28", "throughput": 615.57, "total_tokens": 1696024}
{"current_steps": 150, "total_steps": 3882, "loss": 0.9045, "lr": 7.692307692307693e-05, "epoch": 0.07725985063095545, "percentage": 3.86, "elapsed_time": "0:47:24", "remaining_time": "19:39:19", "throughput": 616.91, "total_tokens": 1754512}
{"current_steps": 150, "total_steps": 3882, "eval_loss": 0.8934853076934814, "epoch": 0.07725985063095545, "percentage": 3.86, "elapsed_time": "0:47:43", "remaining_time": "19:47:33", "throughput": 612.63, "total_tokens": 1754512}
{"current_steps": 155, "total_steps": 3882, "loss": 0.8992, "lr": 7.948717948717948e-05, "epoch": 0.07983517898532062, "percentage": 3.99, "elapsed_time": "0:49:20", "remaining_time": "19:46:21", "throughput": 612.43, "total_tokens": 1812976}
{"current_steps": 160, "total_steps": 3882, "loss": 0.8965, "lr": 8.205128205128205e-05, "epoch": 0.08241050733968582, "percentage": 4.12, "elapsed_time": "0:50:48", "remaining_time": "19:42:01", "throughput": 613.85, "total_tokens": 1871464}
{"current_steps": 165, "total_steps": 3882, "loss": 0.9094, "lr": 8.461538461538461e-05, "epoch": 0.08498583569405099, "percentage": 4.25, "elapsed_time": "0:52:18", "remaining_time": "19:38:32", "throughput": 614.83, "total_tokens": 1929928}
{"current_steps": 170, "total_steps": 3882, "loss": 0.903, "lr": 8.717948717948718e-05, "epoch": 0.08756116404841617, "percentage": 4.38, "elapsed_time": "0:53:47", "remaining_time": "19:34:27", "throughput": 616.14, "total_tokens": 1988432}
{"current_steps": 175, "total_steps": 3882, "loss": 0.902, "lr": 8.974358974358975e-05, "epoch": 0.09013649240278135, "percentage": 4.51, "elapsed_time": "0:55:16", "remaining_time": "19:30:56", "throughput": 617.16, "total_tokens": 2046920}
{"current_steps": 180, "total_steps": 3882, "loss": 0.9006, "lr": 9.230769230769232e-05, "epoch": 0.09271182075714654, "percentage": 4.64, "elapsed_time": "0:56:45", "remaining_time": "19:27:28", "throughput": 618.16, "total_tokens": 2105392}
{"current_steps": 185, "total_steps": 3882, "loss": 0.9042, "lr": 9.487179487179487e-05, "epoch": 0.09528714911151172, "percentage": 4.77, "elapsed_time": "0:58:15", "remaining_time": "19:24:13", "throughput": 619.04, "total_tokens": 2163872}
{"current_steps": 190, "total_steps": 3882, "loss": 0.9096, "lr": 9.743589743589744e-05, "epoch": 0.0978624774658769, "percentage": 4.89, "elapsed_time": "0:59:43", "remaining_time": "19:20:39", "throughput": 620.11, "total_tokens": 2222352}
{"current_steps": 195, "total_steps": 3882, "loss": 0.9037, "lr": 0.0001, "epoch": 0.10043780582024209, "percentage": 5.02, "elapsed_time": "1:01:11", "remaining_time": "19:17:04", "throughput": 621.18, "total_tokens": 2280800}
{"current_steps": 200, "total_steps": 3882, "loss": 0.904, "lr": 9.999954623308172e-05, "epoch": 0.10301313417460727, "percentage": 5.15, "elapsed_time": "1:02:41", "remaining_time": "19:14:00", "throughput": 621.98, "total_tokens": 2339304}
{"current_steps": 200, "total_steps": 3882, "eval_loss": 0.8980139493942261, "epoch": 0.10301313417460727, "percentage": 5.15, "elapsed_time": "1:03:00", "remaining_time": "19:19:56", "throughput": 618.8, "total_tokens": 2339304}
{"current_steps": 205, "total_steps": 3882, "loss": 0.9029, "lr": 9.999818494056303e-05, "epoch": 0.10558846252897244, "percentage": 5.28, "elapsed_time": "1:04:33", "remaining_time": "19:17:54", "throughput": 619.05, "total_tokens": 2397808}
{"current_steps": 210, "total_steps": 3882, "loss": 0.9005, "lr": 9.99959161471523e-05, "epoch": 0.10816379088333762, "percentage": 5.41, "elapsed_time": "1:06:02", "remaining_time": "19:14:52", "throughput": 619.83, "total_tokens": 2456288}
{"current_steps": 215, "total_steps": 3882, "loss": 0.9096, "lr": 9.99927398940297e-05, "epoch": 0.11073911923770281, "percentage": 5.54, "elapsed_time": "1:07:30", "remaining_time": "19:11:25", "throughput": 620.85, "total_tokens": 2514784}
{"current_steps": 220, "total_steps": 3882, "loss": 0.9036, "lr": 9.998865623884635e-05, "epoch": 0.11331444759206799, "percentage": 5.67, "elapsed_time": "1:08:59", "remaining_time": "19:08:31", "throughput": 621.56, "total_tokens": 2573240}
{"current_steps": 225, "total_steps": 3882, "loss": 0.901, "lr": 9.998366525572336e-05, "epoch": 0.11588977594643317, "percentage": 5.8, "elapsed_time": "1:10:27", "remaining_time": "19:05:14", "throughput": 622.48, "total_tokens": 2631672}
{"current_steps": 230, "total_steps": 3882, "loss": 0.9018, "lr": 9.997776703525046e-05, "epoch": 0.11846510430079835, "percentage": 5.92, "elapsed_time": "1:11:56", "remaining_time": "19:02:26", "throughput": 623.14, "total_tokens": 2690112}
{"current_steps": 235, "total_steps": 3882, "loss": 0.8934, "lr": 9.997096168448432e-05, "epoch": 0.12104043265516354, "percentage": 6.05, "elapsed_time": "1:13:24", "remaining_time": "18:59:11", "throughput": 624.07, "total_tokens": 2748608}
{"current_steps": 240, "total_steps": 3882, "loss": 0.8876, "lr": 9.996324932694668e-05, "epoch": 0.12361576100952872, "percentage": 6.18, "elapsed_time": "1:14:53", "remaining_time": "18:56:31", "throughput": 624.67, "total_tokens": 2807080}
{"current_steps": 245, "total_steps": 3882, "loss": 0.9084, "lr": 9.995463010262206e-05, "epoch": 0.1261910893638939, "percentage": 6.31, "elapsed_time": "1:16:21", "remaining_time": "18:53:29", "throughput": 625.48, "total_tokens": 2865520}
{"current_steps": 250, "total_steps": 3882, "loss": 0.9106, "lr": 9.994510416795519e-05, "epoch": 0.12876641771825909, "percentage": 6.44, "elapsed_time": "1:17:50", "remaining_time": "18:50:56", "throughput": 626.03, "total_tokens": 2924016}
{"current_steps": 250, "total_steps": 3882, "eval_loss": 0.8958488702774048, "epoch": 0.12876641771825909, "percentage": 6.44, "elapsed_time": "1:18:10", "remaining_time": "18:55:39", "throughput": 623.43, "total_tokens": 2924016}
{"current_steps": 255, "total_steps": 3882, "loss": 0.9012, "lr": 9.993467169584824e-05, "epoch": 0.13134174607262425, "percentage": 6.57, "elapsed_time": "1:19:44", "remaining_time": "18:54:06", "throughput": 623.43, "total_tokens": 2982520}
{"current_steps": 260, "total_steps": 3882, "loss": 0.9069, "lr": 9.992333287565765e-05, "epoch": 0.13391707442698944, "percentage": 6.7, "elapsed_time": "1:21:14", "remaining_time": "18:51:41", "throughput": 623.9, "total_tokens": 3041008}
{"current_steps": 265, "total_steps": 3882, "loss": 0.8918, "lr": 9.991108791319066e-05, "epoch": 0.13649240278135463, "percentage": 6.83, "elapsed_time": "1:22:42", "remaining_time": "18:48:57", "throughput": 624.54, "total_tokens": 3099464}
{"current_steps": 270, "total_steps": 3882, "loss": 0.8996, "lr": 9.989793703070163e-05, "epoch": 0.1390677311357198, "percentage": 6.96, "elapsed_time": "1:24:13", "remaining_time": "18:46:38", "throughput": 624.96, "total_tokens": 3157944}
{"current_steps": 275, "total_steps": 3882, "loss": 0.9009, "lr": 9.988388046688799e-05, "epoch": 0.141643059490085, "percentage": 7.08, "elapsed_time": "1:25:41", "remaining_time": "18:44:03", "throughput": 625.53, "total_tokens": 3216448}
{"current_steps": 280, "total_steps": 3882, "loss": 0.9059, "lr": 9.986891847688587e-05, "epoch": 0.14421838784445018, "percentage": 7.21, "elapsed_time": "1:27:11", "remaining_time": "18:41:42", "throughput": 625.97, "total_tokens": 3274928}
{"current_steps": 285, "total_steps": 3882, "loss": 0.8939, "lr": 9.985305133226553e-05, "epoch": 0.14679371619881534, "percentage": 7.34, "elapsed_time": "1:28:41", "remaining_time": "18:39:25", "throughput": 626.38, "total_tokens": 3333408}
{"current_steps": 290, "total_steps": 3882, "loss": 0.8899, "lr": 9.983627932102638e-05, "epoch": 0.14936904455318054, "percentage": 7.47, "elapsed_time": "1:30:10", "remaining_time": "18:37:01", "throughput": 626.86, "total_tokens": 3391896}
{"current_steps": 295, "total_steps": 3882, "loss": 0.9092, "lr": 9.981860274759173e-05, "epoch": 0.1519443729075457, "percentage": 7.6, "elapsed_time": "1:31:41", "remaining_time": "18:34:51", "throughput": 627.2, "total_tokens": 3450392}
{"current_steps": 300, "total_steps": 3882, "loss": 0.8901, "lr": 9.980002193280342e-05, "epoch": 0.1545197012619109, "percentage": 7.73, "elapsed_time": "1:33:10", "remaining_time": "18:32:25", "throughput": 627.7, "total_tokens": 3508888}
{"current_steps": 300, "total_steps": 3882, "eval_loss": 0.8932263255119324, "epoch": 0.1545197012619109, "percentage": 7.73, "elapsed_time": "1:33:29", "remaining_time": "18:36:21", "throughput": 625.48, "total_tokens": 3508888}