|
{"current_steps": 5, "total_steps": 3882, "loss": 3.0388, "lr": 2.564102564102564e-06, "epoch": 0.0025753283543651817, "percentage": 0.13, "elapsed_time": "0:02:49", "remaining_time": "1 day, 12:31:05", "throughput": 345.02, "total_tokens": 58496} |
|
{"current_steps": 10, "total_steps": 3882, "loss": 2.9831, "lr": 5.128205128205128e-06, "epoch": 0.0051506567087303634, "percentage": 0.26, "elapsed_time": "0:04:19", "remaining_time": "1 day, 3:52:53", "throughput": 451.18, "total_tokens": 116960} |
|
{"current_steps": 15, "total_steps": 3882, "loss": 2.8696, "lr": 7.692307692307694e-06, "epoch": 0.007725985063095545, "percentage": 0.39, "elapsed_time": "0:05:47", "remaining_time": "1 day, 0:54:38", "throughput": 504.36, "total_tokens": 175448} |
|
{"current_steps": 20, "total_steps": 3882, "loss": 2.6316, "lr": 1.0256410256410256e-05, "epoch": 0.010301313417460727, "percentage": 0.52, "elapsed_time": "0:07:17", "remaining_time": "23:26:52", "throughput": 535.17, "total_tokens": 233944} |
|
{"current_steps": 25, "total_steps": 3882, "loss": 1.9707, "lr": 1.282051282051282e-05, "epoch": 0.012876641771825908, "percentage": 0.64, "elapsed_time": "0:08:45", "remaining_time": "22:30:52", "throughput": 556.6, "total_tokens": 292416} |
|
{"current_steps": 30, "total_steps": 3882, "loss": 1.3782, "lr": 1.5384615384615387e-05, "epoch": 0.01545197012619109, "percentage": 0.77, "elapsed_time": "0:10:14", "remaining_time": "21:55:33", "throughput": 570.81, "total_tokens": 350904} |
|
{"current_steps": 35, "total_steps": 3882, "loss": 1.0628, "lr": 1.794871794871795e-05, "epoch": 0.018027298480556272, "percentage": 0.9, "elapsed_time": "0:11:42", "remaining_time": "21:27:25", "throughput": 582.52, "total_tokens": 409384} |
|
{"current_steps": 40, "total_steps": 3882, "loss": 0.9789, "lr": 2.0512820512820512e-05, "epoch": 0.020602626834921454, "percentage": 1.03, "elapsed_time": "0:13:11", "remaining_time": "21:07:29", "throughput": 590.91, "total_tokens": 467864} |
|
{"current_steps": 45, "total_steps": 3882, "loss": 0.9327, "lr": 2.307692307692308e-05, "epoch": 0.023177955189286635, "percentage": 1.16, "elapsed_time": "0:14:39", "remaining_time": "20:50:06", "throughput": 598.39, "total_tokens": 526384} |
|
{"current_steps": 50, "total_steps": 3882, "loss": 0.9233, "lr": 2.564102564102564e-05, "epoch": 0.025753283543651816, "percentage": 1.29, "elapsed_time": "0:16:09", "remaining_time": "20:38:40", "throughput": 603.11, "total_tokens": 584856} |
|
{"current_steps": 50, "total_steps": 3882, "eval_loss": 0.9281821846961975, "epoch": 0.025753283543651816, "percentage": 1.29, "elapsed_time": "0:16:57", "remaining_time": "21:40:17", "throughput": 574.53, "total_tokens": 584856} |
|
{"current_steps": 55, "total_steps": 3882, "loss": 0.897, "lr": 2.8205128205128207e-05, "epoch": 0.028328611898016998, "percentage": 1.42, "elapsed_time": "0:18:34", "remaining_time": "21:32:15", "throughput": 577.35, "total_tokens": 643344} |
|
{"current_steps": 60, "total_steps": 3882, "loss": 0.9169, "lr": 3.0769230769230774e-05, "epoch": 0.03090394025238218, "percentage": 1.55, "elapsed_time": "0:20:03", "remaining_time": "21:17:49", "throughput": 583.09, "total_tokens": 701808} |
|
{"current_steps": 65, "total_steps": 3882, "loss": 0.9019, "lr": 3.3333333333333335e-05, "epoch": 0.03347926860674736, "percentage": 1.67, "elapsed_time": "0:21:32", "remaining_time": "21:04:47", "throughput": 588.34, "total_tokens": 760304} |
|
{"current_steps": 70, "total_steps": 3882, "loss": 0.8996, "lr": 3.58974358974359e-05, "epoch": 0.036054596961112545, "percentage": 1.8, "elapsed_time": "0:23:02", "remaining_time": "20:55:11", "throughput": 592.04, "total_tokens": 818760} |
|
{"current_steps": 75, "total_steps": 3882, "loss": 0.9073, "lr": 3.846153846153846e-05, "epoch": 0.03862992531547772, "percentage": 1.93, "elapsed_time": "0:24:32", "remaining_time": "20:46:08", "throughput": 595.56, "total_tokens": 877256} |
|
{"current_steps": 80, "total_steps": 3882, "loss": 0.9081, "lr": 4.1025641025641023e-05, "epoch": 0.04120525366984291, "percentage": 2.06, "elapsed_time": "0:26:03", "remaining_time": "20:38:41", "throughput": 598.36, "total_tokens": 935752} |
|
{"current_steps": 85, "total_steps": 3882, "loss": 0.906, "lr": 4.358974358974359e-05, "epoch": 0.043780582024208085, "percentage": 2.19, "elapsed_time": "0:27:32", "remaining_time": "20:30:19", "throughput": 601.63, "total_tokens": 994216} |
|
{"current_steps": 90, "total_steps": 3882, "loss": 0.8952, "lr": 4.615384615384616e-05, "epoch": 0.04635591037857327, "percentage": 2.32, "elapsed_time": "0:29:02", "remaining_time": "20:23:48", "throughput": 604.04, "total_tokens": 1052704} |
|
{"current_steps": 95, "total_steps": 3882, "loss": 0.8996, "lr": 4.871794871794872e-05, "epoch": 0.04893123873293845, "percentage": 2.45, "elapsed_time": "0:30:31", "remaining_time": "20:17:00", "throughput": 606.61, "total_tokens": 1111176} |
|
{"current_steps": 100, "total_steps": 3882, "loss": 0.9024, "lr": 5.128205128205128e-05, "epoch": 0.05150656708730363, "percentage": 2.58, "elapsed_time": "0:32:01", "remaining_time": "20:10:52", "throughput": 608.88, "total_tokens": 1169664} |
|
{"current_steps": 100, "total_steps": 3882, "eval_loss": 0.911374032497406, "epoch": 0.05150656708730363, "percentage": 2.58, "elapsed_time": "0:32:20", "remaining_time": "20:23:12", "throughput": 602.74, "total_tokens": 1169664} |
|
{"current_steps": 105, "total_steps": 3882, "loss": 0.9142, "lr": 5.384615384615385e-05, "epoch": 0.05408189544166881, "percentage": 2.7, "elapsed_time": "0:33:57", "remaining_time": "20:21:47", "throughput": 602.63, "total_tokens": 1228112} |
|
{"current_steps": 110, "total_steps": 3882, "loss": 0.9054, "lr": 5.6410256410256414e-05, "epoch": 0.056657223796033995, "percentage": 2.83, "elapsed_time": "0:35:26", "remaining_time": "20:15:28", "throughput": 604.96, "total_tokens": 1286608} |
|
{"current_steps": 115, "total_steps": 3882, "loss": 0.8997, "lr": 5.897435897435898e-05, "epoch": 0.05923255215039917, "percentage": 2.96, "elapsed_time": "0:36:56", "remaining_time": "20:10:10", "throughput": 606.8, "total_tokens": 1345072} |
|
{"current_steps": 120, "total_steps": 3882, "loss": 0.8988, "lr": 6.153846153846155e-05, "epoch": 0.06180788050476436, "percentage": 3.09, "elapsed_time": "0:38:25", "remaining_time": "20:04:43", "throughput": 608.73, "total_tokens": 1403544} |
|
{"current_steps": 125, "total_steps": 3882, "loss": 0.9087, "lr": 6.410256410256412e-05, "epoch": 0.06438320885912954, "percentage": 3.22, "elapsed_time": "0:39:56", "remaining_time": "20:00:26", "throughput": 610.09, "total_tokens": 1462024} |
|
{"current_steps": 130, "total_steps": 3882, "loss": 0.8961, "lr": 6.666666666666667e-05, "epoch": 0.06695853721349472, "percentage": 3.35, "elapsed_time": "0:41:25", "remaining_time": "19:55:28", "throughput": 611.82, "total_tokens": 1520528} |
|
{"current_steps": 135, "total_steps": 3882, "loss": 0.903, "lr": 6.923076923076924e-05, "epoch": 0.0695338655678599, "percentage": 3.48, "elapsed_time": "0:42:56", "remaining_time": "19:51:42", "throughput": 612.94, "total_tokens": 1579024} |
|
{"current_steps": 140, "total_steps": 3882, "loss": 0.899, "lr": 7.17948717948718e-05, "epoch": 0.07210919392222509, "percentage": 3.61, "elapsed_time": "0:44:25", "remaining_time": "19:47:14", "throughput": 614.42, "total_tokens": 1637504} |
|
{"current_steps": 145, "total_steps": 3882, "loss": 0.9071, "lr": 7.435897435897436e-05, "epoch": 0.07468452227659027, "percentage": 3.74, "elapsed_time": "0:45:55", "remaining_time": "19:43:28", "throughput": 615.57, "total_tokens": 1696024} |
|
{"current_steps": 150, "total_steps": 3882, "loss": 0.9045, "lr": 7.692307692307693e-05, "epoch": 0.07725985063095545, "percentage": 3.86, "elapsed_time": "0:47:24", "remaining_time": "19:39:19", "throughput": 616.91, "total_tokens": 1754512} |
|
{"current_steps": 150, "total_steps": 3882, "eval_loss": 0.8934853076934814, "epoch": 0.07725985063095545, "percentage": 3.86, "elapsed_time": "0:47:43", "remaining_time": "19:47:33", "throughput": 612.63, "total_tokens": 1754512} |
|
{"current_steps": 155, "total_steps": 3882, "loss": 0.8992, "lr": 7.948717948717948e-05, "epoch": 0.07983517898532062, "percentage": 3.99, "elapsed_time": "0:49:20", "remaining_time": "19:46:21", "throughput": 612.43, "total_tokens": 1812976} |
|
{"current_steps": 160, "total_steps": 3882, "loss": 0.8965, "lr": 8.205128205128205e-05, "epoch": 0.08241050733968582, "percentage": 4.12, "elapsed_time": "0:50:48", "remaining_time": "19:42:01", "throughput": 613.85, "total_tokens": 1871464} |
|
{"current_steps": 165, "total_steps": 3882, "loss": 0.9094, "lr": 8.461538461538461e-05, "epoch": 0.08498583569405099, "percentage": 4.25, "elapsed_time": "0:52:18", "remaining_time": "19:38:32", "throughput": 614.83, "total_tokens": 1929928} |
|
{"current_steps": 170, "total_steps": 3882, "loss": 0.903, "lr": 8.717948717948718e-05, "epoch": 0.08756116404841617, "percentage": 4.38, "elapsed_time": "0:53:47", "remaining_time": "19:34:27", "throughput": 616.14, "total_tokens": 1988432} |
|
{"current_steps": 175, "total_steps": 3882, "loss": 0.902, "lr": 8.974358974358975e-05, "epoch": 0.09013649240278135, "percentage": 4.51, "elapsed_time": "0:55:16", "remaining_time": "19:30:56", "throughput": 617.16, "total_tokens": 2046920} |
|
{"current_steps": 180, "total_steps": 3882, "loss": 0.9006, "lr": 9.230769230769232e-05, "epoch": 0.09271182075714654, "percentage": 4.64, "elapsed_time": "0:56:45", "remaining_time": "19:27:28", "throughput": 618.16, "total_tokens": 2105392} |
|
{"current_steps": 185, "total_steps": 3882, "loss": 0.9042, "lr": 9.487179487179487e-05, "epoch": 0.09528714911151172, "percentage": 4.77, "elapsed_time": "0:58:15", "remaining_time": "19:24:13", "throughput": 619.04, "total_tokens": 2163872} |
|
{"current_steps": 190, "total_steps": 3882, "loss": 0.9096, "lr": 9.743589743589744e-05, "epoch": 0.0978624774658769, "percentage": 4.89, "elapsed_time": "0:59:43", "remaining_time": "19:20:39", "throughput": 620.11, "total_tokens": 2222352} |
|
{"current_steps": 195, "total_steps": 3882, "loss": 0.9037, "lr": 0.0001, "epoch": 0.10043780582024209, "percentage": 5.02, "elapsed_time": "1:01:11", "remaining_time": "19:17:04", "throughput": 621.18, "total_tokens": 2280800} |
|
{"current_steps": 200, "total_steps": 3882, "loss": 0.904, "lr": 9.999954623308172e-05, "epoch": 0.10301313417460727, "percentage": 5.15, "elapsed_time": "1:02:41", "remaining_time": "19:14:00", "throughput": 621.98, "total_tokens": 2339304} |
|
{"current_steps": 200, "total_steps": 3882, "eval_loss": 0.8980139493942261, "epoch": 0.10301313417460727, "percentage": 5.15, "elapsed_time": "1:03:00", "remaining_time": "19:19:56", "throughput": 618.8, "total_tokens": 2339304} |
|
{"current_steps": 205, "total_steps": 3882, "loss": 0.9029, "lr": 9.999818494056303e-05, "epoch": 0.10558846252897244, "percentage": 5.28, "elapsed_time": "1:04:33", "remaining_time": "19:17:54", "throughput": 619.05, "total_tokens": 2397808} |
|
{"current_steps": 210, "total_steps": 3882, "loss": 0.9005, "lr": 9.99959161471523e-05, "epoch": 0.10816379088333762, "percentage": 5.41, "elapsed_time": "1:06:02", "remaining_time": "19:14:52", "throughput": 619.83, "total_tokens": 2456288} |
|
{"current_steps": 215, "total_steps": 3882, "loss": 0.9096, "lr": 9.99927398940297e-05, "epoch": 0.11073911923770281, "percentage": 5.54, "elapsed_time": "1:07:30", "remaining_time": "19:11:25", "throughput": 620.85, "total_tokens": 2514784} |
|
{"current_steps": 220, "total_steps": 3882, "loss": 0.9036, "lr": 9.998865623884635e-05, "epoch": 0.11331444759206799, "percentage": 5.67, "elapsed_time": "1:08:59", "remaining_time": "19:08:31", "throughput": 621.56, "total_tokens": 2573240} |
|
{"current_steps": 225, "total_steps": 3882, "loss": 0.901, "lr": 9.998366525572336e-05, "epoch": 0.11588977594643317, "percentage": 5.8, "elapsed_time": "1:10:27", "remaining_time": "19:05:14", "throughput": 622.48, "total_tokens": 2631672} |
|
{"current_steps": 230, "total_steps": 3882, "loss": 0.9018, "lr": 9.997776703525046e-05, "epoch": 0.11846510430079835, "percentage": 5.92, "elapsed_time": "1:11:56", "remaining_time": "19:02:26", "throughput": 623.14, "total_tokens": 2690112} |
|
{"current_steps": 235, "total_steps": 3882, "loss": 0.8934, "lr": 9.997096168448432e-05, "epoch": 0.12104043265516354, "percentage": 6.05, "elapsed_time": "1:13:24", "remaining_time": "18:59:11", "throughput": 624.07, "total_tokens": 2748608} |
|
{"current_steps": 240, "total_steps": 3882, "loss": 0.8876, "lr": 9.996324932694668e-05, "epoch": 0.12361576100952872, "percentage": 6.18, "elapsed_time": "1:14:53", "remaining_time": "18:56:31", "throughput": 624.67, "total_tokens": 2807080} |
|
{"current_steps": 245, "total_steps": 3882, "loss": 0.9084, "lr": 9.995463010262206e-05, "epoch": 0.1261910893638939, "percentage": 6.31, "elapsed_time": "1:16:21", "remaining_time": "18:53:29", "throughput": 625.48, "total_tokens": 2865520} |
|
{"current_steps": 250, "total_steps": 3882, "loss": 0.9106, "lr": 9.994510416795519e-05, "epoch": 0.12876641771825909, "percentage": 6.44, "elapsed_time": "1:17:50", "remaining_time": "18:50:56", "throughput": 626.03, "total_tokens": 2924016} |
|
{"current_steps": 250, "total_steps": 3882, "eval_loss": 0.8958488702774048, "epoch": 0.12876641771825909, "percentage": 6.44, "elapsed_time": "1:18:10", "remaining_time": "18:55:39", "throughput": 623.43, "total_tokens": 2924016} |
|
{"current_steps": 255, "total_steps": 3882, "loss": 0.9012, "lr": 9.993467169584824e-05, "epoch": 0.13134174607262425, "percentage": 6.57, "elapsed_time": "1:19:44", "remaining_time": "18:54:06", "throughput": 623.43, "total_tokens": 2982520} |
|
{"current_steps": 260, "total_steps": 3882, "loss": 0.9069, "lr": 9.992333287565765e-05, "epoch": 0.13391707442698944, "percentage": 6.7, "elapsed_time": "1:21:14", "remaining_time": "18:51:41", "throughput": 623.9, "total_tokens": 3041008} |
|
{"current_steps": 265, "total_steps": 3882, "loss": 0.8918, "lr": 9.991108791319066e-05, "epoch": 0.13649240278135463, "percentage": 6.83, "elapsed_time": "1:22:42", "remaining_time": "18:48:57", "throughput": 624.54, "total_tokens": 3099464} |
|
{"current_steps": 270, "total_steps": 3882, "loss": 0.8996, "lr": 9.989793703070163e-05, "epoch": 0.1390677311357198, "percentage": 6.96, "elapsed_time": "1:24:13", "remaining_time": "18:46:38", "throughput": 624.96, "total_tokens": 3157944} |
|
{"current_steps": 275, "total_steps": 3882, "loss": 0.9009, "lr": 9.988388046688799e-05, "epoch": 0.141643059490085, "percentage": 7.08, "elapsed_time": "1:25:41", "remaining_time": "18:44:03", "throughput": 625.53, "total_tokens": 3216448} |
|
{"current_steps": 280, "total_steps": 3882, "loss": 0.9059, "lr": 9.986891847688587e-05, "epoch": 0.14421838784445018, "percentage": 7.21, "elapsed_time": "1:27:11", "remaining_time": "18:41:42", "throughput": 625.97, "total_tokens": 3274928} |
|
{"current_steps": 285, "total_steps": 3882, "loss": 0.8939, "lr": 9.985305133226553e-05, "epoch": 0.14679371619881534, "percentage": 7.34, "elapsed_time": "1:28:41", "remaining_time": "18:39:25", "throughput": 626.38, "total_tokens": 3333408} |
|
{"current_steps": 290, "total_steps": 3882, "loss": 0.8899, "lr": 9.983627932102638e-05, "epoch": 0.14936904455318054, "percentage": 7.47, "elapsed_time": "1:30:10", "remaining_time": "18:37:01", "throughput": 626.86, "total_tokens": 3391896} |
|
{"current_steps": 295, "total_steps": 3882, "loss": 0.9092, "lr": 9.981860274759173e-05, "epoch": 0.1519443729075457, "percentage": 7.6, "elapsed_time": "1:31:41", "remaining_time": "18:34:51", "throughput": 627.2, "total_tokens": 3450392} |
|
{"current_steps": 300, "total_steps": 3882, "loss": 0.8901, "lr": 9.980002193280342e-05, "epoch": 0.1545197012619109, "percentage": 7.73, "elapsed_time": "1:33:10", "remaining_time": "18:32:25", "throughput": 627.7, "total_tokens": 3508888} |
|
{"current_steps": 300, "total_steps": 3882, "eval_loss": 0.8932263255119324, "epoch": 0.1545197012619109, "percentage": 7.73, "elapsed_time": "1:33:29", "remaining_time": "18:36:21", "throughput": 625.48, "total_tokens": 3508888} |
|
{"current_steps": 305, "total_steps": 3882, "loss": 0.9042, "lr": 9.978053721391578e-05, "epoch": 0.15709502961627608, "percentage": 7.86, "elapsed_time": "1:35:05", "remaining_time": "18:35:12", "throughput": 625.26, "total_tokens": 3567368} |
|
{"current_steps": 310, "total_steps": 3882, "loss": 0.9007, "lr": 9.976014894458963e-05, "epoch": 0.15967035797064125, "percentage": 7.99, "elapsed_time": "1:36:33", "remaining_time": "18:32:40", "throughput": 625.81, "total_tokens": 3625848} |
|
{"current_steps": 315, "total_steps": 3882, "loss": 0.9036, "lr": 9.973885749488589e-05, "epoch": 0.16224568632500644, "percentage": 8.11, "elapsed_time": "1:38:03", "remaining_time": "18:30:22", "throughput": 626.22, "total_tokens": 3684336} |
|
{"current_steps": 320, "total_steps": 3882, "loss": 0.8936, "lr": 9.971666325125874e-05, "epoch": 0.16482101467937163, "percentage": 8.24, "elapsed_time": "1:39:30", "remaining_time": "18:27:44", "throughput": 626.83, "total_tokens": 3742800} |
|
{"current_steps": 325, "total_steps": 3882, "loss": 0.8989, "lr": 9.969356661654876e-05, "epoch": 0.1673963430337368, "percentage": 8.37, "elapsed_time": "1:41:00", "remaining_time": "18:25:29", "throughput": 627.23, "total_tokens": 3801280} |
|
{"current_steps": 330, "total_steps": 3882, "loss": 0.8983, "lr": 9.966956800997546e-05, "epoch": 0.16997167138810199, "percentage": 8.5, "elapsed_time": "1:42:28", "remaining_time": "18:22:55", "throughput": 627.81, "total_tokens": 3859792} |
|
{"current_steps": 335, "total_steps": 3882, "loss": 0.9038, "lr": 9.964466786712984e-05, "epoch": 0.17254699974246718, "percentage": 8.63, "elapsed_time": "1:43:57", "remaining_time": "18:20:45", "throughput": 628.16, "total_tokens": 3918272} |
|
{"current_steps": 340, "total_steps": 3882, "loss": 0.8947, "lr": 9.961886663996629e-05, "epoch": 0.17512232809683234, "percentage": 8.76, "elapsed_time": "1:45:26", "remaining_time": "18:18:25", "throughput": 628.61, "total_tokens": 3976760} |
|
{"current_steps": 345, "total_steps": 3882, "loss": 0.9179, "lr": 9.959216479679458e-05, "epoch": 0.17769765645119753, "percentage": 8.89, "elapsed_time": "1:46:56", "remaining_time": "18:16:25", "throughput": 628.86, "total_tokens": 4035240} |
|
{"current_steps": 350, "total_steps": 3882, "loss": 0.9059, "lr": 9.956456282227122e-05, "epoch": 0.1802729848055627, "percentage": 9.02, "elapsed_time": "1:48:25", "remaining_time": "18:14:06", "throughput": 629.29, "total_tokens": 4093688} |
|
{"current_steps": 350, "total_steps": 3882, "eval_loss": 0.8960411548614502, "epoch": 0.1802729848055627, "percentage": 9.02, "elapsed_time": "1:48:45", "remaining_time": "18:17:29", "throughput": 627.36, "total_tokens": 4093688} |
|
{"current_steps": 355, "total_steps": 3882, "loss": 0.8795, "lr": 9.953606121739074e-05, "epoch": 0.1828483131599279, "percentage": 9.14, "elapsed_time": "1:50:19", "remaining_time": "18:16:10", "throughput": 627.22, "total_tokens": 4152160} |
|
{"current_steps": 360, "total_steps": 3882, "loss": 0.9143, "lr": 9.950666049947653e-05, "epoch": 0.18542364151429308, "percentage": 9.27, "elapsed_time": "1:51:48", "remaining_time": "18:13:48", "throughput": 627.69, "total_tokens": 4210648} |
|
{"current_steps": 365, "total_steps": 3882, "loss": 0.9164, "lr": 9.947636120217155e-05, "epoch": 0.18799896986865824, "percentage": 9.4, "elapsed_time": "1:53:17", "remaining_time": "18:11:38", "throughput": 628.04, "total_tokens": 4269136} |
|
{"current_steps": 370, "total_steps": 3882, "loss": 0.9061, "lr": 9.944516387542852e-05, "epoch": 0.19057429822302344, "percentage": 9.53, "elapsed_time": "1:54:46", "remaining_time": "18:09:27", "throughput": 628.41, "total_tokens": 4327664} |
|
{"current_steps": 375, "total_steps": 3882, "loss": 0.8873, "lr": 9.941306908550005e-05, "epoch": 0.19314962657738863, "percentage": 9.66, "elapsed_time": "1:56:15", "remaining_time": "18:07:13", "throughput": 628.81, "total_tokens": 4386120} |
|
{"current_steps": 380, "total_steps": 3882, "loss": 0.9038, "lr": 9.938007741492828e-05, "epoch": 0.1957249549317538, "percentage": 9.79, "elapsed_time": "1:57:45", "remaining_time": "18:05:10", "throughput": 629.08, "total_tokens": 4444560} |
|
{"current_steps": 385, "total_steps": 3882, "loss": 0.9116, "lr": 9.934618946253437e-05, "epoch": 0.19830028328611898, "percentage": 9.92, "elapsed_time": "1:59:14", "remaining_time": "18:03:01", "throughput": 629.43, "total_tokens": 4503016} |
|
{"current_steps": 390, "total_steps": 3882, "loss": 0.9023, "lr": 9.931140584340761e-05, "epoch": 0.20087561164048418, "percentage": 10.05, "elapsed_time": "2:00:43", "remaining_time": "18:00:58", "throughput": 629.72, "total_tokens": 4561496} |
|
{"current_steps": 395, "total_steps": 3882, "loss": 0.8901, "lr": 9.92757271888942e-05, "epoch": 0.20345093999484934, "percentage": 10.18, "elapsed_time": "2:02:11", "remaining_time": "17:58:43", "throughput": 630.12, "total_tokens": 4619944} |
|
{"current_steps": 400, "total_steps": 3882, "loss": 0.9033, "lr": 9.923915414658587e-05, "epoch": 0.20602626834921453, "percentage": 10.3, "elapsed_time": "2:03:41", "remaining_time": "17:56:44", "throughput": 630.38, "total_tokens": 4678384} |
|
{"current_steps": 400, "total_steps": 3882, "eval_loss": 0.906301736831665, "epoch": 0.20602626834921453, "percentage": 10.3, "elapsed_time": "2:04:01", "remaining_time": "17:59:36", "throughput": 628.7, "total_tokens": 4678384} |
|
|