mistral_openhermes_v3 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 2
928fd5a verified
{"current_steps": 10, "total_steps": 738, "loss": 0.7514, "learning_rate": 5e-06, "epoch": 0.040526849037487336, "percentage": 1.36, "elapsed_time": "0:16:45", "remaining_time": "20:19:55"}
{"current_steps": 20, "total_steps": 738, "loss": 0.6416, "learning_rate": 5e-06, "epoch": 0.08105369807497467, "percentage": 2.71, "elapsed_time": "0:33:23", "remaining_time": "19:58:48"}
{"current_steps": 30, "total_steps": 738, "loss": 0.6144, "learning_rate": 5e-06, "epoch": 0.12158054711246201, "percentage": 4.07, "elapsed_time": "0:50:02", "remaining_time": "19:40:51"}
{"current_steps": 40, "total_steps": 738, "loss": 0.5929, "learning_rate": 5e-06, "epoch": 0.16210739614994935, "percentage": 5.42, "elapsed_time": "1:06:39", "remaining_time": "19:23:08"}
{"current_steps": 50, "total_steps": 738, "loss": 0.5887, "learning_rate": 5e-06, "epoch": 0.20263424518743667, "percentage": 6.78, "elapsed_time": "1:23:17", "remaining_time": "19:06:04"}
{"current_steps": 60, "total_steps": 738, "loss": 0.5846, "learning_rate": 5e-06, "epoch": 0.24316109422492402, "percentage": 8.13, "elapsed_time": "1:39:55", "remaining_time": "18:49:10"}
{"current_steps": 70, "total_steps": 738, "loss": 0.5815, "learning_rate": 5e-06, "epoch": 0.28368794326241137, "percentage": 9.49, "elapsed_time": "1:56:33", "remaining_time": "18:32:22"}
{"current_steps": 80, "total_steps": 738, "loss": 0.5761, "learning_rate": 5e-06, "epoch": 0.3242147922998987, "percentage": 10.84, "elapsed_time": "2:13:14", "remaining_time": "18:15:56"}
{"current_steps": 90, "total_steps": 738, "loss": 0.5768, "learning_rate": 5e-06, "epoch": 0.364741641337386, "percentage": 12.2, "elapsed_time": "2:29:54", "remaining_time": "17:59:17"}
{"current_steps": 100, "total_steps": 738, "loss": 0.5747, "learning_rate": 5e-06, "epoch": 0.40526849037487334, "percentage": 13.55, "elapsed_time": "2:46:35", "remaining_time": "17:42:48"}
{"current_steps": 110, "total_steps": 738, "loss": 0.5707, "learning_rate": 5e-06, "epoch": 0.4457953394123607, "percentage": 14.91, "elapsed_time": "3:03:14", "remaining_time": "17:26:10"}
{"current_steps": 120, "total_steps": 738, "loss": 0.5643, "learning_rate": 5e-06, "epoch": 0.48632218844984804, "percentage": 16.26, "elapsed_time": "3:19:56", "remaining_time": "17:09:40"}
{"current_steps": 130, "total_steps": 738, "loss": 0.5636, "learning_rate": 5e-06, "epoch": 0.5268490374873354, "percentage": 17.62, "elapsed_time": "3:36:36", "remaining_time": "16:53:02"}
{"current_steps": 140, "total_steps": 738, "loss": 0.5637, "learning_rate": 5e-06, "epoch": 0.5673758865248227, "percentage": 18.97, "elapsed_time": "3:53:15", "remaining_time": "16:36:20"}
{"current_steps": 150, "total_steps": 738, "loss": 0.5613, "learning_rate": 5e-06, "epoch": 0.60790273556231, "percentage": 20.33, "elapsed_time": "4:09:53", "remaining_time": "16:19:35"}
{"current_steps": 160, "total_steps": 738, "loss": 0.5588, "learning_rate": 5e-06, "epoch": 0.6484295845997974, "percentage": 21.68, "elapsed_time": "4:26:34", "remaining_time": "16:03:00"}
{"current_steps": 170, "total_steps": 738, "loss": 0.5558, "learning_rate": 5e-06, "epoch": 0.6889564336372846, "percentage": 23.04, "elapsed_time": "4:43:13", "remaining_time": "15:46:17"}
{"current_steps": 180, "total_steps": 738, "loss": 0.5576, "learning_rate": 5e-06, "epoch": 0.729483282674772, "percentage": 24.39, "elapsed_time": "4:59:51", "remaining_time": "15:29:35"}
{"current_steps": 190, "total_steps": 738, "loss": 0.557, "learning_rate": 5e-06, "epoch": 0.7700101317122594, "percentage": 25.75, "elapsed_time": "5:16:33", "remaining_time": "15:12:59"}
{"current_steps": 200, "total_steps": 738, "loss": 0.5555, "learning_rate": 5e-06, "epoch": 0.8105369807497467, "percentage": 27.1, "elapsed_time": "5:33:12", "remaining_time": "14:56:18"}
{"current_steps": 210, "total_steps": 738, "loss": 0.5569, "learning_rate": 5e-06, "epoch": 0.851063829787234, "percentage": 28.46, "elapsed_time": "5:49:53", "remaining_time": "14:39:42"}
{"current_steps": 220, "total_steps": 738, "loss": 0.5515, "learning_rate": 5e-06, "epoch": 0.8915906788247214, "percentage": 29.81, "elapsed_time": "6:06:33", "remaining_time": "14:23:04"}
{"current_steps": 230, "total_steps": 738, "loss": 0.5584, "learning_rate": 5e-06, "epoch": 0.9321175278622087, "percentage": 31.17, "elapsed_time": "6:23:14", "remaining_time": "14:06:27"}
{"current_steps": 240, "total_steps": 738, "loss": 0.5563, "learning_rate": 5e-06, "epoch": 0.9726443768996961, "percentage": 32.52, "elapsed_time": "6:39:54", "remaining_time": "13:49:49"}
{"current_steps": 246, "total_steps": 738, "eval_loss": 0.5481154918670654, "epoch": 0.9969604863221885, "percentage": 33.33, "elapsed_time": "6:59:11", "remaining_time": "13:58:23"}
{"current_steps": 250, "total_steps": 738, "loss": 0.5364, "learning_rate": 5e-06, "epoch": 1.0131712259371835, "percentage": 33.88, "elapsed_time": "7:05:48", "remaining_time": "13:51:11"}
{"current_steps": 260, "total_steps": 738, "loss": 0.4897, "learning_rate": 5e-06, "epoch": 1.0536980749746707, "percentage": 35.23, "elapsed_time": "7:22:38", "remaining_time": "13:33:46"}
{"current_steps": 270, "total_steps": 738, "loss": 0.487, "learning_rate": 5e-06, "epoch": 1.094224924012158, "percentage": 36.59, "elapsed_time": "7:39:28", "remaining_time": "13:16:26"}
{"current_steps": 280, "total_steps": 738, "loss": 0.4854, "learning_rate": 5e-06, "epoch": 1.1347517730496455, "percentage": 37.94, "elapsed_time": "7:56:21", "remaining_time": "12:59:10"}
{"current_steps": 290, "total_steps": 738, "loss": 0.4873, "learning_rate": 5e-06, "epoch": 1.1752786220871327, "percentage": 39.3, "elapsed_time": "8:13:12", "remaining_time": "12:41:54"}
{"current_steps": 300, "total_steps": 738, "loss": 0.4891, "learning_rate": 5e-06, "epoch": 1.21580547112462, "percentage": 40.65, "elapsed_time": "8:30:04", "remaining_time": "12:24:43"}
{"current_steps": 310, "total_steps": 738, "loss": 0.4851, "learning_rate": 5e-06, "epoch": 1.2563323201621075, "percentage": 42.01, "elapsed_time": "8:46:55", "remaining_time": "12:07:29"}
{"current_steps": 320, "total_steps": 738, "loss": 0.4865, "learning_rate": 5e-06, "epoch": 1.2968591691995948, "percentage": 43.36, "elapsed_time": "9:03:45", "remaining_time": "11:50:17"}
{"current_steps": 330, "total_steps": 738, "loss": 0.4866, "learning_rate": 5e-06, "epoch": 1.337386018237082, "percentage": 44.72, "elapsed_time": "9:20:36", "remaining_time": "11:33:07"}
{"current_steps": 340, "total_steps": 738, "loss": 0.4938, "learning_rate": 5e-06, "epoch": 1.3779128672745693, "percentage": 46.07, "elapsed_time": "9:37:28", "remaining_time": "11:15:59"}
{"current_steps": 350, "total_steps": 738, "loss": 0.4925, "learning_rate": 5e-06, "epoch": 1.4184397163120568, "percentage": 47.43, "elapsed_time": "9:54:20", "remaining_time": "10:58:52"}
{"current_steps": 360, "total_steps": 738, "loss": 0.4948, "learning_rate": 5e-06, "epoch": 1.458966565349544, "percentage": 48.78, "elapsed_time": "10:11:13", "remaining_time": "10:41:47"}
{"current_steps": 370, "total_steps": 738, "loss": 0.4933, "learning_rate": 5e-06, "epoch": 1.4994934143870315, "percentage": 50.14, "elapsed_time": "10:28:05", "remaining_time": "10:24:41"}
{"current_steps": 380, "total_steps": 738, "loss": 0.4892, "learning_rate": 5e-06, "epoch": 1.5400202634245188, "percentage": 51.49, "elapsed_time": "10:44:56", "remaining_time": "10:07:36"}
{"current_steps": 390, "total_steps": 738, "loss": 0.4913, "learning_rate": 5e-06, "epoch": 1.580547112462006, "percentage": 52.85, "elapsed_time": "11:01:47", "remaining_time": "9:50:31"}
{"current_steps": 400, "total_steps": 738, "loss": 0.4921, "learning_rate": 5e-06, "epoch": 1.6210739614994933, "percentage": 54.2, "elapsed_time": "11:18:37", "remaining_time": "9:33:26"}
{"current_steps": 410, "total_steps": 738, "loss": 0.4865, "learning_rate": 5e-06, "epoch": 1.6616008105369806, "percentage": 55.56, "elapsed_time": "11:35:27", "remaining_time": "9:16:21"}
{"current_steps": 420, "total_steps": 738, "loss": 0.4886, "learning_rate": 5e-06, "epoch": 1.702127659574468, "percentage": 56.91, "elapsed_time": "11:52:15", "remaining_time": "8:59:17"}
{"current_steps": 430, "total_steps": 738, "loss": 0.4873, "learning_rate": 5e-06, "epoch": 1.7426545086119554, "percentage": 58.27, "elapsed_time": "12:09:06", "remaining_time": "8:42:14"}
{"current_steps": 440, "total_steps": 738, "loss": 0.488, "learning_rate": 5e-06, "epoch": 1.7831813576494429, "percentage": 59.62, "elapsed_time": "12:25:53", "remaining_time": "8:25:10"}
{"current_steps": 450, "total_steps": 738, "loss": 0.4885, "learning_rate": 5e-06, "epoch": 1.8237082066869301, "percentage": 60.98, "elapsed_time": "12:42:41", "remaining_time": "8:08:07"}
{"current_steps": 460, "total_steps": 738, "loss": 0.4916, "learning_rate": 5e-06, "epoch": 1.8642350557244174, "percentage": 62.33, "elapsed_time": "12:59:30", "remaining_time": "7:51:05"}
{"current_steps": 470, "total_steps": 738, "loss": 0.4883, "learning_rate": 5e-06, "epoch": 1.9047619047619047, "percentage": 63.69, "elapsed_time": "13:16:20", "remaining_time": "7:34:05"}
{"current_steps": 480, "total_steps": 738, "loss": 0.4871, "learning_rate": 5e-06, "epoch": 1.9452887537993921, "percentage": 65.04, "elapsed_time": "13:33:11", "remaining_time": "7:17:05"}
{"current_steps": 490, "total_steps": 738, "loss": 0.4893, "learning_rate": 5e-06, "epoch": 1.9858156028368794, "percentage": 66.4, "elapsed_time": "13:50:02", "remaining_time": "7:00:06"}
{"current_steps": 493, "total_steps": 738, "eval_loss": 0.5415348410606384, "epoch": 1.9979736575481257, "percentage": 66.8, "elapsed_time": "14:03:57", "remaining_time": "6:59:24"}
{"current_steps": 500, "total_steps": 738, "loss": 0.4478, "learning_rate": 5e-06, "epoch": 2.026342451874367, "percentage": 67.75, "elapsed_time": "14:19:34", "remaining_time": "6:49:09"}
{"current_steps": 510, "total_steps": 738, "loss": 0.4216, "learning_rate": 5e-06, "epoch": 2.066869300911854, "percentage": 69.11, "elapsed_time": "14:36:18", "remaining_time": "6:31:45"}
{"current_steps": 520, "total_steps": 738, "loss": 0.4209, "learning_rate": 5e-06, "epoch": 2.1073961499493414, "percentage": 70.46, "elapsed_time": "14:53:00", "remaining_time": "6:14:22"}
{"current_steps": 530, "total_steps": 738, "loss": 0.4216, "learning_rate": 5e-06, "epoch": 2.1479229989868287, "percentage": 71.82, "elapsed_time": "15:09:44", "remaining_time": "5:57:01"}
{"current_steps": 540, "total_steps": 738, "loss": 0.4285, "learning_rate": 5e-06, "epoch": 2.188449848024316, "percentage": 73.17, "elapsed_time": "15:26:25", "remaining_time": "5:39:41"}
{"current_steps": 550, "total_steps": 738, "loss": 0.4275, "learning_rate": 5e-06, "epoch": 2.2289766970618032, "percentage": 74.53, "elapsed_time": "15:43:07", "remaining_time": "5:22:22"}
{"current_steps": 560, "total_steps": 738, "loss": 0.4315, "learning_rate": 5e-06, "epoch": 2.269503546099291, "percentage": 75.88, "elapsed_time": "15:59:50", "remaining_time": "5:05:05"}
{"current_steps": 570, "total_steps": 738, "loss": 0.4317, "learning_rate": 5e-06, "epoch": 2.310030395136778, "percentage": 77.24, "elapsed_time": "16:16:33", "remaining_time": "4:47:49"}
{"current_steps": 580, "total_steps": 738, "loss": 0.4312, "learning_rate": 5e-06, "epoch": 2.3505572441742655, "percentage": 78.59, "elapsed_time": "16:33:15", "remaining_time": "4:30:34"}
{"current_steps": 590, "total_steps": 738, "loss": 0.4329, "learning_rate": 5e-06, "epoch": 2.3910840932117527, "percentage": 79.95, "elapsed_time": "16:49:59", "remaining_time": "4:13:21"}
{"current_steps": 600, "total_steps": 738, "loss": 0.4317, "learning_rate": 5e-06, "epoch": 2.43161094224924, "percentage": 81.3, "elapsed_time": "17:06:41", "remaining_time": "3:56:08"}
{"current_steps": 610, "total_steps": 738, "loss": 0.4332, "learning_rate": 5e-06, "epoch": 2.4721377912867273, "percentage": 82.66, "elapsed_time": "17:23:23", "remaining_time": "3:38:56"}
{"current_steps": 620, "total_steps": 738, "loss": 0.4277, "learning_rate": 5e-06, "epoch": 2.512664640324215, "percentage": 84.01, "elapsed_time": "17:40:04", "remaining_time": "3:21:45"}
{"current_steps": 630, "total_steps": 738, "loss": 0.4283, "learning_rate": 5e-06, "epoch": 2.5531914893617023, "percentage": 85.37, "elapsed_time": "17:56:46", "remaining_time": "3:04:35"}
{"current_steps": 640, "total_steps": 738, "loss": 0.4336, "learning_rate": 5e-06, "epoch": 2.5937183383991895, "percentage": 86.72, "elapsed_time": "18:13:29", "remaining_time": "2:47:26"}
{"current_steps": 650, "total_steps": 738, "loss": 0.4292, "learning_rate": 5e-06, "epoch": 2.634245187436677, "percentage": 88.08, "elapsed_time": "18:30:13", "remaining_time": "2:30:18"}
{"current_steps": 660, "total_steps": 738, "loss": 0.4343, "learning_rate": 5e-06, "epoch": 2.674772036474164, "percentage": 89.43, "elapsed_time": "18:46:56", "remaining_time": "2:13:11"}
{"current_steps": 670, "total_steps": 738, "loss": 0.4313, "learning_rate": 5e-06, "epoch": 2.7152988855116513, "percentage": 90.79, "elapsed_time": "19:03:39", "remaining_time": "1:56:04"}
{"current_steps": 680, "total_steps": 738, "loss": 0.4332, "learning_rate": 5e-06, "epoch": 2.7558257345491386, "percentage": 92.14, "elapsed_time": "19:20:22", "remaining_time": "1:38:58"}
{"current_steps": 690, "total_steps": 738, "loss": 0.432, "learning_rate": 5e-06, "epoch": 2.7963525835866263, "percentage": 93.5, "elapsed_time": "19:37:06", "remaining_time": "1:21:53"}
{"current_steps": 700, "total_steps": 738, "loss": 0.436, "learning_rate": 5e-06, "epoch": 2.8368794326241136, "percentage": 94.85, "elapsed_time": "19:53:49", "remaining_time": "1:04:48"}
{"current_steps": 710, "total_steps": 738, "loss": 0.4349, "learning_rate": 5e-06, "epoch": 2.877406281661601, "percentage": 96.21, "elapsed_time": "20:10:34", "remaining_time": "0:47:44"}
{"current_steps": 720, "total_steps": 738, "loss": 0.4365, "learning_rate": 5e-06, "epoch": 2.917933130699088, "percentage": 97.56, "elapsed_time": "20:27:19", "remaining_time": "0:30:40"}
{"current_steps": 730, "total_steps": 738, "loss": 0.4355, "learning_rate": 5e-06, "epoch": 2.9584599797365754, "percentage": 98.92, "elapsed_time": "20:44:01", "remaining_time": "0:13:37"}
{"current_steps": 738, "total_steps": 738, "eval_loss": 0.5579164028167725, "epoch": 2.990881458966565, "percentage": 100.0, "elapsed_time": "21:10:21", "remaining_time": "0:00:00"}
{"current_steps": 738, "total_steps": 738, "epoch": 2.990881458966565, "percentage": 100.0, "elapsed_time": "21:27:46", "remaining_time": "0:00:00"}