qlora-llama3b-all / trainer_log.jsonl
pakphum's picture
Training in progress, step 500
64e170d verified
{"current_steps": 10, "total_steps": 500, "loss": 2.2556, "lr": 4e-05, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:26", "remaining_time": "0:21:41"}
{"current_steps": 10, "total_steps": 500, "eval_loss": 1.899280071258545, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:34", "remaining_time": "0:28:27"}
{"current_steps": 20, "total_steps": 500, "loss": 1.5026, "lr": 8e-05, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:00:58", "remaining_time": "0:23:29"}
{"current_steps": 20, "total_steps": 500, "eval_loss": 0.8436458706855774, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:01:07", "remaining_time": "0:26:48"}
{"current_steps": 30, "total_steps": 500, "loss": 0.8291, "lr": 0.00012, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:01:30", "remaining_time": "0:23:43"}
{"current_steps": 30, "total_steps": 500, "eval_loss": 0.6633767485618591, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:01:39", "remaining_time": "0:25:53"}
{"current_steps": 40, "total_steps": 500, "loss": 0.6688, "lr": 0.00016, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:02", "remaining_time": "0:23:34"}
{"current_steps": 40, "total_steps": 500, "eval_loss": 0.5723012685775757, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:11", "remaining_time": "0:25:09"}
{"current_steps": 50, "total_steps": 500, "loss": 0.6339, "lr": 0.0002, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:02:35", "remaining_time": "0:23:15"}
{"current_steps": 50, "total_steps": 500, "eval_loss": 0.5510777831077576, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:02:43", "remaining_time": "0:24:30"}
{"current_steps": 60, "total_steps": 500, "loss": 0.5258, "lr": 0.00019975640502598244, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:07", "remaining_time": "0:22:52"}
{"current_steps": 60, "total_steps": 500, "eval_loss": 0.4759778082370758, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:15", "remaining_time": "0:23:52"}
{"current_steps": 70, "total_steps": 500, "loss": 0.4825, "lr": 0.00019902680687415705, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:03:39", "remaining_time": "0:22:28"}
{"current_steps": 70, "total_steps": 500, "eval_loss": 0.4696222245693207, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:03:47", "remaining_time": "0:23:17"}
{"current_steps": 80, "total_steps": 500, "loss": 0.5488, "lr": 0.00019781476007338058, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:04:11", "remaining_time": "0:22:00"}
{"current_steps": 80, "total_steps": 500, "eval_loss": 0.4698783755302429, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:04:19", "remaining_time": "0:22:42"}
{"current_steps": 90, "total_steps": 500, "loss": 0.4231, "lr": 0.0001961261695938319, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:04:43", "remaining_time": "0:21:31"}
{"current_steps": 90, "total_steps": 500, "eval_loss": 0.45969489216804504, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:04:51", "remaining_time": "0:22:08"}
{"current_steps": 100, "total_steps": 500, "loss": 0.4558, "lr": 0.00019396926207859084, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:05:15", "remaining_time": "0:21:01"}
{"current_steps": 100, "total_steps": 500, "eval_loss": 0.4219018220901489, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:05:23", "remaining_time": "0:21:33"}
{"current_steps": 110, "total_steps": 500, "loss": 0.4588, "lr": 0.0001913545457642601, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:05:47", "remaining_time": "0:20:30"}
{"current_steps": 110, "total_steps": 500, "eval_loss": 0.41754281520843506, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:05:55", "remaining_time": "0:20:58"}
{"current_steps": 120, "total_steps": 500, "loss": 0.4592, "lr": 0.00018829475928589271, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:06:19", "remaining_time": "0:20:00"}
{"current_steps": 120, "total_steps": 500, "eval_loss": 0.4288429617881775, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:06:27", "remaining_time": "0:20:25"}
{"current_steps": 130, "total_steps": 500, "loss": 0.2996, "lr": 0.0001848048096156426, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:06:51", "remaining_time": "0:19:30"}
{"current_steps": 130, "total_steps": 500, "eval_loss": 0.3918496072292328, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:06:59", "remaining_time": "0:19:53"}
{"current_steps": 140, "total_steps": 500, "loss": 0.3269, "lr": 0.00018090169943749476, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:07:23", "remaining_time": "0:18:59"}
{"current_steps": 140, "total_steps": 500, "eval_loss": 0.41831883788108826, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:07:31", "remaining_time": "0:19:20"}
{"current_steps": 150, "total_steps": 500, "loss": 0.347, "lr": 0.0001766044443118978, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:07:55", "remaining_time": "0:18:29"}
{"current_steps": 150, "total_steps": 500, "eval_loss": 0.43138808012008667, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:08:03", "remaining_time": "0:18:47"}
{"current_steps": 160, "total_steps": 500, "loss": 0.3251, "lr": 0.0001719339800338651, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:08:27", "remaining_time": "0:17:58"}
{"current_steps": 160, "total_steps": 500, "eval_loss": 0.3889056444168091, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:08:35", "remaining_time": "0:18:15"}
{"current_steps": 170, "total_steps": 500, "loss": 0.3035, "lr": 0.00016691306063588583, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:08:59", "remaining_time": "0:17:27"}
{"current_steps": 170, "total_steps": 500, "eval_loss": 0.37887275218963623, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:09:07", "remaining_time": "0:17:42"}
{"current_steps": 180, "total_steps": 500, "loss": 0.3141, "lr": 0.0001615661475325658, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:09:31", "remaining_time": "0:16:56"}
{"current_steps": 180, "total_steps": 500, "eval_loss": 0.38691258430480957, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:09:39", "remaining_time": "0:17:10"}
{"current_steps": 190, "total_steps": 500, "loss": 0.2878, "lr": 0.0001559192903470747, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:10:03", "remaining_time": "0:16:24"}
{"current_steps": 190, "total_steps": 500, "eval_loss": 0.3909819722175598, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:10:11", "remaining_time": "0:16:37"}
{"current_steps": 200, "total_steps": 500, "loss": 0.3063, "lr": 0.00015000000000000001, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:10:35", "remaining_time": "0:15:53"}
{"current_steps": 200, "total_steps": 500, "eval_loss": 0.39576366543769836, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:10:43", "remaining_time": "0:16:05"}
{"current_steps": 210, "total_steps": 500, "loss": 0.2748, "lr": 0.00014383711467890774, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:11:07", "remaining_time": "0:15:22"}
{"current_steps": 210, "total_steps": 500, "eval_loss": 0.3819361627101898, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:11:15", "remaining_time": "0:15:33"}
{"current_steps": 220, "total_steps": 500, "loss": 0.2725, "lr": 0.00013746065934159123, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:11:39", "remaining_time": "0:14:50"}
{"current_steps": 220, "total_steps": 500, "eval_loss": 0.4039897620677948, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:11:47", "remaining_time": "0:15:00"}
{"current_steps": 230, "total_steps": 500, "loss": 0.2897, "lr": 0.00013090169943749476, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:12:11", "remaining_time": "0:14:19"}
{"current_steps": 230, "total_steps": 500, "eval_loss": 0.3928260803222656, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:12:19", "remaining_time": "0:14:28"}
{"current_steps": 240, "total_steps": 500, "loss": 0.1813, "lr": 0.00012419218955996676, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:12:43", "remaining_time": "0:13:47"}
{"current_steps": 240, "total_steps": 500, "eval_loss": 0.4048071801662445, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:12:51", "remaining_time": "0:13:56"}
{"current_steps": 250, "total_steps": 500, "loss": 0.1965, "lr": 0.00011736481776669306, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:13:16", "remaining_time": "0:13:16"}
{"current_steps": 250, "total_steps": 500, "eval_loss": 0.4035675525665283, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:13:24", "remaining_time": "0:13:24"}
{"current_steps": 260, "total_steps": 500, "loss": 0.1751, "lr": 0.00011045284632676536, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:13:48", "remaining_time": "0:12:44"}
{"current_steps": 260, "total_steps": 500, "eval_loss": 0.4221409857273102, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:13:56", "remaining_time": "0:12:51"}
{"current_steps": 270, "total_steps": 500, "loss": 0.1739, "lr": 0.00010348994967025012, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:14:20", "remaining_time": "0:12:12"}
{"current_steps": 270, "total_steps": 500, "eval_loss": 0.40371406078338623, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:14:28", "remaining_time": "0:12:19"}
{"current_steps": 280, "total_steps": 500, "loss": 0.1629, "lr": 9.651005032974994e-05, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:14:52", "remaining_time": "0:11:40"}
{"current_steps": 280, "total_steps": 500, "eval_loss": 0.41767382621765137, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:15:00", "remaining_time": "0:11:47"}
{"current_steps": 290, "total_steps": 500, "loss": 0.1919, "lr": 8.954715367323468e-05, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:15:24", "remaining_time": "0:11:09"}
{"current_steps": 290, "total_steps": 500, "eval_loss": 0.4002458155155182, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:15:32", "remaining_time": "0:11:15"}
{"current_steps": 300, "total_steps": 500, "loss": 0.1804, "lr": 8.263518223330697e-05, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:15:55", "remaining_time": "0:10:37"}
{"current_steps": 300, "total_steps": 500, "eval_loss": 0.4098145365715027, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:16:03", "remaining_time": "0:10:42"}
{"current_steps": 310, "total_steps": 500, "loss": 0.1569, "lr": 7.580781044003324e-05, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:16:27", "remaining_time": "0:10:05"}
{"current_steps": 310, "total_steps": 500, "eval_loss": 0.41249945759773254, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:16:35", "remaining_time": "0:10:10"}
{"current_steps": 320, "total_steps": 500, "loss": 0.1914, "lr": 6.909830056250527e-05, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:16:59", "remaining_time": "0:09:33"}
{"current_steps": 320, "total_steps": 500, "eval_loss": 0.4051912724971771, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:17:07", "remaining_time": "0:09:37"}
{"current_steps": 330, "total_steps": 500, "loss": 0.144, "lr": 6.25393406584088e-05, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:17:30", "remaining_time": "0:09:01"}
{"current_steps": 330, "total_steps": 500, "eval_loss": 0.4041104018688202, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:17:38", "remaining_time": "0:09:05"}
{"current_steps": 340, "total_steps": 500, "loss": 0.1738, "lr": 5.616288532109225e-05, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:18:02", "remaining_time": "0:08:29"}
{"current_steps": 340, "total_steps": 500, "eval_loss": 0.42209112644195557, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:18:10", "remaining_time": "0:08:33"}
{"current_steps": 350, "total_steps": 500, "loss": 0.1087, "lr": 5.000000000000002e-05, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:18:34", "remaining_time": "0:07:57"}
{"current_steps": 350, "total_steps": 500, "eval_loss": 0.42136698961257935, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:18:42", "remaining_time": "0:08:00"}
{"current_steps": 360, "total_steps": 500, "loss": 0.0876, "lr": 4.4080709652925336e-05, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:19:05", "remaining_time": "0:07:25"}
{"current_steps": 360, "total_steps": 500, "eval_loss": 0.4379313588142395, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:19:13", "remaining_time": "0:07:28"}
{"current_steps": 370, "total_steps": 500, "loss": 0.0857, "lr": 3.843385246743417e-05, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:19:37", "remaining_time": "0:06:53"}
{"current_steps": 370, "total_steps": 500, "eval_loss": 0.46546536684036255, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:19:45", "remaining_time": "0:06:56"}
{"current_steps": 380, "total_steps": 500, "loss": 0.0978, "lr": 3.308693936411421e-05, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:20:08", "remaining_time": "0:06:21"}
{"current_steps": 380, "total_steps": 500, "eval_loss": 0.4744359850883484, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:20:16", "remaining_time": "0:06:24"}
{"current_steps": 390, "total_steps": 500, "loss": 0.0746, "lr": 2.8066019966134904e-05, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:20:40", "remaining_time": "0:05:49"}
{"current_steps": 390, "total_steps": 500, "eval_loss": 0.4815245568752289, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:20:48", "remaining_time": "0:05:52"}
{"current_steps": 400, "total_steps": 500, "loss": 0.0897, "lr": 2.339555568810221e-05, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:21:12", "remaining_time": "0:05:18"}
{"current_steps": 400, "total_steps": 500, "eval_loss": 0.4889250695705414, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:21:20", "remaining_time": "0:05:20"}
{"current_steps": 410, "total_steps": 500, "loss": 0.0645, "lr": 1.9098300562505266e-05, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:21:43", "remaining_time": "0:04:46"}
{"current_steps": 410, "total_steps": 500, "eval_loss": 0.4995201826095581, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:21:51", "remaining_time": "0:04:47"}
{"current_steps": 420, "total_steps": 500, "loss": 0.0649, "lr": 1.5195190384357404e-05, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:22:15", "remaining_time": "0:04:14"}
{"current_steps": 420, "total_steps": 500, "eval_loss": 0.5078675746917725, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:22:23", "remaining_time": "0:04:15"}
{"current_steps": 430, "total_steps": 500, "loss": 0.0896, "lr": 1.1705240714107302e-05, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:22:47", "remaining_time": "0:03:42"}
{"current_steps": 430, "total_steps": 500, "eval_loss": 0.5097964406013489, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:22:55", "remaining_time": "0:03:43"}
{"current_steps": 440, "total_steps": 500, "loss": 0.0788, "lr": 8.645454235739903e-06, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:23:18", "remaining_time": "0:03:10"}
{"current_steps": 440, "total_steps": 500, "eval_loss": 0.5094956755638123, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:23:26", "remaining_time": "0:03:11"}
{"current_steps": 450, "total_steps": 500, "loss": 0.0886, "lr": 6.030737921409169e-06, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:23:50", "remaining_time": "0:02:38"}
{"current_steps": 450, "total_steps": 500, "eval_loss": 0.51046222448349, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:23:58", "remaining_time": "0:02:39"}
{"current_steps": 460, "total_steps": 500, "loss": 0.0471, "lr": 3.873830406168111e-06, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:24:22", "remaining_time": "0:02:07"}
{"current_steps": 460, "total_steps": 500, "eval_loss": 0.511073887348175, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:24:30", "remaining_time": "0:02:07"}
{"current_steps": 470, "total_steps": 500, "loss": 0.0461, "lr": 2.1852399266194314e-06, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:24:54", "remaining_time": "0:01:35"}
{"current_steps": 470, "total_steps": 500, "eval_loss": 0.515235185623169, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:25:02", "remaining_time": "0:01:35"}
{"current_steps": 480, "total_steps": 500, "loss": 0.0607, "lr": 9.731931258429638e-07, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:25:26", "remaining_time": "0:01:03"}
{"current_steps": 480, "total_steps": 500, "eval_loss": 0.5151567459106445, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:25:34", "remaining_time": "0:01:03"}
{"current_steps": 490, "total_steps": 500, "loss": 0.0473, "lr": 2.4359497401758024e-07, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:25:58", "remaining_time": "0:00:31"}
{"current_steps": 490, "total_steps": 500, "eval_loss": 0.519190788269043, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:26:06", "remaining_time": "0:00:31"}
{"current_steps": 500, "total_steps": 500, "loss": 0.052, "lr": 0.0, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:26:30", "remaining_time": "0:00:00"}
{"current_steps": 500, "total_steps": 500, "eval_loss": 0.5163235664367676, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:26:39", "remaining_time": "0:00:00"}
{"current_steps": 500, "total_steps": 500, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:26:40", "remaining_time": "0:00:00"}