Llama-3.1-8B-Instruct-ppo-500 / trainer_log.jsonl
chchen's picture
Upload 13 files
ede28fe verified
{"current_steps": 10, "total_steps": 150, "loss": 1.0211, "reward": 7.9638, "lr": 5.625e-06, "epoch": 0.6, "percentage": 6.67, "elapsed_time": "0:20:37", "remaining_time": "4:48:50"}
{"current_steps": 20, "total_steps": 150, "loss": 0.9096, "reward": 7.3826, "lr": 9.989294616193018e-06, "epoch": 1.27, "percentage": 13.33, "elapsed_time": "0:39:02", "remaining_time": "4:13:49"}
{"current_steps": 30, "total_steps": 150, "loss": 0.4739, "reward": 8.4875, "lr": 9.800249271929645e-06, "epoch": 1.93, "percentage": 20.0, "elapsed_time": "1:00:18", "remaining_time": "4:01:13"}
{"current_steps": 40, "total_steps": 150, "loss": 0.3126, "reward": 9.1497, "lr": 9.38363377853754e-06, "epoch": 2.6, "percentage": 26.67, "elapsed_time": "1:21:03", "remaining_time": "3:42:53"}
{"current_steps": 50, "total_steps": 150, "loss": 0.2466, "reward": 9.1595, "lr": 8.759199037394888e-06, "epoch": 3.27, "percentage": 33.33, "elapsed_time": "1:42:09", "remaining_time": "3:24:19"}
{"current_steps": 60, "total_steps": 150, "loss": 0.2322, "reward": 9.4808, "lr": 7.956548241817914e-06, "epoch": 3.93, "percentage": 40.0, "elapsed_time": "2:02:55", "remaining_time": "3:04:23"}
{"current_steps": 70, "total_steps": 150, "loss": 0.1405, "reward": 9.9301, "lr": 7.0137334492936875e-06, "epoch": 4.6, "percentage": 46.67, "elapsed_time": "2:22:45", "remaining_time": "2:43:08"}
{"current_steps": 80, "total_steps": 150, "loss": 0.2078, "reward": 9.595, "lr": 5.975451610080643e-06, "epoch": 5.27, "percentage": 53.33, "elapsed_time": "2:44:04", "remaining_time": "2:23:34"}
{"current_steps": 90, "total_steps": 150, "loss": 0.2104, "reward": 9.7705, "lr": 4.890925574827195e-06, "epoch": 5.93, "percentage": 60.0, "elapsed_time": "3:04:26", "remaining_time": "2:02:57"}
{"current_steps": 100, "total_steps": 150, "loss": 0.1774, "reward": 9.9975, "lr": 3.8115705383691354e-06, "epoch": 6.6, "percentage": 66.67, "elapsed_time": "3:22:01", "remaining_time": "1:41:00"}
{"current_steps": 110, "total_steps": 150, "loss": 0.1646, "reward": 9.8489, "lr": 2.7885565489049948e-06, "epoch": 7.27, "percentage": 73.33, "elapsed_time": "3:39:38", "remaining_time": "1:19:52"}
{"current_steps": 120, "total_steps": 150, "loss": 0.1785, "reward": 10.4678, "lr": 1.8703826390797047e-06, "epoch": 7.93, "percentage": 80.0, "elapsed_time": "3:57:18", "remaining_time": "0:59:19"}
{"current_steps": 130, "total_steps": 150, "loss": 0.0917, "reward": 10.1096, "lr": 1.100577584535592e-06, "epoch": 8.6, "percentage": 86.67, "elapsed_time": "4:18:07", "remaining_time": "0:39:42"}
{"current_steps": 140, "total_steps": 150, "loss": 0.1104, "reward": 10.1309, "lr": 5.156362923365587e-07, "epoch": 9.27, "percentage": 93.33, "elapsed_time": "4:35:32", "remaining_time": "0:19:40"}
{"current_steps": 150, "total_steps": 150, "loss": 0.0614, "reward": 10.4662, "lr": 1.4328965093369284e-07, "epoch": 9.93, "percentage": 100.0, "elapsed_time": "4:54:09", "remaining_time": "0:00:00"}