{ "epoch": 2.986666666666667, "eval_kl": 28.178438186645508, "eval_logps/chosen": -142.316337890625, "eval_loss": 0.5287721157073975, "eval_rewards/chosen": 2.6833560180664064, "eval_runtime": 25.1552, "eval_samples_per_second": 3.975, "eval_steps_per_second": 3.975 }