{ "epoch": 0.9996824388694824, "eval_logits/chosen": -0.7089246511459351, "eval_logits/rejected": -0.7224333882331848, "eval_logps/chosen": -261.0423889160156, "eval_logps/rejected": -333.52276611328125, "eval_loss": 0.4344652593135834, "eval_rewards/accuracies": 0.8185185194015503, "eval_rewards/chosen": 1.3033398389816284, "eval_rewards/margins": 1.0257779359817505, "eval_rewards/rejected": 0.27756187319755554, "eval_runtime": 185.0205, "eval_samples": 2155, "eval_samples_per_second": 11.647, "eval_steps_per_second": 1.459 }