{ "epoch": 1.0, "eval_logits/chosen": 0.4666408896446228, "eval_logits/rejected": -0.32667988538742065, "eval_logps/chosen": -426.2104187011719, "eval_logps/rejected": -941.0735473632812, "eval_loss": 0.6054050922393799, "eval_rewards/accuracies": 0.7867646813392639, "eval_rewards/chosen": 0.03273159638047218, "eval_rewards/margins": 0.20916865766048431, "eval_rewards/rejected": -0.17643706500530243, "eval_runtime": 62.3876, "eval_samples": 520, "eval_samples_per_second": 8.335, "eval_steps_per_second": 0.272 }