{ "epoch": 0.9994767137624281, "eval_logits/chosen": 0.23715664446353912, "eval_logits/rejected": 1.1001231670379639, "eval_logps/chosen": -418.1593017578125, "eval_logps/rejected": -499.1413879394531, "eval_loss": 0.4963093400001526, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -1.3619133234024048, "eval_rewards/margins": 1.0274325609207153, "eval_rewards/rejected": -2.38934588432312, "eval_runtime": 185.5011, "eval_samples": 2000, "eval_samples_per_second": 10.782, "eval_steps_per_second": 0.34 }