{ "epoch": 0.9997172745264349, "eval_logits/chosen": -1.0220762491226196, "eval_logits/rejected": -1.0401626825332642, "eval_logps/chosen": -4.1724467277526855, "eval_logps/rejected": -4.85194206237793, "eval_loss": 2.5764122009277344, "eval_rewards/accuracies": 0.7931150197982788, "eval_rewards/chosen": -41.724464416503906, "eval_rewards/margins": 6.794952392578125, "eval_rewards/rejected": -48.51942443847656, "eval_runtime": 50.6713, "eval_samples_per_second": 58.791, "eval_steps_per_second": 3.69 }