{ "epoch": 0.9974025974025974, "eval_logits/chosen": -69679219.2, "eval_logits/rejected": -34451756.137931034, "eval_logps/chosen": -343.90514322916664, "eval_logps/rejected": -233.7683694773707, "eval_loss": 0.30314239859580994, "eval_rewards/chosen": 1.5421129862467449, "eval_rewards/margins": 8.846664735640603, "eval_rewards/rejected": -7.304551749393858, "eval_runtime": 374.42, "eval_samples_per_second": 1.247, "eval_steps_per_second": 0.158, "kl": 1080.3172607421875, "total_flos": 8.196772297546138e+16, "train_loss": 0.31702631492581634, "train_runtime": 54644.2785, "train_samples_per_second": 0.845, "train_steps_per_second": 0.007 }