PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
khongtrunght's picture
End of training
017105e verified
raw
history blame
588 Bytes
{
"epoch": 0.9996824388694824,
"eval_logits/chosen": -0.7089246511459351,
"eval_logits/rejected": -0.7224333882331848,
"eval_logps/chosen": -261.0423889160156,
"eval_logps/rejected": -333.52276611328125,
"eval_loss": 0.4344652593135834,
"eval_rewards/accuracies": 0.8185185194015503,
"eval_rewards/chosen": 1.3033398389816284,
"eval_rewards/margins": 1.0257779359817505,
"eval_rewards/rejected": 0.27756187319755554,
"eval_runtime": 185.0205,
"eval_samples": 2155,
"eval_samples_per_second": 11.647,
"eval_steps_per_second": 1.459
}