llama-CPO / trainer_state.json
botways's picture
botways/llama_cpo_finetune
34b0cbc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 39,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7692307692307693,
"grad_norm": 1.8173785209655762,
"learning_rate": 7.435897435897435e-07,
"logits/chosen": 0.13414913415908813,
"logits/rejected": 0.12645891308784485,
"logps/chosen": -298.76983642578125,
"logps/rejected": -261.02435302734375,
"loss": 9.7803,
"nll_loss": 1.0939728021621704,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -29.87698745727539,
"rewards/margins": -3.774548292160034,
"rewards/rejected": -26.102436065673828,
"step": 10
},
{
"epoch": 1.5384615384615383,
"grad_norm": 1.8889318704605103,
"learning_rate": 4.871794871794871e-07,
"logits/chosen": 0.077309250831604,
"logits/rejected": 0.12109130620956421,
"logps/chosen": -277.27899169921875,
"logps/rejected": -260.14141845703125,
"loss": 8.6446,
"nll_loss": 1.0915288925170898,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -27.7278995513916,
"rewards/margins": -1.7137558460235596,
"rewards/rejected": -26.014141082763672,
"step": 20
},
{
"epoch": 2.3076923076923075,
"grad_norm": 1.609092354774475,
"learning_rate": 2.3076923076923078e-07,
"logits/chosen": 0.09378460794687271,
"logits/rejected": 0.0867479220032692,
"logps/chosen": -280.64697265625,
"logps/rejected": -273.1399841308594,
"loss": 7.8556,
"nll_loss": 1.1089527606964111,
"rewards/accuracies": 0.5,
"rewards/chosen": -28.064701080322266,
"rewards/margins": -0.7507012486457825,
"rewards/rejected": -27.31399917602539,
"step": 30
},
{
"epoch": 3.0,
"step": 39,
"total_flos": 0.0,
"train_loss": 8.851261627979767,
"train_runtime": 435.0584,
"train_samples_per_second": 0.69,
"train_steps_per_second": 0.09
}
],
"logging_steps": 10,
"max_steps": 39,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}