{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9980582524271845, "eval_steps": 100, "global_step": 257, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003883495145631068, "grad_norm": 0.06435743719339371, "learning_rate": 3.846153846153846e-08, "logits/chosen": -0.8751495480537415, "logits/rejected": -0.7611837387084961, "logps/chosen": -216.45455932617188, "logps/rejected": -194.20330810546875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.3883495145631068, "grad_norm": 0.08497226983308792, "learning_rate": 7.674539256930362e-07, "logits/chosen": -1.1164360046386719, "logits/rejected": -1.1212449073791504, "logps/chosen": -261.63470458984375, "logps/rejected": -252.7709197998047, "loss": 0.6933, "rewards/accuracies": 0.43686869740486145, "rewards/chosen": -0.0006324161658994853, "rewards/margins": -0.0002744389639701694, "rewards/rejected": -0.0003579772310331464, "step": 100 }, { "epoch": 0.3883495145631068, "eval_logits/chosen": -1.6433279514312744, "eval_logits/rejected": -1.6940118074417114, "eval_logps/chosen": -245.18409729003906, "eval_logps/rejected": -228.72549438476562, "eval_loss": 0.6930427551269531, "eval_rewards/accuracies": 0.4357594847679138, "eval_rewards/chosen": -0.0013237559469416738, "eval_rewards/margins": 0.00021979793382342905, "eval_rewards/rejected": -0.001543553895317018, "eval_runtime": 978.8839, "eval_samples_per_second": 3.224, "eval_steps_per_second": 0.404, "step": 100 }, { "epoch": 0.7766990291262136, "grad_norm": 0.06872538477182388, "learning_rate": 1.428588273890482e-07, "logits/chosen": -1.0920249223709106, "logits/rejected": -1.1872063875198364, "logps/chosen": -267.94097900390625, "logps/rejected": -240.1817169189453, "loss": 0.6931, "rewards/accuracies": 0.453125, "rewards/chosen": -0.0006506562349386513, "rewards/margins": 0.00018236432515550405, "rewards/rejected": -0.0008330204873345792, "step": 200 }, { "epoch": 0.7766990291262136, "eval_logits/chosen": -1.2404372692108154, "eval_logits/rejected": -1.2768234014511108, "eval_logps/chosen": -245.1230010986328, "eval_logps/rejected": -228.6853790283203, "eval_loss": 0.6929394602775574, "eval_rewards/accuracies": 0.4487341642379761, "eval_rewards/chosen": -0.0007124166586436331, "eval_rewards/margins": 0.0004301773733459413, "eval_rewards/rejected": -0.0011425940319895744, "eval_runtime": 981.2056, "eval_samples_per_second": 3.216, "eval_steps_per_second": 0.403, "step": 200 }, { "epoch": 0.9980582524271845, "step": 257, "total_flos": 0.0, "train_loss": 0.6931419428220519, "train_runtime": 4635.9681, "train_samples_per_second": 0.888, "train_steps_per_second": 0.055 } ], "logging_steps": 100, "max_steps": 257, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }