{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 274, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18248175182481752, "grad_norm": 0.0007704155868850648, "learning_rate": 9.803954791481238e-07, "logits/chosen": -3.126544237136841, "logits/rejected": -3.302138566970825, "logps/chosen": -39.869632720947266, "logps/rejected": -157.12303161621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 8.736296653747559, "rewards/margins": 19.26922607421875, "rewards/rejected": -10.532929420471191, "step": 50 }, { "epoch": 0.36496350364963503, "grad_norm": 0.00025232642656192183, "learning_rate": 8.03112705483319e-07, "logits/chosen": -3.0920073986053467, "logits/rejected": -3.2855396270751953, "logps/chosen": -37.723506927490234, "logps/rejected": -163.46827697753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 8.903458595275879, "rewards/margins": 20.080211639404297, "rewards/rejected": -11.176753044128418, "step": 100 }, { "epoch": 0.5474452554744526, "grad_norm": 1.50539008245687e-05, "learning_rate": 5.063851773579869e-07, "logits/chosen": -3.077618360519409, "logits/rejected": -3.280364751815796, "logps/chosen": -40.20799255371094, "logps/rejected": -170.47584533691406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 8.536523818969727, "rewards/margins": 20.434810638427734, "rewards/rejected": -11.898284912109375, "step": 150 }, { "epoch": 0.7299270072992701, "grad_norm": 1.4836322407063562e-05, "learning_rate": 2.071415028359026e-07, "logits/chosen": -3.073031425476074, "logits/rejected": -3.256861925125122, "logps/chosen": -41.553260803222656, "logps/rejected": -174.45729064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 8.274081230163574, "rewards/margins": 20.350595474243164, "rewards/rejected": -12.076513290405273, "step": 200 }, { "epoch": 0.9124087591240876, "grad_norm": 2.432690564546647e-07, "learning_rate": 2.3301803972534728e-08, "logits/chosen": -3.053009033203125, "logits/rejected": -3.253528356552124, "logps/chosen": -38.02189636230469, "logps/rejected": -169.2959747314453, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 8.201343536376953, "rewards/margins": 20.2254581451416, "rewards/rejected": -12.024112701416016, "step": 250 } ], "logging_steps": 50, "max_steps": 274, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }