{ "best_metric": 0.35499998927116394, "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.10-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 0.0, "kl": 0.6947981119155884, "learning_rate": 0.00018, "logps/chosen": -534.4633178710938, "logps/rejected": -551.6043090820312, "loss": 0.3763, "rewards/chosen": -24.06700325012207, "rewards/margins": 3.339742660522461, "rewards/rejected": -27.222640991210938, "step": 20 }, { "epoch": 0.27, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00015142857142857143, "logps/chosen": -1181.132568359375, "logps/rejected": -1268.249755859375, "loss": 0.3703, "rewards/chosen": -89.00174713134766, "rewards/margins": 8.45933723449707, "rewards/rejected": -97.4486083984375, "step": 40 }, { "epoch": 0.34, "eval_kl": 0.0, "eval_logps/chosen": -1383.1231689453125, "eval_logps/rejected": -1366.6656494140625, "eval_loss": 0.35499998927116394, "eval_rewards/chosen": -110.769287109375, "eval_rewards/margins": 0.34473782777786255, "eval_rewards/rejected": -110.63328552246094, "eval_runtime": 139.1234, "eval_samples_per_second": 2.156, "eval_steps_per_second": 0.539, "step": 50 }, { "epoch": 0.41, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00012285714285714287, "logps/chosen": -1373.5347900390625, "logps/rejected": -1470.8179931640625, "loss": 0.3422, "rewards/chosen": -108.9572525024414, "rewards/margins": 11.698455810546875, "rewards/rejected": -120.54151916503906, "step": 60 }, { "epoch": 0.55, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 9.428571428571429e-05, "logps/chosen": -1384.841796875, "logps/rejected": -1508.3975830078125, "loss": 0.3703, "rewards/chosen": -111.45362854003906, "rewards/margins": 9.210695266723633, "rewards/rejected": -121.47576904296875, "step": 80 }, { "epoch": 0.68, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 6.571428571428571e-05, "logps/chosen": -1428.02587890625, "logps/rejected": -1513.79248046875, "loss": 0.3352, "rewards/chosen": -113.59603118896484, "rewards/margins": 8.474186897277832, "rewards/rejected": -121.59716796875, "step": 100 }, { "epoch": 0.68, "eval_kl": 0.0, "eval_logps/chosen": -1404.312255859375, "eval_logps/rejected": -1386.0108642578125, "eval_loss": 0.35499998927116394, "eval_rewards/chosen": -112.88818359375, "eval_rewards/margins": 0.1723746657371521, "eval_rewards/rejected": -112.56780242919922, "eval_runtime": 139.1821, "eval_samples_per_second": 2.155, "eval_steps_per_second": 0.539, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }