{ "best_metric": 0.7562676668167114, "best_model_checkpoint": "./output/checkpoints/2024-05-27_09-00-27/checkpoint-100", "epoch": 0.5050505050505051, "eval_steps": 100, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025252525252525252, "grad_norm": 31.540218353271484, "learning_rate": 6e-06, "logits/chosen": 0.14386241137981415, "logits/rejected": -0.5877799391746521, "logps/chosen": -220.9837646484375, "logps/rejected": -171.7880096435547, "loss": 0.6932, "rewards/accuracies": 0.11249999701976776, "rewards/chosen": 0.004091186448931694, "rewards/margins": -7.568336877739057e-05, "rewards/rejected": 0.004166870377957821, "step": 5 }, { "epoch": 0.050505050505050504, "grad_norm": 23.380859375, "learning_rate": 1.6000000000000003e-05, "logits/chosen": -0.10832454264163971, "logits/rejected": -0.4757871627807617, "logps/chosen": -196.83120727539062, "logps/rejected": -171.42141723632812, "loss": 0.6684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.33129221200942993, "rewards/margins": 0.08790162205696106, "rewards/rejected": 0.24339056015014648, "step": 10 }, { "epoch": 0.07575757575757576, "grad_norm": 23.615068435668945, "learning_rate": 2.6000000000000002e-05, "logits/chosen": -0.2714936137199402, "logits/rejected": -0.6976348161697388, "logps/chosen": -209.19827270507812, "logps/rejected": -173.0024871826172, "loss": 0.6943, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.1716283559799194, "rewards/margins": 0.2701141834259033, "rewards/rejected": 0.9015142321586609, "step": 15 }, { "epoch": 0.10101010101010101, "grad_norm": 23.117158889770508, "learning_rate": 3.6e-05, "logits/chosen": -0.4857279658317566, "logits/rejected": -0.9330118298530579, "logps/chosen": -202.5274200439453, "logps/rejected": -176.1457977294922, "loss": 0.6632, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.3160903453826904, "rewards/margins": 0.552257239818573, "rewards/rejected": 0.7638329863548279, "step": 20 }, { "epoch": 0.12626262626262627, "grad_norm": 31.966520309448242, "learning_rate": 3.997197144003557e-05, "logits/chosen": 0.038466982543468475, "logits/rejected": -0.503484845161438, "logps/chosen": -225.3045196533203, "logps/rejected": -201.3688201904297, "loss": 0.6399, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.8800640106201172, "rewards/margins": 0.41275835037231445, "rewards/rejected": 0.46730566024780273, "step": 25 }, { "epoch": 0.15151515151515152, "grad_norm": 33.95348358154297, "learning_rate": 3.980097021028909e-05, "logits/chosen": -0.036555200815200806, "logits/rejected": -0.4494614005088806, "logps/chosen": -230.3584747314453, "logps/rejected": -197.8057098388672, "loss": 0.7822, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.15881267189979553, "rewards/margins": 0.23805825412273407, "rewards/rejected": -0.07924561202526093, "step": 30 }, { "epoch": 0.17676767676767677, "grad_norm": 30.72279930114746, "learning_rate": 3.947586836927601e-05, "logits/chosen": -0.24093489348888397, "logits/rejected": -0.8071072697639465, "logps/chosen": -206.206787109375, "logps/rejected": -173.3004608154297, "loss": 0.6694, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.3433918356895447, "rewards/margins": 0.5247436761856079, "rewards/rejected": -0.18135181069374084, "step": 35 }, { "epoch": 0.20202020202020202, "grad_norm": 29.44301986694336, "learning_rate": 3.899919601485982e-05, "logits/chosen": -0.5006700754165649, "logits/rejected": -1.0141886472702026, "logps/chosen": -211.9564666748047, "logps/rejected": -185.8589630126953, "loss": 0.6946, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.9416486620903015, "rewards/margins": 0.5160013437271118, "rewards/rejected": 0.4256472587585449, "step": 40 }, { "epoch": 0.22727272727272727, "grad_norm": 21.33418083190918, "learning_rate": 3.837466283906112e-05, "logits/chosen": -0.37220650911331177, "logits/rejected": -1.1134871244430542, "logps/chosen": -203.7546844482422, "logps/rejected": -160.15695190429688, "loss": 0.7258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.9558101892471313, "rewards/margins": 0.6899305582046509, "rewards/rejected": 1.2658796310424805, "step": 45 }, { "epoch": 0.25252525252525254, "grad_norm": 29.768083572387695, "learning_rate": 3.760712925746183e-05, "logits/chosen": -0.40102189779281616, "logits/rejected": -0.8553081750869751, "logps/chosen": -213.06222534179688, "logps/rejected": -184.9990997314453, "loss": 0.7829, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 2.8542065620422363, "rewards/margins": 1.1129443645477295, "rewards/rejected": 1.7412618398666382, "step": 50 }, { "epoch": 0.2777777777777778, "grad_norm": 21.650400161743164, "learning_rate": 3.6702568583128715e-05, "logits/chosen": -0.5856252908706665, "logits/rejected": -1.2530784606933594, "logps/chosen": -204.8536834716797, "logps/rejected": -151.60122680664062, "loss": 0.6457, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 1.6728694438934326, "rewards/margins": 1.0119760036468506, "rewards/rejected": 0.6608934998512268, "step": 55 }, { "epoch": 0.30303030303030304, "grad_norm": 25.246517181396484, "learning_rate": 3.566802053943705e-05, "logits/chosen": -0.56941157579422, "logits/rejected": -1.02021062374115, "logps/chosen": -209.837158203125, "logps/rejected": -172.53732299804688, "loss": 0.763, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 1.4966418743133545, "rewards/margins": 0.739094078540802, "rewards/rejected": 0.7575478553771973, "step": 60 }, { "epoch": 0.3282828282828283, "grad_norm": 17.691490173339844, "learning_rate": 3.451153647357965e-05, "logits/chosen": -0.05042291432619095, "logits/rejected": -0.6024894118309021, "logps/chosen": -234.263427734375, "logps/rejected": -189.74607849121094, "loss": 0.7844, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 1.7601999044418335, "rewards/margins": 0.520693302154541, "rewards/rejected": 1.2395066022872925, "step": 65 }, { "epoch": 0.35353535353535354, "grad_norm": 26.0878849029541, "learning_rate": 3.3242116697136015e-05, "logits/chosen": -0.10399909317493439, "logits/rejected": -0.5000227093696594, "logps/chosen": -210.8389434814453, "logps/rejected": -188.54824829101562, "loss": 0.7101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 1.1393743753433228, "rewards/margins": 0.3506864905357361, "rewards/rejected": 0.7886878848075867, "step": 70 }, { "epoch": 0.3787878787878788, "grad_norm": 24.767913818359375, "learning_rate": 3.186964044134713e-05, "logits/chosen": -0.6071327328681946, "logits/rejected": -0.8622013926506042, "logps/chosen": -188.81149291992188, "logps/rejected": -173.66650390625, "loss": 0.769, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.9471501111984253, "rewards/margins": 0.3631848394870758, "rewards/rejected": 0.5839653611183167, "step": 75 }, { "epoch": 0.40404040404040403, "grad_norm": 27.299219131469727, "learning_rate": 3.0404788972217645e-05, "logits/chosen": -0.47333812713623047, "logits/rejected": -0.855948805809021, "logps/chosen": -232.19448852539062, "logps/rejected": -185.18539428710938, "loss": 0.7493, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 1.0035685300827026, "rewards/margins": 0.4641413688659668, "rewards/rejected": 0.5394272208213806, "step": 80 }, { "epoch": 0.4292929292929293, "grad_norm": 11.157441139221191, "learning_rate": 2.8858962463800163e-05, "logits/chosen": -0.3463514745235443, "logits/rejected": -1.0284178256988525, "logps/chosen": -227.76937866210938, "logps/rejected": -173.3926239013672, "loss": 0.7012, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 1.2513014078140259, "rewards/margins": 0.724143922328949, "rewards/rejected": 0.5271574854850769, "step": 85 }, { "epoch": 0.45454545454545453, "grad_norm": 21.36394500732422, "learning_rate": 2.7244191276593653e-05, "logits/chosen": -0.396036297082901, "logits/rejected": -0.6990352272987366, "logps/chosen": -198.6284942626953, "logps/rejected": -180.7591094970703, "loss": 0.7398, "rewards/accuracies": 0.5625, "rewards/chosen": 1.1550531387329102, "rewards/margins": 0.42906150221824646, "rewards/rejected": 0.7259916067123413, "step": 90 }, { "epoch": 0.4797979797979798, "grad_norm": 21.15500831604004, "learning_rate": 2.5573042331529846e-05, "logits/chosen": -0.5320285558700562, "logits/rejected": -0.9789053797721863, "logps/chosen": -216.59359741210938, "logps/rejected": -175.47976684570312, "loss": 0.7284, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 1.1451869010925293, "rewards/margins": 0.508368968963623, "rewards/rejected": 0.6368179321289062, "step": 95 }, { "epoch": 0.5050505050505051, "grad_norm": 25.441944122314453, "learning_rate": 2.385852130818994e-05, "logits/chosen": -0.38838592171669006, "logits/rejected": -0.7203149795532227, "logps/chosen": -212.9621124267578, "logps/rejected": -187.50942993164062, "loss": 0.7677, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 1.5237963199615479, "rewards/margins": 0.5711702108383179, "rewards/rejected": 0.95262610912323, "step": 100 }, { "epoch": 0.5050505050505051, "eval_logits/chosen": 1.4398491382598877, "eval_logits/rejected": 1.2089641094207764, "eval_logps/chosen": -209.8059844970703, "eval_logps/rejected": -178.4928741455078, "eval_loss": 0.7562676668167114, "eval_rewards/accuracies": 0.59375, "eval_rewards/chosen": 1.1455074548721313, "eval_rewards/margins": 0.41021832823753357, "eval_rewards/rejected": 0.7352891564369202, "eval_runtime": 274.7063, "eval_samples_per_second": 2.33, "eval_steps_per_second": 0.146, "step": 100 } ], "logging_steps": 5, "max_steps": 198, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }