|
{ |
|
"best_metric": 0.7562676668167114, |
|
"best_model_checkpoint": "./output/checkpoints/2024-05-27_09-00-27/checkpoint-100", |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 198, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025252525252525252, |
|
"grad_norm": 31.540218353271484, |
|
"learning_rate": 6e-06, |
|
"logits/chosen": 0.14386241137981415, |
|
"logits/rejected": -0.5877799391746521, |
|
"logps/chosen": -220.9837646484375, |
|
"logps/rejected": -171.7880096435547, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.11249999701976776, |
|
"rewards/chosen": 0.004091186448931694, |
|
"rewards/margins": -7.568336877739057e-05, |
|
"rewards/rejected": 0.004166870377957821, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.050505050505050504, |
|
"grad_norm": 23.380859375, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"logits/chosen": -0.10832454264163971, |
|
"logits/rejected": -0.4757871627807617, |
|
"logps/chosen": -196.83120727539062, |
|
"logps/rejected": -171.42141723632812, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.33129221200942993, |
|
"rewards/margins": 0.08790162205696106, |
|
"rewards/rejected": 0.24339056015014648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07575757575757576, |
|
"grad_norm": 23.615068435668945, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"logits/chosen": -0.2714936137199402, |
|
"logits/rejected": -0.6976348161697388, |
|
"logps/chosen": -209.19827270507812, |
|
"logps/rejected": -173.0024871826172, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1716283559799194, |
|
"rewards/margins": 0.2701141834259033, |
|
"rewards/rejected": 0.9015142321586609, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10101010101010101, |
|
"grad_norm": 23.117158889770508, |
|
"learning_rate": 3.6e-05, |
|
"logits/chosen": -0.4857279658317566, |
|
"logits/rejected": -0.9330118298530579, |
|
"logps/chosen": -202.5274200439453, |
|
"logps/rejected": -176.1457977294922, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.3160903453826904, |
|
"rewards/margins": 0.552257239818573, |
|
"rewards/rejected": 0.7638329863548279, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12626262626262627, |
|
"grad_norm": 31.966520309448242, |
|
"learning_rate": 3.997197144003557e-05, |
|
"logits/chosen": 0.038466982543468475, |
|
"logits/rejected": -0.503484845161438, |
|
"logps/chosen": -225.3045196533203, |
|
"logps/rejected": -201.3688201904297, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8800640106201172, |
|
"rewards/margins": 0.41275835037231445, |
|
"rewards/rejected": 0.46730566024780273, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15151515151515152, |
|
"grad_norm": 33.95348358154297, |
|
"learning_rate": 3.980097021028909e-05, |
|
"logits/chosen": -0.036555200815200806, |
|
"logits/rejected": -0.4494614005088806, |
|
"logps/chosen": -230.3584747314453, |
|
"logps/rejected": -197.8057098388672, |
|
"loss": 0.7822, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.15881267189979553, |
|
"rewards/margins": 0.23805825412273407, |
|
"rewards/rejected": -0.07924561202526093, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17676767676767677, |
|
"grad_norm": 30.72279930114746, |
|
"learning_rate": 3.947586836927601e-05, |
|
"logits/chosen": -0.24093489348888397, |
|
"logits/rejected": -0.8071072697639465, |
|
"logps/chosen": -206.206787109375, |
|
"logps/rejected": -173.3004608154297, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3433918356895447, |
|
"rewards/margins": 0.5247436761856079, |
|
"rewards/rejected": -0.18135181069374084, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20202020202020202, |
|
"grad_norm": 29.44301986694336, |
|
"learning_rate": 3.899919601485982e-05, |
|
"logits/chosen": -0.5006700754165649, |
|
"logits/rejected": -1.0141886472702026, |
|
"logps/chosen": -211.9564666748047, |
|
"logps/rejected": -185.8589630126953, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.9416486620903015, |
|
"rewards/margins": 0.5160013437271118, |
|
"rewards/rejected": 0.4256472587585449, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 21.33418083190918, |
|
"learning_rate": 3.837466283906112e-05, |
|
"logits/chosen": -0.37220650911331177, |
|
"logits/rejected": -1.1134871244430542, |
|
"logps/chosen": -203.7546844482422, |
|
"logps/rejected": -160.15695190429688, |
|
"loss": 0.7258, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.9558101892471313, |
|
"rewards/margins": 0.6899305582046509, |
|
"rewards/rejected": 1.2658796310424805, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25252525252525254, |
|
"grad_norm": 29.768083572387695, |
|
"learning_rate": 3.760712925746183e-05, |
|
"logits/chosen": -0.40102189779281616, |
|
"logits/rejected": -0.8553081750869751, |
|
"logps/chosen": -213.06222534179688, |
|
"logps/rejected": -184.9990997314453, |
|
"loss": 0.7829, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 2.8542065620422363, |
|
"rewards/margins": 1.1129443645477295, |
|
"rewards/rejected": 1.7412618398666382, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 21.650400161743164, |
|
"learning_rate": 3.6702568583128715e-05, |
|
"logits/chosen": -0.5856252908706665, |
|
"logits/rejected": -1.2530784606933594, |
|
"logps/chosen": -204.8536834716797, |
|
"logps/rejected": -151.60122680664062, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.6728694438934326, |
|
"rewards/margins": 1.0119760036468506, |
|
"rewards/rejected": 0.6608934998512268, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 25.246517181396484, |
|
"learning_rate": 3.566802053943705e-05, |
|
"logits/chosen": -0.56941157579422, |
|
"logits/rejected": -1.02021062374115, |
|
"logps/chosen": -209.837158203125, |
|
"logps/rejected": -172.53732299804688, |
|
"loss": 0.763, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.4966418743133545, |
|
"rewards/margins": 0.739094078540802, |
|
"rewards/rejected": 0.7575478553771973, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3282828282828283, |
|
"grad_norm": 17.691490173339844, |
|
"learning_rate": 3.451153647357965e-05, |
|
"logits/chosen": -0.05042291432619095, |
|
"logits/rejected": -0.6024894118309021, |
|
"logps/chosen": -234.263427734375, |
|
"logps/rejected": -189.74607849121094, |
|
"loss": 0.7844, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.7601999044418335, |
|
"rewards/margins": 0.520693302154541, |
|
"rewards/rejected": 1.2395066022872925, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.35353535353535354, |
|
"grad_norm": 26.0878849029541, |
|
"learning_rate": 3.3242116697136015e-05, |
|
"logits/chosen": -0.10399909317493439, |
|
"logits/rejected": -0.5000227093696594, |
|
"logps/chosen": -210.8389434814453, |
|
"logps/rejected": -188.54824829101562, |
|
"loss": 0.7101, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 1.1393743753433228, |
|
"rewards/margins": 0.3506864905357361, |
|
"rewards/rejected": 0.7886878848075867, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 24.767913818359375, |
|
"learning_rate": 3.186964044134713e-05, |
|
"logits/chosen": -0.6071327328681946, |
|
"logits/rejected": -0.8622013926506042, |
|
"logps/chosen": -188.81149291992188, |
|
"logps/rejected": -173.66650390625, |
|
"loss": 0.769, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9471501111984253, |
|
"rewards/margins": 0.3631848394870758, |
|
"rewards/rejected": 0.5839653611183167, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"grad_norm": 27.299219131469727, |
|
"learning_rate": 3.0404788972217645e-05, |
|
"logits/chosen": -0.47333812713623047, |
|
"logits/rejected": -0.855948805809021, |
|
"logps/chosen": -232.19448852539062, |
|
"logps/rejected": -185.18539428710938, |
|
"loss": 0.7493, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.0035685300827026, |
|
"rewards/margins": 0.4641413688659668, |
|
"rewards/rejected": 0.5394272208213806, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4292929292929293, |
|
"grad_norm": 11.157441139221191, |
|
"learning_rate": 2.8858962463800163e-05, |
|
"logits/chosen": -0.3463514745235443, |
|
"logits/rejected": -1.0284178256988525, |
|
"logps/chosen": -227.76937866210938, |
|
"logps/rejected": -173.3926239013672, |
|
"loss": 0.7012, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.2513014078140259, |
|
"rewards/margins": 0.724143922328949, |
|
"rewards/rejected": 0.5271574854850769, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 21.36394500732422, |
|
"learning_rate": 2.7244191276593653e-05, |
|
"logits/chosen": -0.396036297082901, |
|
"logits/rejected": -0.6990352272987366, |
|
"logps/chosen": -198.6284942626953, |
|
"logps/rejected": -180.7591094970703, |
|
"loss": 0.7398, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 1.1550531387329102, |
|
"rewards/margins": 0.42906150221824646, |
|
"rewards/rejected": 0.7259916067123413, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4797979797979798, |
|
"grad_norm": 21.15500831604004, |
|
"learning_rate": 2.5573042331529846e-05, |
|
"logits/chosen": -0.5320285558700562, |
|
"logits/rejected": -0.9789053797721863, |
|
"logps/chosen": -216.59359741210938, |
|
"logps/rejected": -175.47976684570312, |
|
"loss": 0.7284, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.1451869010925293, |
|
"rewards/margins": 0.508368968963623, |
|
"rewards/rejected": 0.6368179321289062, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"grad_norm": 25.441944122314453, |
|
"learning_rate": 2.385852130818994e-05, |
|
"logits/chosen": -0.38838592171669006, |
|
"logits/rejected": -0.7203149795532227, |
|
"logps/chosen": -212.9621124267578, |
|
"logps/rejected": -187.50942993164062, |
|
"loss": 0.7677, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 1.5237963199615479, |
|
"rewards/margins": 0.5711702108383179, |
|
"rewards/rejected": 0.95262610912323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5050505050505051, |
|
"eval_logits/chosen": 1.4398491382598877, |
|
"eval_logits/rejected": 1.2089641094207764, |
|
"eval_logps/chosen": -209.8059844970703, |
|
"eval_logps/rejected": -178.4928741455078, |
|
"eval_loss": 0.7562676668167114, |
|
"eval_rewards/accuracies": 0.59375, |
|
"eval_rewards/chosen": 1.1455074548721313, |
|
"eval_rewards/margins": 0.41021832823753357, |
|
"eval_rewards/rejected": 0.7352891564369202, |
|
"eval_runtime": 274.7063, |
|
"eval_samples_per_second": 2.33, |
|
"eval_steps_per_second": 0.146, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5303030303030303, |
|
"grad_norm": 18.52869415283203, |
|
"learning_rate": 2.2113971428391862e-05, |
|
"logits/chosen": -0.5178086757659912, |
|
"logits/rejected": -1.0226086378097534, |
|
"logps/chosen": -208.1871795654297, |
|
"logps/rejected": -171.2388458251953, |
|
"loss": 0.7824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9819921255111694, |
|
"rewards/margins": 0.44732803106307983, |
|
"rewards/rejected": 0.5346641540527344, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 18.219402313232422, |
|
"learning_rate": 2.0352969612862576e-05, |
|
"logits/chosen": -0.5200484395027161, |
|
"logits/rejected": -0.8673607707023621, |
|
"logps/chosen": -207.2660369873047, |
|
"logps/rejected": -179.528564453125, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.8134071230888367, |
|
"rewards/margins": 0.5864471197128296, |
|
"rewards/rejected": 0.2269599884748459, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5808080808080808, |
|
"grad_norm": 22.682283401489258, |
|
"learning_rate": 1.858922081915378e-05, |
|
"logits/chosen": -0.3367950916290283, |
|
"logits/rejected": -0.7870457768440247, |
|
"logps/chosen": -187.28225708007812, |
|
"logps/rejected": -167.66563415527344, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.0122456550598145, |
|
"rewards/margins": 0.42765671014785767, |
|
"rewards/rejected": 0.5845889449119568, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 21.8991756439209, |
|
"learning_rate": 1.6836451383113923e-05, |
|
"logits/chosen": -0.6206024885177612, |
|
"logits/rejected": -1.1339770555496216, |
|
"logps/chosen": -216.4201202392578, |
|
"logps/rejected": -181.8843536376953, |
|
"loss": 0.7218, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.1707611083984375, |
|
"rewards/margins": 0.4338921010494232, |
|
"rewards/rejected": 0.7368690371513367, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6313131313131313, |
|
"grad_norm": 26.37303352355957, |
|
"learning_rate": 1.5108302193984004e-05, |
|
"logits/chosen": -0.3522421717643738, |
|
"logits/rejected": -1.013496994972229, |
|
"logps/chosen": -231.60025024414062, |
|
"logps/rejected": -174.62521362304688, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.5266729593276978, |
|
"rewards/margins": 0.6238378286361694, |
|
"rewards/rejected": 0.9028350710868835, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6565656565656566, |
|
"grad_norm": 19.788497924804688, |
|
"learning_rate": 1.3752556459724117e-05, |
|
"logits/chosen": -0.44988712668418884, |
|
"logits/rejected": -0.899565577507019, |
|
"logps/chosen": -206.1852264404297, |
|
"logps/rejected": -188.783447265625, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 1.4883193969726562, |
|
"rewards/margins": 0.6105667352676392, |
|
"rewards/rejected": 0.8777526021003723, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 17.837421417236328, |
|
"learning_rate": 1.2102420272588178e-05, |
|
"logits/chosen": -0.269491583108902, |
|
"logits/rejected": -0.7996856570243835, |
|
"logps/chosen": -219.24240112304688, |
|
"logps/rejected": -178.4714813232422, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 1.4494140148162842, |
|
"rewards/margins": 0.8258872032165527, |
|
"rewards/rejected": 0.6235266923904419, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7070707070707071, |
|
"grad_norm": 30.42537498474121, |
|
"learning_rate": 1.0513746824428951e-05, |
|
"logits/chosen": -0.6660299897193909, |
|
"logits/rejected": -1.161827564239502, |
|
"logps/chosen": -225.38668823242188, |
|
"logps/rejected": -188.2053985595703, |
|
"loss": 0.7771, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.9760153889656067, |
|
"rewards/margins": 0.37082913517951965, |
|
"rewards/rejected": 0.6051862239837646, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7323232323232324, |
|
"grad_norm": 21.641803741455078, |
|
"learning_rate": 8.998899931103173e-06, |
|
"logits/chosen": -0.6154942512512207, |
|
"logits/rejected": -1.150761365890503, |
|
"logps/chosen": -205.3412628173828, |
|
"logps/rejected": -176.29917907714844, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9418373107910156, |
|
"rewards/margins": 0.5389925241470337, |
|
"rewards/rejected": 0.40284472703933716, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 17.89181137084961, |
|
"learning_rate": 7.569668854942815e-06, |
|
"logits/chosen": -0.2999069094657898, |
|
"logits/rejected": -0.8618285059928894, |
|
"logps/chosen": -208.3065948486328, |
|
"logps/rejected": -163.25619506835938, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.9588847160339355, |
|
"rewards/margins": 0.6155614256858826, |
|
"rewards/rejected": 0.343323290348053, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7828282828282829, |
|
"grad_norm": 20.726341247558594, |
|
"learning_rate": 6.237176555082625e-06, |
|
"logits/chosen": -0.09209189563989639, |
|
"logits/rejected": -0.40321072936058044, |
|
"logps/chosen": -221.6671905517578, |
|
"logps/rejected": -206.04989624023438, |
|
"loss": 0.7131, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.885414719581604, |
|
"rewards/margins": 0.5608201622962952, |
|
"rewards/rejected": 0.32459449768066406, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"grad_norm": 16.55711555480957, |
|
"learning_rate": 5.0117931232775e-06, |
|
"logits/chosen": -0.19318969547748566, |
|
"logits/rejected": -0.7406075596809387, |
|
"logps/chosen": -207.5948944091797, |
|
"logps/rejected": -176.14010620117188, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8107932209968567, |
|
"rewards/margins": 0.5306531190872192, |
|
"rewards/rejected": 0.28014007210731506, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 17.550378799438477, |
|
"learning_rate": 3.903055078893489e-06, |
|
"logits/chosen": -0.05737120658159256, |
|
"logits/rejected": -0.6579563617706299, |
|
"logps/chosen": -220.5160675048828, |
|
"logps/rejected": -174.6505584716797, |
|
"loss": 0.7329, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9277563095092773, |
|
"rewards/margins": 0.7453610301017761, |
|
"rewards/rejected": 0.18239526450634003, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.8585858585858586, |
|
"grad_norm": 33.4338493347168, |
|
"learning_rate": 2.919591151157475e-06, |
|
"logits/chosen": -0.6612521409988403, |
|
"logits/rejected": -1.0171642303466797, |
|
"logps/chosen": -191.87083435058594, |
|
"logps/rejected": -168.1193084716797, |
|
"loss": 0.7216, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.65160071849823, |
|
"rewards/margins": 0.4179254472255707, |
|
"rewards/rejected": 0.2336752861738205, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8838383838383839, |
|
"grad_norm": 18.22390365600586, |
|
"learning_rate": 2.069055126263433e-06, |
|
"logits/chosen": -0.7078531384468079, |
|
"logits/rejected": -0.9975414276123047, |
|
"logps/chosen": -177.2061004638672, |
|
"logps/rejected": -164.01715087890625, |
|
"loss": 0.7113, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7737497091293335, |
|
"rewards/margins": 0.5676254034042358, |
|
"rewards/rejected": 0.20612427592277527, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 17.45758056640625, |
|
"learning_rate": 1.3580662819512093e-06, |
|
"logits/chosen": -0.49047690629959106, |
|
"logits/rejected": -0.9685350656509399, |
|
"logps/chosen": -211.9286346435547, |
|
"logps/rejected": -177.24661254882812, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8009859323501587, |
|
"rewards/margins": 0.5601747632026672, |
|
"rewards/rejected": 0.24081113934516907, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9343434343434344, |
|
"grad_norm": 24.04217529296875, |
|
"learning_rate": 7.92157873124415e-07, |
|
"logits/chosen": -0.42594170570373535, |
|
"logits/rejected": -1.1140912771224976, |
|
"logps/chosen": -225.57559204101562, |
|
"logps/rejected": -178.7427520751953, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.0804827213287354, |
|
"rewards/margins": 0.6804853081703186, |
|
"rewards/rejected": 0.39999738335609436, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9595959595959596, |
|
"grad_norm": 30.535690307617188, |
|
"learning_rate": 3.757340694169109e-07, |
|
"logits/chosen": -0.4965648651123047, |
|
"logits/rejected": -0.7092806696891785, |
|
"logps/chosen": -183.77874755859375, |
|
"logps/rejected": -176.67739868164062, |
|
"loss": 0.7775, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7033542394638062, |
|
"rewards/margins": 0.2690446078777313, |
|
"rewards/rejected": 0.4343096613883972, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9848484848484849, |
|
"grad_norm": 17.108673095703125, |
|
"learning_rate": 1.1203567984036101e-07, |
|
"logits/chosen": -0.38576817512512207, |
|
"logits/rejected": -1.0843629837036133, |
|
"logps/chosen": -222.65896606445312, |
|
"logps/rejected": -185.77786254882812, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.9926906824111938, |
|
"rewards/margins": 0.9567824602127075, |
|
"rewards/rejected": 0.03590827062726021, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 198, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7032270744593456, |
|
"train_runtime": 3353.7827, |
|
"train_samples_per_second": 0.943, |
|
"train_steps_per_second": 0.059 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 198, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|