|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9904153354632586, |
|
"eval_steps": 500, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006389776357827476, |
|
"grad_norm": 29.368945511033267, |
|
"learning_rate": 1.0638297872340425e-08, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.28125, |
|
"logps/chosen": -145.0, |
|
"logps/rejected": -130.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06389776357827476, |
|
"grad_norm": 28.719594921405, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.34375, |
|
"logps/chosen": -145.0, |
|
"logps/rejected": -129.0, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.3055555522441864, |
|
"rewards/chosen": 0.002960205078125, |
|
"rewards/margins": 0.0189208984375, |
|
"rewards/rejected": -0.0159912109375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 22.494308989969756, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.390625, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -134.0, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.00970458984375, |
|
"rewards/margins": 0.203125, |
|
"rewards/rejected": -0.193359375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19169329073482427, |
|
"grad_norm": 9.449890990720535, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.296875, |
|
"logps/chosen": -140.0, |
|
"logps/rejected": -136.0, |
|
"loss": 0.3229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.025634765625, |
|
"rewards/margins": 1.0625, |
|
"rewards/rejected": -1.0390625, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 0.38707031057010677, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.375, |
|
"logps/chosen": -149.0, |
|
"logps/rejected": -169.0, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2578125, |
|
"rewards/margins": 3.671875, |
|
"rewards/rejected": -3.9375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3194888178913738, |
|
"grad_norm": 0.20241899573369684, |
|
"learning_rate": 4.96437054631829e-07, |
|
"logits/chosen": -1.515625, |
|
"logits/rejected": -1.359375, |
|
"logps/chosen": -147.0, |
|
"logps/rejected": -203.0, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.66015625, |
|
"rewards/margins": 6.875, |
|
"rewards/rejected": -7.53125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 0.3039285596197402, |
|
"learning_rate": 4.845605700712589e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.3984375, |
|
"logps/chosen": -153.0, |
|
"logps/rejected": -221.0, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7265625, |
|
"rewards/margins": 8.0, |
|
"rewards/rejected": -8.75, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4472843450479233, |
|
"grad_norm": 0.362275931660039, |
|
"learning_rate": 4.7268408551068883e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.4296875, |
|
"logps/chosen": -151.0, |
|
"logps/rejected": -222.0, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8515625, |
|
"rewards/margins": 8.375, |
|
"rewards/rejected": -9.25, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 0.2868857164394034, |
|
"learning_rate": 4.6080760095011875e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.4375, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -231.0, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.734375, |
|
"rewards/margins": 9.3125, |
|
"rewards/rejected": -10.0625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5750798722044729, |
|
"grad_norm": 0.15240372298676685, |
|
"learning_rate": 4.4893111638954866e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.421875, |
|
"logps/chosen": -148.0, |
|
"logps/rejected": -230.0, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.73046875, |
|
"rewards/margins": 9.5, |
|
"rewards/rejected": -10.25, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 0.02392673493900165, |
|
"learning_rate": 4.3705463182897863e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.3828125, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -235.0, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7578125, |
|
"rewards/margins": 9.75, |
|
"rewards/rejected": -10.5625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7028753993610224, |
|
"grad_norm": 0.09175324835066521, |
|
"learning_rate": 4.251781472684085e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.421875, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -234.0, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.83203125, |
|
"rewards/margins": 10.0625, |
|
"rewards/rejected": -10.875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 0.06184587725303002, |
|
"learning_rate": 4.1330166270783846e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.40625, |
|
"logps/chosen": -145.0, |
|
"logps/rejected": -240.0, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.89453125, |
|
"rewards/margins": 10.375, |
|
"rewards/rejected": -11.25, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8306709265175719, |
|
"grad_norm": 0.024649453162613934, |
|
"learning_rate": 4.0142517814726837e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.4296875, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -242.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.859375, |
|
"rewards/margins": 10.625, |
|
"rewards/rejected": -11.5, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 0.022941440244503526, |
|
"learning_rate": 3.8954869358669834e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.4453125, |
|
"logps/chosen": -171.0, |
|
"logps/rejected": -242.0, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.046875, |
|
"rewards/margins": 10.375, |
|
"rewards/rejected": -11.4375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9584664536741214, |
|
"grad_norm": 0.2502811167205942, |
|
"learning_rate": 3.7767220902612825e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -250.0, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.984375, |
|
"rewards/margins": 10.8125, |
|
"rewards/rejected": -11.75, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9968051118210862, |
|
"eval_logits/chosen": -1.4921875, |
|
"eval_logits/rejected": -1.46875, |
|
"eval_logps/chosen": -148.0, |
|
"eval_logps/rejected": -250.0, |
|
"eval_loss": 7.716297841398045e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.90625, |
|
"eval_rewards/margins": 10.9375, |
|
"eval_rewards/rejected": -11.8125, |
|
"eval_runtime": 11.6151, |
|
"eval_samples_per_second": 17.219, |
|
"eval_steps_per_second": 0.603, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 0.05686403061946404, |
|
"learning_rate": 3.6579572446555817e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.453125, |
|
"logps/chosen": -174.0, |
|
"logps/rejected": -249.0, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.95703125, |
|
"rewards/margins": 10.875, |
|
"rewards/rejected": -11.8125, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0862619808306708, |
|
"grad_norm": 0.025647512662174864, |
|
"learning_rate": 3.5391923990498813e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -151.0, |
|
"logps/rejected": -253.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.890625, |
|
"rewards/margins": 11.4375, |
|
"rewards/rejected": -12.3125, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 0.10600901838990953, |
|
"learning_rate": 3.42042755344418e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.46875, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -249.0, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.94140625, |
|
"rewards/margins": 11.25, |
|
"rewards/rejected": -12.25, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2140575079872205, |
|
"grad_norm": 0.13119830498432972, |
|
"learning_rate": 3.3016627078384796e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.484375, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -251.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.88671875, |
|
"rewards/margins": 11.4375, |
|
"rewards/rejected": -12.3125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 0.030239526412913494, |
|
"learning_rate": 3.182897862232779e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.4609375, |
|
"logps/chosen": -134.0, |
|
"logps/rejected": -252.0, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9453125, |
|
"rewards/margins": 11.6875, |
|
"rewards/rejected": -12.625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34185303514377, |
|
"grad_norm": 0.0080485091857848, |
|
"learning_rate": 3.0641330166270784e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.97265625, |
|
"rewards/margins": 11.8125, |
|
"rewards/rejected": -12.8125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 0.01503760669759659, |
|
"learning_rate": 2.9453681710213776e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.890625, |
|
"rewards/margins": 11.8125, |
|
"rewards/rejected": -12.6875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4696485623003195, |
|
"grad_norm": 0.01745595116822664, |
|
"learning_rate": 2.8266033254156767e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.73828125, |
|
"rewards/margins": 12.0, |
|
"rewards/rejected": -12.75, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 0.010639490905132864, |
|
"learning_rate": 2.7078384798099764e-07, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -262.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8359375, |
|
"rewards/margins": 12.125, |
|
"rewards/rejected": -13.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5974440894568689, |
|
"grad_norm": 0.021325021450464145, |
|
"learning_rate": 2.589073634204275e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -153.0, |
|
"logps/rejected": -256.0, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.80859375, |
|
"rewards/margins": 11.6875, |
|
"rewards/rejected": -12.5, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 0.03402727752531963, |
|
"learning_rate": 2.4703087885985747e-07, |
|
"logits/chosen": -1.546875, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -143.0, |
|
"logps/rejected": -255.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7265625, |
|
"rewards/margins": 11.6875, |
|
"rewards/rejected": -12.4375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7252396166134185, |
|
"grad_norm": 0.01735332750787422, |
|
"learning_rate": 2.351543942992874e-07, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -260.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.80859375, |
|
"rewards/margins": 12.1875, |
|
"rewards/rejected": -13.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 0.02817904426646656, |
|
"learning_rate": 2.2327790973871732e-07, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -158.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7734375, |
|
"rewards/margins": 12.375, |
|
"rewards/rejected": -13.125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8530351437699681, |
|
"grad_norm": 0.026646622883463183, |
|
"learning_rate": 2.1140142517814726e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.46875, |
|
"logps/chosen": -154.0, |
|
"logps/rejected": -258.0, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.90234375, |
|
"rewards/margins": 11.9375, |
|
"rewards/rejected": -12.875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 0.005464018856443713, |
|
"learning_rate": 1.9952494061757718e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -155.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.296875, |
|
"rewards/margins": 12.3125, |
|
"rewards/rejected": -13.625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9808306709265175, |
|
"grad_norm": 0.01607250092110866, |
|
"learning_rate": 1.876484560570071e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 12.5625, |
|
"rewards/rejected": -13.875, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.5078125, |
|
"eval_logits/rejected": -1.515625, |
|
"eval_logps/chosen": -151.0, |
|
"eval_logps/rejected": -266.0, |
|
"eval_loss": 1.9190907551092096e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.1640625, |
|
"eval_rewards/margins": 12.3125, |
|
"eval_rewards/rejected": -13.4375, |
|
"eval_runtime": 11.5575, |
|
"eval_samples_per_second": 17.305, |
|
"eval_steps_per_second": 0.606, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.0447284345047922, |
|
"grad_norm": 0.01361688937176726, |
|
"learning_rate": 1.7577197149643706e-07, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.53125, |
|
"logps/chosen": -157.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.203125, |
|
"rewards/margins": 12.6875, |
|
"rewards/rejected": -13.9375, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.108626198083067, |
|
"grad_norm": 0.05503143614977478, |
|
"learning_rate": 1.6389548693586697e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -162.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1796875, |
|
"rewards/margins": 12.375, |
|
"rewards/rejected": -13.5625, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.1725239616613417, |
|
"grad_norm": 0.010269985783571838, |
|
"learning_rate": 1.520190023752969e-07, |
|
"logits/chosen": -1.53125, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -152.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 12.75, |
|
"rewards/rejected": -13.8125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.236421725239617, |
|
"grad_norm": 0.0683919347457657, |
|
"learning_rate": 1.4014251781472683e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -156.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.09375, |
|
"rewards/margins": 12.75, |
|
"rewards/rejected": -13.8125, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3003194888178915, |
|
"grad_norm": 0.015461126488895941, |
|
"learning_rate": 1.2826603325415677e-07, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.546875, |
|
"logps/chosen": -146.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1015625, |
|
"rewards/margins": 12.625, |
|
"rewards/rejected": -13.75, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.364217252396166, |
|
"grad_norm": 0.004666909345169454, |
|
"learning_rate": 1.163895486935867e-07, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1328125, |
|
"rewards/margins": 12.8125, |
|
"rewards/rejected": -13.9375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.428115015974441, |
|
"grad_norm": 0.021768817881518163, |
|
"learning_rate": 1.0451306413301662e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -157.0, |
|
"logps/rejected": -266.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1484375, |
|
"rewards/margins": 12.8125, |
|
"rewards/rejected": -14.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.4920127795527156, |
|
"grad_norm": 0.006712968496189408, |
|
"learning_rate": 9.263657957244655e-08, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.4765625, |
|
"logps/chosen": -150.0, |
|
"logps/rejected": -272.0, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.046875, |
|
"rewards/margins": 13.125, |
|
"rewards/rejected": -14.125, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.5559105431309903, |
|
"grad_norm": 0.03157363956222121, |
|
"learning_rate": 8.076009501187649e-08, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.515625, |
|
"logps/chosen": -160.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0625, |
|
"rewards/margins": 13.0625, |
|
"rewards/rejected": -14.125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.619808306709265, |
|
"grad_norm": 0.012508580641767728, |
|
"learning_rate": 6.88836104513064e-08, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5, |
|
"logps/chosen": -151.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.03125, |
|
"rewards/margins": 12.9375, |
|
"rewards/rejected": -13.9375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.68370607028754, |
|
"grad_norm": 0.014858628369684351, |
|
"learning_rate": 5.700712589073634e-08, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.515625, |
|
"logps/chosen": -159.0, |
|
"logps/rejected": -268.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.171875, |
|
"rewards/margins": 12.8125, |
|
"rewards/rejected": -14.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.747603833865815, |
|
"grad_norm": 0.00119033851952325, |
|
"learning_rate": 4.5130641330166267e-08, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.46875, |
|
"logps/chosen": -165.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0703125, |
|
"rewards/margins": 13.25, |
|
"rewards/rejected": -14.3125, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8115015974440896, |
|
"grad_norm": 0.01000781639917085, |
|
"learning_rate": 3.32541567695962e-08, |
|
"logits/chosen": -1.5390625, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -142.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0234375, |
|
"rewards/margins": 13.375, |
|
"rewards/rejected": -14.375, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.8753993610223643, |
|
"grad_norm": 0.032333252949445446, |
|
"learning_rate": 2.1377672209026125e-08, |
|
"logits/chosen": -1.5546875, |
|
"logits/rejected": -1.5078125, |
|
"logps/chosen": -159.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 13.3125, |
|
"rewards/rejected": -14.375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.939297124600639, |
|
"grad_norm": 0.0060308968638626, |
|
"learning_rate": 9.501187648456057e-09, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.4921875, |
|
"logps/chosen": -153.0, |
|
"logps/rejected": -270.0, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0390625, |
|
"rewards/margins": 13.3125, |
|
"rewards/rejected": -14.375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.9904153354632586, |
|
"eval_logits/chosen": -1.5078125, |
|
"eval_logits/rejected": -1.5234375, |
|
"eval_logps/chosen": -150.0, |
|
"eval_logps/rejected": -270.0, |
|
"eval_loss": 1.3302266779646743e-05, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -1.09375, |
|
"eval_rewards/margins": 12.8125, |
|
"eval_rewards/rejected": -13.9375, |
|
"eval_runtime": 14.4196, |
|
"eval_samples_per_second": 13.87, |
|
"eval_steps_per_second": 0.485, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.9904153354632586, |
|
"step": 468, |
|
"total_flos": 0.0, |
|
"train_loss": 0.036010837255037814, |
|
"train_runtime": 4360.4368, |
|
"train_samples_per_second": 6.88, |
|
"train_steps_per_second": 0.107 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|