|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1019, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009813542688910696, |
|
"grad_norm": 12.871247750249635, |
|
"learning_rate": 4.9019607843137254e-09, |
|
"logits/chosen": 5327.5185546875, |
|
"logits/rejected": 3678.846435546875, |
|
"logps/chosen": -222.31866455078125, |
|
"logps/rejected": -157.3788299560547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009813542688910697, |
|
"grad_norm": 14.081428099593134, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": 5387.51123046875, |
|
"logits/rejected": 4719.13525390625, |
|
"logps/chosen": -280.157958984375, |
|
"logps/rejected": -244.06271362304688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.40740740299224854, |
|
"rewards/chosen": -0.014360553584992886, |
|
"rewards/margins": -0.05316641554236412, |
|
"rewards/rejected": 0.03880586475133896, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.019627085377821395, |
|
"grad_norm": 10.990615121540667, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": 4691.1123046875, |
|
"logits/rejected": 4289.6572265625, |
|
"logps/chosen": -243.6353302001953, |
|
"logps/rejected": -236.8662872314453, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.46666669845581055, |
|
"rewards/chosen": -0.03324734792113304, |
|
"rewards/margins": -0.0356144905090332, |
|
"rewards/rejected": 0.0023671439848840237, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.029440628066732092, |
|
"grad_norm": 12.484022522013351, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": 5969.29296875, |
|
"logits/rejected": 5405.775390625, |
|
"logps/chosen": -284.97119140625, |
|
"logps/rejected": -282.4980163574219, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6250000596046448, |
|
"rewards/chosen": 0.2023317515850067, |
|
"rewards/margins": 0.21659104526042938, |
|
"rewards/rejected": -0.014259283430874348, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03925417075564279, |
|
"grad_norm": 13.51105908880634, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": 5424.30859375, |
|
"logits/rejected": 4093.165283203125, |
|
"logps/chosen": -278.38232421875, |
|
"logps/rejected": -219.98922729492188, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5105813145637512, |
|
"rewards/margins": 0.6657305955886841, |
|
"rewards/rejected": -0.15514932572841644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04906771344455348, |
|
"grad_norm": 12.244316305452477, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": 5819.39111328125, |
|
"logits/rejected": 4993.8203125, |
|
"logps/chosen": -267.16241455078125, |
|
"logps/rejected": -275.3472595214844, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": 1.6877946853637695, |
|
"rewards/margins": 1.0000646114349365, |
|
"rewards/rejected": 0.6877301931381226, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.058881256133464184, |
|
"grad_norm": 11.729075229552288, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": 6246.43115234375, |
|
"logits/rejected": 5279.3232421875, |
|
"logps/chosen": -293.96044921875, |
|
"logps/rejected": -250.30880737304688, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 4.434187889099121, |
|
"rewards/margins": 2.814074993133545, |
|
"rewards/rejected": 1.6201130151748657, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06869479882237488, |
|
"grad_norm": 11.58284928755517, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": 5581.76318359375, |
|
"logits/rejected": 5016.42333984375, |
|
"logps/chosen": -273.4932556152344, |
|
"logps/rejected": -272.8643493652344, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": 4.158146858215332, |
|
"rewards/margins": 5.391061782836914, |
|
"rewards/rejected": -1.2329151630401611, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07850834151128558, |
|
"grad_norm": 12.989777400848494, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": 5730.53759765625, |
|
"logits/rejected": 4633.5458984375, |
|
"logps/chosen": -269.62908935546875, |
|
"logps/rejected": -244.82156372070312, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.3705421686172485, |
|
"rewards/margins": 8.697429656982422, |
|
"rewards/rejected": -7.326887607574463, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08832188420019627, |
|
"grad_norm": 14.220448041653073, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": 5785.2666015625, |
|
"logits/rejected": 5267.29931640625, |
|
"logps/chosen": -262.34014892578125, |
|
"logps/rejected": -285.23370361328125, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -1.5268057584762573, |
|
"rewards/margins": 10.330537796020508, |
|
"rewards/rejected": -11.857342720031738, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09813542688910697, |
|
"grad_norm": 15.81206433163477, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": 5834.7822265625, |
|
"logits/rejected": 4743.5556640625, |
|
"logps/chosen": -311.53265380859375, |
|
"logps/rejected": -305.3698425292969, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -14.513681411743164, |
|
"rewards/margins": 19.83308219909668, |
|
"rewards/rejected": -34.346763610839844, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10794896957801767, |
|
"grad_norm": 18.563361241995352, |
|
"learning_rate": 4.999061090193831e-07, |
|
"logits/chosen": 5575.4599609375, |
|
"logits/rejected": 5340.49658203125, |
|
"logps/chosen": -277.6549987792969, |
|
"logps/rejected": -278.158447265625, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -7.583652496337891, |
|
"rewards/margins": 10.811137199401855, |
|
"rewards/rejected": -18.394786834716797, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11776251226692837, |
|
"grad_norm": 22.063065551890407, |
|
"learning_rate": 4.995247977764035e-07, |
|
"logits/chosen": 5714.29443359375, |
|
"logits/rejected": 5232.7041015625, |
|
"logps/chosen": -276.466552734375, |
|
"logps/rejected": -295.88800048828125, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6583333015441895, |
|
"rewards/chosen": -20.445241928100586, |
|
"rewards/margins": 17.259180068969727, |
|
"rewards/rejected": -37.70441818237305, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12757605495583907, |
|
"grad_norm": 21.227897979315813, |
|
"learning_rate": 4.988506452457066e-07, |
|
"logits/chosen": 5282.2646484375, |
|
"logits/rejected": 4814.9853515625, |
|
"logps/chosen": -284.6465759277344, |
|
"logps/rejected": -329.804931640625, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -43.58851623535156, |
|
"rewards/margins": 24.183361053466797, |
|
"rewards/rejected": -67.7718734741211, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13738959764474976, |
|
"grad_norm": 19.667832090255832, |
|
"learning_rate": 4.9788444260996e-07, |
|
"logits/chosen": 5482.5751953125, |
|
"logits/rejected": 5381.85107421875, |
|
"logps/chosen": -307.1512451171875, |
|
"logps/rejected": -342.03619384765625, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": -47.92987823486328, |
|
"rewards/margins": 22.427753448486328, |
|
"rewards/rejected": -70.3576431274414, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14720314033366044, |
|
"grad_norm": 25.463823735637064, |
|
"learning_rate": 4.96627323800647e-07, |
|
"logits/chosen": 5556.36572265625, |
|
"logits/rejected": 4525.91796875, |
|
"logps/chosen": -339.99114990234375, |
|
"logps/rejected": -357.9053649902344, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -65.89563751220703, |
|
"rewards/margins": 47.135643005371094, |
|
"rewards/rejected": -113.0312728881836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15701668302257116, |
|
"grad_norm": 26.781231387453232, |
|
"learning_rate": 4.95080764167289e-07, |
|
"logits/chosen": 6055.6474609375, |
|
"logits/rejected": 5491.48046875, |
|
"logps/chosen": -350.4269104003906, |
|
"logps/rejected": -381.8998107910156, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.6083333492279053, |
|
"rewards/chosen": -62.65166473388672, |
|
"rewards/margins": 36.19008255004883, |
|
"rewards/rejected": -98.84175109863281, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16683022571148184, |
|
"grad_norm": 28.113973023052374, |
|
"learning_rate": 4.932465787459808e-07, |
|
"logits/chosen": 5991.466796875, |
|
"logits/rejected": 5234.6416015625, |
|
"logps/chosen": -302.66656494140625, |
|
"logps/rejected": -343.98358154296875, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -39.85801315307617, |
|
"rewards/margins": 54.232994079589844, |
|
"rewards/rejected": -94.09100341796875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17664376840039253, |
|
"grad_norm": 33.999159471041786, |
|
"learning_rate": 4.911269201292724e-07, |
|
"logits/chosen": 5687.16943359375, |
|
"logits/rejected": 5025.896484375, |
|
"logps/chosen": -303.44134521484375, |
|
"logps/rejected": -364.39190673828125, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -62.167022705078125, |
|
"rewards/margins": 51.05744552612305, |
|
"rewards/rejected": -113.2244644165039, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18645731108930325, |
|
"grad_norm": 29.641088692190937, |
|
"learning_rate": 4.887242759398945e-07, |
|
"logits/chosen": 6036.60205078125, |
|
"logits/rejected": 5355.47216796875, |
|
"logps/chosen": -337.2464294433594, |
|
"logps/rejected": -388.3368835449219, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -57.11214065551758, |
|
"rewards/margins": 51.385711669921875, |
|
"rewards/rejected": -108.49784851074219, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19627085377821393, |
|
"grad_norm": 44.68902740164567, |
|
"learning_rate": 4.860414659112948e-07, |
|
"logits/chosen": 6272.4951171875, |
|
"logits/rejected": 5538.49609375, |
|
"logps/chosen": -370.70849609375, |
|
"logps/rejected": -407.4710998535156, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -84.62608337402344, |
|
"rewards/margins": 62.12910079956055, |
|
"rewards/rejected": -146.75518798828125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20608439646712462, |
|
"grad_norm": 25.411968927521745, |
|
"learning_rate": 4.830816385784104e-07, |
|
"logits/chosen": 4968.16015625, |
|
"logits/rejected": 4779.7099609375, |
|
"logps/chosen": -331.57757568359375, |
|
"logps/rejected": -343.7427062988281, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.6583333611488342, |
|
"rewards/chosen": -78.5858383178711, |
|
"rewards/margins": 39.72159957885742, |
|
"rewards/rejected": -118.30744934082031, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.21589793915603533, |
|
"grad_norm": 37.405063992584424, |
|
"learning_rate": 4.798482675825602e-07, |
|
"logits/chosen": 5361.2626953125, |
|
"logits/rejected": 5484.0341796875, |
|
"logps/chosen": -311.9710388183594, |
|
"logps/rejected": -405.7643127441406, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -71.02371215820312, |
|
"rewards/margins": 85.1280746459961, |
|
"rewards/rejected": -156.1517791748047, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.22571148184494602, |
|
"grad_norm": 50.62400555207534, |
|
"learning_rate": 4.7634514759479275e-07, |
|
"logits/chosen": 6291.7314453125, |
|
"logits/rejected": 4984.1982421875, |
|
"logps/chosen": -361.0018615722656, |
|
"logps/rejected": -410.3404846191406, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -79.9288330078125, |
|
"rewards/margins": 78.11891174316406, |
|
"rewards/rejected": -158.04774475097656, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.23552502453385674, |
|
"grad_norm": 27.660916558165752, |
|
"learning_rate": 4.7257638986247684e-07, |
|
"logits/chosen": 6535.8984375, |
|
"logits/rejected": 5374.02294921875, |
|
"logps/chosen": -426.83148193359375, |
|
"logps/rejected": -457.632080078125, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7083333730697632, |
|
"rewards/chosen": -109.15034484863281, |
|
"rewards/margins": 92.73652648925781, |
|
"rewards/rejected": -201.88687133789062, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.24533856722276742, |
|
"grad_norm": 31.148002019742822, |
|
"learning_rate": 4.685464173843574e-07, |
|
"logits/chosen": 5497.865234375, |
|
"logits/rejected": 4737.041015625, |
|
"logps/chosen": -371.4256591796875, |
|
"logps/rejected": -383.71661376953125, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -120.2467269897461, |
|
"rewards/margins": 42.96506881713867, |
|
"rewards/rejected": -163.21180725097656, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.25515210991167814, |
|
"grad_norm": 31.571604145354506, |
|
"learning_rate": 4.6425995971974265e-07, |
|
"logits/chosen": 5646.7626953125, |
|
"logits/rejected": 5109.78369140625, |
|
"logps/chosen": -389.2139587402344, |
|
"logps/rejected": -417.52374267578125, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -108.80330657958984, |
|
"rewards/margins": 49.87716293334961, |
|
"rewards/rejected": -158.6804656982422, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2649656526005888, |
|
"grad_norm": 35.26099277826172, |
|
"learning_rate": 4.597220474379125e-07, |
|
"logits/chosen": 5891.14990234375, |
|
"logits/rejected": 4710.44384765625, |
|
"logps/chosen": -349.8431701660156, |
|
"logps/rejected": -394.3140869140625, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -90.45411682128906, |
|
"rewards/margins": 59.028533935546875, |
|
"rewards/rejected": -149.48263549804688, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2747791952894995, |
|
"grad_norm": 48.24359930236473, |
|
"learning_rate": 4.549380062142627e-07, |
|
"logits/chosen": 5449.0, |
|
"logits/rejected": 4662.09521484375, |
|
"logps/chosen": -345.41461181640625, |
|
"logps/rejected": -420.5967712402344, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -90.24136352539062, |
|
"rewards/margins": 88.40531158447266, |
|
"rewards/rejected": -178.64669799804688, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2845927379784102, |
|
"grad_norm": 29.807942673069554, |
|
"learning_rate": 4.499134505801141e-07, |
|
"logits/chosen": 6478.8251953125, |
|
"logits/rejected": 5145.69580078125, |
|
"logps/chosen": -425.2914123535156, |
|
"logps/rejected": -475.61224365234375, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -114.30363464355469, |
|
"rewards/margins": 90.24967193603516, |
|
"rewards/rejected": -204.55331420898438, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2944062806673209, |
|
"grad_norm": 35.28572084013644, |
|
"learning_rate": 4.4465427733352124e-07, |
|
"logits/chosen": 5390.82275390625, |
|
"logits/rejected": 5010.67919921875, |
|
"logps/chosen": -404.703125, |
|
"logps/rejected": -445.12921142578125, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.6416667103767395, |
|
"rewards/chosen": -142.99755859375, |
|
"rewards/margins": 53.75908279418945, |
|
"rewards/rejected": -196.75662231445312, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3042198233562316, |
|
"grad_norm": 40.084769146081335, |
|
"learning_rate": 4.391666586188145e-07, |
|
"logits/chosen": 5972.5166015625, |
|
"logits/rejected": 5158.81103515625, |
|
"logps/chosen": -387.8962707519531, |
|
"logps/rejected": -440.2579040527344, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.6916667222976685, |
|
"rewards/chosen": -106.87181091308594, |
|
"rewards/margins": 80.19728088378906, |
|
"rewards/rejected": -187.06912231445312, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3140333660451423, |
|
"grad_norm": 28.62016537121461, |
|
"learning_rate": 4.3345703468299634e-07, |
|
"logits/chosen": 5544.9384765625, |
|
"logits/rejected": 4833.5224609375, |
|
"logps/chosen": -360.482421875, |
|
"logps/rejected": -389.13385009765625, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -99.99230194091797, |
|
"rewards/margins": 53.11186599731445, |
|
"rewards/rejected": -153.10415649414062, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.323846908734053, |
|
"grad_norm": 59.032426150017336, |
|
"learning_rate": 4.275321063174936e-07, |
|
"logits/chosen": 5484.0458984375, |
|
"logits/rejected": 4950.25537109375, |
|
"logps/chosen": -403.98785400390625, |
|
"logps/rejected": -519.8248291015625, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -141.48928833007812, |
|
"rewards/margins": 106.94425964355469, |
|
"rewards/rejected": -248.4335479736328, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3336604514229637, |
|
"grad_norm": 31.879122250786, |
|
"learning_rate": 4.2139882699413613e-07, |
|
"logits/chosen": 5405.72265625, |
|
"logits/rejected": 4280.78857421875, |
|
"logps/chosen": -441.2384338378906, |
|
"logps/rejected": -502.58209228515625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -169.95379638671875, |
|
"rewards/margins": 94.90937805175781, |
|
"rewards/rejected": -264.8631286621094, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3434739941118744, |
|
"grad_norm": 33.81913218379805, |
|
"learning_rate": 4.1506439470459056e-07, |
|
"logits/chosen": 6440.4052734375, |
|
"logits/rejected": 4974.0732421875, |
|
"logps/chosen": -476.2796325683594, |
|
"logps/rejected": -503.81915283203125, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -159.8102569580078, |
|
"rewards/margins": 94.52180480957031, |
|
"rewards/rejected": -254.3320770263672, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.35328753680078506, |
|
"grad_norm": 27.826546606872803, |
|
"learning_rate": 4.085362435128262e-07, |
|
"logits/chosen": 5557.1865234375, |
|
"logits/rejected": 5118.06640625, |
|
"logps/chosen": -378.259033203125, |
|
"logps/rejected": -449.55145263671875, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -135.6257781982422, |
|
"rewards/margins": 67.34868621826172, |
|
"rewards/rejected": -202.97447204589844, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3631010794896958, |
|
"grad_norm": 35.17289058393036, |
|
"learning_rate": 4.0182203483052825e-07, |
|
"logits/chosen": 6366.83056640625, |
|
"logits/rejected": 5257.0703125, |
|
"logps/chosen": -399.1999206542969, |
|
"logps/rejected": -466.55255126953125, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -125.0527572631836, |
|
"rewards/margins": 93.51255798339844, |
|
"rewards/rejected": -218.56527709960938, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3729146221786065, |
|
"grad_norm": 38.44052471860425, |
|
"learning_rate": 3.949296484256959e-07, |
|
"logits/chosen": 5621.7138671875, |
|
"logits/rejected": 5390.65478515625, |
|
"logps/chosen": -457.83837890625, |
|
"logps/rejected": -548.638427734375, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -193.8985137939453, |
|
"rewards/margins": 95.32550048828125, |
|
"rewards/rejected": -289.2240295410156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.38272816486751715, |
|
"grad_norm": 36.66931449629898, |
|
"learning_rate": 3.8786717317497875e-07, |
|
"logits/chosen": 5111.90576171875, |
|
"logits/rejected": 4626.9228515625, |
|
"logps/chosen": -434.62835693359375, |
|
"logps/rejected": -526.7476806640625, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -197.80410766601562, |
|
"rewards/margins": 103.44625091552734, |
|
"rewards/rejected": -301.2503662109375, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.39254170755642787, |
|
"grad_norm": 57.59980893341699, |
|
"learning_rate": 3.806428975706042e-07, |
|
"logits/chosen": 6388.87158203125, |
|
"logits/rejected": 4657.2216796875, |
|
"logps/chosen": -454.86175537109375, |
|
"logps/rejected": -485.8328552246094, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -156.0497283935547, |
|
"rewards/margins": 103.42845153808594, |
|
"rewards/rejected": -259.4781494140625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4023552502453386, |
|
"grad_norm": 39.54551097407698, |
|
"learning_rate": 3.7326529999303633e-07, |
|
"logits/chosen": 6277.59228515625, |
|
"logits/rejected": 5186.5234375, |
|
"logps/chosen": -436.134521484375, |
|
"logps/rejected": -489.72021484375, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -157.28512573242188, |
|
"rewards/margins": 91.26525115966797, |
|
"rewards/rejected": -248.55038452148438, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.41216879293424924, |
|
"grad_norm": 28.9828512823159, |
|
"learning_rate": 3.6574303876078366e-07, |
|
"logits/chosen": 6166.8349609375, |
|
"logits/rejected": 5749.53759765625, |
|
"logps/chosen": -429.44842529296875, |
|
"logps/rejected": -501.61553955078125, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -146.19595336914062, |
|
"rewards/margins": 76.49540710449219, |
|
"rewards/rejected": -222.6913604736328, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.42198233562315995, |
|
"grad_norm": 38.109059334795454, |
|
"learning_rate": 3.5808494196903117e-07, |
|
"logits/chosen": 5872.1611328125, |
|
"logits/rejected": 5257.36962890625, |
|
"logps/chosen": -426.01629638671875, |
|
"logps/rejected": -524.32470703125, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.7249999642372131, |
|
"rewards/chosen": -147.16848754882812, |
|
"rewards/margins": 97.02165222167969, |
|
"rewards/rejected": -244.1901397705078, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.43179587831207067, |
|
"grad_norm": 36.48516188691919, |
|
"learning_rate": 3.5029999712902387e-07, |
|
"logits/chosen": 5825.1708984375, |
|
"logits/rejected": 5375.9892578125, |
|
"logps/chosen": -412.44866943359375, |
|
"logps/rejected": -492.4132385253906, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -137.3380584716797, |
|
"rewards/margins": 78.8681640625, |
|
"rewards/rejected": -216.2062225341797, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.44160942100098133, |
|
"grad_norm": 32.455988556697044, |
|
"learning_rate": 3.4239734062036067e-07, |
|
"logits/chosen": 5395.5947265625, |
|
"logits/rejected": 4995.49267578125, |
|
"logps/chosen": -377.55206298828125, |
|
"logps/rejected": -480.9095764160156, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.8166666030883789, |
|
"rewards/chosen": -116.18603515625, |
|
"rewards/margins": 91.22578430175781, |
|
"rewards/rejected": -207.4118194580078, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.45142296368989204, |
|
"grad_norm": 40.54519554942862, |
|
"learning_rate": 3.343862469685755e-07, |
|
"logits/chosen": 5598.1201171875, |
|
"logits/rejected": 5239.931640625, |
|
"logps/chosen": -418.7256774902344, |
|
"logps/rejected": -504.2228088378906, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -155.0460662841797, |
|
"rewards/margins": 88.58525848388672, |
|
"rewards/rejected": -243.63131713867188, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.46123650637880276, |
|
"grad_norm": 63.388353478656484, |
|
"learning_rate": 3.2627611796059283e-07, |
|
"logits/chosen": 6118.2041015625, |
|
"logits/rejected": 4867.0166015625, |
|
"logps/chosen": -513.0584716796875, |
|
"logps/rejected": -580.1785278320312, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -215.5017547607422, |
|
"rewards/margins": 102.20874786376953, |
|
"rewards/rejected": -317.71051025390625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.47105004906771347, |
|
"grad_norm": 53.006728665147484, |
|
"learning_rate": 3.1807647161082797e-07, |
|
"logits/chosen": 6796.0439453125, |
|
"logits/rejected": 4991.43505859375, |
|
"logps/chosen": -505.2491760253906, |
|
"logps/rejected": -591.8897094726562, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -224.52243041992188, |
|
"rewards/margins": 122.7418441772461, |
|
"rewards/rejected": -347.2642822265625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.48086359175662413, |
|
"grad_norm": 45.433668803945835, |
|
"learning_rate": 3.097969309908847e-07, |
|
"logits/chosen": 6198.6357421875, |
|
"logits/rejected": 4938.93701171875, |
|
"logps/chosen": -537.33154296875, |
|
"logps/rejected": -604.7870483398438, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -252.5094451904297, |
|
"rewards/margins": 102.22807312011719, |
|
"rewards/rejected": -354.7375183105469, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.49067713444553485, |
|
"grad_norm": 40.30222762902004, |
|
"learning_rate": 3.01447212935957e-07, |
|
"logits/chosen": 5542.91015625, |
|
"logits/rejected": 4886.0283203125, |
|
"logps/chosen": -515.2832641601562, |
|
"logps/rejected": -611.3238525390625, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -230.98831176757812, |
|
"rewards/margins": 120.3188247680664, |
|
"rewards/rejected": -351.30718994140625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5004906771344455, |
|
"grad_norm": 37.25943332077513, |
|
"learning_rate": 2.930371166411915e-07, |
|
"logits/chosen": 6290.35107421875, |
|
"logits/rejected": 5406.603515625, |
|
"logps/chosen": -496.71923828125, |
|
"logps/rejected": -568.7760009765625, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -171.8844451904297, |
|
"rewards/margins": 106.14483642578125, |
|
"rewards/rejected": -278.02923583984375, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5103042198233563, |
|
"grad_norm": 34.219197608369974, |
|
"learning_rate": 2.845765121613912e-07, |
|
"logits/chosen": 5363.45361328125, |
|
"logits/rejected": 4926.47705078125, |
|
"logps/chosen": -400.9844665527344, |
|
"logps/rejected": -468.4186096191406, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -137.99566650390625, |
|
"rewards/margins": 86.77244567871094, |
|
"rewards/rejected": -224.7681121826172, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5201177625122669, |
|
"grad_norm": 51.08115197166243, |
|
"learning_rate": 2.760753288275598e-07, |
|
"logits/chosen": 6380.15380859375, |
|
"logits/rejected": 5523.56103515625, |
|
"logps/chosen": -411.37030029296875, |
|
"logps/rejected": -479.7333984375, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -120.66414642333984, |
|
"rewards/margins": 82.25830841064453, |
|
"rewards/rejected": -202.92245483398438, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5299313052011776, |
|
"grad_norm": 32.10452041832907, |
|
"learning_rate": 2.675435435938788e-07, |
|
"logits/chosen": 5805.7861328125, |
|
"logits/rejected": 4628.6015625, |
|
"logps/chosen": -400.0195617675781, |
|
"logps/rejected": -493.15631103515625, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -143.0798797607422, |
|
"rewards/margins": 111.36392974853516, |
|
"rewards/rejected": -254.4438018798828, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5397448478900884, |
|
"grad_norm": 35.50341902811831, |
|
"learning_rate": 2.5899116932879534e-07, |
|
"logits/chosen": 5951.2255859375, |
|
"logits/rejected": 5129.73291015625, |
|
"logps/chosen": -436.9695739746094, |
|
"logps/rejected": -521.4527587890625, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -170.31277465820312, |
|
"rewards/margins": 103.4649658203125, |
|
"rewards/rejected": -273.77777099609375, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.549558390578999, |
|
"grad_norm": 38.923123039929806, |
|
"learning_rate": 2.504282430639594e-07, |
|
"logits/chosen": 5168.88427734375, |
|
"logits/rejected": 4690.22412109375, |
|
"logps/chosen": -454.4593811035156, |
|
"logps/rejected": -523.0364990234375, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": -206.7684783935547, |
|
"rewards/margins": 83.95912170410156, |
|
"rewards/rejected": -290.72760009765625, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5593719332679097, |
|
"grad_norm": 35.9015213923173, |
|
"learning_rate": 2.418648142148056e-07, |
|
"logits/chosen": 5650.38818359375, |
|
"logits/rejected": 4686.87158203125, |
|
"logps/chosen": -421.58416748046875, |
|
"logps/rejected": -519.3839721679688, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -161.98081970214844, |
|
"rewards/margins": 115.42814636230469, |
|
"rewards/rejected": -277.4089660644531, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5691854759568205, |
|
"grad_norm": 35.377631368601875, |
|
"learning_rate": 2.3331093278659906e-07, |
|
"logits/chosen": 6001.3486328125, |
|
"logits/rejected": 5075.9619140625, |
|
"logps/chosen": -444.90069580078125, |
|
"logps/rejected": -534.0222778320312, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -150.86428833007812, |
|
"rewards/margins": 100.94398498535156, |
|
"rewards/rejected": -251.80825805664062, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5789990186457311, |
|
"grad_norm": 33.72847930978894, |
|
"learning_rate": 2.247766375797906e-07, |
|
"logits/chosen": 6150.4951171875, |
|
"logits/rejected": 5650.3603515625, |
|
"logps/chosen": -447.9390563964844, |
|
"logps/rejected": -580.2978515625, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -173.01307678222656, |
|
"rewards/margins": 128.7034454345703, |
|
"rewards/rejected": -301.71649169921875, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5888125613346418, |
|
"grad_norm": 38.272687769078246, |
|
"learning_rate": 2.1627194440852142e-07, |
|
"logits/chosen": 5934.83935546875, |
|
"logits/rejected": 5138.47705078125, |
|
"logps/chosen": -510.39532470703125, |
|
"logps/rejected": -600.4871826171875, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7583333849906921, |
|
"rewards/chosen": -226.30844116210938, |
|
"rewards/margins": 95.1800765991211, |
|
"rewards/rejected": -321.4884948730469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5986261040235525, |
|
"grad_norm": 42.72545572301978, |
|
"learning_rate": 2.0780683434610413e-07, |
|
"logits/chosen": 5760.5244140625, |
|
"logits/rejected": 4755.18798828125, |
|
"logps/chosen": -520.7589721679688, |
|
"logps/rejected": -605.10546875, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -230.55068969726562, |
|
"rewards/margins": 103.2475357055664, |
|
"rewards/rejected": -333.7981872558594, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6084396467124632, |
|
"grad_norm": 30.800890611965162, |
|
"learning_rate": 1.993912420112756e-07, |
|
"logits/chosen": 6323.02978515625, |
|
"logits/rejected": 5290.75927734375, |
|
"logps/chosen": -529.4403686523438, |
|
"logps/rejected": -628.4583129882812, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -250.78018188476562, |
|
"rewards/margins": 104.14213562011719, |
|
"rewards/rejected": -354.92236328125, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6182531894013739, |
|
"grad_norm": 33.05143266331657, |
|
"learning_rate": 1.9103504390896944e-07, |
|
"logits/chosen": 6340.01025390625, |
|
"logits/rejected": 5427.24755859375, |
|
"logps/chosen": -559.9760131835938, |
|
"logps/rejected": -633.686767578125, |
|
"loss": 0.4884, |
|
"rewards/accuracies": 0.7583334445953369, |
|
"rewards/chosen": -264.83856201171875, |
|
"rewards/margins": 85.63264465332031, |
|
"rewards/rejected": -350.47125244140625, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6280667320902846, |
|
"grad_norm": 39.56952674823438, |
|
"learning_rate": 1.8274804683928913e-07, |
|
"logits/chosen": 5424.0146484375, |
|
"logits/rejected": 4903.7958984375, |
|
"logps/chosen": -535.6927490234375, |
|
"logps/rejected": -647.5748901367188, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -269.99102783203125, |
|
"rewards/margins": 118.67204284667969, |
|
"rewards/rejected": -388.6630554199219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6378802747791953, |
|
"grad_norm": 55.248010597812424, |
|
"learning_rate": 1.745399763882881e-07, |
|
"logits/chosen": 5793.76953125, |
|
"logits/rejected": 4353.64794921875, |
|
"logps/chosen": -535.369140625, |
|
"logps/rejected": -589.8530883789062, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -241.2775421142578, |
|
"rewards/margins": 122.86383056640625, |
|
"rewards/rejected": -364.1413879394531, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.647693817468106, |
|
"grad_norm": 38.643520028392174, |
|
"learning_rate": 1.664204655140607e-07, |
|
"logits/chosen": 6159.14306640625, |
|
"logits/rejected": 4976.43994140625, |
|
"logps/chosen": -499.28851318359375, |
|
"logps/rejected": -561.6052856445312, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -212.77145385742188, |
|
"rewards/margins": 93.96333312988281, |
|
"rewards/rejected": -306.73480224609375, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6575073601570167, |
|
"grad_norm": 35.07622366892728, |
|
"learning_rate": 1.5839904324154273e-07, |
|
"logits/chosen": 5574.2802734375, |
|
"logits/rejected": 4987.9404296875, |
|
"logps/chosen": -466.86346435546875, |
|
"logps/rejected": -580.9351196289062, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -179.73626708984375, |
|
"rewards/margins": 136.65802001953125, |
|
"rewards/rejected": -316.3943176269531, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6673209028459274, |
|
"grad_norm": 33.542518567077636, |
|
"learning_rate": 1.5048512347928564e-07, |
|
"logits/chosen": 6700.78515625, |
|
"logits/rejected": 5496.53662109375, |
|
"logps/chosen": -503.79290771484375, |
|
"logps/rejected": -590.5035400390625, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.7499999403953552, |
|
"rewards/chosen": -193.58926391601562, |
|
"rewards/margins": 135.96713256835938, |
|
"rewards/rejected": -329.556396484375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.677134445534838, |
|
"grad_norm": 34.78474391764019, |
|
"learning_rate": 1.426879939713322e-07, |
|
"logits/chosen": 5514.447265625, |
|
"logits/rejected": 4842.81640625, |
|
"logps/chosen": -472.7972717285156, |
|
"logps/rejected": -572.2882690429688, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -220.92953491210938, |
|
"rewards/margins": 110.0331039428711, |
|
"rewards/rejected": -330.9626159667969, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6869479882237488, |
|
"grad_norm": 39.067983682803174, |
|
"learning_rate": 1.350168053971577e-07, |
|
"logits/chosen": 5970.7685546875, |
|
"logits/rejected": 5311.5283203125, |
|
"logps/chosen": -452.698974609375, |
|
"logps/rejected": -518.3038330078125, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -172.0897979736328, |
|
"rewards/margins": 97.96910095214844, |
|
"rewards/rejected": -270.05889892578125, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6967615309126595, |
|
"grad_norm": 40.38105094076238, |
|
"learning_rate": 1.2748056063246994e-07, |
|
"logits/chosen": 5575.70458984375, |
|
"logits/rejected": 5063.31884765625, |
|
"logps/chosen": -460.80413818359375, |
|
"logps/rejected": -541.817138671875, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -161.07498168945312, |
|
"rewards/margins": 101.1529541015625, |
|
"rewards/rejected": -262.2279357910156, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7065750736015701, |
|
"grad_norm": 34.54330420809715, |
|
"learning_rate": 1.2008810418347093e-07, |
|
"logits/chosen": 5857.0908203125, |
|
"logits/rejected": 5070.1689453125, |
|
"logps/chosen": -448.393798828125, |
|
"logps/rejected": -511.27130126953125, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -160.17967224121094, |
|
"rewards/margins": 94.85597229003906, |
|
"rewards/rejected": -255.03564453125, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7163886162904809, |
|
"grad_norm": 36.199421295115016, |
|
"learning_rate": 1.128481118069799e-07, |
|
"logits/chosen": 5848.61279296875, |
|
"logits/rejected": 4546.04296875, |
|
"logps/chosen": -461.7185974121094, |
|
"logps/rejected": -540.6113891601562, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.6916666626930237, |
|
"rewards/chosen": -193.16610717773438, |
|
"rewards/margins": 106.0004653930664, |
|
"rewards/rejected": -299.16656494140625, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7262021589793916, |
|
"grad_norm": 58.298464182056584, |
|
"learning_rate": 1.0576908032860088e-07, |
|
"logits/chosen": 5177.734375, |
|
"logits/rejected": 4254.4931640625, |
|
"logps/chosen": -439.21923828125, |
|
"logps/rejected": -490.22210693359375, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -179.5984344482422, |
|
"rewards/margins": 106.27181243896484, |
|
"rewards/rejected": -285.8702697753906, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7360157016683022, |
|
"grad_norm": 47.06791612169973, |
|
"learning_rate": 9.88593176708827e-08, |
|
"logits/chosen": 5833.16748046875, |
|
"logits/rejected": 4599.1416015625, |
|
"logps/chosen": -447.70770263671875, |
|
"logps/rejected": -503.156005859375, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -168.36907958984375, |
|
"rewards/margins": 97.07537078857422, |
|
"rewards/rejected": -265.4444580078125, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.745829244357213, |
|
"grad_norm": 37.1160086085217, |
|
"learning_rate": 9.212693310317479e-08, |
|
"logits/chosen": 5141.75390625, |
|
"logits/rejected": 4296.54833984375, |
|
"logps/chosen": -440.88067626953125, |
|
"logps/rejected": -532.016845703125, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -181.9151153564453, |
|
"rewards/margins": 105.42694091796875, |
|
"rewards/rejected": -287.34210205078125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7556427870461236, |
|
"grad_norm": 43.097373457610466, |
|
"learning_rate": 8.557982772462138e-08, |
|
"logits/chosen": 5532.06689453125, |
|
"logits/rejected": 4944.3828125, |
|
"logps/chosen": -424.0889587402344, |
|
"logps/rejected": -537.22802734375, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.8083333969116211, |
|
"rewards/chosen": -165.0626983642578, |
|
"rewards/margins": 116.6352767944336, |
|
"rewards/rejected": -281.6979675292969, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7654563297350343, |
|
"grad_norm": 51.31695327547084, |
|
"learning_rate": 7.922568519146425e-08, |
|
"logits/chosen": 5383.9931640625, |
|
"logits/rejected": 4821.4970703125, |
|
"logps/chosen": -442.91583251953125, |
|
"logps/rejected": -547.6976928710938, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": -190.0575714111328, |
|
"rewards/margins": 98.0093002319336, |
|
"rewards/rejected": -288.06683349609375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7752698724239451, |
|
"grad_norm": 40.87283215033227, |
|
"learning_rate": 7.307196269953444e-08, |
|
"logits/chosen": 5953.62646484375, |
|
"logits/rejected": 4360.71435546875, |
|
"logps/chosen": -468.15301513671875, |
|
"logps/rejected": -554.8399658203125, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -173.2440185546875, |
|
"rewards/margins": 138.4357147216797, |
|
"rewards/rejected": -311.67974853515625, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7850834151128557, |
|
"grad_norm": 53.01816227936955, |
|
"learning_rate": 6.712588223251809e-08, |
|
"logits/chosen": 5890.1064453125, |
|
"logits/rejected": 5068.29052734375, |
|
"logps/chosen": -507.1546936035156, |
|
"logps/rejected": -587.9667358398438, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -197.91702270507812, |
|
"rewards/margins": 104.29020690917969, |
|
"rewards/rejected": -302.20721435546875, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7948969578017664, |
|
"grad_norm": 39.36718486541899, |
|
"learning_rate": 6.139442208626517e-08, |
|
"logits/chosen": 5642.1572265625, |
|
"logits/rejected": 5064.44140625, |
|
"logps/chosen": -466.017822265625, |
|
"logps/rejected": -542.1941528320312, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -191.46595764160156, |
|
"rewards/margins": 93.6746597290039, |
|
"rewards/rejected": -285.140625, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8047105004906772, |
|
"grad_norm": 32.25150411325172, |
|
"learning_rate": 5.5884308679090525e-08, |
|
"logits/chosen": 6617.20166015625, |
|
"logits/rejected": 5841.89990234375, |
|
"logps/chosen": -489.13140869140625, |
|
"logps/rejected": -556.3676147460938, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -174.83181762695312, |
|
"rewards/margins": 96.47708129882812, |
|
"rewards/rejected": -271.30889892578125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8145240431795878, |
|
"grad_norm": 42.66355926716236, |
|
"learning_rate": 5.060200865767605e-08, |
|
"logits/chosen": 5482.3115234375, |
|
"logits/rejected": 4349.36181640625, |
|
"logps/chosen": -489.5411071777344, |
|
"logps/rejected": -519.50439453125, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -183.46530151367188, |
|
"rewards/margins": 100.88998413085938, |
|
"rewards/rejected": -284.3552551269531, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8243375858684985, |
|
"grad_norm": 32.60506546982286, |
|
"learning_rate": 4.555372130784102e-08, |
|
"logits/chosen": 6099.6806640625, |
|
"logits/rejected": 5423.52294921875, |
|
"logps/chosen": -430.18377685546875, |
|
"logps/rejected": -559.9306640625, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -174.9810028076172, |
|
"rewards/margins": 115.120361328125, |
|
"rewards/rejected": -290.10137939453125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8341511285574092, |
|
"grad_norm": 28.976991791091827, |
|
"learning_rate": 4.0745371279084976e-08, |
|
"logits/chosen": 6144.4287109375, |
|
"logits/rejected": 5391.69189453125, |
|
"logps/chosen": -456.33270263671875, |
|
"logps/rejected": -532.3670654296875, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -177.6209716796875, |
|
"rewards/margins": 89.14666748046875, |
|
"rewards/rejected": -266.76763916015625, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8439646712463199, |
|
"grad_norm": 35.3584148587086, |
|
"learning_rate": 3.6182601631443596e-08, |
|
"logits/chosen": 6054.46142578125, |
|
"logits/rejected": 5496.1396484375, |
|
"logps/chosen": -492.4789123535156, |
|
"logps/rejected": -586.8856811523438, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": -178.38107299804688, |
|
"rewards/margins": 115.93116760253906, |
|
"rewards/rejected": -294.312255859375, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8537782139352306, |
|
"grad_norm": 34.93609119738404, |
|
"learning_rate": 3.187076721281595e-08, |
|
"logits/chosen": 5244.7314453125, |
|
"logits/rejected": 4227.8193359375, |
|
"logps/chosen": -435.866943359375, |
|
"logps/rejected": -531.3182983398438, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.7416667342185974, |
|
"rewards/chosen": -182.42454528808594, |
|
"rewards/margins": 120.93087005615234, |
|
"rewards/rejected": -303.35540771484375, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8635917566241413, |
|
"grad_norm": 42.21210418756789, |
|
"learning_rate": 2.7814928374537334e-08, |
|
"logits/chosen": 6968.44384765625, |
|
"logits/rejected": 5644.8955078125, |
|
"logps/chosen": -539.173828125, |
|
"logps/rejected": -613.7080078125, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.7916666865348816, |
|
"rewards/chosen": -196.6251220703125, |
|
"rewards/margins": 135.3355255126953, |
|
"rewards/rejected": -331.96063232421875, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.873405299313052, |
|
"grad_norm": 48.62475030995162, |
|
"learning_rate": 2.4019845032570875e-08, |
|
"logits/chosen": 6289.82763671875, |
|
"logits/rejected": 4878.1728515625, |
|
"logps/chosen": -469.8004455566406, |
|
"logps/rejected": -565.7530517578125, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -185.22061157226562, |
|
"rewards/margins": 133.4977264404297, |
|
"rewards/rejected": -318.7183532714844, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8832188420019627, |
|
"grad_norm": 35.719224006833315, |
|
"learning_rate": 2.0489971081290193e-08, |
|
"logits/chosen": 5738.51318359375, |
|
"logits/rejected": 4603.50439453125, |
|
"logps/chosen": -483.54791259765625, |
|
"logps/rejected": -549.1290283203125, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.7249999642372131, |
|
"rewards/chosen": -206.58328247070312, |
|
"rewards/margins": 97.61729431152344, |
|
"rewards/rejected": -304.2005920410156, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8930323846908734, |
|
"grad_norm": 43.132524531606194, |
|
"learning_rate": 1.7229449166406477e-08, |
|
"logits/chosen": 5693.8486328125, |
|
"logits/rejected": 4534.4052734375, |
|
"logps/chosen": -469.5682067871094, |
|
"logps/rejected": -569.1848754882812, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.8083332777023315, |
|
"rewards/chosen": -204.99484252929688, |
|
"rewards/margins": 132.3409881591797, |
|
"rewards/rejected": -337.3358154296875, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9028459273797841, |
|
"grad_norm": 34.73048158998948, |
|
"learning_rate": 1.4242105823176837e-08, |
|
"logits/chosen": 6962.6904296875, |
|
"logits/rejected": 5748.6943359375, |
|
"logps/chosen": -525.45068359375, |
|
"logps/rejected": -564.1856689453125, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -192.100830078125, |
|
"rewards/margins": 99.60045623779297, |
|
"rewards/rejected": -291.7012939453125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9126594700686947, |
|
"grad_norm": 34.47893895251098, |
|
"learning_rate": 1.1531446985597604e-08, |
|
"logits/chosen": 5990.88525390625, |
|
"logits/rejected": 5583.560546875, |
|
"logps/chosen": -485.2509765625, |
|
"logps/rejected": -572.419921875, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.8250001072883606, |
|
"rewards/chosen": -192.66342163085938, |
|
"rewards/margins": 110.31678771972656, |
|
"rewards/rejected": -302.9801940917969, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9224730127576055, |
|
"grad_norm": 38.65102653124819, |
|
"learning_rate": 9.100653871854963e-09, |
|
"logits/chosen": 5348.1103515625, |
|
"logits/rejected": 4875.837890625, |
|
"logps/chosen": -461.71697998046875, |
|
"logps/rejected": -564.880126953125, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.7583333253860474, |
|
"rewards/chosen": -193.52760314941406, |
|
"rewards/margins": 111.4560546875, |
|
"rewards/rejected": -304.98358154296875, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9322865554465162, |
|
"grad_norm": 35.36010410132843, |
|
"learning_rate": 6.9525792508597634e-09, |
|
"logits/chosen": 5099.234375, |
|
"logits/rejected": 4961.53466796875, |
|
"logps/chosen": -456.7210388183594, |
|
"logps/rejected": -571.4191284179688, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -211.7642364501953, |
|
"rewards/margins": 97.53651428222656, |
|
"rewards/rejected": -309.30072021484375, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9421000981354269, |
|
"grad_norm": 60.3061901160388, |
|
"learning_rate": 5.089744094249837e-09, |
|
"logits/chosen": 6198.19091796875, |
|
"logits/rejected": 5164.39013671875, |
|
"logps/chosen": -477.7798767089844, |
|
"logps/rejected": -606.0765991210938, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -207.751953125, |
|
"rewards/margins": 137.81539916992188, |
|
"rewards/rejected": -345.56732177734375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9519136408243376, |
|
"grad_norm": 38.92788774449534, |
|
"learning_rate": 3.5143346177878565e-09, |
|
"logits/chosen": 6070.90673828125, |
|
"logits/rejected": 5626.92578125, |
|
"logps/chosen": -508.4833068847656, |
|
"logps/rejected": -613.7086791992188, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.7750000357627869, |
|
"rewards/chosen": -202.98733520507812, |
|
"rewards/margins": 117.0799560546875, |
|
"rewards/rejected": -320.0672912597656, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9617271835132483, |
|
"grad_norm": 54.09136710237634, |
|
"learning_rate": 2.2281997156273213e-09, |
|
"logits/chosen": 6383.44775390625, |
|
"logits/rejected": 5800.46484375, |
|
"logps/chosen": -531.06884765625, |
|
"logps/rejected": -637.2273559570312, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -214.9575958251953, |
|
"rewards/margins": 97.85444641113281, |
|
"rewards/rejected": -312.81207275390625, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.971540726202159, |
|
"grad_norm": 37.58754664399141, |
|
"learning_rate": 1.2328487904580131e-09, |
|
"logits/chosen": 5965.31982421875, |
|
"logits/rejected": 4487.17431640625, |
|
"logps/chosen": -527.6492919921875, |
|
"logps/rejected": -595.473876953125, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -219.84896850585938, |
|
"rewards/margins": 120.89280700683594, |
|
"rewards/rejected": -340.7417907714844, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9813542688910697, |
|
"grad_norm": 29.779875569619673, |
|
"learning_rate": 5.29449982077046e-10, |
|
"logits/chosen": 5757.50439453125, |
|
"logits/rejected": 5476.619140625, |
|
"logps/chosen": -460.2972106933594, |
|
"logps/rejected": -551.8410034179688, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.7249999642372131, |
|
"rewards/chosen": -186.51734924316406, |
|
"rewards/margins": 90.38455963134766, |
|
"rewards/rejected": -276.90191650390625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9911678115799804, |
|
"grad_norm": 27.069636754429258, |
|
"learning_rate": 1.1882879646485379e-10, |
|
"logits/chosen": 6565.1044921875, |
|
"logits/rejected": 5212.9794921875, |
|
"logps/chosen": -543.57421875, |
|
"logps/rejected": -614.15185546875, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -200.32835388183594, |
|
"rewards/margins": 119.9702377319336, |
|
"rewards/rejected": -320.298583984375, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1019, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5212765811586988, |
|
"train_runtime": 13234.9919, |
|
"train_samples_per_second": 4.619, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1019, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|