|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997172745264349, |
|
"eval_steps": 500, |
|
"global_step": 442, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022618037885213456, |
|
"grad_norm": 73.88447935284657, |
|
"learning_rate": 1.7777777777777777e-08, |
|
"logits/chosen": -0.970178484916687, |
|
"logits/rejected": -0.9579144716262817, |
|
"logps/chosen": -1.656538963317871, |
|
"logps/rejected": -1.776021957397461, |
|
"loss": 5.7103, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.56538963317871, |
|
"rewards/margins": 1.1948291063308716, |
|
"rewards/rejected": -17.76021957397461, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004523607577042691, |
|
"grad_norm": 86.9125420208072, |
|
"learning_rate": 3.5555555555555554e-08, |
|
"logits/chosen": -1.0327715873718262, |
|
"logits/rejected": -1.023756980895996, |
|
"logps/chosen": -1.7529594898223877, |
|
"logps/rejected": -1.700197458267212, |
|
"loss": 6.1559, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -17.52959442138672, |
|
"rewards/margins": -0.527620792388916, |
|
"rewards/rejected": -17.00197410583496, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006785411365564037, |
|
"grad_norm": 81.69530155846427, |
|
"learning_rate": 5.333333333333333e-08, |
|
"logits/chosen": -0.9820892810821533, |
|
"logits/rejected": -0.998577892780304, |
|
"logps/chosen": -1.7510223388671875, |
|
"logps/rejected": -1.9477208852767944, |
|
"loss": 6.4107, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -17.510223388671875, |
|
"rewards/margins": 1.966986060142517, |
|
"rewards/rejected": -19.47720718383789, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009047215154085382, |
|
"grad_norm": 102.14679455248601, |
|
"learning_rate": 7.111111111111111e-08, |
|
"logits/chosen": -1.0096410512924194, |
|
"logits/rejected": -0.9931849241256714, |
|
"logps/chosen": -1.7210272550582886, |
|
"logps/rejected": -1.6721601486206055, |
|
"loss": 6.2871, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -17.21027183532715, |
|
"rewards/margins": -0.4886711835861206, |
|
"rewards/rejected": -16.721601486206055, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01130901894260673, |
|
"grad_norm": 80.06704613306383, |
|
"learning_rate": 8.888888888888888e-08, |
|
"logits/chosen": -1.0100579261779785, |
|
"logits/rejected": -0.998342752456665, |
|
"logps/chosen": -1.6671767234802246, |
|
"logps/rejected": -1.6990655660629272, |
|
"loss": 5.7732, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -16.671768188476562, |
|
"rewards/margins": 0.3188871145248413, |
|
"rewards/rejected": -16.99065399169922, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013570822731128074, |
|
"grad_norm": 74.95530274523003, |
|
"learning_rate": 1.0666666666666666e-07, |
|
"logits/chosen": -1.0750232934951782, |
|
"logits/rejected": -1.054511547088623, |
|
"logps/chosen": -1.7332895994186401, |
|
"logps/rejected": -1.6780965328216553, |
|
"loss": 6.2788, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -17.332895278930664, |
|
"rewards/margins": -0.5519311428070068, |
|
"rewards/rejected": -16.780963897705078, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01583262651964942, |
|
"grad_norm": 39.99505319144089, |
|
"learning_rate": 1.2444444444444443e-07, |
|
"logits/chosen": -0.9987316131591797, |
|
"logits/rejected": -0.9925400614738464, |
|
"logps/chosen": -1.56162691116333, |
|
"logps/rejected": -1.926164150238037, |
|
"loss": 4.1206, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -15.616266250610352, |
|
"rewards/margins": 3.6453747749328613, |
|
"rewards/rejected": -19.261642456054688, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018094430308170765, |
|
"grad_norm": 106.66642676745393, |
|
"learning_rate": 1.4222222222222222e-07, |
|
"logits/chosen": -0.9619507789611816, |
|
"logits/rejected": -0.9788931608200073, |
|
"logps/chosen": -1.6865566968917847, |
|
"logps/rejected": -1.7454805374145508, |
|
"loss": 5.9544, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -16.86556625366211, |
|
"rewards/margins": 0.5892367959022522, |
|
"rewards/rejected": -17.454803466796875, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 57.905987384590865, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -1.0222256183624268, |
|
"logits/rejected": -1.0178791284561157, |
|
"logps/chosen": -1.5835094451904297, |
|
"logps/rejected": -1.796851634979248, |
|
"loss": 5.3604, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -15.835092544555664, |
|
"rewards/margins": 2.1334214210510254, |
|
"rewards/rejected": -17.968515396118164, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02261803788521346, |
|
"grad_norm": 82.69531006046971, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"logits/chosen": -1.0171338319778442, |
|
"logits/rejected": -1.0163377523422241, |
|
"logps/chosen": -1.7584372758865356, |
|
"logps/rejected": -1.6933101415634155, |
|
"loss": 6.0802, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.58437156677246, |
|
"rewards/margins": -0.6512712240219116, |
|
"rewards/rejected": -16.933101654052734, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024879841673734804, |
|
"grad_norm": 97.05523972814746, |
|
"learning_rate": 1.9555555555555555e-07, |
|
"logits/chosen": -1.0786405801773071, |
|
"logits/rejected": -1.0799434185028076, |
|
"logps/chosen": -1.732240915298462, |
|
"logps/rejected": -1.967947006225586, |
|
"loss": 5.3828, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.322410583496094, |
|
"rewards/margins": 2.357060432434082, |
|
"rewards/rejected": -19.679471969604492, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02714164546225615, |
|
"grad_norm": 94.06438233273298, |
|
"learning_rate": 2.133333333333333e-07, |
|
"logits/chosen": -0.9874241352081299, |
|
"logits/rejected": -0.969804584980011, |
|
"logps/chosen": -1.8421882390975952, |
|
"logps/rejected": -1.9192876815795898, |
|
"loss": 6.3133, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.42188262939453, |
|
"rewards/margins": 0.7709953188896179, |
|
"rewards/rejected": -19.1928768157959, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029403449250777494, |
|
"grad_norm": 94.45847571939335, |
|
"learning_rate": 2.3111111111111107e-07, |
|
"logits/chosen": -0.9826835989952087, |
|
"logits/rejected": -0.960513710975647, |
|
"logps/chosen": -1.771365761756897, |
|
"logps/rejected": -1.6903609037399292, |
|
"loss": 6.1689, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -17.71365737915039, |
|
"rewards/margins": -0.8100481629371643, |
|
"rewards/rejected": -16.903610229492188, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03166525303929884, |
|
"grad_norm": 59.99069333682814, |
|
"learning_rate": 2.4888888888888886e-07, |
|
"logits/chosen": -0.9839141964912415, |
|
"logits/rejected": -0.9832520484924316, |
|
"logps/chosen": -1.4408010244369507, |
|
"logps/rejected": -1.6560081243515015, |
|
"loss": 4.5476, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -14.40800952911377, |
|
"rewards/margins": 2.152069568634033, |
|
"rewards/rejected": -16.56007957458496, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033927056827820185, |
|
"grad_norm": 96.41950248980098, |
|
"learning_rate": 2.666666666666666e-07, |
|
"logits/chosen": -0.9852787852287292, |
|
"logits/rejected": -0.9823875427246094, |
|
"logps/chosen": -1.5907388925552368, |
|
"logps/rejected": -1.6886473894119263, |
|
"loss": 5.1372, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.907387733459473, |
|
"rewards/margins": 0.9790847301483154, |
|
"rewards/rejected": -16.886472702026367, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03618886061634153, |
|
"grad_norm": 76.96404809338092, |
|
"learning_rate": 2.8444444444444443e-07, |
|
"logits/chosen": -1.022695541381836, |
|
"logits/rejected": -1.0211068391799927, |
|
"logps/chosen": -1.741645097732544, |
|
"logps/rejected": -1.709848403930664, |
|
"loss": 6.4049, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -17.416452407836914, |
|
"rewards/margins": -0.3179686665534973, |
|
"rewards/rejected": -17.09848403930664, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038450664404862875, |
|
"grad_norm": 93.85331038926209, |
|
"learning_rate": 3.022222222222222e-07, |
|
"logits/chosen": -1.0325472354888916, |
|
"logits/rejected": -1.0200681686401367, |
|
"logps/chosen": -1.5360885858535767, |
|
"logps/rejected": -1.4957702159881592, |
|
"loss": 6.2812, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -15.360885620117188, |
|
"rewards/margins": -0.4031827449798584, |
|
"rewards/rejected": -14.957704544067383, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 98.20855483649616, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": -1.0454903841018677, |
|
"logits/rejected": -1.0343623161315918, |
|
"logps/chosen": -1.9974663257598877, |
|
"logps/rejected": -2.022808074951172, |
|
"loss": 6.0793, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -19.97466278076172, |
|
"rewards/margins": 0.2534167766571045, |
|
"rewards/rejected": -20.22808265686035, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04297427198190557, |
|
"grad_norm": 86.85908299567286, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"logits/chosen": -1.0504282712936401, |
|
"logits/rejected": -1.042330265045166, |
|
"logps/chosen": -1.7138850688934326, |
|
"logps/rejected": -1.7518320083618164, |
|
"loss": 5.2537, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.13884925842285, |
|
"rewards/margins": 0.37946897745132446, |
|
"rewards/rejected": -17.51831817626953, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04523607577042692, |
|
"grad_norm": 40.41103844657731, |
|
"learning_rate": 3.5555555555555553e-07, |
|
"logits/chosen": -1.0436055660247803, |
|
"logits/rejected": -1.0474931001663208, |
|
"logps/chosen": -1.5377863645553589, |
|
"logps/rejected": -1.766308069229126, |
|
"loss": 4.3313, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.377862930297852, |
|
"rewards/margins": 2.2852182388305664, |
|
"rewards/rejected": -17.6630802154541, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04749787955894826, |
|
"grad_norm": 65.41047373950924, |
|
"learning_rate": 3.7333333333333334e-07, |
|
"logits/chosen": -0.9501165151596069, |
|
"logits/rejected": -0.953487753868103, |
|
"logps/chosen": -1.5590920448303223, |
|
"logps/rejected": -1.5690488815307617, |
|
"loss": 5.4623, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -15.590919494628906, |
|
"rewards/margins": 0.09956827759742737, |
|
"rewards/rejected": -15.690486907958984, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04975968334746961, |
|
"grad_norm": 52.44214623062925, |
|
"learning_rate": 3.911111111111111e-07, |
|
"logits/chosen": -0.9958354830741882, |
|
"logits/rejected": -0.9834215641021729, |
|
"logps/chosen": -1.6224457025527954, |
|
"logps/rejected": -1.6867178678512573, |
|
"loss": 4.7122, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -16.224456787109375, |
|
"rewards/margins": 0.6427204608917236, |
|
"rewards/rejected": -16.867177963256836, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05202148713599095, |
|
"grad_norm": 70.21591683383166, |
|
"learning_rate": 4.0888888888888886e-07, |
|
"logits/chosen": -1.0022647380828857, |
|
"logits/rejected": -0.972631573677063, |
|
"logps/chosen": -1.6058869361877441, |
|
"logps/rejected": -1.6072971820831299, |
|
"loss": 5.8867, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -16.058868408203125, |
|
"rewards/margins": 0.014102667570114136, |
|
"rewards/rejected": -16.07297134399414, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0542832909245123, |
|
"grad_norm": 51.78707493520344, |
|
"learning_rate": 4.266666666666666e-07, |
|
"logits/chosen": -1.0004748106002808, |
|
"logits/rejected": -1.0073304176330566, |
|
"logps/chosen": -1.4363174438476562, |
|
"logps/rejected": -1.5035033226013184, |
|
"loss": 5.1217, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -14.363174438476562, |
|
"rewards/margins": 0.671859622001648, |
|
"rewards/rejected": -15.035033226013184, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05654509471303364, |
|
"grad_norm": 83.93302314997005, |
|
"learning_rate": 4.4444444444444444e-07, |
|
"logits/chosen": -0.9994463920593262, |
|
"logits/rejected": -0.9950385093688965, |
|
"logps/chosen": -1.7453137636184692, |
|
"logps/rejected": -1.7574050426483154, |
|
"loss": 5.5417, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -17.453136444091797, |
|
"rewards/margins": 0.12091405689716339, |
|
"rewards/rejected": -17.574050903320312, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05880689850155499, |
|
"grad_norm": 78.39435935083853, |
|
"learning_rate": 4.6222222222222214e-07, |
|
"logits/chosen": -1.0201776027679443, |
|
"logits/rejected": -1.0055274963378906, |
|
"logps/chosen": -1.7500752210617065, |
|
"logps/rejected": -1.752750039100647, |
|
"loss": 5.4918, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.500751495361328, |
|
"rewards/margins": 0.02674958109855652, |
|
"rewards/rejected": -17.52750015258789, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 68.24007748722875, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": -1.0300168991088867, |
|
"logits/rejected": -1.032236933708191, |
|
"logps/chosen": -1.7269227504730225, |
|
"logps/rejected": -1.6468346118927002, |
|
"loss": 6.2062, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -17.269227981567383, |
|
"rewards/margins": -0.800881028175354, |
|
"rewards/rejected": -16.468345642089844, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06333050607859768, |
|
"grad_norm": 51.325729522476735, |
|
"learning_rate": 4.977777777777777e-07, |
|
"logits/chosen": -1.020127534866333, |
|
"logits/rejected": -1.00740647315979, |
|
"logps/chosen": -1.6155153512954712, |
|
"logps/rejected": -1.7134020328521729, |
|
"loss": 5.6067, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -16.1551513671875, |
|
"rewards/margins": 0.9788689613342285, |
|
"rewards/rejected": -17.134021759033203, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06559230986711903, |
|
"grad_norm": 67.70299840427867, |
|
"learning_rate": 5.155555555555556e-07, |
|
"logits/chosen": -1.0417428016662598, |
|
"logits/rejected": -1.0501348972320557, |
|
"logps/chosen": -1.625864028930664, |
|
"logps/rejected": -1.672263264656067, |
|
"loss": 5.307, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -16.25864028930664, |
|
"rewards/margins": 0.4639921486377716, |
|
"rewards/rejected": -16.722633361816406, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06785411365564037, |
|
"grad_norm": 88.41842890120783, |
|
"learning_rate": 5.333333333333332e-07, |
|
"logits/chosen": -1.012568473815918, |
|
"logits/rejected": -0.9970849752426147, |
|
"logps/chosen": -1.673133134841919, |
|
"logps/rejected": -1.7118630409240723, |
|
"loss": 6.1408, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -16.73133087158203, |
|
"rewards/margins": 0.38729995489120483, |
|
"rewards/rejected": -17.11863136291504, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07011591744416172, |
|
"grad_norm": 79.26925757564649, |
|
"learning_rate": 5.511111111111111e-07, |
|
"logits/chosen": -1.0454754829406738, |
|
"logits/rejected": -1.031201720237732, |
|
"logps/chosen": -1.654517412185669, |
|
"logps/rejected": -1.6231170892715454, |
|
"loss": 5.7537, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -16.54517364501953, |
|
"rewards/margins": -0.314003586769104, |
|
"rewards/rejected": -16.231170654296875, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07237772123268306, |
|
"grad_norm": 62.138847724263975, |
|
"learning_rate": 5.688888888888889e-07, |
|
"logits/chosen": -1.0572967529296875, |
|
"logits/rejected": -1.0592782497406006, |
|
"logps/chosen": -1.5859274864196777, |
|
"logps/rejected": -1.5584020614624023, |
|
"loss": 5.6448, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.859275817871094, |
|
"rewards/margins": -0.275254487991333, |
|
"rewards/rejected": -15.584020614624023, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07463952502120441, |
|
"grad_norm": 51.730594123658335, |
|
"learning_rate": 5.866666666666666e-07, |
|
"logits/chosen": -1.001430869102478, |
|
"logits/rejected": -0.9772145748138428, |
|
"logps/chosen": -1.556628704071045, |
|
"logps/rejected": -1.740290880203247, |
|
"loss": 4.6798, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -15.56628704071045, |
|
"rewards/margins": 1.836620569229126, |
|
"rewards/rejected": -17.402908325195312, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07690132880972575, |
|
"grad_norm": 52.933499868170415, |
|
"learning_rate": 6.044444444444444e-07, |
|
"logits/chosen": -1.0453863143920898, |
|
"logits/rejected": -0.9934295415878296, |
|
"logps/chosen": -1.4905204772949219, |
|
"logps/rejected": -1.6541056632995605, |
|
"loss": 4.4742, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -14.905204772949219, |
|
"rewards/margins": 1.6358494758605957, |
|
"rewards/rejected": -16.541053771972656, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0791631325982471, |
|
"grad_norm": 74.39925640560193, |
|
"learning_rate": 6.222222222222223e-07, |
|
"logits/chosen": -1.0696301460266113, |
|
"logits/rejected": -1.028558611869812, |
|
"logps/chosen": -1.660436987876892, |
|
"logps/rejected": -1.8180859088897705, |
|
"loss": 5.0488, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -16.6043701171875, |
|
"rewards/margins": 1.576488733291626, |
|
"rewards/rejected": -18.18086051940918, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 60.67801960429597, |
|
"learning_rate": 6.4e-07, |
|
"logits/chosen": -0.9725692272186279, |
|
"logits/rejected": -0.9748388528823853, |
|
"logps/chosen": -1.5920709371566772, |
|
"logps/rejected": -1.7690041065216064, |
|
"loss": 4.7704, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -15.920707702636719, |
|
"rewards/margins": 1.7693322896957397, |
|
"rewards/rejected": -17.690040588378906, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08368674017528979, |
|
"grad_norm": 55.8652194329523, |
|
"learning_rate": 6.577777777777777e-07, |
|
"logits/chosen": -0.9925840497016907, |
|
"logits/rejected": -0.9903581142425537, |
|
"logps/chosen": -1.442215085029602, |
|
"logps/rejected": -1.468677282333374, |
|
"loss": 5.1667, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -14.422150611877441, |
|
"rewards/margins": 0.26462242007255554, |
|
"rewards/rejected": -14.686773300170898, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08594854396381114, |
|
"grad_norm": 66.48789350704135, |
|
"learning_rate": 6.755555555555555e-07, |
|
"logits/chosen": -1.081889271736145, |
|
"logits/rejected": -1.0824934244155884, |
|
"logps/chosen": -1.6371712684631348, |
|
"logps/rejected": -1.6554210186004639, |
|
"loss": 5.278, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.371713638305664, |
|
"rewards/margins": 0.182498037815094, |
|
"rewards/rejected": -16.554210662841797, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08821034775233248, |
|
"grad_norm": 60.77774068293974, |
|
"learning_rate": 6.933333333333333e-07, |
|
"logits/chosen": -1.0623948574066162, |
|
"logits/rejected": -1.074435830116272, |
|
"logps/chosen": -1.5994151830673218, |
|
"logps/rejected": -1.6381984949111938, |
|
"loss": 5.0061, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -15.99415111541748, |
|
"rewards/margins": 0.38783279061317444, |
|
"rewards/rejected": -16.38198471069336, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09047215154085383, |
|
"grad_norm": 38.115575176568385, |
|
"learning_rate": 7.111111111111111e-07, |
|
"logits/chosen": -1.0157631635665894, |
|
"logits/rejected": -0.9986149668693542, |
|
"logps/chosen": -1.4888092279434204, |
|
"logps/rejected": -1.7236121892929077, |
|
"loss": 3.922, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -14.888091087341309, |
|
"rewards/margins": 2.348029613494873, |
|
"rewards/rejected": -17.236122131347656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09273395532937517, |
|
"grad_norm": 38.36474699448423, |
|
"learning_rate": 7.288888888888888e-07, |
|
"logits/chosen": -1.060340404510498, |
|
"logits/rejected": -1.0315660238265991, |
|
"logps/chosen": -1.456924319267273, |
|
"logps/rejected": -1.6312799453735352, |
|
"loss": 4.4318, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.569242477416992, |
|
"rewards/margins": 1.7435554265975952, |
|
"rewards/rejected": -16.31279945373535, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09499575911789652, |
|
"grad_norm": 68.97554618193577, |
|
"learning_rate": 7.466666666666667e-07, |
|
"logits/chosen": -0.9593913555145264, |
|
"logits/rejected": -0.9225877523422241, |
|
"logps/chosen": -1.5931707620620728, |
|
"logps/rejected": -1.6982492208480835, |
|
"loss": 5.0405, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.931707382202148, |
|
"rewards/margins": 1.0507832765579224, |
|
"rewards/rejected": -16.982492446899414, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09725756290641786, |
|
"grad_norm": 58.86006592276919, |
|
"learning_rate": 7.644444444444444e-07, |
|
"logits/chosen": -1.0317012071609497, |
|
"logits/rejected": -1.0032808780670166, |
|
"logps/chosen": -1.561445951461792, |
|
"logps/rejected": -1.6140797138214111, |
|
"loss": 5.0187, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.614459037780762, |
|
"rewards/margins": 0.5263394117355347, |
|
"rewards/rejected": -16.140798568725586, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09951936669493922, |
|
"grad_norm": 40.694422648420655, |
|
"learning_rate": 7.822222222222222e-07, |
|
"logits/chosen": -0.9743322730064392, |
|
"logits/rejected": -0.9567911624908447, |
|
"logps/chosen": -1.4267774820327759, |
|
"logps/rejected": -1.591205358505249, |
|
"loss": 4.1227, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -14.26777458190918, |
|
"rewards/margins": 1.644278883934021, |
|
"rewards/rejected": -15.912054061889648, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 40.8715446540072, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -1.0036640167236328, |
|
"logits/rejected": -0.9986906051635742, |
|
"logps/chosen": -1.4704385995864868, |
|
"logps/rejected": -1.6148722171783447, |
|
"loss": 4.5268, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -14.704385757446289, |
|
"rewards/margins": 1.4443360567092896, |
|
"rewards/rejected": -16.14872169494629, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1040429742719819, |
|
"grad_norm": 64.76142197420904, |
|
"learning_rate": 7.999874759018868e-07, |
|
"logits/chosen": -1.0634511709213257, |
|
"logits/rejected": -1.0455323457717896, |
|
"logps/chosen": -1.7534921169281006, |
|
"logps/rejected": -1.9212398529052734, |
|
"loss": 4.5489, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -17.534921646118164, |
|
"rewards/margins": 1.6774773597717285, |
|
"rewards/rejected": -19.212400436401367, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10630477806050326, |
|
"grad_norm": 43.36406398315485, |
|
"learning_rate": 7.999499043918123e-07, |
|
"logits/chosen": -1.0629500150680542, |
|
"logits/rejected": -1.0646634101867676, |
|
"logps/chosen": -1.575823187828064, |
|
"logps/rejected": -1.6532174348831177, |
|
"loss": 4.9532, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -15.758233070373535, |
|
"rewards/margins": 0.7739435434341431, |
|
"rewards/rejected": -16.532176971435547, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1085665818490246, |
|
"grad_norm": 44.32249342002411, |
|
"learning_rate": 7.998872878225228e-07, |
|
"logits/chosen": -0.9744206666946411, |
|
"logits/rejected": -0.9684075117111206, |
|
"logps/chosen": -1.603161334991455, |
|
"logps/rejected": -1.7590126991271973, |
|
"loss": 4.35, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -16.0316104888916, |
|
"rewards/margins": 1.5585150718688965, |
|
"rewards/rejected": -17.590126037597656, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11082838563754595, |
|
"grad_norm": 45.52706437580373, |
|
"learning_rate": 7.997996301150987e-07, |
|
"logits/chosen": -0.9781917929649353, |
|
"logits/rejected": -0.9805136919021606, |
|
"logps/chosen": -1.5446103811264038, |
|
"logps/rejected": -1.670082926750183, |
|
"loss": 4.6801, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -15.446105003356934, |
|
"rewards/margins": 1.2547240257263184, |
|
"rewards/rejected": -16.70082664489746, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11309018942606729, |
|
"grad_norm": 48.545636464126815, |
|
"learning_rate": 7.996869367587088e-07, |
|
"logits/chosen": -0.9543410539627075, |
|
"logits/rejected": -0.9468324780464172, |
|
"logps/chosen": -1.639030933380127, |
|
"logps/rejected": -1.7508221864700317, |
|
"loss": 4.7132, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.390310287475586, |
|
"rewards/margins": 1.1179133653640747, |
|
"rewards/rejected": -17.508222579956055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11535199321458864, |
|
"grad_norm": 43.1716994169158, |
|
"learning_rate": 7.99549214810266e-07, |
|
"logits/chosen": -0.957094669342041, |
|
"logits/rejected": -0.9495903849601746, |
|
"logps/chosen": -1.6492178440093994, |
|
"logps/rejected": -1.7528547048568726, |
|
"loss": 4.624, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -16.49217987060547, |
|
"rewards/margins": 1.0363675355911255, |
|
"rewards/rejected": -17.528545379638672, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11761379700310998, |
|
"grad_norm": 44.767623959784785, |
|
"learning_rate": 7.993864728939867e-07, |
|
"logits/chosen": -0.9784611463546753, |
|
"logits/rejected": -0.9418229460716248, |
|
"logps/chosen": -1.6040714979171753, |
|
"logps/rejected": -1.7161141633987427, |
|
"loss": 4.8234, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.040714263916016, |
|
"rewards/margins": 1.1204277276992798, |
|
"rewards/rejected": -17.16114044189453, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11987560079163133, |
|
"grad_norm": 49.50003960371549, |
|
"learning_rate": 7.991987212008491e-07, |
|
"logits/chosen": -0.9920171499252319, |
|
"logits/rejected": -0.983026921749115, |
|
"logps/chosen": -1.6678504943847656, |
|
"logps/rejected": -1.9023908376693726, |
|
"loss": 4.2403, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.678503036499023, |
|
"rewards/margins": 2.345404624938965, |
|
"rewards/rejected": -19.023910522460938, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 53.488107569112316, |
|
"learning_rate": 7.989859714879565e-07, |
|
"logits/chosen": -1.0169801712036133, |
|
"logits/rejected": -0.9922108054161072, |
|
"logps/chosen": -1.6226848363876343, |
|
"logps/rejected": -1.7421677112579346, |
|
"loss": 4.7775, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -16.226848602294922, |
|
"rewards/margins": 1.1948294639587402, |
|
"rewards/rejected": -17.42167854309082, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12439920836867402, |
|
"grad_norm": 47.80071251117602, |
|
"learning_rate": 7.987482370778005e-07, |
|
"logits/chosen": -0.9851608276367188, |
|
"logits/rejected": -0.9729430675506592, |
|
"logps/chosen": -1.675248622894287, |
|
"logps/rejected": -1.8124985694885254, |
|
"loss": 4.6785, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.752485275268555, |
|
"rewards/margins": 1.372498869895935, |
|
"rewards/rejected": -18.124984741210938, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12666101215719536, |
|
"grad_norm": 54.191735218964155, |
|
"learning_rate": 7.984855328574262e-07, |
|
"logits/chosen": -0.8548079133033752, |
|
"logits/rejected": -0.8540121912956238, |
|
"logps/chosen": -1.6108894348144531, |
|
"logps/rejected": -1.752925992012024, |
|
"loss": 4.4245, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -16.10889434814453, |
|
"rewards/margins": 1.4203639030456543, |
|
"rewards/rejected": -17.529258728027344, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1289228159457167, |
|
"grad_norm": 67.34737943017025, |
|
"learning_rate": 7.981978752775009e-07, |
|
"logits/chosen": -0.9092062711715698, |
|
"logits/rejected": -0.9146596789360046, |
|
"logps/chosen": -1.70232093334198, |
|
"logps/rejected": -1.8453538417816162, |
|
"loss": 4.6952, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.023208618164062, |
|
"rewards/margins": 1.4303276538848877, |
|
"rewards/rejected": -18.45353889465332, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13118461973423806, |
|
"grad_norm": 80.10334750091522, |
|
"learning_rate": 7.978852823512833e-07, |
|
"logits/chosen": -0.9658604264259338, |
|
"logits/rejected": -0.9477964639663696, |
|
"logps/chosen": -1.832795262336731, |
|
"logps/rejected": -1.9546037912368774, |
|
"loss": 5.0523, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -18.327951431274414, |
|
"rewards/margins": 1.2180850505828857, |
|
"rewards/rejected": -19.546037673950195, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1334464235227594, |
|
"grad_norm": 63.905912489444795, |
|
"learning_rate": 7.975477736534957e-07, |
|
"logits/chosen": -0.9663300514221191, |
|
"logits/rejected": -0.9616862535476685, |
|
"logps/chosen": -1.7907471656799316, |
|
"logps/rejected": -2.0022921562194824, |
|
"loss": 4.4589, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -17.907470703125, |
|
"rewards/margins": 2.115452766418457, |
|
"rewards/rejected": -20.022924423217773, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13570822731128074, |
|
"grad_norm": 73.71974681164818, |
|
"learning_rate": 7.971853703190986e-07, |
|
"logits/chosen": -0.9496431350708008, |
|
"logits/rejected": -0.9395902752876282, |
|
"logps/chosen": -1.7495907545089722, |
|
"logps/rejected": -1.9543805122375488, |
|
"loss": 4.2887, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -17.495908737182617, |
|
"rewards/margins": 2.047896146774292, |
|
"rewards/rejected": -19.543804168701172, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1379700310998021, |
|
"grad_norm": 55.31678343050937, |
|
"learning_rate": 7.967980950419664e-07, |
|
"logits/chosen": -0.8977174758911133, |
|
"logits/rejected": -0.9004334807395935, |
|
"logps/chosen": -1.683959722518921, |
|
"logps/rejected": -1.8546793460845947, |
|
"loss": 4.4567, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -16.839595794677734, |
|
"rewards/margins": 1.7071975469589233, |
|
"rewards/rejected": -18.546794891357422, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14023183488832344, |
|
"grad_norm": 40.408238034098886, |
|
"learning_rate": 7.963859720734669e-07, |
|
"logits/chosen": -0.9545056223869324, |
|
"logits/rejected": -0.9535243511199951, |
|
"logps/chosen": -1.6104179620742798, |
|
"logps/rejected": -1.8246254920959473, |
|
"loss": 4.2358, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -16.10417938232422, |
|
"rewards/margins": 2.142076253890991, |
|
"rewards/rejected": -18.24625587463379, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 39.63066442014652, |
|
"learning_rate": 7.959490272209427e-07, |
|
"logits/chosen": -0.9525138735771179, |
|
"logits/rejected": -0.9237796664237976, |
|
"logps/chosen": -1.6529213190078735, |
|
"logps/rejected": -1.9204621315002441, |
|
"loss": 3.7751, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -16.52921485900879, |
|
"rewards/margins": 2.675405740737915, |
|
"rewards/rejected": -19.204618453979492, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14475544246536612, |
|
"grad_norm": 49.2257151600756, |
|
"learning_rate": 7.954872878460946e-07, |
|
"logits/chosen": -0.9712534546852112, |
|
"logits/rejected": -0.9322056770324707, |
|
"logps/chosen": -1.7236018180847168, |
|
"logps/rejected": -1.9554036855697632, |
|
"loss": 3.9722, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -17.23601722717285, |
|
"rewards/margins": 2.3180198669433594, |
|
"rewards/rejected": -19.554035186767578, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14701724625388748, |
|
"grad_norm": 56.40694891058032, |
|
"learning_rate": 7.950007828632691e-07, |
|
"logits/chosen": -0.9170076847076416, |
|
"logits/rejected": -0.9099224805831909, |
|
"logps/chosen": -1.8218014240264893, |
|
"logps/rejected": -2.114569902420044, |
|
"loss": 4.0631, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -18.218013763427734, |
|
"rewards/margins": 2.9276864528656006, |
|
"rewards/rejected": -21.145700454711914, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14927905004240882, |
|
"grad_norm": 54.86077177732332, |
|
"learning_rate": 7.944895427376465e-07, |
|
"logits/chosen": -0.9314538240432739, |
|
"logits/rejected": -0.9174329042434692, |
|
"logps/chosen": -1.799817681312561, |
|
"logps/rejected": -2.0671584606170654, |
|
"loss": 4.1606, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.99817657470703, |
|
"rewards/margins": 2.6734066009521484, |
|
"rewards/rejected": -20.67158317565918, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15154085383093016, |
|
"grad_norm": 40.53992007371899, |
|
"learning_rate": 7.939535994833345e-07, |
|
"logits/chosen": -0.8827767372131348, |
|
"logits/rejected": -0.8872506618499756, |
|
"logps/chosen": -1.6804907321929932, |
|
"logps/rejected": -1.9774214029312134, |
|
"loss": 3.9425, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.804906845092773, |
|
"rewards/margins": 2.969305992126465, |
|
"rewards/rejected": -19.774213790893555, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1538026576194515, |
|
"grad_norm": 51.36974889634882, |
|
"learning_rate": 7.933929866613628e-07, |
|
"logits/chosen": -0.9131999611854553, |
|
"logits/rejected": -0.9120370149612427, |
|
"logps/chosen": -1.746276617050171, |
|
"logps/rejected": -1.8965258598327637, |
|
"loss": 4.5149, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -17.462764739990234, |
|
"rewards/margins": 1.5024943351745605, |
|
"rewards/rejected": -18.965259552001953, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15606446140797287, |
|
"grad_norm": 50.79237381214548, |
|
"learning_rate": 7.928077393775808e-07, |
|
"logits/chosen": -0.887502133846283, |
|
"logits/rejected": -0.9122737050056458, |
|
"logps/chosen": -1.7796714305877686, |
|
"logps/rejected": -2.1390035152435303, |
|
"loss": 3.5203, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.796714782714844, |
|
"rewards/margins": 3.593320369720459, |
|
"rewards/rejected": -21.39003562927246, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1583262651964942, |
|
"grad_norm": 77.32537487087262, |
|
"learning_rate": 7.921978942804609e-07, |
|
"logits/chosen": -0.8942463397979736, |
|
"logits/rejected": -0.8875532150268555, |
|
"logps/chosen": -1.7755475044250488, |
|
"logps/rejected": -2.0664196014404297, |
|
"loss": 3.8556, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -17.755474090576172, |
|
"rewards/margins": 2.908721446990967, |
|
"rewards/rejected": -20.664196014404297, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16058806898501554, |
|
"grad_norm": 62.6030871972903, |
|
"learning_rate": 7.915634895588021e-07, |
|
"logits/chosen": -0.9042387008666992, |
|
"logits/rejected": -0.8948806524276733, |
|
"logps/chosen": -1.8970236778259277, |
|
"logps/rejected": -2.0581259727478027, |
|
"loss": 4.513, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -18.970237731933594, |
|
"rewards/margins": 1.6110222339630127, |
|
"rewards/rejected": -20.581260681152344, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 61.94984380236303, |
|
"learning_rate": 7.909045649393394e-07, |
|
"logits/chosen": -0.9422912001609802, |
|
"logits/rejected": -0.9519181251525879, |
|
"logps/chosen": -1.7758185863494873, |
|
"logps/rejected": -1.9057157039642334, |
|
"loss": 4.5088, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.7581844329834, |
|
"rewards/margins": 1.2989716529846191, |
|
"rewards/rejected": -19.05715560913086, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16511167656205825, |
|
"grad_norm": 48.64018161030811, |
|
"learning_rate": 7.902211616842556e-07, |
|
"logits/chosen": -0.9227447509765625, |
|
"logits/rejected": -0.9130942821502686, |
|
"logps/chosen": -1.815474033355713, |
|
"logps/rejected": -2.099963426589966, |
|
"loss": 4.078, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -18.154739379882812, |
|
"rewards/margins": 2.84489369392395, |
|
"rewards/rejected": -20.9996337890625, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16737348035057958, |
|
"grad_norm": 58.56813481029154, |
|
"learning_rate": 7.89513322588598e-07, |
|
"logits/chosen": -0.8936547636985779, |
|
"logits/rejected": -0.8881016373634338, |
|
"logps/chosen": -1.8046737909317017, |
|
"logps/rejected": -2.0184521675109863, |
|
"loss": 3.8547, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -18.046737670898438, |
|
"rewards/margins": 2.1377835273742676, |
|
"rewards/rejected": -20.18452262878418, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16963528413910092, |
|
"grad_norm": 53.69434207416314, |
|
"learning_rate": 7.887810919775976e-07, |
|
"logits/chosen": -0.8241357207298279, |
|
"logits/rejected": -0.8239607810974121, |
|
"logps/chosen": -1.8949229717254639, |
|
"logps/rejected": -2.1345691680908203, |
|
"loss": 4.0357, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -18.949230194091797, |
|
"rewards/margins": 2.396462917327881, |
|
"rewards/rejected": -21.345691680908203, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1718970879276223, |
|
"grad_norm": 45.03380806243152, |
|
"learning_rate": 7.880245157038949e-07, |
|
"logits/chosen": -0.9108306169509888, |
|
"logits/rejected": -0.8901318907737732, |
|
"logps/chosen": -1.8972225189208984, |
|
"logps/rejected": -2.1310153007507324, |
|
"loss": 4.1242, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -18.972225189208984, |
|
"rewards/margins": 2.3379273414611816, |
|
"rewards/rejected": -21.310152053833008, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.17415889171614363, |
|
"grad_norm": 77.27156743255722, |
|
"learning_rate": 7.872436411446671e-07, |
|
"logits/chosen": -0.9079168438911438, |
|
"logits/rejected": -0.9322432279586792, |
|
"logps/chosen": -1.9609839916229248, |
|
"logps/rejected": -2.1486620903015137, |
|
"loss": 4.4641, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -19.609838485717773, |
|
"rewards/margins": 1.876783013343811, |
|
"rewards/rejected": -21.486618041992188, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17642069550466496, |
|
"grad_norm": 48.02529617691416, |
|
"learning_rate": 7.86438517198662e-07, |
|
"logits/chosen": -0.8568049073219299, |
|
"logits/rejected": -0.8431757688522339, |
|
"logps/chosen": -1.8570719957351685, |
|
"logps/rejected": -2.0501086711883545, |
|
"loss": 4.3324, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -18.570720672607422, |
|
"rewards/margins": 1.9303661584854126, |
|
"rewards/rejected": -20.501087188720703, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1786824992931863, |
|
"grad_norm": 47.59975572650292, |
|
"learning_rate": 7.856091942831366e-07, |
|
"logits/chosen": -0.829656183719635, |
|
"logits/rejected": -0.8414878249168396, |
|
"logps/chosen": -1.829184651374817, |
|
"logps/rejected": -2.0620758533477783, |
|
"loss": 4.3785, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -18.291847229003906, |
|
"rewards/margins": 2.3289124965667725, |
|
"rewards/rejected": -20.620759963989258, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18094430308170767, |
|
"grad_norm": 50.91304057566776, |
|
"learning_rate": 7.847557243306982e-07, |
|
"logits/chosen": -0.9277985095977783, |
|
"logits/rejected": -0.9156295657157898, |
|
"logps/chosen": -1.8894391059875488, |
|
"logps/rejected": -2.130729913711548, |
|
"loss": 3.9842, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -18.894390106201172, |
|
"rewards/margins": 2.412907361984253, |
|
"rewards/rejected": -21.30729866027832, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 54.950758859746244, |
|
"learning_rate": 7.838781607860541e-07, |
|
"logits/chosen": -0.8941960334777832, |
|
"logits/rejected": -0.8841151595115662, |
|
"logps/chosen": -1.9732506275177002, |
|
"logps/rejected": -2.187934637069702, |
|
"loss": 3.7454, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -19.732505798339844, |
|
"rewards/margins": 2.146841526031494, |
|
"rewards/rejected": -21.87934684753418, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18546791065875035, |
|
"grad_norm": 49.11264696752389, |
|
"learning_rate": 7.82976558602664e-07, |
|
"logits/chosen": -0.9305046796798706, |
|
"logits/rejected": -0.9404510855674744, |
|
"logps/chosen": -1.898559808731079, |
|
"logps/rejected": -2.1002445220947266, |
|
"loss": 4.1062, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -18.985597610473633, |
|
"rewards/margins": 2.016847848892212, |
|
"rewards/rejected": -21.0024471282959, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1877297144472717, |
|
"grad_norm": 76.5927273539599, |
|
"learning_rate": 7.820509742392988e-07, |
|
"logits/chosen": -0.919454038143158, |
|
"logits/rejected": -0.914941668510437, |
|
"logps/chosen": -2.0516703128814697, |
|
"logps/rejected": -2.226200819015503, |
|
"loss": 4.1924, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -20.51670265197754, |
|
"rewards/margins": 1.745304822921753, |
|
"rewards/rejected": -22.262008666992188, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18999151823579305, |
|
"grad_norm": 89.96851686792246, |
|
"learning_rate": 7.811014656565054e-07, |
|
"logits/chosen": -0.9175388813018799, |
|
"logits/rejected": -0.9047884345054626, |
|
"logps/chosen": -1.9007177352905273, |
|
"logps/rejected": -2.289841890335083, |
|
"loss": 3.5515, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -19.007177352905273, |
|
"rewards/margins": 3.891240119934082, |
|
"rewards/rejected": -22.898418426513672, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1922533220243144, |
|
"grad_norm": 68.354941784998, |
|
"learning_rate": 7.801280923129773e-07, |
|
"logits/chosen": -0.9134962558746338, |
|
"logits/rejected": -0.915255069732666, |
|
"logps/chosen": -1.979697823524475, |
|
"logps/rejected": -2.148502826690674, |
|
"loss": 4.4942, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -19.79697608947754, |
|
"rewards/margins": 1.6880521774291992, |
|
"rewards/rejected": -21.485031127929688, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19451512581283573, |
|
"grad_norm": 70.68741427366325, |
|
"learning_rate": 7.791309151618305e-07, |
|
"logits/chosen": -0.904114842414856, |
|
"logits/rejected": -0.9040198922157288, |
|
"logps/chosen": -2.137181282043457, |
|
"logps/rejected": -2.3369667530059814, |
|
"loss": 4.2346, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -21.37181282043457, |
|
"rewards/margins": 1.9978532791137695, |
|
"rewards/rejected": -23.36966323852539, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1967769296013571, |
|
"grad_norm": 49.78117435262967, |
|
"learning_rate": 7.781099966467874e-07, |
|
"logits/chosen": -0.9545247554779053, |
|
"logits/rejected": -0.9480579495429993, |
|
"logps/chosen": -1.931373119354248, |
|
"logps/rejected": -2.1317574977874756, |
|
"loss": 3.7192, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -19.313732147216797, |
|
"rewards/margins": 2.0038418769836426, |
|
"rewards/rejected": -21.317575454711914, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19903873338987843, |
|
"grad_norm": 87.19584039986934, |
|
"learning_rate": 7.770654006982664e-07, |
|
"logits/chosen": -0.9274868965148926, |
|
"logits/rejected": -0.907908022403717, |
|
"logps/chosen": -2.2163946628570557, |
|
"logps/rejected": -2.405592918395996, |
|
"loss": 4.4877, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -22.16394805908203, |
|
"rewards/margins": 1.8919786214828491, |
|
"rewards/rejected": -24.055925369262695, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20130053717839977, |
|
"grad_norm": 67.3834157436242, |
|
"learning_rate": 7.759971927293781e-07, |
|
"logits/chosen": -0.9331269264221191, |
|
"logits/rejected": -0.9273476600646973, |
|
"logps/chosen": -2.0026259422302246, |
|
"logps/rejected": -2.2201406955718994, |
|
"loss": 3.9806, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -20.02625846862793, |
|
"rewards/margins": 2.175147533416748, |
|
"rewards/rejected": -22.201404571533203, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 67.10853700406643, |
|
"learning_rate": 7.749054396318297e-07, |
|
"logits/chosen": -0.9318501353263855, |
|
"logits/rejected": -0.9156869649887085, |
|
"logps/chosen": -2.1561312675476074, |
|
"logps/rejected": -2.351287364959717, |
|
"loss": 4.0478, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -21.56131362915039, |
|
"rewards/margins": 1.9515619277954102, |
|
"rewards/rejected": -23.512874603271484, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20582414475544247, |
|
"grad_norm": 80.66942714303244, |
|
"learning_rate": 7.737902097717356e-07, |
|
"logits/chosen": -0.9083584547042847, |
|
"logits/rejected": -0.9206920266151428, |
|
"logps/chosen": -2.1059789657592773, |
|
"logps/rejected": -2.380828857421875, |
|
"loss": 4.1678, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -21.059789657592773, |
|
"rewards/margins": 2.748499870300293, |
|
"rewards/rejected": -23.80828857421875, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2080859485439638, |
|
"grad_norm": 69.60271028625591, |
|
"learning_rate": 7.726515729853367e-07, |
|
"logits/chosen": -0.9099538922309875, |
|
"logits/rejected": -0.9193957448005676, |
|
"logps/chosen": -2.098288059234619, |
|
"logps/rejected": -2.2589056491851807, |
|
"loss": 4.3746, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -20.98288345336914, |
|
"rewards/margins": 1.6061725616455078, |
|
"rewards/rejected": -22.58905601501465, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21034775233248515, |
|
"grad_norm": 73.95526859841975, |
|
"learning_rate": 7.714896005746272e-07, |
|
"logits/chosen": -0.9390743970870972, |
|
"logits/rejected": -0.9398510456085205, |
|
"logps/chosen": -2.198225975036621, |
|
"logps/rejected": -2.485823631286621, |
|
"loss": 3.6005, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -21.982261657714844, |
|
"rewards/margins": 2.875973701477051, |
|
"rewards/rejected": -24.858234405517578, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21260955612100652, |
|
"grad_norm": 115.43649279446808, |
|
"learning_rate": 7.703043653028896e-07, |
|
"logits/chosen": -0.9868816137313843, |
|
"logits/rejected": -0.9943164587020874, |
|
"logps/chosen": -2.428180694580078, |
|
"logps/rejected": -2.6686596870422363, |
|
"loss": 4.1693, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -24.281808853149414, |
|
"rewards/margins": 2.4047892093658447, |
|
"rewards/rejected": -26.68659782409668, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21487135990952785, |
|
"grad_norm": 84.91161124024543, |
|
"learning_rate": 7.690959413901379e-07, |
|
"logits/chosen": -0.9326856732368469, |
|
"logits/rejected": -0.9249070882797241, |
|
"logps/chosen": -2.2586255073547363, |
|
"logps/rejected": -2.5268056392669678, |
|
"loss": 4.1, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -22.586254119873047, |
|
"rewards/margins": 2.6818017959594727, |
|
"rewards/rejected": -25.268056869506836, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2171331636980492, |
|
"grad_norm": 75.95382221779049, |
|
"learning_rate": 7.678644045084704e-07, |
|
"logits/chosen": -0.8716304302215576, |
|
"logits/rejected": -0.9014454483985901, |
|
"logps/chosen": -2.1552388668060303, |
|
"logps/rejected": -2.4034106731414795, |
|
"loss": 4.2664, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -21.55238914489746, |
|
"rewards/margins": 2.4817161560058594, |
|
"rewards/rejected": -24.034103393554688, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21939496748657053, |
|
"grad_norm": 71.70588534717525, |
|
"learning_rate": 7.666098317773308e-07, |
|
"logits/chosen": -0.9344345331192017, |
|
"logits/rejected": -0.9383649826049805, |
|
"logps/chosen": -2.3225467205047607, |
|
"logps/rejected": -2.5641982555389404, |
|
"loss": 3.6566, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -23.2254638671875, |
|
"rewards/margins": 2.416515827178955, |
|
"rewards/rejected": -25.641983032226562, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2216567712750919, |
|
"grad_norm": 63.70209243196811, |
|
"learning_rate": 7.653323017586789e-07, |
|
"logits/chosen": -0.9357935786247253, |
|
"logits/rejected": -0.9202609062194824, |
|
"logps/chosen": -2.091855525970459, |
|
"logps/rejected": -2.3324010372161865, |
|
"loss": 3.7794, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -20.91855239868164, |
|
"rewards/margins": 2.4054555892944336, |
|
"rewards/rejected": -23.32400894165039, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 85.72067726547178, |
|
"learning_rate": 7.640318944520711e-07, |
|
"logits/chosen": -0.9170993566513062, |
|
"logits/rejected": -0.9224846363067627, |
|
"logps/chosen": -2.2841103076934814, |
|
"logps/rejected": -2.5552611351013184, |
|
"loss": 3.5253, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -22.841102600097656, |
|
"rewards/margins": 2.7115087509155273, |
|
"rewards/rejected": -25.552610397338867, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22618037885213457, |
|
"grad_norm": 496.5312096722674, |
|
"learning_rate": 7.627086912896511e-07, |
|
"logits/chosen": -0.8243163824081421, |
|
"logits/rejected": -0.8487036824226379, |
|
"logps/chosen": -2.1957106590270996, |
|
"logps/rejected": -2.4948227405548096, |
|
"loss": 3.5183, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -21.95710563659668, |
|
"rewards/margins": 2.9911231994628906, |
|
"rewards/rejected": -24.948226928710938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2284421826406559, |
|
"grad_norm": 70.0697669735273, |
|
"learning_rate": 7.613627751310499e-07, |
|
"logits/chosen": -0.95480877161026, |
|
"logits/rejected": -0.9551037549972534, |
|
"logps/chosen": -2.3536200523376465, |
|
"logps/rejected": -2.6055948734283447, |
|
"loss": 3.5656, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -23.53619956970215, |
|
"rewards/margins": 2.5197482109069824, |
|
"rewards/rejected": -26.055946350097656, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23070398642917728, |
|
"grad_norm": 88.93036785903197, |
|
"learning_rate": 7.599942302581977e-07, |
|
"logits/chosen": -0.9218271374702454, |
|
"logits/rejected": -0.9325077533721924, |
|
"logps/chosen": -2.3475117683410645, |
|
"logps/rejected": -2.683833122253418, |
|
"loss": 3.4855, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -23.47511863708496, |
|
"rewards/margins": 3.363212823867798, |
|
"rewards/rejected": -26.83833122253418, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.23296579021769862, |
|
"grad_norm": 90.0166632810772, |
|
"learning_rate": 7.586031423700457e-07, |
|
"logits/chosen": -0.9283229112625122, |
|
"logits/rejected": -0.9252837896347046, |
|
"logps/chosen": -2.3153982162475586, |
|
"logps/rejected": -2.582225799560547, |
|
"loss": 3.9243, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -23.153982162475586, |
|
"rewards/margins": 2.6682748794555664, |
|
"rewards/rejected": -25.822254180908203, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23522759400621995, |
|
"grad_norm": 82.46023897895928, |
|
"learning_rate": 7.571895985772e-07, |
|
"logits/chosen": -0.8776436448097229, |
|
"logits/rejected": -0.897273063659668, |
|
"logps/chosen": -2.323317050933838, |
|
"logps/rejected": -2.699765205383301, |
|
"loss": 3.1857, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -23.233171463012695, |
|
"rewards/margins": 3.7644832134246826, |
|
"rewards/rejected": -26.99765396118164, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23748939779474132, |
|
"grad_norm": 82.19454890744656, |
|
"learning_rate": 7.557536873964661e-07, |
|
"logits/chosen": -0.9673488736152649, |
|
"logits/rejected": -0.9688251614570618, |
|
"logps/chosen": -2.662144660949707, |
|
"logps/rejected": -2.884061098098755, |
|
"loss": 4.0931, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -26.62144660949707, |
|
"rewards/margins": 2.219163417816162, |
|
"rewards/rejected": -28.84061050415039, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23975120158326266, |
|
"grad_norm": 69.82415627225754, |
|
"learning_rate": 7.542954987453069e-07, |
|
"logits/chosen": -0.9264833927154541, |
|
"logits/rejected": -0.9302193522453308, |
|
"logps/chosen": -2.4886820316314697, |
|
"logps/rejected": -2.736279010772705, |
|
"loss": 3.7142, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -24.88681983947754, |
|
"rewards/margins": 2.475968837738037, |
|
"rewards/rejected": -27.362789154052734, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.242013005371784, |
|
"grad_norm": 97.8985386292832, |
|
"learning_rate": 7.528151239362108e-07, |
|
"logits/chosen": -0.9315553903579712, |
|
"logits/rejected": -0.9390649199485779, |
|
"logps/chosen": -2.59812068939209, |
|
"logps/rejected": -2.882211685180664, |
|
"loss": 3.6407, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -25.981210708618164, |
|
"rewards/margins": 2.840907096862793, |
|
"rewards/rejected": -28.82211685180664, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 92.40475152000312, |
|
"learning_rate": 7.513126556709748e-07, |
|
"logits/chosen": -0.8942272067070007, |
|
"logits/rejected": -0.8834237456321716, |
|
"logps/chosen": -2.5529963970184326, |
|
"logps/rejected": -2.9440810680389404, |
|
"loss": 3.287, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -25.529958724975586, |
|
"rewards/margins": 3.9108479022979736, |
|
"rewards/rejected": -29.440811157226562, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2465366129488267, |
|
"grad_norm": 84.45808750482125, |
|
"learning_rate": 7.497881880348984e-07, |
|
"logits/chosen": -0.853496789932251, |
|
"logits/rejected": -0.8538418412208557, |
|
"logps/chosen": -2.599656581878662, |
|
"logps/rejected": -2.9151229858398438, |
|
"loss": 3.72, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -25.996566772460938, |
|
"rewards/margins": 3.154664993286133, |
|
"rewards/rejected": -29.151229858398438, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24879841673734804, |
|
"grad_norm": 108.36449765897002, |
|
"learning_rate": 7.482418164908931e-07, |
|
"logits/chosen": -0.8664608001708984, |
|
"logits/rejected": -0.8705554604530334, |
|
"logps/chosen": -2.702484607696533, |
|
"logps/rejected": -2.9504551887512207, |
|
"loss": 3.7899, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -27.024845123291016, |
|
"rewards/margins": 2.479705572128296, |
|
"rewards/rejected": -29.504549026489258, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510602205258694, |
|
"grad_norm": 97.25049221586009, |
|
"learning_rate": 7.466736378735035e-07, |
|
"logits/chosen": -0.8632520437240601, |
|
"logits/rejected": -0.8773136138916016, |
|
"logps/chosen": -2.784451484680176, |
|
"logps/rejected": -3.137439250946045, |
|
"loss": 3.5651, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -27.84451675415039, |
|
"rewards/margins": 3.529878616333008, |
|
"rewards/rejected": -31.374393463134766, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2533220243143907, |
|
"grad_norm": 89.03725814368798, |
|
"learning_rate": 7.450837503828439e-07, |
|
"logits/chosen": -0.8495292663574219, |
|
"logits/rejected": -0.8434633612632751, |
|
"logps/chosen": -2.8906612396240234, |
|
"logps/rejected": -3.3082571029663086, |
|
"loss": 3.3517, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -28.90660858154297, |
|
"rewards/margins": 4.175960063934326, |
|
"rewards/rejected": -33.08256912231445, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2555838281029121, |
|
"grad_norm": 83.1197248364895, |
|
"learning_rate": 7.43472253578449e-07, |
|
"logits/chosen": -0.8363775610923767, |
|
"logits/rejected": -0.8391093611717224, |
|
"logps/chosen": -2.605562925338745, |
|
"logps/rejected": -2.962904930114746, |
|
"loss": 3.435, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -26.055631637573242, |
|
"rewards/margins": 3.5734176635742188, |
|
"rewards/rejected": -29.629047393798828, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2578456318914334, |
|
"grad_norm": 96.33426382345725, |
|
"learning_rate": 7.418392483730389e-07, |
|
"logits/chosen": -0.8500136137008667, |
|
"logits/rejected": -0.8644757270812988, |
|
"logps/chosen": -2.8558506965637207, |
|
"logps/rejected": -3.2193422317504883, |
|
"loss": 3.3585, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -28.558509826660156, |
|
"rewards/margins": 3.634913444519043, |
|
"rewards/rejected": -32.19342041015625, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26010743567995476, |
|
"grad_norm": 87.46521771371307, |
|
"learning_rate": 7.401848370262012e-07, |
|
"logits/chosen": -0.8887827396392822, |
|
"logits/rejected": -0.8773460984230042, |
|
"logps/chosen": -2.8665177822113037, |
|
"logps/rejected": -3.1353914737701416, |
|
"loss": 3.7008, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -28.665176391601562, |
|
"rewards/margins": 2.688736915588379, |
|
"rewards/rejected": -31.353914260864258, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2623692394684761, |
|
"grad_norm": 104.59325034684498, |
|
"learning_rate": 7.385091231379856e-07, |
|
"logits/chosen": -0.8525288701057434, |
|
"logits/rejected": -0.854878842830658, |
|
"logps/chosen": -3.076798439025879, |
|
"logps/rejected": -3.424906015396118, |
|
"loss": 3.7519, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -30.767982482910156, |
|
"rewards/margins": 3.481076717376709, |
|
"rewards/rejected": -34.249061584472656, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 86.85796272452473, |
|
"learning_rate": 7.368122116424182e-07, |
|
"logits/chosen": -0.8492611646652222, |
|
"logits/rejected": -0.8522269129753113, |
|
"logps/chosen": -3.021807909011841, |
|
"logps/rejected": -3.3991785049438477, |
|
"loss": 3.4941, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -30.21807861328125, |
|
"rewards/margins": 3.773710250854492, |
|
"rewards/rejected": -33.991783142089844, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2668928470455188, |
|
"grad_norm": 111.1000685978278, |
|
"learning_rate": 7.350942088009289e-07, |
|
"logits/chosen": -0.8642144203186035, |
|
"logits/rejected": -0.8728247284889221, |
|
"logps/chosen": -3.0954854488372803, |
|
"logps/rejected": -3.433528423309326, |
|
"loss": 3.2773, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -30.954856872558594, |
|
"rewards/margins": 3.3804259300231934, |
|
"rewards/rejected": -34.33528137207031, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.26915465083404017, |
|
"grad_norm": 142.32137319032904, |
|
"learning_rate": 7.333552221956986e-07, |
|
"logits/chosen": -0.9806150197982788, |
|
"logits/rejected": -0.9809490442276001, |
|
"logps/chosen": -3.2465367317199707, |
|
"logps/rejected": -3.631196975708008, |
|
"loss": 3.7568, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -32.46536636352539, |
|
"rewards/margins": 3.8466014862060547, |
|
"rewards/rejected": -36.31196975708008, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2714164546225615, |
|
"grad_norm": 106.21999349893953, |
|
"learning_rate": 7.315953607229217e-07, |
|
"logits/chosen": -0.8744773268699646, |
|
"logits/rejected": -0.8868200778961182, |
|
"logps/chosen": -3.3663530349731445, |
|
"logps/rejected": -3.7398698329925537, |
|
"loss": 3.3962, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -33.66353225708008, |
|
"rewards/margins": 3.73516845703125, |
|
"rewards/rejected": -37.39870071411133, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27367825841108284, |
|
"grad_norm": 136.21663214062764, |
|
"learning_rate": 7.298147345859869e-07, |
|
"logits/chosen": -0.8941931128501892, |
|
"logits/rejected": -0.9127393364906311, |
|
"logps/chosen": -3.2252919673919678, |
|
"logps/rejected": -3.617177963256836, |
|
"loss": 3.541, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -32.2529182434082, |
|
"rewards/margins": 3.918860912322998, |
|
"rewards/rejected": -36.171783447265625, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2759400621996042, |
|
"grad_norm": 110.82874244685783, |
|
"learning_rate": 7.280134552885762e-07, |
|
"logits/chosen": -0.8409429788589478, |
|
"logits/rejected": -0.8534374237060547, |
|
"logps/chosen": -3.2517788410186768, |
|
"logps/rejected": -3.6886167526245117, |
|
"loss": 3.2942, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -32.517791748046875, |
|
"rewards/margins": 4.368377685546875, |
|
"rewards/rejected": -36.88616943359375, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2782018659881255, |
|
"grad_norm": 104.89134559764693, |
|
"learning_rate": 7.261916356276831e-07, |
|
"logits/chosen": -0.8516025543212891, |
|
"logits/rejected": -0.8547568917274475, |
|
"logps/chosen": -3.388638734817505, |
|
"logps/rejected": -3.9394216537475586, |
|
"loss": 2.8993, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -33.886390686035156, |
|
"rewards/margins": 5.507827281951904, |
|
"rewards/rejected": -39.39421463012695, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2804636697766469, |
|
"grad_norm": 97.54336648263664, |
|
"learning_rate": 7.243493896865486e-07, |
|
"logits/chosen": -0.8488821387290955, |
|
"logits/rejected": -0.8565881848335266, |
|
"logps/chosen": -3.166471481323242, |
|
"logps/rejected": -3.4654765129089355, |
|
"loss": 3.6222, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -31.664714813232422, |
|
"rewards/margins": 2.9900498390197754, |
|
"rewards/rejected": -34.65476608276367, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2827254735651682, |
|
"grad_norm": 172.00687462942722, |
|
"learning_rate": 7.224868328275169e-07, |
|
"logits/chosen": -0.8468244075775146, |
|
"logits/rejected": -0.8559240102767944, |
|
"logps/chosen": -3.2884538173675537, |
|
"logps/rejected": -3.7175629138946533, |
|
"loss": 3.5839, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -32.88453674316406, |
|
"rewards/margins": 4.291090965270996, |
|
"rewards/rejected": -37.175628662109375, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 174.35341145683532, |
|
"learning_rate": 7.206040816848126e-07, |
|
"logits/chosen": -0.852262020111084, |
|
"logits/rejected": -0.8540931940078735, |
|
"logps/chosen": -3.4291653633117676, |
|
"logps/rejected": -3.6988754272460938, |
|
"loss": 4.5346, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -34.29165267944336, |
|
"rewards/margins": 2.6971054077148438, |
|
"rewards/rejected": -36.9887580871582, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2872490811422109, |
|
"grad_norm": 138.87306344888844, |
|
"learning_rate": 7.187012541572356e-07, |
|
"logits/chosen": -0.9179739952087402, |
|
"logits/rejected": -0.9147102236747742, |
|
"logps/chosen": -3.4903879165649414, |
|
"logps/rejected": -3.916646718978882, |
|
"loss": 3.4458, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -34.90387725830078, |
|
"rewards/margins": 4.2625837326049805, |
|
"rewards/rejected": -39.166465759277344, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.28951088493073224, |
|
"grad_norm": 143.69937293237177, |
|
"learning_rate": 7.167784694007791e-07, |
|
"logits/chosen": -0.8328741192817688, |
|
"logits/rejected": -0.8579819798469543, |
|
"logps/chosen": -3.286675453186035, |
|
"logps/rejected": -3.76692533493042, |
|
"loss": 3.2917, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -32.866756439208984, |
|
"rewards/margins": 4.802499771118164, |
|
"rewards/rejected": -37.669254302978516, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2917726887192536, |
|
"grad_norm": 99.31418978127101, |
|
"learning_rate": 7.148358478211682e-07, |
|
"logits/chosen": -0.8990265130996704, |
|
"logits/rejected": -0.888577401638031, |
|
"logps/chosen": -3.4041242599487305, |
|
"logps/rejected": -3.8776817321777344, |
|
"loss": 2.8207, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -34.04124450683594, |
|
"rewards/margins": 4.735580921173096, |
|
"rewards/rejected": -38.77682113647461, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29403449250777497, |
|
"grad_norm": 102.38604318491103, |
|
"learning_rate": 7.128735110663187e-07, |
|
"logits/chosen": -0.8688659071922302, |
|
"logits/rejected": -0.8469004034996033, |
|
"logps/chosen": -3.0947279930114746, |
|
"logps/rejected": -3.5679235458374023, |
|
"loss": 3.3385, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -30.947277069091797, |
|
"rewards/margins": 4.731960773468018, |
|
"rewards/rejected": -35.679237365722656, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 113.17524735613614, |
|
"learning_rate": 7.108915820187211e-07, |
|
"logits/chosen": -0.8502359986305237, |
|
"logits/rejected": -0.8502506017684937, |
|
"logps/chosen": -3.5472612380981445, |
|
"logps/rejected": -3.9762022495269775, |
|
"loss": 3.7675, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -35.47261047363281, |
|
"rewards/margins": 4.289410591125488, |
|
"rewards/rejected": -39.762020111083984, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29855810008481765, |
|
"grad_norm": 128.69199015701764, |
|
"learning_rate": 7.088901847877447e-07, |
|
"logits/chosen": -0.8379947543144226, |
|
"logits/rejected": -0.8463510274887085, |
|
"logps/chosen": -3.320732831954956, |
|
"logps/rejected": -3.5967845916748047, |
|
"loss": 4.558, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -33.20732498168945, |
|
"rewards/margins": 2.76051926612854, |
|
"rewards/rejected": -35.96784591674805, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.300819903873339, |
|
"grad_norm": 122.95029199912098, |
|
"learning_rate": 7.068694447018658e-07, |
|
"logits/chosen": -0.8884780406951904, |
|
"logits/rejected": -0.8965522050857544, |
|
"logps/chosen": -3.27457857131958, |
|
"logps/rejected": -3.729426383972168, |
|
"loss": 3.4103, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -32.745792388916016, |
|
"rewards/margins": 4.5484747886657715, |
|
"rewards/rejected": -37.29426574707031, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3030817076618603, |
|
"grad_norm": 102.55252063015635, |
|
"learning_rate": 7.048294883008199e-07, |
|
"logits/chosen": -0.8615848422050476, |
|
"logits/rejected": -0.8659648895263672, |
|
"logps/chosen": -3.20782208442688, |
|
"logps/rejected": -3.573031425476074, |
|
"loss": 3.4383, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -32.07822036743164, |
|
"rewards/margins": 3.652094841003418, |
|
"rewards/rejected": -35.730316162109375, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 82.3321624317021, |
|
"learning_rate": 7.027704433276776e-07, |
|
"logits/chosen": -0.8441089391708374, |
|
"logits/rejected": -0.8458942174911499, |
|
"logps/chosen": -3.187887191772461, |
|
"logps/rejected": -3.677952289581299, |
|
"loss": 3.0593, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -31.878875732421875, |
|
"rewards/margins": 4.900643825531006, |
|
"rewards/rejected": -36.779518127441406, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.307605315238903, |
|
"grad_norm": 188.5700545530925, |
|
"learning_rate": 7.006924387208452e-07, |
|
"logits/chosen": -0.8357016444206238, |
|
"logits/rejected": -0.8290910720825195, |
|
"logps/chosen": -3.086805820465088, |
|
"logps/rejected": -3.4287164211273193, |
|
"loss": 3.5074, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -30.868057250976562, |
|
"rewards/margins": 3.419107437133789, |
|
"rewards/rejected": -34.28716278076172, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30986711902742436, |
|
"grad_norm": 108.37290517834423, |
|
"learning_rate": 6.985956046059904e-07, |
|
"logits/chosen": -0.8443328142166138, |
|
"logits/rejected": -0.8402392268180847, |
|
"logps/chosen": -2.943553924560547, |
|
"logps/rejected": -3.3497257232666016, |
|
"loss": 3.691, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -29.43553924560547, |
|
"rewards/margins": 4.061717987060547, |
|
"rewards/rejected": -33.497257232666016, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.31212892281594573, |
|
"grad_norm": 77.64006113050937, |
|
"learning_rate": 6.964800722878945e-07, |
|
"logits/chosen": -0.7976541519165039, |
|
"logits/rejected": -0.8123403787612915, |
|
"logps/chosen": -3.1234209537506104, |
|
"logps/rejected": -3.665281057357788, |
|
"loss": 3.0089, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -31.234210968017578, |
|
"rewards/margins": 5.418602466583252, |
|
"rewards/rejected": -36.65281295776367, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31439072660446704, |
|
"grad_norm": 84.56545363105515, |
|
"learning_rate": 6.943459742422287e-07, |
|
"logits/chosen": -0.8222934603691101, |
|
"logits/rejected": -0.8047088384628296, |
|
"logps/chosen": -3.0740787982940674, |
|
"logps/rejected": -3.537842035293579, |
|
"loss": 3.5096, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -30.74079132080078, |
|
"rewards/margins": 4.63762903213501, |
|
"rewards/rejected": -35.37841796875, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3166525303929884, |
|
"grad_norm": 89.71503011819237, |
|
"learning_rate": 6.921934441072597e-07, |
|
"logits/chosen": -0.8978405594825745, |
|
"logits/rejected": -0.9020492434501648, |
|
"logps/chosen": -3.2600619792938232, |
|
"logps/rejected": -3.6400341987609863, |
|
"loss": 3.6164, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -32.60062026977539, |
|
"rewards/margins": 3.7997186183929443, |
|
"rewards/rejected": -36.40034484863281, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3189143341815098, |
|
"grad_norm": 105.19674463664295, |
|
"learning_rate": 6.900226166754807e-07, |
|
"logits/chosen": -0.8642122149467468, |
|
"logits/rejected": -0.8808465003967285, |
|
"logps/chosen": -3.301727533340454, |
|
"logps/rejected": -3.619366407394409, |
|
"loss": 3.953, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -33.017276763916016, |
|
"rewards/margins": 3.1763877868652344, |
|
"rewards/rejected": -36.19366455078125, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3211761379700311, |
|
"grad_norm": 107.67624941944278, |
|
"learning_rate": 6.8783362788517e-07, |
|
"logits/chosen": -0.8666509985923767, |
|
"logits/rejected": -0.8702815175056458, |
|
"logps/chosen": -3.2676572799682617, |
|
"logps/rejected": -3.6641600131988525, |
|
"loss": 4.0009, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -32.676578521728516, |
|
"rewards/margins": 3.9650261402130127, |
|
"rewards/rejected": -36.6416015625, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.32343794175855245, |
|
"grad_norm": 99.32873615736939, |
|
"learning_rate": 6.856266148118796e-07, |
|
"logits/chosen": -0.8618003726005554, |
|
"logits/rejected": -0.8743412494659424, |
|
"logps/chosen": -3.03981614112854, |
|
"logps/rejected": -3.484009027481079, |
|
"loss": 3.265, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -30.398160934448242, |
|
"rewards/margins": 4.441934585571289, |
|
"rewards/rejected": -34.84009552001953, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 102.8416386415525, |
|
"learning_rate": 6.834017156598512e-07, |
|
"logits/chosen": -0.8412870764732361, |
|
"logits/rejected": -0.840274453163147, |
|
"logps/chosen": -3.2066195011138916, |
|
"logps/rejected": -3.617882490158081, |
|
"loss": 3.5015, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -32.06619644165039, |
|
"rewards/margins": 4.112632751464844, |
|
"rewards/rejected": -36.178829193115234, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3279615493355951, |
|
"grad_norm": 81.13201808680371, |
|
"learning_rate": 6.811590697533607e-07, |
|
"logits/chosen": -0.9234153628349304, |
|
"logits/rejected": -0.9473743438720703, |
|
"logps/chosen": -3.108992576599121, |
|
"logps/rejected": -3.5079336166381836, |
|
"loss": 3.3848, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -31.089927673339844, |
|
"rewards/margins": 3.9894118309020996, |
|
"rewards/rejected": -35.07933807373047, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3302233531241165, |
|
"grad_norm": 126.90983253589141, |
|
"learning_rate": 6.788988175279951e-07, |
|
"logits/chosen": -0.8728285431861877, |
|
"logits/rejected": -0.8665140271186829, |
|
"logps/chosen": -3.129547357559204, |
|
"logps/rejected": -3.460236072540283, |
|
"loss": 3.8778, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -31.295473098754883, |
|
"rewards/margins": 3.3068888187408447, |
|
"rewards/rejected": -34.602359771728516, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3324851569126378, |
|
"grad_norm": 91.06075264383888, |
|
"learning_rate": 6.766211005218577e-07, |
|
"logits/chosen": -0.8505449295043945, |
|
"logits/rejected": -0.8593109250068665, |
|
"logps/chosen": -3.05971622467041, |
|
"logps/rejected": -3.5910305976867676, |
|
"loss": 2.9708, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -30.59716033935547, |
|
"rewards/margins": 5.313144683837891, |
|
"rewards/rejected": -35.91030502319336, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33474696070115917, |
|
"grad_norm": 85.9903517839081, |
|
"learning_rate": 6.743260613667047e-07, |
|
"logits/chosen": -0.9269782900810242, |
|
"logits/rejected": -0.9214802980422974, |
|
"logps/chosen": -3.011857748031616, |
|
"logps/rejected": -3.465926170349121, |
|
"loss": 3.2708, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -30.11857795715332, |
|
"rewards/margins": 4.540684223175049, |
|
"rewards/rejected": -34.659263610839844, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.33700876448968053, |
|
"grad_norm": 71.98194941268687, |
|
"learning_rate": 6.720138437790139e-07, |
|
"logits/chosen": -0.8807485103607178, |
|
"logits/rejected": -0.8728206157684326, |
|
"logps/chosen": -2.951597213745117, |
|
"logps/rejected": -3.371277332305908, |
|
"loss": 3.1961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -29.515972137451172, |
|
"rewards/margins": 4.196802139282227, |
|
"rewards/rejected": -33.71277618408203, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33927056827820185, |
|
"grad_norm": 115.41856316684272, |
|
"learning_rate": 6.696845925509848e-07, |
|
"logits/chosen": -0.9250473976135254, |
|
"logits/rejected": -0.9204010963439941, |
|
"logps/chosen": -3.063525676727295, |
|
"logps/rejected": -3.4022319316864014, |
|
"loss": 3.6883, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -30.635255813598633, |
|
"rewards/margins": 3.387064218521118, |
|
"rewards/rejected": -34.02231979370117, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3415323720667232, |
|
"grad_norm": 101.95454524075308, |
|
"learning_rate": 6.673384535414718e-07, |
|
"logits/chosen": -0.9343738555908203, |
|
"logits/rejected": -0.9356783628463745, |
|
"logps/chosen": -3.1804661750793457, |
|
"logps/rejected": -3.466540575027466, |
|
"loss": 3.9923, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -31.80466079711914, |
|
"rewards/margins": 2.860745906829834, |
|
"rewards/rejected": -34.665401458740234, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3437941758552446, |
|
"grad_norm": 94.85696940073606, |
|
"learning_rate": 6.649755736668511e-07, |
|
"logits/chosen": -0.8427945971488953, |
|
"logits/rejected": -0.8381005525588989, |
|
"logps/chosen": -2.847813367843628, |
|
"logps/rejected": -3.27738356590271, |
|
"loss": 2.7389, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -28.478132247924805, |
|
"rewards/margins": 4.295701026916504, |
|
"rewards/rejected": -32.77383804321289, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 92.66825859497754, |
|
"learning_rate": 6.625961008918192e-07, |
|
"logits/chosen": -0.8821796178817749, |
|
"logits/rejected": -0.8879325985908508, |
|
"logps/chosen": -2.928504467010498, |
|
"logps/rejected": -3.411726951599121, |
|
"loss": 2.8879, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -29.285043716430664, |
|
"rewards/margins": 4.832226753234863, |
|
"rewards/rejected": -34.117271423339844, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34831778343228725, |
|
"grad_norm": 104.67373787864423, |
|
"learning_rate": 6.602001842201289e-07, |
|
"logits/chosen": -0.8627166152000427, |
|
"logits/rejected": -0.8778930902481079, |
|
"logps/chosen": -2.948608160018921, |
|
"logps/rejected": -3.2540464401245117, |
|
"loss": 3.8502, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -29.486083984375, |
|
"rewards/margins": 3.0543811321258545, |
|
"rewards/rejected": -32.540462493896484, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3505795872208086, |
|
"grad_norm": 85.52036897226773, |
|
"learning_rate": 6.577879736852571e-07, |
|
"logits/chosen": -0.8922989368438721, |
|
"logits/rejected": -0.9067973494529724, |
|
"logps/chosen": -3.0298097133636475, |
|
"logps/rejected": -3.309849977493286, |
|
"loss": 3.789, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -30.298097610473633, |
|
"rewards/margins": 2.8003997802734375, |
|
"rewards/rejected": -33.0984992980957, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.35284139100932993, |
|
"grad_norm": 95.84181793437827, |
|
"learning_rate": 6.553596203410112e-07, |
|
"logits/chosen": -0.8902891874313354, |
|
"logits/rejected": -0.8962793946266174, |
|
"logps/chosen": -3.0103700160980225, |
|
"logps/rejected": -3.539245843887329, |
|
"loss": 2.5342, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -30.103702545166016, |
|
"rewards/margins": 5.288753509521484, |
|
"rewards/rejected": -35.392459869384766, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3551031947978513, |
|
"grad_norm": 83.48045959211674, |
|
"learning_rate": 6.529152762520688e-07, |
|
"logits/chosen": -0.890064537525177, |
|
"logits/rejected": -0.8975638151168823, |
|
"logps/chosen": -3.166869878768921, |
|
"logps/rejected": -3.534972667694092, |
|
"loss": 3.3879, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -31.6686954498291, |
|
"rewards/margins": 3.681027889251709, |
|
"rewards/rejected": -35.34972381591797, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3573649985863726, |
|
"grad_norm": 104.36848999624989, |
|
"learning_rate": 6.504550944844558e-07, |
|
"logits/chosen": -0.8488032221794128, |
|
"logits/rejected": -0.8643382787704468, |
|
"logps/chosen": -3.09021258354187, |
|
"logps/rejected": -3.471135139465332, |
|
"loss": 3.4617, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -30.90212631225586, |
|
"rewards/margins": 3.8092260360717773, |
|
"rewards/rejected": -34.71135330200195, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.359626802374894, |
|
"grad_norm": 104.97216908078047, |
|
"learning_rate": 6.479792290959613e-07, |
|
"logits/chosen": -0.8663011789321899, |
|
"logits/rejected": -0.8864620923995972, |
|
"logps/chosen": -3.1034600734710693, |
|
"logps/rejected": -3.6639652252197266, |
|
"loss": 3.03, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -31.03459930419922, |
|
"rewards/margins": 5.60504674911499, |
|
"rewards/rejected": -36.639652252197266, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.36188860616341534, |
|
"grad_norm": 102.25000126815084, |
|
"learning_rate": 6.454878351264906e-07, |
|
"logits/chosen": -0.8334028720855713, |
|
"logits/rejected": -0.8324363231658936, |
|
"logps/chosen": -3.0704848766326904, |
|
"logps/rejected": -3.4776694774627686, |
|
"loss": 3.5175, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -30.704849243164062, |
|
"rewards/margins": 4.07184362411499, |
|
"rewards/rejected": -34.776695251464844, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36415040995193665, |
|
"grad_norm": 109.80268605933277, |
|
"learning_rate": 6.429810685883565e-07, |
|
"logits/chosen": -0.8985482454299927, |
|
"logits/rejected": -0.8990384340286255, |
|
"logps/chosen": -3.2140891551971436, |
|
"logps/rejected": -3.6064000129699707, |
|
"loss": 3.1092, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -32.140892028808594, |
|
"rewards/margins": 3.9231081008911133, |
|
"rewards/rejected": -36.06399917602539, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 108.55618372141429, |
|
"learning_rate": 6.404590864565088e-07, |
|
"logits/chosen": -0.8505939245223999, |
|
"logits/rejected": -0.8520810008049011, |
|
"logps/chosen": -3.1991541385650635, |
|
"logps/rejected": -3.4929707050323486, |
|
"loss": 3.8613, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -31.991540908813477, |
|
"rewards/margins": 2.9381637573242188, |
|
"rewards/rejected": -34.92970275878906, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3686740175289794, |
|
"grad_norm": 104.50273506595065, |
|
"learning_rate": 6.379220466587063e-07, |
|
"logits/chosen": -0.8530042171478271, |
|
"logits/rejected": -0.8485729694366455, |
|
"logps/chosen": -3.2691879272460938, |
|
"logps/rejected": -3.709401845932007, |
|
"loss": 3.0171, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -32.69187927246094, |
|
"rewards/margins": 4.4021406173706055, |
|
"rewards/rejected": -37.094017028808594, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3709358213175007, |
|
"grad_norm": 110.29027629310306, |
|
"learning_rate": 6.353701080656254e-07, |
|
"logits/chosen": -0.8432658910751343, |
|
"logits/rejected": -0.8815178275108337, |
|
"logps/chosen": -3.3604822158813477, |
|
"logps/rejected": -3.718573808670044, |
|
"loss": 3.4067, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -33.60482406616211, |
|
"rewards/margins": 3.580916166305542, |
|
"rewards/rejected": -37.18573760986328, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.37319762510602206, |
|
"grad_norm": 89.50224289291396, |
|
"learning_rate": 6.32803430480913e-07, |
|
"logits/chosen": -0.8596310019493103, |
|
"logits/rejected": -0.8655349016189575, |
|
"logps/chosen": -3.311291456222534, |
|
"logps/rejected": -3.7834932804107666, |
|
"loss": 3.3279, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -33.1129150390625, |
|
"rewards/margins": 4.722017288208008, |
|
"rewards/rejected": -37.83493423461914, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3754594288945434, |
|
"grad_norm": 101.95377945602579, |
|
"learning_rate": 6.302221746311782e-07, |
|
"logits/chosen": -0.8723585605621338, |
|
"logits/rejected": -0.8533425331115723, |
|
"logps/chosen": -3.2129547595977783, |
|
"logps/rejected": -3.6323916912078857, |
|
"loss": 3.3934, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -32.129547119140625, |
|
"rewards/margins": 4.194368839263916, |
|
"rewards/rejected": -36.323917388916016, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.37772123268306473, |
|
"grad_norm": 96.75845720827888, |
|
"learning_rate": 6.276265021559288e-07, |
|
"logits/chosen": -0.890640377998352, |
|
"logits/rejected": -0.8925571441650391, |
|
"logps/chosen": -3.5187129974365234, |
|
"logps/rejected": -3.8824410438537598, |
|
"loss": 3.7992, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -35.187129974365234, |
|
"rewards/margins": 3.637279987335205, |
|
"rewards/rejected": -38.82441329956055, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3799830364715861, |
|
"grad_norm": 87.40404880493536, |
|
"learning_rate": 6.250165755974487e-07, |
|
"logits/chosen": -0.8265157341957092, |
|
"logits/rejected": -0.835180938243866, |
|
"logps/chosen": -3.481316328048706, |
|
"logps/rejected": -3.9201066493988037, |
|
"loss": 3.0053, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -34.81316375732422, |
|
"rewards/margins": 4.387899398803711, |
|
"rewards/rejected": -39.20106506347656, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3822448402601074, |
|
"grad_norm": 103.05270142062696, |
|
"learning_rate": 6.223925583906192e-07, |
|
"logits/chosen": -0.8841734528541565, |
|
"logits/rejected": -0.8769547939300537, |
|
"logps/chosen": -3.5402333736419678, |
|
"logps/rejected": -4.006646156311035, |
|
"loss": 3.0164, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -35.4023323059082, |
|
"rewards/margins": 4.664129257202148, |
|
"rewards/rejected": -40.06645965576172, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3845066440486288, |
|
"grad_norm": 99.14006440661235, |
|
"learning_rate": 6.19754614852685e-07, |
|
"logits/chosen": -0.8786910772323608, |
|
"logits/rejected": -0.8795101046562195, |
|
"logps/chosen": -3.42849063873291, |
|
"logps/rejected": -3.857112407684326, |
|
"loss": 2.992, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -34.28490447998047, |
|
"rewards/margins": 4.286218166351318, |
|
"rewards/rejected": -38.57112503051758, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 101.36285623071498, |
|
"learning_rate": 6.171029101729644e-07, |
|
"logits/chosen": -0.8048484325408936, |
|
"logits/rejected": -0.8320644497871399, |
|
"logps/chosen": -3.6026647090911865, |
|
"logps/rejected": -4.107236385345459, |
|
"loss": 3.0985, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -36.02664566040039, |
|
"rewards/margins": 5.045715808868408, |
|
"rewards/rejected": -41.07236099243164, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.38903025162567145, |
|
"grad_norm": 107.03781497013733, |
|
"learning_rate": 6.144376104025055e-07, |
|
"logits/chosen": -0.895035982131958, |
|
"logits/rejected": -0.8912597894668579, |
|
"logps/chosen": -3.4788260459899902, |
|
"logps/rejected": -3.9913995265960693, |
|
"loss": 2.8953, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -34.78825759887695, |
|
"rewards/margins": 5.125735759735107, |
|
"rewards/rejected": -39.91399383544922, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3912920554141928, |
|
"grad_norm": 111.20601005232534, |
|
"learning_rate": 6.117588824436873e-07, |
|
"logits/chosen": -0.9020571112632751, |
|
"logits/rejected": -0.9188283681869507, |
|
"logps/chosen": -3.605067491531372, |
|
"logps/rejected": -3.988225221633911, |
|
"loss": 3.5203, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -36.05067825317383, |
|
"rewards/margins": 3.8315775394439697, |
|
"rewards/rejected": -39.88224792480469, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3935538592027142, |
|
"grad_norm": 103.4654281484595, |
|
"learning_rate": 6.090668940397688e-07, |
|
"logits/chosen": -0.8591521978378296, |
|
"logits/rejected": -0.8686568737030029, |
|
"logps/chosen": -3.537184953689575, |
|
"logps/rejected": -4.049908638000488, |
|
"loss": 2.8987, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -35.371849060058594, |
|
"rewards/margins": 5.127236843109131, |
|
"rewards/rejected": -40.499088287353516, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3958156629912355, |
|
"grad_norm": 135.38904727694083, |
|
"learning_rate": 6.063618137643844e-07, |
|
"logits/chosen": -0.8473508358001709, |
|
"logits/rejected": -0.8524729609489441, |
|
"logps/chosen": -3.561941385269165, |
|
"logps/rejected": -4.00567626953125, |
|
"loss": 2.9943, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -35.61941146850586, |
|
"rewards/margins": 4.437344551086426, |
|
"rewards/rejected": -40.05675506591797, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39807746677975686, |
|
"grad_norm": 110.97675460831563, |
|
"learning_rate": 6.03643811010988e-07, |
|
"logits/chosen": -0.8901565670967102, |
|
"logits/rejected": -0.9126089811325073, |
|
"logps/chosen": -3.7035725116729736, |
|
"logps/rejected": -4.154231071472168, |
|
"loss": 2.9707, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.035728454589844, |
|
"rewards/margins": 4.506581783294678, |
|
"rewards/rejected": -41.54230499267578, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4003392705682782, |
|
"grad_norm": 124.1530845740034, |
|
"learning_rate": 6.009130559822453e-07, |
|
"logits/chosen": -0.9078172445297241, |
|
"logits/rejected": -0.921136736869812, |
|
"logps/chosen": -3.670597553253174, |
|
"logps/rejected": -3.9640297889709473, |
|
"loss": 4.0276, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -36.70597457885742, |
|
"rewards/margins": 2.9343202114105225, |
|
"rewards/rejected": -39.64029312133789, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.40260107435679954, |
|
"grad_norm": 112.28760201530416, |
|
"learning_rate": 5.981697196793758e-07, |
|
"logits/chosen": -0.9355087280273438, |
|
"logits/rejected": -0.9506640434265137, |
|
"logps/chosen": -3.7393956184387207, |
|
"logps/rejected": -4.199625015258789, |
|
"loss": 2.6604, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.39395523071289, |
|
"rewards/margins": 4.602291584014893, |
|
"rewards/rejected": -41.99625015258789, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4048628781453209, |
|
"grad_norm": 113.95118236789975, |
|
"learning_rate": 5.954139738914446e-07, |
|
"logits/chosen": -0.9104683995246887, |
|
"logits/rejected": -0.9266977310180664, |
|
"logps/chosen": -3.624338388442993, |
|
"logps/rejected": -4.1234588623046875, |
|
"loss": 3.0403, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -36.243385314941406, |
|
"rewards/margins": 4.991206169128418, |
|
"rewards/rejected": -41.23459243774414, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 116.42013410546693, |
|
"learning_rate": 5.92645991184605e-07, |
|
"logits/chosen": -0.8819618225097656, |
|
"logits/rejected": -0.9078726768493652, |
|
"logps/chosen": -3.8188674449920654, |
|
"logps/rejected": -4.311014652252197, |
|
"loss": 2.8079, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -38.18867492675781, |
|
"rewards/margins": 4.921470642089844, |
|
"rewards/rejected": -43.11014938354492, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4093864857223636, |
|
"grad_norm": 105.7185913445078, |
|
"learning_rate": 5.898659448912917e-07, |
|
"logits/chosen": -0.892175555229187, |
|
"logits/rejected": -0.9181370735168457, |
|
"logps/chosen": -3.6667990684509277, |
|
"logps/rejected": -4.1268720626831055, |
|
"loss": 3.2278, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -36.667991638183594, |
|
"rewards/margins": 4.600729465484619, |
|
"rewards/rejected": -41.26872253417969, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.41164828951088495, |
|
"grad_norm": 113.58444674434676, |
|
"learning_rate": 5.870740090993676e-07, |
|
"logits/chosen": -0.9441367983818054, |
|
"logits/rejected": -0.9531031250953674, |
|
"logps/chosen": -3.8816134929656982, |
|
"logps/rejected": -4.417888641357422, |
|
"loss": 2.726, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -38.816131591796875, |
|
"rewards/margins": 5.362754821777344, |
|
"rewards/rejected": -44.178890228271484, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.41391009329940626, |
|
"grad_norm": 132.693780202085, |
|
"learning_rate": 5.842703586412214e-07, |
|
"logits/chosen": -0.9231690168380737, |
|
"logits/rejected": -0.9451411962509155, |
|
"logps/chosen": -4.003310203552246, |
|
"logps/rejected": -4.392942428588867, |
|
"loss": 3.7509, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -40.033103942871094, |
|
"rewards/margins": 3.896320343017578, |
|
"rewards/rejected": -43.92942428588867, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4161718970879276, |
|
"grad_norm": 117.19432439519281, |
|
"learning_rate": 5.814551690828203e-07, |
|
"logits/chosen": -0.9079042673110962, |
|
"logits/rejected": -0.9319837093353271, |
|
"logps/chosen": -3.8635096549987793, |
|
"logps/rejected": -4.329761505126953, |
|
"loss": 2.9234, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.635093688964844, |
|
"rewards/margins": 4.662517547607422, |
|
"rewards/rejected": -43.29761505126953, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.418433700876449, |
|
"grad_norm": 134.874370876785, |
|
"learning_rate": 5.786286167127155e-07, |
|
"logits/chosen": -0.946037769317627, |
|
"logits/rejected": -0.940789520740509, |
|
"logps/chosen": -3.8176631927490234, |
|
"logps/rejected": -4.328977584838867, |
|
"loss": 3.094, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -38.17662811279297, |
|
"rewards/margins": 5.1131415367126465, |
|
"rewards/rejected": -43.289772033691406, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4206955046649703, |
|
"grad_norm": 104.28377189007104, |
|
"learning_rate": 5.757908785310031e-07, |
|
"logits/chosen": -0.891565203666687, |
|
"logits/rejected": -0.90488600730896, |
|
"logps/chosen": -3.6350033283233643, |
|
"logps/rejected": -4.077045440673828, |
|
"loss": 3.2427, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -36.350032806396484, |
|
"rewards/margins": 4.420421600341797, |
|
"rewards/rejected": -40.770450592041016, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.42295730845349166, |
|
"grad_norm": 90.6334711650778, |
|
"learning_rate": 5.729421322382399e-07, |
|
"logits/chosen": -0.8813575506210327, |
|
"logits/rejected": -0.902955174446106, |
|
"logps/chosen": -3.487731695175171, |
|
"logps/rejected": -4.006654739379883, |
|
"loss": 2.862, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -34.8773193359375, |
|
"rewards/margins": 5.1892290115356445, |
|
"rewards/rejected": -40.06654357910156, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42521911224201303, |
|
"grad_norm": 129.7937200032232, |
|
"learning_rate": 5.700825562243163e-07, |
|
"logits/chosen": -0.8732385039329529, |
|
"logits/rejected": -0.8936405777931213, |
|
"logps/chosen": -3.58575439453125, |
|
"logps/rejected": -4.113236427307129, |
|
"loss": 2.8991, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -35.8575439453125, |
|
"rewards/margins": 5.27481746673584, |
|
"rewards/rejected": -41.13235855102539, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 108.64585507889065, |
|
"learning_rate": 5.672123295572854e-07, |
|
"logits/chosen": -0.940618634223938, |
|
"logits/rejected": -0.9647533893585205, |
|
"logps/chosen": -3.6546216011047363, |
|
"logps/rejected": -4.055443286895752, |
|
"loss": 2.8708, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -36.54621887207031, |
|
"rewards/margins": 4.008213996887207, |
|
"rewards/rejected": -40.5544319152832, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4297427198190557, |
|
"grad_norm": 108.37528044511077, |
|
"learning_rate": 5.643316319721487e-07, |
|
"logits/chosen": -0.9153305292129517, |
|
"logits/rejected": -0.9312554001808167, |
|
"logps/chosen": -3.8006129264831543, |
|
"logps/rejected": -4.253608703613281, |
|
"loss": 3.5312, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -38.00613021850586, |
|
"rewards/margins": 4.529964923858643, |
|
"rewards/rejected": -42.536094665527344, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.432004523607577, |
|
"grad_norm": 100.72656998572613, |
|
"learning_rate": 5.614406438596026e-07, |
|
"logits/chosen": -0.9669473171234131, |
|
"logits/rejected": -0.9767682552337646, |
|
"logps/chosen": -3.853173017501831, |
|
"logps/rejected": -4.320462703704834, |
|
"loss": 3.3801, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -38.53173065185547, |
|
"rewards/margins": 4.672896385192871, |
|
"rewards/rejected": -43.20462417602539, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4342663273960984, |
|
"grad_norm": 108.78329119316179, |
|
"learning_rate": 5.585395462547406e-07, |
|
"logits/chosen": -0.9244773983955383, |
|
"logits/rejected": -0.9188146591186523, |
|
"logps/chosen": -3.638792037963867, |
|
"logps/rejected": -3.9765026569366455, |
|
"loss": 3.4023, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -36.38792037963867, |
|
"rewards/margins": 3.3771071434020996, |
|
"rewards/rejected": -39.76502990722656, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43652813118461975, |
|
"grad_norm": 111.88606555816419, |
|
"learning_rate": 5.55628520825718e-07, |
|
"logits/chosen": -0.9915658235549927, |
|
"logits/rejected": -0.9997342824935913, |
|
"logps/chosen": -3.658958911895752, |
|
"logps/rejected": -4.055312156677246, |
|
"loss": 3.3482, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -36.5895881652832, |
|
"rewards/margins": 3.963534355163574, |
|
"rewards/rejected": -40.553123474121094, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.43878993497314106, |
|
"grad_norm": 115.18763957837935, |
|
"learning_rate": 5.527077498623752e-07, |
|
"logits/chosen": -0.9457052946090698, |
|
"logits/rejected": -0.9517892599105835, |
|
"logps/chosen": -3.640913963317871, |
|
"logps/rejected": -4.082710266113281, |
|
"loss": 2.9719, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -36.409141540527344, |
|
"rewards/margins": 4.417965412139893, |
|
"rewards/rejected": -40.82710266113281, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4410517387616624, |
|
"grad_norm": 102.71937253011347, |
|
"learning_rate": 5.497774162648228e-07, |
|
"logits/chosen": -0.9235740303993225, |
|
"logits/rejected": -0.9570806622505188, |
|
"logps/chosen": -3.5843145847320557, |
|
"logps/rejected": -4.175308704376221, |
|
"loss": 2.7626, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -35.84314727783203, |
|
"rewards/margins": 5.909941673278809, |
|
"rewards/rejected": -41.753082275390625, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4433135425501838, |
|
"grad_norm": 123.78104117915449, |
|
"learning_rate": 5.468377035319882e-07, |
|
"logits/chosen": -0.9676077365875244, |
|
"logits/rejected": -0.9691037535667419, |
|
"logps/chosen": -3.5839638710021973, |
|
"logps/rejected": -4.107885360717773, |
|
"loss": 3.0469, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -35.83964157104492, |
|
"rewards/margins": 5.239212989807129, |
|
"rewards/rejected": -41.078853607177734, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4455753463387051, |
|
"grad_norm": 121.58965966459272, |
|
"learning_rate": 5.438887957501248e-07, |
|
"logits/chosen": -0.8684402108192444, |
|
"logits/rejected": -0.8744467496871948, |
|
"logps/chosen": -3.645962715148926, |
|
"logps/rejected": -4.094875335693359, |
|
"loss": 3.1804, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -36.45962905883789, |
|
"rewards/margins": 4.489128112792969, |
|
"rewards/rejected": -40.948753356933594, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 109.22873019040438, |
|
"learning_rate": 5.409308775812844e-07, |
|
"logits/chosen": -0.9160696864128113, |
|
"logits/rejected": -0.9228438138961792, |
|
"logps/chosen": -3.8386149406433105, |
|
"logps/rejected": -4.307831287384033, |
|
"loss": 3.3267, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -38.38615036010742, |
|
"rewards/margins": 4.692164421081543, |
|
"rewards/rejected": -43.078311920166016, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45009895391574783, |
|
"grad_norm": 104.87483635159766, |
|
"learning_rate": 5.379641342517541e-07, |
|
"logits/chosen": -0.9663090705871582, |
|
"logits/rejected": -0.9763889908790588, |
|
"logps/chosen": -3.617103099822998, |
|
"logps/rejected": -4.147641658782959, |
|
"loss": 2.9717, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -36.17103576660156, |
|
"rewards/margins": 5.305381774902344, |
|
"rewards/rejected": -41.47641372680664, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.45236075770426915, |
|
"grad_norm": 103.08645689813231, |
|
"learning_rate": 5.349887515404564e-07, |
|
"logits/chosen": -0.9189653992652893, |
|
"logits/rejected": -0.9432557225227356, |
|
"logps/chosen": -3.866921901702881, |
|
"logps/rejected": -4.4801025390625, |
|
"loss": 2.6106, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -38.669219970703125, |
|
"rewards/margins": 6.131805419921875, |
|
"rewards/rejected": -44.801025390625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4546225614927905, |
|
"grad_norm": 116.35515985998616, |
|
"learning_rate": 5.320049157673163e-07, |
|
"logits/chosen": -0.8698301911354065, |
|
"logits/rejected": -0.8828994631767273, |
|
"logps/chosen": -3.784053325653076, |
|
"logps/rejected": -4.32690954208374, |
|
"loss": 2.8006, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -37.84053421020508, |
|
"rewards/margins": 5.428560256958008, |
|
"rewards/rejected": -43.26909637451172, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4568843652813118, |
|
"grad_norm": 109.47698928643487, |
|
"learning_rate": 5.290128137815938e-07, |
|
"logits/chosen": -0.9175432920455933, |
|
"logits/rejected": -0.9369080662727356, |
|
"logps/chosen": -4.0667266845703125, |
|
"logps/rejected": -4.628395080566406, |
|
"loss": 2.6128, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.66727066040039, |
|
"rewards/margins": 5.616676330566406, |
|
"rewards/rejected": -46.28395080566406, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4591461690698332, |
|
"grad_norm": 111.02996873736168, |
|
"learning_rate": 5.260126329501828e-07, |
|
"logits/chosen": -0.939166784286499, |
|
"logits/rejected": -0.9610799551010132, |
|
"logps/chosen": -3.9119553565979004, |
|
"logps/rejected": -4.589254379272461, |
|
"loss": 2.4693, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.11955261230469, |
|
"rewards/margins": 6.772988796234131, |
|
"rewards/rejected": -45.89254379272461, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.46140797285835455, |
|
"grad_norm": 146.1981574496801, |
|
"learning_rate": 5.230045611458789e-07, |
|
"logits/chosen": -0.875460147857666, |
|
"logits/rejected": -0.9028068780899048, |
|
"logps/chosen": -3.9016761779785156, |
|
"logps/rejected": -4.389087677001953, |
|
"loss": 2.9617, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -39.016761779785156, |
|
"rewards/margins": 4.874115943908691, |
|
"rewards/rejected": -43.89087677001953, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.46366977664687586, |
|
"grad_norm": 121.56241675915379, |
|
"learning_rate": 5.199887867356143e-07, |
|
"logits/chosen": -0.8924857378005981, |
|
"logits/rejected": -0.9020816087722778, |
|
"logps/chosen": -4.1306023597717285, |
|
"logps/rejected": -4.79485559463501, |
|
"loss": 2.5375, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.306026458740234, |
|
"rewards/margins": 6.642524242401123, |
|
"rewards/rejected": -47.94855499267578, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.46593158043539723, |
|
"grad_norm": 122.508625760498, |
|
"learning_rate": 5.16965498568662e-07, |
|
"logits/chosen": -0.9368366599082947, |
|
"logits/rejected": -0.9245457053184509, |
|
"logps/chosen": -4.264636516571045, |
|
"logps/rejected": -5.027202129364014, |
|
"loss": 2.7563, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.646366119384766, |
|
"rewards/margins": 7.6256561279296875, |
|
"rewards/rejected": -50.27201843261719, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 131.11773288045427, |
|
"learning_rate": 5.139348859648098e-07, |
|
"logits/chosen": -0.9146727919578552, |
|
"logits/rejected": -0.9255930185317993, |
|
"logps/chosen": -3.9246838092803955, |
|
"logps/rejected": -4.490647315979004, |
|
"loss": 2.9053, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.2468376159668, |
|
"rewards/margins": 5.659629821777344, |
|
"rewards/rejected": -44.90647506713867, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4704551880124399, |
|
"grad_norm": 120.950641593037, |
|
"learning_rate": 5.10897138702506e-07, |
|
"logits/chosen": -0.8812559843063354, |
|
"logits/rejected": -0.8994000554084778, |
|
"logps/chosen": -4.020036220550537, |
|
"logps/rejected": -4.57065486907959, |
|
"loss": 3.3148, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -40.20035934448242, |
|
"rewards/margins": 5.506185531616211, |
|
"rewards/rejected": -45.70654296875, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4727169918009613, |
|
"grad_norm": 119.1724296691641, |
|
"learning_rate": 5.078524470069743e-07, |
|
"logits/chosen": -0.9579637050628662, |
|
"logits/rejected": -0.9650572538375854, |
|
"logps/chosen": -4.186748027801514, |
|
"logps/rejected": -4.810725688934326, |
|
"loss": 2.3755, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -41.86748123168945, |
|
"rewards/margins": 6.239780426025391, |
|
"rewards/rejected": -48.107261657714844, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.47497879558948264, |
|
"grad_norm": 116.29036710628272, |
|
"learning_rate": 5.048010015383021e-07, |
|
"logits/chosen": -0.8580180406570435, |
|
"logits/rejected": -0.8542965650558472, |
|
"logps/chosen": -4.219040870666504, |
|
"logps/rejected": -4.9554338455200195, |
|
"loss": 2.3901, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.190399169921875, |
|
"rewards/margins": 7.363934516906738, |
|
"rewards/rejected": -49.55433654785156, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47724059937800395, |
|
"grad_norm": 121.1251025837884, |
|
"learning_rate": 5.01742993379502e-07, |
|
"logits/chosen": -0.912419319152832, |
|
"logits/rejected": -0.9371484518051147, |
|
"logps/chosen": -4.289198398590088, |
|
"logps/rejected": -4.923559188842773, |
|
"loss": 2.4793, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.89198684692383, |
|
"rewards/margins": 6.343609809875488, |
|
"rewards/rejected": -49.235595703125, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4795024031665253, |
|
"grad_norm": 143.77581690319334, |
|
"learning_rate": 4.986786140245446e-07, |
|
"logits/chosen": -0.8684762716293335, |
|
"logits/rejected": -0.8781230449676514, |
|
"logps/chosen": -4.23728084564209, |
|
"logps/rejected": -4.795536041259766, |
|
"loss": 3.0145, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -42.37281036376953, |
|
"rewards/margins": 5.582554340362549, |
|
"rewards/rejected": -47.95536422729492, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4817642069550466, |
|
"grad_norm": 128.57500022332647, |
|
"learning_rate": 4.956080553663687e-07, |
|
"logits/chosen": -0.9273526072502136, |
|
"logits/rejected": -0.9341423511505127, |
|
"logps/chosen": -4.209490776062012, |
|
"logps/rejected": -4.829947471618652, |
|
"loss": 2.8036, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.09490203857422, |
|
"rewards/margins": 6.204565048217773, |
|
"rewards/rejected": -48.299468994140625, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.484026010743568, |
|
"grad_norm": 134.74973546376373, |
|
"learning_rate": 4.925315096848636e-07, |
|
"logits/chosen": -0.9303544759750366, |
|
"logits/rejected": -0.9426358938217163, |
|
"logps/chosen": -4.48342752456665, |
|
"logps/rejected": -5.268622875213623, |
|
"loss": 2.5133, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -44.83427047729492, |
|
"rewards/margins": 7.851953983306885, |
|
"rewards/rejected": -52.68622589111328, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48628781453208936, |
|
"grad_norm": 137.87557167889346, |
|
"learning_rate": 4.894491696348293e-07, |
|
"logits/chosen": -0.933896541595459, |
|
"logits/rejected": -0.9386736154556274, |
|
"logps/chosen": -4.23392915725708, |
|
"logps/rejected": -4.687985420227051, |
|
"loss": 3.0521, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.33929443359375, |
|
"rewards/margins": 4.5405592918396, |
|
"rewards/rejected": -46.87985610961914, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 139.11956890881316, |
|
"learning_rate": 4.863612282339116e-07, |
|
"logits/chosen": -0.8552120923995972, |
|
"logits/rejected": -0.8585663437843323, |
|
"logps/chosen": -4.637057781219482, |
|
"logps/rejected": -5.207608222961426, |
|
"loss": 3.0894, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -46.37057113647461, |
|
"rewards/margins": 5.705511093139648, |
|
"rewards/rejected": -52.07608413696289, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.49081142210913203, |
|
"grad_norm": 144.47535678356647, |
|
"learning_rate": 4.832678788505161e-07, |
|
"logits/chosen": -0.8959632515907288, |
|
"logits/rejected": -0.8897783160209656, |
|
"logps/chosen": -4.5698347091674805, |
|
"logps/rejected": -5.178661346435547, |
|
"loss": 3.2773, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -45.69834518432617, |
|
"rewards/margins": 6.0882649421691895, |
|
"rewards/rejected": -51.7866096496582, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4930732258976534, |
|
"grad_norm": 183.85171049659183, |
|
"learning_rate": 4.801693151916985e-07, |
|
"logits/chosen": -0.9000855088233948, |
|
"logits/rejected": -0.9295123219490051, |
|
"logps/chosen": -4.563023567199707, |
|
"logps/rejected": -5.171204566955566, |
|
"loss": 2.7277, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -45.63024139404297, |
|
"rewards/margins": 6.08180570602417, |
|
"rewards/rejected": -51.71204376220703, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4953350296861747, |
|
"grad_norm": 144.8137273177042, |
|
"learning_rate": 4.770657312910354e-07, |
|
"logits/chosen": -0.9353141784667969, |
|
"logits/rejected": -0.938468337059021, |
|
"logps/chosen": -4.625380039215088, |
|
"logps/rejected": -5.1412482261657715, |
|
"loss": 3.2197, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -46.25379943847656, |
|
"rewards/margins": 5.158687114715576, |
|
"rewards/rejected": -51.41248321533203, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4975968334746961, |
|
"grad_norm": 163.41972624587643, |
|
"learning_rate": 4.739573214964729e-07, |
|
"logits/chosen": -0.8990980386734009, |
|
"logits/rejected": -0.9109318852424622, |
|
"logps/chosen": -4.455334663391113, |
|
"logps/rejected": -5.060534954071045, |
|
"loss": 2.939, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -44.5533447265625, |
|
"rewards/margins": 6.052001953125, |
|
"rewards/rejected": -50.60535430908203, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49985863726321744, |
|
"grad_norm": 170.1558238963429, |
|
"learning_rate": 4.7084428045815733e-07, |
|
"logits/chosen": -0.8906704187393188, |
|
"logits/rejected": -0.9073330163955688, |
|
"logps/chosen": -4.730157375335693, |
|
"logps/rejected": -5.305697441101074, |
|
"loss": 3.0027, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -47.301578521728516, |
|
"rewards/margins": 5.755396842956543, |
|
"rewards/rejected": -53.05697250366211, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5021204410517388, |
|
"grad_norm": 226.11746442069574, |
|
"learning_rate": 4.677268031162457e-07, |
|
"logits/chosen": -0.9023485779762268, |
|
"logits/rejected": -0.9136303067207336, |
|
"logps/chosen": -4.3702263832092285, |
|
"logps/rejected": -4.995909690856934, |
|
"loss": 2.9436, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -43.70226287841797, |
|
"rewards/margins": 6.2568359375, |
|
"rewards/rejected": -49.9590950012207, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5043822448402601, |
|
"grad_norm": 132.7984703986409, |
|
"learning_rate": 4.646050846886985e-07, |
|
"logits/chosen": -0.814538300037384, |
|
"logits/rejected": -0.8217288255691528, |
|
"logps/chosen": -4.306761264801025, |
|
"logps/rejected": -4.975722312927246, |
|
"loss": 2.4596, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -43.06761169433594, |
|
"rewards/margins": 6.689613342285156, |
|
"rewards/rejected": -49.75722885131836, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5066440486287814, |
|
"grad_norm": 147.9751731515365, |
|
"learning_rate": 4.6147932065905494e-07, |
|
"logits/chosen": -0.8536533117294312, |
|
"logits/rejected": -0.8585684299468994, |
|
"logps/chosen": -4.433886528015137, |
|
"logps/rejected": -4.914949893951416, |
|
"loss": 3.3481, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.3388671875, |
|
"rewards/margins": 4.810628890991211, |
|
"rewards/rejected": -49.14949417114258, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 149.36332124550927, |
|
"learning_rate": 4.5834970676419214e-07, |
|
"logits/chosen": -0.867747962474823, |
|
"logits/rejected": -0.8860857486724854, |
|
"logps/chosen": -4.323178768157959, |
|
"logps/rejected": -4.883927822113037, |
|
"loss": 2.8019, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -43.23178482055664, |
|
"rewards/margins": 5.607486248016357, |
|
"rewards/rejected": -48.83927917480469, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5111676562058242, |
|
"grad_norm": 163.9248977367859, |
|
"learning_rate": 4.552164389820673e-07, |
|
"logits/chosen": -0.7673951983451843, |
|
"logits/rejected": -0.7906842231750488, |
|
"logps/chosen": -4.370258331298828, |
|
"logps/rejected": -5.137817859649658, |
|
"loss": 2.539, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -43.70258331298828, |
|
"rewards/margins": 7.675594329833984, |
|
"rewards/rejected": -51.378177642822266, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5134294599943455, |
|
"grad_norm": 160.95245413332196, |
|
"learning_rate": 4.5207971351944605e-07, |
|
"logits/chosen": -0.8810624480247498, |
|
"logits/rejected": -0.8833796977996826, |
|
"logps/chosen": -4.283847808837891, |
|
"logps/rejected": -4.946835994720459, |
|
"loss": 3.3887, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -42.838478088378906, |
|
"rewards/margins": 6.629883289337158, |
|
"rewards/rejected": -49.46835708618164, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5156912637828668, |
|
"grad_norm": 150.5220812123222, |
|
"learning_rate": 4.489397267996157e-07, |
|
"logits/chosen": -0.8820953965187073, |
|
"logits/rejected": -0.8898295164108276, |
|
"logps/chosen": -4.304379463195801, |
|
"logps/rejected": -4.843184471130371, |
|
"loss": 2.96, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -43.04379653930664, |
|
"rewards/margins": 5.388051509857178, |
|
"rewards/rejected": -48.431846618652344, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5179530675713881, |
|
"grad_norm": 141.43842633471348, |
|
"learning_rate": 4.45796675450085e-07, |
|
"logits/chosen": -0.8514925241470337, |
|
"logits/rejected": -0.8414303064346313, |
|
"logps/chosen": -4.168148040771484, |
|
"logps/rejected": -4.858559608459473, |
|
"loss": 2.6745, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.68148422241211, |
|
"rewards/margins": 6.904117107391357, |
|
"rewards/rejected": -48.585601806640625, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5202148713599095, |
|
"grad_norm": 168.87995319696404, |
|
"learning_rate": 4.4265075629027126e-07, |
|
"logits/chosen": -0.7781671285629272, |
|
"logits/rejected": -0.7910118103027344, |
|
"logps/chosen": -4.419309139251709, |
|
"logps/rejected": -4.9767374992370605, |
|
"loss": 2.7029, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.193092346191406, |
|
"rewards/margins": 5.57428503036499, |
|
"rewards/rejected": -49.767372131347656, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5224766751484309, |
|
"grad_norm": 131.99524587680546, |
|
"learning_rate": 4.3950216631917563e-07, |
|
"logits/chosen": -0.8382211327552795, |
|
"logits/rejected": -0.8589369058609009, |
|
"logps/chosen": -4.271603584289551, |
|
"logps/rejected": -4.960786819458008, |
|
"loss": 2.6628, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.71603012084961, |
|
"rewards/margins": 6.891840934753418, |
|
"rewards/rejected": -49.607872009277344, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5247384789369522, |
|
"grad_norm": 134.3361120222247, |
|
"learning_rate": 4.3635110270304676e-07, |
|
"logits/chosen": -0.8264502286911011, |
|
"logits/rejected": -0.8120173811912537, |
|
"logps/chosen": -4.067740440368652, |
|
"logps/rejected": -4.706015586853027, |
|
"loss": 2.148, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -40.677398681640625, |
|
"rewards/margins": 6.382755279541016, |
|
"rewards/rejected": -47.06015396118164, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5270002827254736, |
|
"grad_norm": 152.61581673294296, |
|
"learning_rate": 4.331977627630339e-07, |
|
"logits/chosen": -0.7725579738616943, |
|
"logits/rejected": -0.760904848575592, |
|
"logps/chosen": -4.003568172454834, |
|
"logps/rejected": -4.687016487121582, |
|
"loss": 2.7551, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.03567886352539, |
|
"rewards/margins": 6.8344855308532715, |
|
"rewards/rejected": -46.87016296386719, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 147.10762801151967, |
|
"learning_rate": 4.300423439628313e-07, |
|
"logits/chosen": -0.7840836048126221, |
|
"logits/rejected": -0.8098961114883423, |
|
"logps/chosen": -4.061254024505615, |
|
"logps/rejected": -4.711936950683594, |
|
"loss": 2.5606, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.61254119873047, |
|
"rewards/margins": 6.506833076477051, |
|
"rewards/rejected": -47.11936950683594, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5315238903025162, |
|
"grad_norm": 132.90712688474918, |
|
"learning_rate": 4.268850438963118e-07, |
|
"logits/chosen": -0.817150354385376, |
|
"logits/rejected": -0.8263847827911377, |
|
"logps/chosen": -4.237096309661865, |
|
"logps/rejected": -4.880129814147949, |
|
"loss": 2.584, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.370967864990234, |
|
"rewards/margins": 6.430337429046631, |
|
"rewards/rejected": -48.80130386352539, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5337856940910376, |
|
"grad_norm": 138.4880606332674, |
|
"learning_rate": 4.2372606027515463e-07, |
|
"logits/chosen": -0.7787373661994934, |
|
"logits/rejected": -0.7952776551246643, |
|
"logps/chosen": -3.890188694000244, |
|
"logps/rejected": -4.451065540313721, |
|
"loss": 2.9659, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -38.90188980102539, |
|
"rewards/margins": 5.60876989364624, |
|
"rewards/rejected": -44.51065444946289, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.536047497879559, |
|
"grad_norm": 144.57651076503535, |
|
"learning_rate": 4.2056559091646387e-07, |
|
"logits/chosen": -0.7900989055633545, |
|
"logits/rejected": -0.8283600211143494, |
|
"logps/chosen": -4.038181781768799, |
|
"logps/rejected": -4.595399856567383, |
|
"loss": 2.9693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -40.38182067871094, |
|
"rewards/margins": 5.572176933288574, |
|
"rewards/rejected": -45.95399475097656, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5383093016680803, |
|
"grad_norm": 139.26003729586844, |
|
"learning_rate": 4.1740383373038116e-07, |
|
"logits/chosen": -0.7787960171699524, |
|
"logits/rejected": -0.7999230027198792, |
|
"logps/chosen": -3.868332862854004, |
|
"logps/rejected": -4.443919658660889, |
|
"loss": 2.8123, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.683326721191406, |
|
"rewards/margins": 5.75586462020874, |
|
"rewards/rejected": -44.43919372558594, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5405711054566016, |
|
"grad_norm": 142.51994348526011, |
|
"learning_rate": 4.1424098670769255e-07, |
|
"logits/chosen": -0.8288208246231079, |
|
"logits/rejected": -0.8396254777908325, |
|
"logps/chosen": -4.00309419631958, |
|
"logps/rejected": -4.4558329582214355, |
|
"loss": 3.2117, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -40.030941009521484, |
|
"rewards/margins": 4.5273871421813965, |
|
"rewards/rejected": -44.55833053588867, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.542832909245123, |
|
"grad_norm": 137.83906726980342, |
|
"learning_rate": 4.1107724790743007e-07, |
|
"logits/chosen": -0.7743911147117615, |
|
"logits/rejected": -0.8078039884567261, |
|
"logps/chosen": -3.917076587677002, |
|
"logps/rejected": -4.433419227600098, |
|
"loss": 2.8325, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.17076110839844, |
|
"rewards/margins": 5.163426399230957, |
|
"rewards/rejected": -44.334190368652344, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5450947130336443, |
|
"grad_norm": 139.8500358924521, |
|
"learning_rate": 4.0791281544446947e-07, |
|
"logits/chosen": -0.8219509124755859, |
|
"logits/rejected": -0.8224381804466248, |
|
"logps/chosen": -3.893245220184326, |
|
"logps/rejected": -4.521663665771484, |
|
"loss": 2.3828, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -38.932456970214844, |
|
"rewards/margins": 6.284185409545898, |
|
"rewards/rejected": -45.21664047241211, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5473565168221657, |
|
"grad_norm": 126.08857645654825, |
|
"learning_rate": 4.0474788747712416e-07, |
|
"logits/chosen": -0.8421152830123901, |
|
"logits/rejected": -0.8481616973876953, |
|
"logps/chosen": -3.897510051727295, |
|
"logps/rejected": -4.381252765655518, |
|
"loss": 3.3288, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -38.975101470947266, |
|
"rewards/margins": 4.837425231933594, |
|
"rewards/rejected": -43.812522888183594, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 115.43735691500179, |
|
"learning_rate": 4.0158266219473573e-07, |
|
"logits/chosen": -0.8440154194831848, |
|
"logits/rejected": -0.8574568033218384, |
|
"logps/chosen": -3.7257094383239746, |
|
"logps/rejected": -4.348711013793945, |
|
"loss": 2.4764, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -37.25709533691406, |
|
"rewards/margins": 6.2300190925598145, |
|
"rewards/rejected": -43.48711013793945, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5518801243992084, |
|
"grad_norm": 116.33902882228996, |
|
"learning_rate": 3.984173378052643e-07, |
|
"logits/chosen": -0.7851986885070801, |
|
"logits/rejected": -0.796712338924408, |
|
"logps/chosen": -3.6892523765563965, |
|
"logps/rejected": -4.342462539672852, |
|
"loss": 2.6619, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -36.89252471923828, |
|
"rewards/margins": 6.532103538513184, |
|
"rewards/rejected": -43.42462921142578, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5541419281877297, |
|
"grad_norm": 124.64167796525535, |
|
"learning_rate": 3.9525211252287585e-07, |
|
"logits/chosen": -0.8931095600128174, |
|
"logits/rejected": -0.9180840849876404, |
|
"logps/chosen": -3.963862419128418, |
|
"logps/rejected": -4.748945236206055, |
|
"loss": 2.5828, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.63862609863281, |
|
"rewards/margins": 7.850823879241943, |
|
"rewards/rejected": -47.48944854736328, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.556403731976251, |
|
"grad_norm": 112.09055809017627, |
|
"learning_rate": 3.920871845555305e-07, |
|
"logits/chosen": -0.8313508629798889, |
|
"logits/rejected": -0.8461873531341553, |
|
"logps/chosen": -3.870025157928467, |
|
"logps/rejected": -4.447489261627197, |
|
"loss": 2.385, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.700252532958984, |
|
"rewards/margins": 5.774643898010254, |
|
"rewards/rejected": -44.47489547729492, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5586655357647724, |
|
"grad_norm": 139.45655157059815, |
|
"learning_rate": 3.8892275209256984e-07, |
|
"logits/chosen": -0.8623577356338501, |
|
"logits/rejected": -0.8599537014961243, |
|
"logps/chosen": -4.079935073852539, |
|
"logps/rejected": -4.5872697830200195, |
|
"loss": 2.7693, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -40.79934310913086, |
|
"rewards/margins": 5.073347568511963, |
|
"rewards/rejected": -45.87269592285156, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5609273395532938, |
|
"grad_norm": 128.5379031156273, |
|
"learning_rate": 3.8575901329230747e-07, |
|
"logits/chosen": -0.831263542175293, |
|
"logits/rejected": -0.8186984062194824, |
|
"logps/chosen": -4.078851222991943, |
|
"logps/rejected": -4.684617519378662, |
|
"loss": 2.9864, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -40.788516998291016, |
|
"rewards/margins": 6.057657241821289, |
|
"rewards/rejected": -46.84617614746094, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5631891433418151, |
|
"grad_norm": 137.16938367411643, |
|
"learning_rate": 3.8259616626961886e-07, |
|
"logits/chosen": -0.8066788911819458, |
|
"logits/rejected": -0.8286083936691284, |
|
"logps/chosen": -3.8074519634246826, |
|
"logps/rejected": -4.328085422515869, |
|
"loss": 2.4375, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -38.074520111083984, |
|
"rewards/margins": 5.206332206726074, |
|
"rewards/rejected": -43.280853271484375, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5654509471303364, |
|
"grad_norm": 114.38221061442538, |
|
"learning_rate": 3.794344090835362e-07, |
|
"logits/chosen": -0.836406946182251, |
|
"logits/rejected": -0.8421981334686279, |
|
"logps/chosen": -4.185178279876709, |
|
"logps/rejected": -4.7932024002075195, |
|
"loss": 2.8436, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -41.85177993774414, |
|
"rewards/margins": 6.080243110656738, |
|
"rewards/rejected": -47.93202590942383, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5677127509188578, |
|
"grad_norm": 173.94050832794733, |
|
"learning_rate": 3.7627393972484534e-07, |
|
"logits/chosen": -0.8843735456466675, |
|
"logits/rejected": -0.8974891901016235, |
|
"logps/chosen": -4.174497127532959, |
|
"logps/rejected": -4.667062282562256, |
|
"loss": 3.303, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -41.74496841430664, |
|
"rewards/margins": 4.92565393447876, |
|
"rewards/rejected": -46.67062759399414, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 139.59908745062046, |
|
"learning_rate": 3.7311495610368823e-07, |
|
"logits/chosen": -0.8997682929039001, |
|
"logits/rejected": -0.9016697406768799, |
|
"logps/chosen": -4.343634128570557, |
|
"logps/rejected": -4.967972755432129, |
|
"loss": 2.7301, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -43.43634033203125, |
|
"rewards/margins": 6.243386268615723, |
|
"rewards/rejected": -49.679725646972656, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5722363584959005, |
|
"grad_norm": 120.92407808146834, |
|
"learning_rate": 3.699576560371689e-07, |
|
"logits/chosen": -0.8279405236244202, |
|
"logits/rejected": -0.8453271389007568, |
|
"logps/chosen": -4.558162689208984, |
|
"logps/rejected": -5.47041654586792, |
|
"loss": 2.1306, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -45.581626892089844, |
|
"rewards/margins": 9.122541427612305, |
|
"rewards/rejected": -54.70416259765625, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5744981622844219, |
|
"grad_norm": 144.72597313458348, |
|
"learning_rate": 3.66802237236966e-07, |
|
"logits/chosen": -0.8229357600212097, |
|
"logits/rejected": -0.8324556946754456, |
|
"logps/chosen": -4.652154445648193, |
|
"logps/rejected": -5.330699920654297, |
|
"loss": 2.4779, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -46.521549224853516, |
|
"rewards/margins": 6.78544807434082, |
|
"rewards/rejected": -53.30699920654297, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5767599660729432, |
|
"grad_norm": 138.6610214924037, |
|
"learning_rate": 3.636488972969532e-07, |
|
"logits/chosen": -0.8425090312957764, |
|
"logits/rejected": -0.8567550182342529, |
|
"logps/chosen": -4.528759479522705, |
|
"logps/rejected": -5.228953838348389, |
|
"loss": 2.7067, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -45.287593841552734, |
|
"rewards/margins": 7.001940727233887, |
|
"rewards/rejected": -52.2895393371582, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5790217698614645, |
|
"grad_norm": 149.23473531486215, |
|
"learning_rate": 3.604978336808244e-07, |
|
"logits/chosen": -0.9809710383415222, |
|
"logits/rejected": -0.9890305399894714, |
|
"logps/chosen": -4.413546085357666, |
|
"logps/rejected": -5.061186790466309, |
|
"loss": 2.7346, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -44.13545608520508, |
|
"rewards/margins": 6.476408958435059, |
|
"rewards/rejected": -50.61186981201172, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5812835736499858, |
|
"grad_norm": 151.69021123299453, |
|
"learning_rate": 3.5734924370972876e-07, |
|
"logits/chosen": -0.9168260097503662, |
|
"logits/rejected": -0.9385607242584229, |
|
"logps/chosen": -4.60618257522583, |
|
"logps/rejected": -5.185503959655762, |
|
"loss": 3.0486, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -46.06182861328125, |
|
"rewards/margins": 5.793212890625, |
|
"rewards/rejected": -51.855037689208984, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5835453774385072, |
|
"grad_norm": 168.20501802658688, |
|
"learning_rate": 3.5420332454991504e-07, |
|
"logits/chosen": -0.8639505505561829, |
|
"logits/rejected": -0.8759117126464844, |
|
"logps/chosen": -4.676448822021484, |
|
"logps/rejected": -5.332930564880371, |
|
"loss": 3.0441, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -46.76448440551758, |
|
"rewards/margins": 6.564816951751709, |
|
"rewards/rejected": -53.32930374145508, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5858071812270286, |
|
"grad_norm": 133.2402904508335, |
|
"learning_rate": 3.510602732003843e-07, |
|
"logits/chosen": -0.9353285431861877, |
|
"logits/rejected": -0.9522349834442139, |
|
"logps/chosen": -4.753300666809082, |
|
"logps/rejected": -5.492231369018555, |
|
"loss": 2.6279, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -47.53300476074219, |
|
"rewards/margins": 7.389303684234619, |
|
"rewards/rejected": -54.92231369018555, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5880689850155499, |
|
"grad_norm": 144.95585468548657, |
|
"learning_rate": 3.4792028648055396e-07, |
|
"logits/chosen": -0.9198298454284668, |
|
"logits/rejected": -0.9325035214424133, |
|
"logps/chosen": -4.450214385986328, |
|
"logps/rejected": -5.1093268394470215, |
|
"loss": 2.8402, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -44.502140045166016, |
|
"rewards/margins": 6.591127872467041, |
|
"rewards/rejected": -51.0932731628418, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 155.36455892363466, |
|
"learning_rate": 3.447835610179327e-07, |
|
"logits/chosen": -0.915900707244873, |
|
"logits/rejected": -0.9240118265151978, |
|
"logps/chosen": -4.484950065612793, |
|
"logps/rejected": -5.312657833099365, |
|
"loss": 2.5099, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -44.84950637817383, |
|
"rewards/margins": 8.277070999145508, |
|
"rewards/rejected": -53.12657928466797, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 178.5841356353186, |
|
"learning_rate": 3.416502932358079e-07, |
|
"logits/chosen": -0.9722024202346802, |
|
"logits/rejected": -0.9864451289176941, |
|
"logps/chosen": -4.724699974060059, |
|
"logps/rejected": -5.200839519500732, |
|
"loss": 3.191, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -47.24700164794922, |
|
"rewards/margins": 4.761399269104004, |
|
"rewards/rejected": -52.008399963378906, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5948543963811139, |
|
"grad_norm": 155.6491240961835, |
|
"learning_rate": 3.385206793409451e-07, |
|
"logits/chosen": -0.9002339839935303, |
|
"logits/rejected": -0.917601466178894, |
|
"logps/chosen": -4.058086395263672, |
|
"logps/rejected": -4.651598930358887, |
|
"loss": 2.657, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.58086395263672, |
|
"rewards/margins": 5.935131072998047, |
|
"rewards/rejected": -46.515995025634766, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5971162001696353, |
|
"grad_norm": 148.42975008344453, |
|
"learning_rate": 3.3539491531130163e-07, |
|
"logits/chosen": -0.9280572533607483, |
|
"logits/rejected": -0.9459517002105713, |
|
"logps/chosen": -4.205946922302246, |
|
"logps/rejected": -4.992376804351807, |
|
"loss": 2.4892, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.05946731567383, |
|
"rewards/margins": 7.864302635192871, |
|
"rewards/rejected": -49.923770904541016, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5993780039581567, |
|
"grad_norm": 140.98014037458591, |
|
"learning_rate": 3.3227319688375426e-07, |
|
"logits/chosen": -1.005509376525879, |
|
"logits/rejected": -0.996308445930481, |
|
"logps/chosen": -4.1070556640625, |
|
"logps/rejected": -4.702898979187012, |
|
"loss": 2.6008, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.070556640625, |
|
"rewards/margins": 5.958428382873535, |
|
"rewards/rejected": -47.02898406982422, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.601639807746678, |
|
"grad_norm": 128.74217848218075, |
|
"learning_rate": 3.291557195418427e-07, |
|
"logits/chosen": -1.0023953914642334, |
|
"logits/rejected": -0.9917722940444946, |
|
"logps/chosen": -3.8998665809631348, |
|
"logps/rejected": -4.379611492156982, |
|
"loss": 3.1704, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -38.99866485595703, |
|
"rewards/margins": 4.797451496124268, |
|
"rewards/rejected": -43.796112060546875, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6039016115351993, |
|
"grad_norm": 145.05497368203444, |
|
"learning_rate": 3.260426785035272e-07, |
|
"logits/chosen": -0.9523136019706726, |
|
"logits/rejected": -0.9565008878707886, |
|
"logps/chosen": -3.9325990676879883, |
|
"logps/rejected": -4.465220928192139, |
|
"loss": 3.3689, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -39.325984954833984, |
|
"rewards/margins": 5.326223850250244, |
|
"rewards/rejected": -44.6522102355957, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6061634153237206, |
|
"grad_norm": 133.48758547591208, |
|
"learning_rate": 3.229342687089646e-07, |
|
"logits/chosen": -0.9732850193977356, |
|
"logits/rejected": -0.9786388278007507, |
|
"logps/chosen": -3.7646870613098145, |
|
"logps/rejected": -4.4038310050964355, |
|
"loss": 2.6359, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.646873474121094, |
|
"rewards/margins": 6.391435623168945, |
|
"rewards/rejected": -44.038307189941406, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.608425219112242, |
|
"grad_norm": 156.96228832905817, |
|
"learning_rate": 3.1983068480830143e-07, |
|
"logits/chosen": -0.9513987302780151, |
|
"logits/rejected": -0.9469345808029175, |
|
"logps/chosen": -3.7457966804504395, |
|
"logps/rejected": -4.372827529907227, |
|
"loss": 2.6094, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.45797348022461, |
|
"rewards/margins": 6.270303249359131, |
|
"rewards/rejected": -43.728271484375, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 126.30153170952352, |
|
"learning_rate": 3.1673212114948387e-07, |
|
"logits/chosen": -0.9619816541671753, |
|
"logits/rejected": -0.9705020189285278, |
|
"logps/chosen": -3.638077974319458, |
|
"logps/rejected": -4.2800679206848145, |
|
"loss": 2.5696, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -36.380775451660156, |
|
"rewards/margins": 6.419899940490723, |
|
"rewards/rejected": -42.80067825317383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6129488266892847, |
|
"grad_norm": 131.06230943967222, |
|
"learning_rate": 3.1363877176608845e-07, |
|
"logits/chosen": -0.9253988862037659, |
|
"logits/rejected": -0.9438823461532593, |
|
"logps/chosen": -3.546363592147827, |
|
"logps/rejected": -4.163539409637451, |
|
"loss": 2.721, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -35.46363830566406, |
|
"rewards/margins": 6.171757698059082, |
|
"rewards/rejected": -41.63539505004883, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.615210630477806, |
|
"grad_norm": 119.36603698630678, |
|
"learning_rate": 3.1055083036517076e-07, |
|
"logits/chosen": -0.931835949420929, |
|
"logits/rejected": -0.9240102767944336, |
|
"logps/chosen": -3.519230604171753, |
|
"logps/rejected": -4.135827541351318, |
|
"loss": 2.5416, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -35.19230651855469, |
|
"rewards/margins": 6.165970325469971, |
|
"rewards/rejected": -41.3582763671875, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6174724342663274, |
|
"grad_norm": 157.21966352920222, |
|
"learning_rate": 3.074684903151364e-07, |
|
"logits/chosen": -0.8328728675842285, |
|
"logits/rejected": -0.8194759488105774, |
|
"logps/chosen": -3.2504658699035645, |
|
"logps/rejected": -3.7328827381134033, |
|
"loss": 2.4568, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -32.50465774536133, |
|
"rewards/margins": 4.824168682098389, |
|
"rewards/rejected": -37.32883071899414, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6197342380548487, |
|
"grad_norm": 125.59777868815232, |
|
"learning_rate": 3.0439194463363136e-07, |
|
"logits/chosen": -0.9033098816871643, |
|
"logits/rejected": -0.9113788604736328, |
|
"logps/chosen": -3.432678699493408, |
|
"logps/rejected": -3.9219419956207275, |
|
"loss": 3.0769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -34.326786041259766, |
|
"rewards/margins": 4.892629146575928, |
|
"rewards/rejected": -39.219417572021484, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6219960418433701, |
|
"grad_norm": 113.53689202871962, |
|
"learning_rate": 3.0132138597545537e-07, |
|
"logits/chosen": -0.9494621753692627, |
|
"logits/rejected": -0.9803709983825684, |
|
"logps/chosen": -3.605886936187744, |
|
"logps/rejected": -4.303212642669678, |
|
"loss": 2.5638, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -36.058868408203125, |
|
"rewards/margins": 6.973256587982178, |
|
"rewards/rejected": -43.03212356567383, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6242578456318915, |
|
"grad_norm": 105.00410359380473, |
|
"learning_rate": 2.982570066204981e-07, |
|
"logits/chosen": -0.9540393948554993, |
|
"logits/rejected": -0.9732652306556702, |
|
"logps/chosen": -3.5351197719573975, |
|
"logps/rejected": -4.073846340179443, |
|
"loss": 2.8045, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -35.351200103759766, |
|
"rewards/margins": 5.387262344360352, |
|
"rewards/rejected": -40.738460540771484, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6265196494204128, |
|
"grad_norm": 154.48698784858473, |
|
"learning_rate": 2.951989984616979e-07, |
|
"logits/chosen": -0.8954455852508545, |
|
"logits/rejected": -0.9178181886672974, |
|
"logps/chosen": -3.763580799102783, |
|
"logps/rejected": -4.397812843322754, |
|
"loss": 3.2425, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.63581085205078, |
|
"rewards/margins": 6.342320919036865, |
|
"rewards/rejected": -43.97813034057617, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6287814532089341, |
|
"grad_norm": 124.9996609566805, |
|
"learning_rate": 2.9214755299302584e-07, |
|
"logits/chosen": -0.9186022877693176, |
|
"logits/rejected": -0.9265093803405762, |
|
"logps/chosen": -4.027862548828125, |
|
"logps/rejected": -4.721011638641357, |
|
"loss": 2.1795, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.278629302978516, |
|
"rewards/margins": 6.931490421295166, |
|
"rewards/rejected": -47.210121154785156, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 128.6732786280236, |
|
"learning_rate": 2.89102861297494e-07, |
|
"logits/chosen": -0.9596308469772339, |
|
"logits/rejected": -0.9828594923019409, |
|
"logps/chosen": -3.758955955505371, |
|
"logps/rejected": -4.320319175720215, |
|
"loss": 3.1683, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -37.589561462402344, |
|
"rewards/margins": 5.613630294799805, |
|
"rewards/rejected": -43.203189849853516, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6333050607859768, |
|
"grad_norm": 129.18886507394328, |
|
"learning_rate": 2.860651140351902e-07, |
|
"logits/chosen": -0.949332058429718, |
|
"logits/rejected": -0.9587225914001465, |
|
"logps/chosen": -3.851499557495117, |
|
"logps/rejected": -4.524806022644043, |
|
"loss": 2.8367, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -38.5150032043457, |
|
"rewards/margins": 6.733060359954834, |
|
"rewards/rejected": -45.2480583190918, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6355668645744982, |
|
"grad_norm": 121.51443077771003, |
|
"learning_rate": 2.830345014313381e-07, |
|
"logits/chosen": -0.882565975189209, |
|
"logits/rejected": -0.9311519265174866, |
|
"logps/chosen": -4.010669231414795, |
|
"logps/rejected": -4.751985549926758, |
|
"loss": 2.0959, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -40.106693267822266, |
|
"rewards/margins": 7.4131622314453125, |
|
"rewards/rejected": -47.51985549926758, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6378286683630195, |
|
"grad_norm": 124.30762639284417, |
|
"learning_rate": 2.800112132643856e-07, |
|
"logits/chosen": -0.9435937404632568, |
|
"logits/rejected": -0.9561968445777893, |
|
"logps/chosen": -4.011701583862305, |
|
"logps/rejected": -4.713750839233398, |
|
"loss": 2.6367, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -40.11701583862305, |
|
"rewards/margins": 7.020491600036621, |
|
"rewards/rejected": -47.13751220703125, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6400904721515408, |
|
"grad_norm": 126.70176291289812, |
|
"learning_rate": 2.7699543885412105e-07, |
|
"logits/chosen": -0.9590288400650024, |
|
"logits/rejected": -0.9676195383071899, |
|
"logps/chosen": -4.167572021484375, |
|
"logps/rejected": -4.936420917510986, |
|
"loss": 2.2448, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -41.67572021484375, |
|
"rewards/margins": 7.688486099243164, |
|
"rewards/rejected": -49.36420440673828, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6423522759400622, |
|
"grad_norm": 137.6588338492344, |
|
"learning_rate": 2.7398736704981725e-07, |
|
"logits/chosen": -0.9565585255622864, |
|
"logits/rejected": -0.9600222110748291, |
|
"logps/chosen": -4.261449337005615, |
|
"logps/rejected": -4.877924919128418, |
|
"loss": 2.4145, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.61449432373047, |
|
"rewards/margins": 6.164754867553711, |
|
"rewards/rejected": -48.77925109863281, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6446140797285835, |
|
"grad_norm": 142.52301773899467, |
|
"learning_rate": 2.709871862184063e-07, |
|
"logits/chosen": -0.9420406818389893, |
|
"logits/rejected": -0.9535735249519348, |
|
"logps/chosen": -4.204644680023193, |
|
"logps/rejected": -4.8350419998168945, |
|
"loss": 2.8569, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.046443939208984, |
|
"rewards/margins": 6.303977012634277, |
|
"rewards/rejected": -48.35041809082031, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6468758835171049, |
|
"grad_norm": 116.83614175305792, |
|
"learning_rate": 2.679950842326837e-07, |
|
"logits/chosen": -0.9828240275382996, |
|
"logits/rejected": -0.9906516671180725, |
|
"logps/chosen": -4.424816608428955, |
|
"logps/rejected": -5.189925193786621, |
|
"loss": 2.3291, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -44.248165130615234, |
|
"rewards/margins": 7.651082515716553, |
|
"rewards/rejected": -51.89924621582031, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6491376873056263, |
|
"grad_norm": 119.1863932565742, |
|
"learning_rate": 2.6501124845954363e-07, |
|
"logits/chosen": -0.9583408832550049, |
|
"logits/rejected": -0.9792217016220093, |
|
"logps/chosen": -4.437569618225098, |
|
"logps/rejected": -5.259509086608887, |
|
"loss": 2.0703, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -44.375701904296875, |
|
"rewards/margins": 8.219388961791992, |
|
"rewards/rejected": -52.595088958740234, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 135.34347421839638, |
|
"learning_rate": 2.62035865748246e-07, |
|
"logits/chosen": -0.942946195602417, |
|
"logits/rejected": -0.9585049748420715, |
|
"logps/chosen": -4.206583023071289, |
|
"logps/rejected": -4.9610395431518555, |
|
"loss": 2.3861, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.065826416015625, |
|
"rewards/margins": 7.544568061828613, |
|
"rewards/rejected": -49.61039733886719, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6536612948826689, |
|
"grad_norm": 135.7054533746377, |
|
"learning_rate": 2.5906912241871554e-07, |
|
"logits/chosen": -1.0347181558609009, |
|
"logits/rejected": -1.0334982872009277, |
|
"logps/chosen": -4.558216094970703, |
|
"logps/rejected": -5.3071675300598145, |
|
"loss": 2.4851, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -45.5821647644043, |
|
"rewards/margins": 7.489511013031006, |
|
"rewards/rejected": -53.07167434692383, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6559230986711903, |
|
"grad_norm": 146.88601624122214, |
|
"learning_rate": 2.561112042498753e-07, |
|
"logits/chosen": -0.898168683052063, |
|
"logits/rejected": -0.9251189231872559, |
|
"logps/chosen": -4.299272537231445, |
|
"logps/rejected": -4.885739326477051, |
|
"loss": 3.2414, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.99272155761719, |
|
"rewards/margins": 5.864667892456055, |
|
"rewards/rejected": -48.857391357421875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6581849024597116, |
|
"grad_norm": 149.6582755236179, |
|
"learning_rate": 2.5316229646801195e-07, |
|
"logits/chosen": -0.9464926719665527, |
|
"logits/rejected": -0.9794542789459229, |
|
"logps/chosen": -4.87380313873291, |
|
"logps/rejected": -5.561539173126221, |
|
"loss": 2.6496, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -48.7380256652832, |
|
"rewards/margins": 6.877363204956055, |
|
"rewards/rejected": -55.61539077758789, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.660446706248233, |
|
"grad_norm": 145.17127169522513, |
|
"learning_rate": 2.5022258373517714e-07, |
|
"logits/chosen": -1.0149282217025757, |
|
"logits/rejected": -1.0269863605499268, |
|
"logps/chosen": -4.728174686431885, |
|
"logps/rejected": -5.443430423736572, |
|
"loss": 2.3251, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -47.281742095947266, |
|
"rewards/margins": 7.152561187744141, |
|
"rewards/rejected": -54.43430709838867, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6627085100367544, |
|
"grad_norm": 163.61207833360584, |
|
"learning_rate": 2.4729225013762474e-07, |
|
"logits/chosen": -1.0522814989089966, |
|
"logits/rejected": -1.0629031658172607, |
|
"logps/chosen": -5.014071464538574, |
|
"logps/rejected": -5.679079532623291, |
|
"loss": 3.3606, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -50.140708923339844, |
|
"rewards/margins": 6.650084972381592, |
|
"rewards/rejected": -56.79079818725586, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6649703138252756, |
|
"grad_norm": 160.99675359530877, |
|
"learning_rate": 2.4437147917428203e-07, |
|
"logits/chosen": -0.9571595191955566, |
|
"logits/rejected": -0.9781152009963989, |
|
"logps/chosen": -4.976321220397949, |
|
"logps/rejected": -5.786618232727051, |
|
"loss": 2.5064, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -49.76321029663086, |
|
"rewards/margins": 8.102978706359863, |
|
"rewards/rejected": -57.866188049316406, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.667232117613797, |
|
"grad_norm": 177.52031008687038, |
|
"learning_rate": 2.414604537452595e-07, |
|
"logits/chosen": -0.9503519535064697, |
|
"logits/rejected": -0.9574546813964844, |
|
"logps/chosen": -4.793954372406006, |
|
"logps/rejected": -5.45725679397583, |
|
"loss": 2.7098, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -47.939544677734375, |
|
"rewards/margins": 6.633028030395508, |
|
"rewards/rejected": -54.572574615478516, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6694939214023183, |
|
"grad_norm": 177.92862527487938, |
|
"learning_rate": 2.385593561403974e-07, |
|
"logits/chosen": -0.9907506704330444, |
|
"logits/rejected": -1.007781744003296, |
|
"logps/chosen": -4.650373935699463, |
|
"logps/rejected": -5.429416179656982, |
|
"loss": 2.1979, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.50373840332031, |
|
"rewards/margins": 7.790426254272461, |
|
"rewards/rejected": -54.29416275024414, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 139.27180823243322, |
|
"learning_rate": 2.3566836802785119e-07, |
|
"logits/chosen": -0.9892964363098145, |
|
"logits/rejected": -1.0037226676940918, |
|
"logps/chosen": -4.7973175048828125, |
|
"logps/rejected": -5.591341972351074, |
|
"loss": 2.467, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -47.973182678222656, |
|
"rewards/margins": 7.940241813659668, |
|
"rewards/rejected": -55.913421630859375, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6740175289793611, |
|
"grad_norm": 153.97560579826447, |
|
"learning_rate": 2.327876704427146e-07, |
|
"logits/chosen": -0.9428931474685669, |
|
"logits/rejected": -0.9553389549255371, |
|
"logps/chosen": -4.640042304992676, |
|
"logps/rejected": -5.214030742645264, |
|
"loss": 2.8868, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -46.40042495727539, |
|
"rewards/margins": 5.739879608154297, |
|
"rewards/rejected": -52.14030838012695, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6762793327678824, |
|
"grad_norm": 348.3808116966291, |
|
"learning_rate": 2.2991744377568358e-07, |
|
"logits/chosen": -0.9435532093048096, |
|
"logits/rejected": -0.9605557322502136, |
|
"logps/chosen": -4.827619552612305, |
|
"logps/rejected": -5.459317207336426, |
|
"loss": 2.9575, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -48.27620315551758, |
|
"rewards/margins": 6.316972255706787, |
|
"rewards/rejected": -54.593170166015625, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6785411365564037, |
|
"grad_norm": 179.28273430158572, |
|
"learning_rate": 2.270578677617601e-07, |
|
"logits/chosen": -1.0008050203323364, |
|
"logits/rejected": -0.9990091323852539, |
|
"logps/chosen": -4.594552993774414, |
|
"logps/rejected": -5.334099292755127, |
|
"loss": 3.3764, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -45.945526123046875, |
|
"rewards/margins": 7.395462512969971, |
|
"rewards/rejected": -53.34099197387695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6808029403449251, |
|
"grad_norm": 151.0565233114858, |
|
"learning_rate": 2.242091214689971e-07, |
|
"logits/chosen": -0.9774994850158691, |
|
"logits/rejected": -1.0008808374404907, |
|
"logps/chosen": -4.770328044891357, |
|
"logps/rejected": -5.569509506225586, |
|
"loss": 2.2267, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -47.70328140258789, |
|
"rewards/margins": 7.991814613342285, |
|
"rewards/rejected": -55.69509506225586, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6830647441334464, |
|
"grad_norm": 159.89458772534127, |
|
"learning_rate": 2.2137138328728456e-07, |
|
"logits/chosen": -1.0418307781219482, |
|
"logits/rejected": -1.0276165008544922, |
|
"logps/chosen": -4.720486164093018, |
|
"logps/rejected": -5.296718120574951, |
|
"loss": 2.6692, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -47.20486068725586, |
|
"rewards/margins": 5.762320518493652, |
|
"rewards/rejected": -52.96717834472656, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6853265479219678, |
|
"grad_norm": 126.48151913696319, |
|
"learning_rate": 2.1854483091717974e-07, |
|
"logits/chosen": -1.0288279056549072, |
|
"logits/rejected": -1.0501220226287842, |
|
"logps/chosen": -4.512904167175293, |
|
"logps/rejected": -5.211638927459717, |
|
"loss": 2.3552, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -45.12904357910156, |
|
"rewards/margins": 6.987349987030029, |
|
"rewards/rejected": -52.116390228271484, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6875883517104892, |
|
"grad_norm": 135.94321383588283, |
|
"learning_rate": 2.1572964135877863e-07, |
|
"logits/chosen": -0.9905492067337036, |
|
"logits/rejected": -1.0089741945266724, |
|
"logps/chosen": -4.556299686431885, |
|
"logps/rejected": -5.186739921569824, |
|
"loss": 2.8414, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.5629997253418, |
|
"rewards/margins": 6.3044047355651855, |
|
"rewards/rejected": -51.867401123046875, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6898501554990104, |
|
"grad_norm": 125.82619317763663, |
|
"learning_rate": 2.1292599090063245e-07, |
|
"logits/chosen": -1.0182273387908936, |
|
"logits/rejected": -1.0373239517211914, |
|
"logps/chosen": -4.384622573852539, |
|
"logps/rejected": -5.199775695800781, |
|
"loss": 2.0894, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -43.84622573852539, |
|
"rewards/margins": 8.151532173156738, |
|
"rewards/rejected": -51.99775695800781, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 144.33359341912222, |
|
"learning_rate": 2.1013405510870824e-07, |
|
"logits/chosen": -0.9348523020744324, |
|
"logits/rejected": -0.9837851524353027, |
|
"logps/chosen": -4.506128311157227, |
|
"logps/rejected": -5.2762885093688965, |
|
"loss": 2.0643, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -45.06128692626953, |
|
"rewards/margins": 7.701597690582275, |
|
"rewards/rejected": -52.762882232666016, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6943737630760531, |
|
"grad_norm": 137.68267743107825, |
|
"learning_rate": 2.0735400881539494e-07, |
|
"logits/chosen": -0.9666372537612915, |
|
"logits/rejected": -0.9910269975662231, |
|
"logps/chosen": -4.676998138427734, |
|
"logps/rejected": -5.494973182678223, |
|
"loss": 2.4669, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.76997756958008, |
|
"rewards/margins": 8.179755210876465, |
|
"rewards/rejected": -54.949729919433594, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6966355668645745, |
|
"grad_norm": 136.55337779354605, |
|
"learning_rate": 2.0458602610855536e-07, |
|
"logits/chosen": -1.0127683877944946, |
|
"logits/rejected": -1.0208343267440796, |
|
"logps/chosen": -4.608272075653076, |
|
"logps/rejected": -5.278717517852783, |
|
"loss": 2.1998, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -46.08271789550781, |
|
"rewards/margins": 6.704453945159912, |
|
"rewards/rejected": -52.787174224853516, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6988973706530959, |
|
"grad_norm": 150.35850786814652, |
|
"learning_rate": 2.0183028032062422e-07, |
|
"logits/chosen": -0.9884274005889893, |
|
"logits/rejected": -1.007688045501709, |
|
"logps/chosen": -4.597423076629639, |
|
"logps/rejected": -5.278217315673828, |
|
"loss": 2.7826, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.97422790527344, |
|
"rewards/margins": 6.807945251464844, |
|
"rewards/rejected": -52.78217697143555, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7011591744416172, |
|
"grad_norm": 151.16788260358283, |
|
"learning_rate": 1.9908694401775473e-07, |
|
"logits/chosen": -1.014832854270935, |
|
"logits/rejected": -1.0308024883270264, |
|
"logps/chosen": -4.622461318969727, |
|
"logps/rejected": -5.307525634765625, |
|
"loss": 2.4588, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -46.22461700439453, |
|
"rewards/margins": 6.8506388664245605, |
|
"rewards/rejected": -53.075252532958984, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7034209782301385, |
|
"grad_norm": 143.10054441095096, |
|
"learning_rate": 1.9635618898901196e-07, |
|
"logits/chosen": -1.0075442790985107, |
|
"logits/rejected": -1.0087586641311646, |
|
"logps/chosen": -5.013138771057129, |
|
"logps/rejected": -5.722409725189209, |
|
"loss": 2.7131, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -50.13139343261719, |
|
"rewards/margins": 7.092708587646484, |
|
"rewards/rejected": -57.22409439086914, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7056827820186599, |
|
"grad_norm": 126.69180335404305, |
|
"learning_rate": 1.9363818623561565e-07, |
|
"logits/chosen": -0.9536194801330566, |
|
"logits/rejected": -0.9867286682128906, |
|
"logps/chosen": -4.637800693511963, |
|
"logps/rejected": -5.396078109741211, |
|
"loss": 2.3183, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.37800598144531, |
|
"rewards/margins": 7.582779884338379, |
|
"rewards/rejected": -53.960784912109375, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7079445858071812, |
|
"grad_norm": 120.13641288355163, |
|
"learning_rate": 1.9093310596023108e-07, |
|
"logits/chosen": -0.9519625902175903, |
|
"logits/rejected": -0.9601365923881531, |
|
"logps/chosen": -4.475955009460449, |
|
"logps/rejected": -5.384003639221191, |
|
"loss": 2.1502, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -44.759552001953125, |
|
"rewards/margins": 9.080483436584473, |
|
"rewards/rejected": -53.84003448486328, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7102063895957026, |
|
"grad_norm": 157.80538513211965, |
|
"learning_rate": 1.8824111755631274e-07, |
|
"logits/chosen": -1.0193778276443481, |
|
"logits/rejected": -1.0384585857391357, |
|
"logps/chosen": -4.5386857986450195, |
|
"logps/rejected": -5.142300605773926, |
|
"loss": 2.9757, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.38685989379883, |
|
"rewards/margins": 6.036149024963379, |
|
"rewards/rejected": -51.42300796508789, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 181.18937775062142, |
|
"learning_rate": 1.8556238959749457e-07, |
|
"logits/chosen": -0.9791902303695679, |
|
"logits/rejected": -0.9818335771560669, |
|
"logps/chosen": -4.927301406860352, |
|
"logps/rejected": -5.432847023010254, |
|
"loss": 3.4906, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -49.273006439208984, |
|
"rewards/margins": 5.055453777313232, |
|
"rewards/rejected": -54.32846450805664, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7147299971727452, |
|
"grad_norm": 193.86258885290945, |
|
"learning_rate": 1.8289708982703562e-07, |
|
"logits/chosen": -0.9614027142524719, |
|
"logits/rejected": -0.936948299407959, |
|
"logps/chosen": -4.739333152770996, |
|
"logps/rejected": -5.496729373931885, |
|
"loss": 3.2191, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -47.3933219909668, |
|
"rewards/margins": 7.573972702026367, |
|
"rewards/rejected": -54.9672966003418, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7169918009612666, |
|
"grad_norm": 144.60961684057406, |
|
"learning_rate": 1.802453851473151e-07, |
|
"logits/chosen": -1.0016368627548218, |
|
"logits/rejected": -1.0029720067977905, |
|
"logps/chosen": -4.8493804931640625, |
|
"logps/rejected": -5.593348503112793, |
|
"loss": 2.2796, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -48.493804931640625, |
|
"rewards/margins": 7.4396843910217285, |
|
"rewards/rejected": -55.93348693847656, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.719253604749788, |
|
"grad_norm": 131.40245309400345, |
|
"learning_rate": 1.7760744160938093e-07, |
|
"logits/chosen": -0.9554572701454163, |
|
"logits/rejected": -0.9705080986022949, |
|
"logps/chosen": -4.564050197601318, |
|
"logps/rejected": -5.474889278411865, |
|
"loss": 2.351, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -45.6405029296875, |
|
"rewards/margins": 9.108393669128418, |
|
"rewards/rejected": -54.748897552490234, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7215154085383093, |
|
"grad_norm": 155.43325768660566, |
|
"learning_rate": 1.7498342440255135e-07, |
|
"logits/chosen": -1.00507390499115, |
|
"logits/rejected": -1.0052752494812012, |
|
"logps/chosen": -4.724150657653809, |
|
"logps/rejected": -5.360733985900879, |
|
"loss": 2.5509, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -47.24150848388672, |
|
"rewards/margins": 6.365832805633545, |
|
"rewards/rejected": -53.60734176635742, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7237772123268307, |
|
"grad_norm": 131.09223849172065, |
|
"learning_rate": 1.7237349784407115e-07, |
|
"logits/chosen": -1.0048574209213257, |
|
"logits/rejected": -1.0041303634643555, |
|
"logps/chosen": -4.809683799743652, |
|
"logps/rejected": -5.5847930908203125, |
|
"loss": 2.2834, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -48.09683609008789, |
|
"rewards/margins": 7.751087665557861, |
|
"rewards/rejected": -55.847923278808594, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.726039016115352, |
|
"grad_norm": 135.5800803898602, |
|
"learning_rate": 1.6977782536882178e-07, |
|
"logits/chosen": -0.934962809085846, |
|
"logits/rejected": -0.9414641857147217, |
|
"logps/chosen": -4.202589511871338, |
|
"logps/rejected": -5.056532859802246, |
|
"loss": 2.3171, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.02588653564453, |
|
"rewards/margins": 8.539436340332031, |
|
"rewards/rejected": -50.565330505371094, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7283008199038733, |
|
"grad_norm": 114.28778261159624, |
|
"learning_rate": 1.6719656951908708e-07, |
|
"logits/chosen": -0.9354251027107239, |
|
"logits/rejected": -0.961618185043335, |
|
"logps/chosen": -4.163588047027588, |
|
"logps/rejected": -4.967078685760498, |
|
"loss": 2.2743, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -41.63587951660156, |
|
"rewards/margins": 8.034907341003418, |
|
"rewards/rejected": -49.6707878112793, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7305626236923947, |
|
"grad_norm": 149.49164825917453, |
|
"learning_rate": 1.6462989193437453e-07, |
|
"logits/chosen": -1.0084627866744995, |
|
"logits/rejected": -1.0145008563995361, |
|
"logps/chosen": -4.514183521270752, |
|
"logps/rejected": -5.200974464416504, |
|
"loss": 2.7438, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.14183044433594, |
|
"rewards/margins": 6.867916584014893, |
|
"rewards/rejected": -52.00975036621094, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 164.51017995267839, |
|
"learning_rate": 1.6207795334129365e-07, |
|
"logits/chosen": -0.9625239968299866, |
|
"logits/rejected": -0.969192385673523, |
|
"logps/chosen": -4.7873430252075195, |
|
"logps/rejected": -5.481894016265869, |
|
"loss": 2.7085, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -47.8734245300293, |
|
"rewards/margins": 6.945512771606445, |
|
"rewards/rejected": -54.81894302368164, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7350862312694374, |
|
"grad_norm": 176.57340901183855, |
|
"learning_rate": 1.5954091354349121e-07, |
|
"logits/chosen": -0.9980968832969666, |
|
"logits/rejected": -1.008570909500122, |
|
"logps/chosen": -4.490519046783447, |
|
"logps/rejected": -5.037813663482666, |
|
"loss": 3.1544, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -44.905189514160156, |
|
"rewards/margins": 5.4729437828063965, |
|
"rewards/rejected": -50.378135681152344, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7373480350579588, |
|
"grad_norm": 291.93760275647674, |
|
"learning_rate": 1.5701893141164364e-07, |
|
"logits/chosen": -0.9874584674835205, |
|
"logits/rejected": -0.9985488653182983, |
|
"logps/chosen": -4.682331085205078, |
|
"logps/rejected": -5.434433937072754, |
|
"loss": 2.9428, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -46.823307037353516, |
|
"rewards/margins": 7.52103853225708, |
|
"rewards/rejected": -54.34434509277344, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.73960983884648, |
|
"grad_norm": 159.05107721522452, |
|
"learning_rate": 1.545121648735093e-07, |
|
"logits/chosen": -0.9645061492919922, |
|
"logits/rejected": -0.9795565605163574, |
|
"logps/chosen": -4.574826717376709, |
|
"logps/rejected": -5.205280303955078, |
|
"loss": 2.9854, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.748268127441406, |
|
"rewards/margins": 6.304535865783691, |
|
"rewards/rejected": -52.05281066894531, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7418716426350014, |
|
"grad_norm": 136.1527260388613, |
|
"learning_rate": 1.5202077090403863e-07, |
|
"logits/chosen": -0.99421226978302, |
|
"logits/rejected": -0.9890978336334229, |
|
"logps/chosen": -4.0757365226745605, |
|
"logps/rejected": -4.764214038848877, |
|
"loss": 2.4082, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.75736999511719, |
|
"rewards/margins": 6.884768486022949, |
|
"rewards/rejected": -47.64213562011719, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7441334464235227, |
|
"grad_norm": 148.6404714088356, |
|
"learning_rate": 1.495449055155443e-07, |
|
"logits/chosen": -0.9893301725387573, |
|
"logits/rejected": -0.9949851036071777, |
|
"logps/chosen": -4.401925086975098, |
|
"logps/rejected": -5.235118865966797, |
|
"loss": 2.1108, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.019248962402344, |
|
"rewards/margins": 8.331939697265625, |
|
"rewards/rejected": -52.3511848449707, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7463952502120441, |
|
"grad_norm": 146.90492941982063, |
|
"learning_rate": 1.4708472374793112e-07, |
|
"logits/chosen": -0.9245947599411011, |
|
"logits/rejected": -0.9359519481658936, |
|
"logps/chosen": -4.438968658447266, |
|
"logps/rejected": -4.909884452819824, |
|
"loss": 3.6505, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -44.389686584472656, |
|
"rewards/margins": 4.709160804748535, |
|
"rewards/rejected": -49.09885025024414, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7486570540005655, |
|
"grad_norm": 134.59594283680065, |
|
"learning_rate": 1.4464037965898878e-07, |
|
"logits/chosen": -0.9060376882553101, |
|
"logits/rejected": -0.9134998321533203, |
|
"logps/chosen": -4.321666717529297, |
|
"logps/rejected": -4.94952392578125, |
|
"loss": 2.7636, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.216670989990234, |
|
"rewards/margins": 6.278566360473633, |
|
"rewards/rejected": -49.4952392578125, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7509188577890868, |
|
"grad_norm": 135.57897482412505, |
|
"learning_rate": 1.4221202631474282e-07, |
|
"logits/chosen": -0.9183069467544556, |
|
"logits/rejected": -0.9207398891448975, |
|
"logps/chosen": -4.24176549911499, |
|
"logps/rejected": -4.887630462646484, |
|
"loss": 2.6764, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.417659759521484, |
|
"rewards/margins": 6.458644866943359, |
|
"rewards/rejected": -48.876304626464844, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 132.11305657314077, |
|
"learning_rate": 1.3979981577987113e-07, |
|
"logits/chosen": -0.9489963054656982, |
|
"logits/rejected": -0.9431209564208984, |
|
"logps/chosen": -3.9949569702148438, |
|
"logps/rejected": -4.739992141723633, |
|
"loss": 2.4723, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -39.94956588745117, |
|
"rewards/margins": 7.450351238250732, |
|
"rewards/rejected": -47.39991760253906, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7554424653661295, |
|
"grad_norm": 136.71421838648521, |
|
"learning_rate": 1.374038991081807e-07, |
|
"logits/chosen": -0.9887750148773193, |
|
"logits/rejected": -0.9911076426506042, |
|
"logps/chosen": -4.311697483062744, |
|
"logps/rejected": -4.892322540283203, |
|
"loss": 2.8695, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -43.11697769165039, |
|
"rewards/margins": 5.80624532699585, |
|
"rewards/rejected": -48.923221588134766, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7577042691546508, |
|
"grad_norm": 129.76539286228083, |
|
"learning_rate": 1.3502442633314882e-07, |
|
"logits/chosen": -0.9298081994056702, |
|
"logits/rejected": -0.9281089305877686, |
|
"logps/chosen": -3.8088793754577637, |
|
"logps/rejected": -4.399178981781006, |
|
"loss": 2.568, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -38.08879470825195, |
|
"rewards/margins": 5.902992248535156, |
|
"rewards/rejected": -43.991790771484375, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7599660729431722, |
|
"grad_norm": 133.48386572014678, |
|
"learning_rate": 1.3266154645852815e-07, |
|
"logits/chosen": -0.9325582981109619, |
|
"logits/rejected": -0.9300287961959839, |
|
"logps/chosen": -4.226827621459961, |
|
"logps/rejected": -4.8680830001831055, |
|
"loss": 2.4884, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -42.268280029296875, |
|
"rewards/margins": 6.4125542640686035, |
|
"rewards/rejected": -48.68083190917969, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7622278767316936, |
|
"grad_norm": 155.31497007928593, |
|
"learning_rate": 1.303154074490152e-07, |
|
"logits/chosen": -0.9856913685798645, |
|
"logits/rejected": -0.9596735239028931, |
|
"logps/chosen": -4.013852119445801, |
|
"logps/rejected": -4.661082744598389, |
|
"loss": 2.8335, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -40.13852310180664, |
|
"rewards/margins": 6.472311973571777, |
|
"rewards/rejected": -46.6108283996582, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7644896805202148, |
|
"grad_norm": 139.18511727180848, |
|
"learning_rate": 1.2798615622098616e-07, |
|
"logits/chosen": -0.9739874601364136, |
|
"logits/rejected": -0.9792473316192627, |
|
"logps/chosen": -3.9729883670806885, |
|
"logps/rejected": -4.684140205383301, |
|
"loss": 2.743, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -39.729881286621094, |
|
"rewards/margins": 7.111522674560547, |
|
"rewards/rejected": -46.841407775878906, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7667514843087362, |
|
"grad_norm": 132.3553794311643, |
|
"learning_rate": 1.2567393863329523e-07, |
|
"logits/chosen": -0.9450173377990723, |
|
"logits/rejected": -0.9746794104576111, |
|
"logps/chosen": -4.164608478546143, |
|
"logps/rejected": -4.965396404266357, |
|
"loss": 2.3621, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -41.646080017089844, |
|
"rewards/margins": 8.00788688659668, |
|
"rewards/rejected": -49.65396499633789, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7690132880972576, |
|
"grad_norm": 122.61541284883357, |
|
"learning_rate": 1.233788994781423e-07, |
|
"logits/chosen": -0.9901926517486572, |
|
"logits/rejected": -1.0221422910690308, |
|
"logps/chosen": -4.009914875030518, |
|
"logps/rejected": -4.677107334136963, |
|
"loss": 2.2977, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.099151611328125, |
|
"rewards/margins": 6.671916961669922, |
|
"rewards/rejected": -46.77106475830078, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7712750918857789, |
|
"grad_norm": 128.54731960578127, |
|
"learning_rate": 1.2110118247200468e-07, |
|
"logits/chosen": -0.98152756690979, |
|
"logits/rejected": -0.9877326488494873, |
|
"logps/chosen": -3.989288568496704, |
|
"logps/rejected": -4.7238030433654785, |
|
"loss": 2.0917, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -39.89288330078125, |
|
"rewards/margins": 7.3451457023620605, |
|
"rewards/rejected": -47.23802947998047, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 152.52119821185264, |
|
"learning_rate": 1.1884093024663933e-07, |
|
"logits/chosen": -0.9875534772872925, |
|
"logits/rejected": -0.9850603342056274, |
|
"logps/chosen": -3.8040857315063477, |
|
"logps/rejected": -4.616514682769775, |
|
"loss": 2.5956, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -38.04085922241211, |
|
"rewards/margins": 8.124288558959961, |
|
"rewards/rejected": -46.16515350341797, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7757986994628217, |
|
"grad_norm": 137.49550762814457, |
|
"learning_rate": 1.1659828434014886e-07, |
|
"logits/chosen": -0.9853401780128479, |
|
"logits/rejected": -0.9595975279808044, |
|
"logps/chosen": -3.8625781536102295, |
|
"logps/rejected": -4.667750835418701, |
|
"loss": 2.3451, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -38.62577819824219, |
|
"rewards/margins": 8.051729202270508, |
|
"rewards/rejected": -46.67750549316406, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7780605032513429, |
|
"grad_norm": 151.81630882151333, |
|
"learning_rate": 1.143733851881203e-07, |
|
"logits/chosen": -1.016958236694336, |
|
"logits/rejected": -1.0146615505218506, |
|
"logps/chosen": -4.202175617218018, |
|
"logps/rejected": -5.043542861938477, |
|
"loss": 2.3046, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -42.02175521850586, |
|
"rewards/margins": 8.413676261901855, |
|
"rewards/rejected": -50.4354248046875, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7803223070398643, |
|
"grad_norm": 132.32110407207458, |
|
"learning_rate": 1.1216637211483005e-07, |
|
"logits/chosen": -0.9708240032196045, |
|
"logits/rejected": -0.9722229242324829, |
|
"logps/chosen": -4.108154773712158, |
|
"logps/rejected": -4.720202922821045, |
|
"loss": 2.8093, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.08155059814453, |
|
"rewards/margins": 6.120481014251709, |
|
"rewards/rejected": -47.20203399658203, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7825841108283856, |
|
"grad_norm": 145.16817928123578, |
|
"learning_rate": 1.0997738332451936e-07, |
|
"logits/chosen": -0.9453762769699097, |
|
"logits/rejected": -0.94859778881073, |
|
"logps/chosen": -4.420743465423584, |
|
"logps/rejected": -5.103504180908203, |
|
"loss": 2.555, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -44.20743179321289, |
|
"rewards/margins": 6.827612400054932, |
|
"rewards/rejected": -51.03504943847656, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.784845914616907, |
|
"grad_norm": 152.89585809443537, |
|
"learning_rate": 1.0780655589274031e-07, |
|
"logits/chosen": -1.0250661373138428, |
|
"logits/rejected": -1.000236988067627, |
|
"logps/chosen": -4.187366962432861, |
|
"logps/rejected": -4.821606636047363, |
|
"loss": 2.2597, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -41.8736686706543, |
|
"rewards/margins": 6.34239387512207, |
|
"rewards/rejected": -48.216064453125, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7871077184054284, |
|
"grad_norm": 127.0780346687704, |
|
"learning_rate": 1.056540257577712e-07, |
|
"logits/chosen": -0.9302332997322083, |
|
"logits/rejected": -0.9465296268463135, |
|
"logps/chosen": -4.749317646026611, |
|
"logps/rejected": -5.628003120422363, |
|
"loss": 1.8934, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -47.49317932128906, |
|
"rewards/margins": 8.786852836608887, |
|
"rewards/rejected": -56.280033111572266, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7893695221939496, |
|
"grad_norm": 126.51351161185914, |
|
"learning_rate": 1.0351992771210554e-07, |
|
"logits/chosen": -0.9633577466011047, |
|
"logits/rejected": -0.970734179019928, |
|
"logps/chosen": -4.292740821838379, |
|
"logps/rejected": -5.064752578735352, |
|
"loss": 2.5149, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -42.92741012573242, |
|
"rewards/margins": 7.720117092132568, |
|
"rewards/rejected": -50.647525787353516, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.791631325982471, |
|
"grad_norm": 166.01639039281858, |
|
"learning_rate": 1.0140439539400953e-07, |
|
"logits/chosen": -0.9338628649711609, |
|
"logits/rejected": -0.950715184211731, |
|
"logps/chosen": -4.245103359222412, |
|
"logps/rejected": -4.8726959228515625, |
|
"loss": 3.156, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -42.4510383605957, |
|
"rewards/margins": 6.275918483734131, |
|
"rewards/rejected": -48.72695541381836, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 131.0281285552435, |
|
"learning_rate": 9.930756127915488e-08, |
|
"logits/chosen": -0.9479851722717285, |
|
"logits/rejected": -0.9509535431861877, |
|
"logps/chosen": -4.242725372314453, |
|
"logps/rejected": -4.937259674072266, |
|
"loss": 2.4118, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.4272575378418, |
|
"rewards/margins": 6.945339679718018, |
|
"rewards/rejected": -49.37260055541992, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7961549335595137, |
|
"grad_norm": 159.53197135066353, |
|
"learning_rate": 9.722955667232242e-08, |
|
"logits/chosen": -0.9826103448867798, |
|
"logits/rejected": -0.9935145378112793, |
|
"logps/chosen": -4.630585670471191, |
|
"logps/rejected": -5.20355224609375, |
|
"loss": 3.0279, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -46.305850982666016, |
|
"rewards/margins": 5.729671955108643, |
|
"rewards/rejected": -52.0355224609375, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7984167373480351, |
|
"grad_norm": 131.6547821736773, |
|
"learning_rate": 9.517051169918016e-08, |
|
"logits/chosen": -0.9904384613037109, |
|
"logits/rejected": -0.9811457991600037, |
|
"logps/chosen": -4.245575904846191, |
|
"logps/rejected": -4.880057334899902, |
|
"loss": 3.1398, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.45575714111328, |
|
"rewards/margins": 6.344809532165527, |
|
"rewards/rejected": -48.800567626953125, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8006785411365565, |
|
"grad_norm": 142.9124100481491, |
|
"learning_rate": 9.313055529813412e-08, |
|
"logits/chosen": -0.9195039868354797, |
|
"logits/rejected": -0.9531816244125366, |
|
"logps/chosen": -4.369924545288086, |
|
"logps/rejected": -5.106759071350098, |
|
"loss": 2.3245, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -43.69923782348633, |
|
"rewards/margins": 7.368349552154541, |
|
"rewards/rejected": -51.067588806152344, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8029403449250777, |
|
"grad_norm": 141.5454480732975, |
|
"learning_rate": 9.110981521225532e-08, |
|
"logits/chosen": -0.9761883616447449, |
|
"logits/rejected": -0.9907628297805786, |
|
"logps/chosen": -4.390708923339844, |
|
"logps/rejected": -5.035253047943115, |
|
"loss": 2.7619, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.907081604003906, |
|
"rewards/margins": 6.445442199707031, |
|
"rewards/rejected": -50.35253143310547, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8052021487135991, |
|
"grad_norm": 141.24588926834747, |
|
"learning_rate": 8.910841798127884e-08, |
|
"logits/chosen": -0.932433545589447, |
|
"logits/rejected": -0.9380560517311096, |
|
"logps/chosen": -4.490683555603027, |
|
"logps/rejected": -5.1833906173706055, |
|
"loss": 2.4937, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -44.906837463378906, |
|
"rewards/margins": 6.927071571350098, |
|
"rewards/rejected": -51.83391189575195, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8074639525021204, |
|
"grad_norm": 153.489266818857, |
|
"learning_rate": 8.712648893368139e-08, |
|
"logits/chosen": -0.9466272592544556, |
|
"logits/rejected": -0.9827693700790405, |
|
"logps/chosen": -4.324895858764648, |
|
"logps/rejected": -5.1728057861328125, |
|
"loss": 2.4164, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -43.24895477294922, |
|
"rewards/margins": 8.479098320007324, |
|
"rewards/rejected": -51.72805404663086, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8097257562906418, |
|
"grad_norm": 134.69471486150493, |
|
"learning_rate": 8.516415217883186e-08, |
|
"logits/chosen": -0.9450441598892212, |
|
"logits/rejected": -0.9372915029525757, |
|
"logps/chosen": -4.20553731918335, |
|
"logps/rejected": -5.077816486358643, |
|
"loss": 2.1505, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -42.05537796020508, |
|
"rewards/margins": 8.722793579101562, |
|
"rewards/rejected": -50.778167724609375, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8119875600791632, |
|
"grad_norm": 172.05066369257742, |
|
"learning_rate": 8.32215305992209e-08, |
|
"logits/chosen": -0.9893842935562134, |
|
"logits/rejected": -1.0069129467010498, |
|
"logps/chosen": -4.14910888671875, |
|
"logps/rejected": -4.812809944152832, |
|
"loss": 2.9886, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.491092681884766, |
|
"rewards/margins": 6.6370015144348145, |
|
"rewards/rejected": -48.12809371948242, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 118.45196699685957, |
|
"learning_rate": 8.129874584276448e-08, |
|
"logits/chosen": -0.9478542804718018, |
|
"logits/rejected": -0.9472507834434509, |
|
"logps/chosen": -4.2414398193359375, |
|
"logps/rejected": -5.106539726257324, |
|
"loss": 1.8634, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -42.41439437866211, |
|
"rewards/margins": 8.650999069213867, |
|
"rewards/rejected": -51.06539535522461, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8165111676562058, |
|
"grad_norm": 147.08542318664797, |
|
"learning_rate": 7.939591831518746e-08, |
|
"logits/chosen": -0.9485371112823486, |
|
"logits/rejected": -0.9716846942901611, |
|
"logps/chosen": -4.281550407409668, |
|
"logps/rejected": -4.866559028625488, |
|
"loss": 2.4179, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.81550979614258, |
|
"rewards/margins": 5.850086212158203, |
|
"rewards/rejected": -48.66558837890625, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8187729714447272, |
|
"grad_norm": 140.7832629501282, |
|
"learning_rate": 7.751316717248304e-08, |
|
"logits/chosen": -0.9519385695457458, |
|
"logits/rejected": -0.963306188583374, |
|
"logps/chosen": -4.716574668884277, |
|
"logps/rejected": -5.629094123840332, |
|
"loss": 2.4426, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -47.165740966796875, |
|
"rewards/margins": 9.125191688537598, |
|
"rewards/rejected": -56.29093933105469, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8210347752332485, |
|
"grad_norm": 161.95945831881926, |
|
"learning_rate": 7.565061031345142e-08, |
|
"logits/chosen": -0.9452296495437622, |
|
"logits/rejected": -0.9586248993873596, |
|
"logps/chosen": -4.896112442016602, |
|
"logps/rejected": -5.624301910400391, |
|
"loss": 2.4316, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -48.961124420166016, |
|
"rewards/margins": 7.281898498535156, |
|
"rewards/rejected": -56.24302673339844, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8232965790217699, |
|
"grad_norm": 192.2424118773627, |
|
"learning_rate": 7.380836437231686e-08, |
|
"logits/chosen": -0.9347036480903625, |
|
"logits/rejected": -0.9242918491363525, |
|
"logps/chosen": -4.409902095794678, |
|
"logps/rejected": -5.111190319061279, |
|
"loss": 2.5825, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -44.099021911621094, |
|
"rewards/margins": 7.012877941131592, |
|
"rewards/rejected": -51.111900329589844, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8255583828102913, |
|
"grad_norm": 124.6304811777762, |
|
"learning_rate": 7.198654471142371e-08, |
|
"logits/chosen": -0.9537335634231567, |
|
"logits/rejected": -0.9453788995742798, |
|
"logps/chosen": -4.486839771270752, |
|
"logps/rejected": -5.401633262634277, |
|
"loss": 2.3187, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.8683967590332, |
|
"rewards/margins": 9.14793872833252, |
|
"rewards/rejected": -54.016334533691406, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8278201865988125, |
|
"grad_norm": 163.83416320888907, |
|
"learning_rate": 7.01852654140132e-08, |
|
"logits/chosen": -1.0016599893569946, |
|
"logits/rejected": -1.0151605606079102, |
|
"logps/chosen": -4.924201965332031, |
|
"logps/rejected": -5.710483074188232, |
|
"loss": 2.5367, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -49.24201965332031, |
|
"rewards/margins": 7.862810134887695, |
|
"rewards/rejected": -57.104827880859375, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8300819903873339, |
|
"grad_norm": 144.1303882492354, |
|
"learning_rate": 6.840463927707833e-08, |
|
"logits/chosen": -0.9487999677658081, |
|
"logits/rejected": -0.9631211757659912, |
|
"logps/chosen": -4.902750015258789, |
|
"logps/rejected": -5.617919445037842, |
|
"loss": 2.5045, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -49.027503967285156, |
|
"rewards/margins": 7.1516900062561035, |
|
"rewards/rejected": -56.179195404052734, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8323437941758552, |
|
"grad_norm": 144.87253676271803, |
|
"learning_rate": 6.664477780430138e-08, |
|
"logits/chosen": -0.952880859375, |
|
"logits/rejected": -0.9492167234420776, |
|
"logps/chosen": -4.776792049407959, |
|
"logps/rejected": -5.370887756347656, |
|
"loss": 2.717, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -47.767921447753906, |
|
"rewards/margins": 5.940953731536865, |
|
"rewards/rejected": -53.7088737487793, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 184.09075370725176, |
|
"learning_rate": 6.49057911990711e-08, |
|
"logits/chosen": -0.9280183911323547, |
|
"logits/rejected": -0.9295321702957153, |
|
"logps/chosen": -4.6214423179626465, |
|
"logps/rejected": -5.288618087768555, |
|
"loss": 2.8896, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -46.21442413330078, |
|
"rewards/margins": 6.671757221221924, |
|
"rewards/rejected": -52.88618469238281, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.836867401752898, |
|
"grad_norm": 156.0668928651448, |
|
"learning_rate": 6.318778835758189e-08, |
|
"logits/chosen": -0.9676793813705444, |
|
"logits/rejected": -0.9772645235061646, |
|
"logps/chosen": -4.79730224609375, |
|
"logps/rejected": -5.520888328552246, |
|
"loss": 2.0356, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -47.9730224609375, |
|
"rewards/margins": 7.235863208770752, |
|
"rewards/rejected": -55.20888137817383, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8391292055414192, |
|
"grad_norm": 181.31703942045965, |
|
"learning_rate": 6.149087686201433e-08, |
|
"logits/chosen": -0.9715522527694702, |
|
"logits/rejected": -0.9828546047210693, |
|
"logps/chosen": -4.468597888946533, |
|
"logps/rejected": -5.149814605712891, |
|
"loss": 3.3273, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.685977935791016, |
|
"rewards/margins": 6.812170505523682, |
|
"rewards/rejected": -51.498146057128906, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8413910093299406, |
|
"grad_norm": 157.64317964257543, |
|
"learning_rate": 5.98151629737988e-08, |
|
"logits/chosen": -0.9902997016906738, |
|
"logits/rejected": -0.9751971960067749, |
|
"logps/chosen": -4.583035469055176, |
|
"logps/rejected": -5.4460954666137695, |
|
"loss": 2.1937, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -45.83035659790039, |
|
"rewards/margins": 8.630597114562988, |
|
"rewards/rejected": -54.46095275878906, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.843652813118462, |
|
"grad_norm": 126.70730972040158, |
|
"learning_rate": 5.816075162696097e-08, |
|
"logits/chosen": -0.9920557737350464, |
|
"logits/rejected": -1.0163335800170898, |
|
"logps/chosen": -4.634104251861572, |
|
"logps/rejected": -5.2726593017578125, |
|
"loss": 2.3016, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.341041564941406, |
|
"rewards/margins": 6.3855462074279785, |
|
"rewards/rejected": -52.72658920288086, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8459146169069833, |
|
"grad_norm": 136.57228607111298, |
|
"learning_rate": 5.6527746421551046e-08, |
|
"logits/chosen": -0.9222903847694397, |
|
"logits/rejected": -0.9167163372039795, |
|
"logps/chosen": -4.5692830085754395, |
|
"logps/rejected": -5.321811676025391, |
|
"loss": 2.4519, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -45.69283676147461, |
|
"rewards/margins": 7.525275230407715, |
|
"rewards/rejected": -53.218109130859375, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8481764206955047, |
|
"grad_norm": 147.12870026365698, |
|
"learning_rate": 5.4916249617156064e-08, |
|
"logits/chosen": -0.9463968873023987, |
|
"logits/rejected": -0.9552465677261353, |
|
"logps/chosen": -4.313798427581787, |
|
"logps/rejected": -4.9934186935424805, |
|
"loss": 2.6259, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -43.13798141479492, |
|
"rewards/margins": 6.796201705932617, |
|
"rewards/rejected": -49.93418502807617, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8504382244840261, |
|
"grad_norm": 161.79684082712205, |
|
"learning_rate": 5.332636212649646e-08, |
|
"logits/chosen": -0.9481571316719055, |
|
"logits/rejected": -0.9492475986480713, |
|
"logps/chosen": -4.502243995666504, |
|
"logps/rejected": -5.292466640472412, |
|
"loss": 2.0549, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -45.02243423461914, |
|
"rewards/margins": 7.902227878570557, |
|
"rewards/rejected": -52.924659729003906, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8527000282725473, |
|
"grad_norm": 158.20408842262404, |
|
"learning_rate": 5.17581835091069e-08, |
|
"logits/chosen": -0.940109372138977, |
|
"logits/rejected": -0.9614642858505249, |
|
"logps/chosen": -4.670576095581055, |
|
"logps/rejected": -5.383021831512451, |
|
"loss": 2.734, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -46.70576095581055, |
|
"rewards/margins": 7.124454021453857, |
|
"rewards/rejected": -53.83021545410156, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 131.07408134426578, |
|
"learning_rate": 5.02118119651016e-08, |
|
"logits/chosen": -0.9807331562042236, |
|
"logits/rejected": -0.98893803358078, |
|
"logps/chosen": -4.549108028411865, |
|
"logps/rejected": -5.304394721984863, |
|
"loss": 2.3562, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -45.49108123779297, |
|
"rewards/margins": 7.552868366241455, |
|
"rewards/rejected": -53.043949127197266, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.85722363584959, |
|
"grad_norm": 159.82635061305217, |
|
"learning_rate": 4.868734432902526e-08, |
|
"logits/chosen": -1.0292322635650635, |
|
"logits/rejected": -1.0086599588394165, |
|
"logps/chosen": -4.577394485473633, |
|
"logps/rejected": -5.43131160736084, |
|
"loss": 2.9397, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.77394104003906, |
|
"rewards/margins": 8.539175987243652, |
|
"rewards/rejected": -54.313114166259766, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8594854396381114, |
|
"grad_norm": 153.5753175656312, |
|
"learning_rate": 4.7184876063789134e-08, |
|
"logits/chosen": -0.973901629447937, |
|
"logits/rejected": -0.9794867634773254, |
|
"logps/chosen": -4.038912296295166, |
|
"logps/rejected": -4.743659019470215, |
|
"loss": 2.5866, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -40.38911437988281, |
|
"rewards/margins": 7.0474653244018555, |
|
"rewards/rejected": -47.43658447265625, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8617472434266328, |
|
"grad_norm": 127.21695474983532, |
|
"learning_rate": 4.570450125469314e-08, |
|
"logits/chosen": -0.9439151287078857, |
|
"logits/rejected": -0.9651936292648315, |
|
"logps/chosen": -4.656189441680908, |
|
"logps/rejected": -5.580712795257568, |
|
"loss": 2.0476, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -46.561893463134766, |
|
"rewards/margins": 9.245238304138184, |
|
"rewards/rejected": -55.80712890625, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.864009047215154, |
|
"grad_norm": 140.9365653388638, |
|
"learning_rate": 4.424631260353378e-08, |
|
"logits/chosen": -0.9873952865600586, |
|
"logits/rejected": -0.9998773336410522, |
|
"logps/chosen": -4.442070960998535, |
|
"logps/rejected": -5.129306316375732, |
|
"loss": 2.568, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -44.42071533203125, |
|
"rewards/margins": 6.872350215911865, |
|
"rewards/rejected": -51.293060302734375, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8662708510036754, |
|
"grad_norm": 123.99421688298398, |
|
"learning_rate": 4.281040142280008e-08, |
|
"logits/chosen": -1.0014389753341675, |
|
"logits/rejected": -1.0032625198364258, |
|
"logps/chosen": -4.253451347351074, |
|
"logps/rejected": -5.129884719848633, |
|
"loss": 1.7937, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -42.534515380859375, |
|
"rewards/margins": 8.764336585998535, |
|
"rewards/rejected": -51.298851013183594, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8685326547921968, |
|
"grad_norm": 147.9606819458139, |
|
"learning_rate": 4.1396857629954286e-08, |
|
"logits/chosen": -0.9735297560691833, |
|
"logits/rejected": -0.9905420541763306, |
|
"logps/chosen": -4.999640464782715, |
|
"logps/rejected": -5.769815921783447, |
|
"loss": 2.6449, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -49.99640655517578, |
|
"rewards/margins": 7.7017502784729, |
|
"rewards/rejected": -57.698158264160156, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8707944585807181, |
|
"grad_norm": 145.63899265924684, |
|
"learning_rate": 4.000576974180232e-08, |
|
"logits/chosen": -0.9300609827041626, |
|
"logits/rejected": -0.9512342214584351, |
|
"logps/chosen": -4.364050388336182, |
|
"logps/rejected": -5.139545917510986, |
|
"loss": 2.5334, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -43.6405029296875, |
|
"rewards/margins": 7.7549591064453125, |
|
"rewards/rejected": -51.39546203613281, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8730562623692395, |
|
"grad_norm": 134.08121981811036, |
|
"learning_rate": 3.8637224868950066e-08, |
|
"logits/chosen": -0.9386020302772522, |
|
"logits/rejected": -0.9424499273300171, |
|
"logps/chosen": -4.373361587524414, |
|
"logps/rejected": -5.010212421417236, |
|
"loss": 2.7981, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.73361587524414, |
|
"rewards/margins": 6.368506908416748, |
|
"rewards/rejected": -50.10212326049805, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 154.64601318433506, |
|
"learning_rate": 3.729130871034885e-08, |
|
"logits/chosen": -0.95456862449646, |
|
"logits/rejected": -0.9573556184768677, |
|
"logps/chosen": -4.52392578125, |
|
"logps/rejected": -5.276218891143799, |
|
"loss": 2.5976, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.239253997802734, |
|
"rewards/margins": 7.5229339599609375, |
|
"rewards/rejected": -52.76219177246094, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8775798699462821, |
|
"grad_norm": 154.26105591417908, |
|
"learning_rate": 3.596810554792888e-08, |
|
"logits/chosen": -0.9463018178939819, |
|
"logits/rejected": -0.954681396484375, |
|
"logps/chosen": -4.434272289276123, |
|
"logps/rejected": -5.201148509979248, |
|
"loss": 2.6181, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -44.34272003173828, |
|
"rewards/margins": 7.668770790100098, |
|
"rewards/rejected": -52.01148986816406, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8798416737348035, |
|
"grad_norm": 147.27634883503094, |
|
"learning_rate": 3.466769824132116e-08, |
|
"logits/chosen": -0.9583290219306946, |
|
"logits/rejected": -0.9514294862747192, |
|
"logps/chosen": -4.45271110534668, |
|
"logps/rejected": -5.209290981292725, |
|
"loss": 2.2404, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.52710723876953, |
|
"rewards/margins": 7.565799236297607, |
|
"rewards/rejected": -52.09291076660156, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8821034775233249, |
|
"grad_norm": 152.7393534346966, |
|
"learning_rate": 3.339016822266925e-08, |
|
"logits/chosen": -0.9181097149848938, |
|
"logits/rejected": -0.9469025135040283, |
|
"logps/chosen": -4.571746826171875, |
|
"logps/rejected": -5.5013909339904785, |
|
"loss": 1.7452, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -45.717464447021484, |
|
"rewards/margins": 9.296442031860352, |
|
"rewards/rejected": -55.01390838623047, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8843652813118462, |
|
"grad_norm": 147.69164041581885, |
|
"learning_rate": 3.213559549152958e-08, |
|
"logits/chosen": -0.977473795413971, |
|
"logits/rejected": -0.9800957441329956, |
|
"logps/chosen": -4.443673610687256, |
|
"logps/rejected": -5.2254157066345215, |
|
"loss": 2.5852, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.436729431152344, |
|
"rewards/margins": 7.817424774169922, |
|
"rewards/rejected": -52.2541618347168, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8866270851003676, |
|
"grad_norm": 148.653528854335, |
|
"learning_rate": 3.090405860986203e-08, |
|
"logits/chosen": -0.9800371527671814, |
|
"logits/rejected": -1.016266942024231, |
|
"logps/chosen": -4.722820281982422, |
|
"logps/rejected": -5.609055519104004, |
|
"loss": 2.4299, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -47.22819900512695, |
|
"rewards/margins": 8.86235523223877, |
|
"rewards/rejected": -56.090553283691406, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 144.92734187701728, |
|
"learning_rate": 2.9695634697110315e-08, |
|
"logits/chosen": -0.9289108514785767, |
|
"logits/rejected": -0.9394564032554626, |
|
"logps/chosen": -4.453829765319824, |
|
"logps/rejected": -5.3411054611206055, |
|
"loss": 2.6235, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.53829574584961, |
|
"rewards/margins": 8.872754096984863, |
|
"rewards/rejected": -53.41105651855469, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8911506926774102, |
|
"grad_norm": 161.40695882406106, |
|
"learning_rate": 2.8510399425372766e-08, |
|
"logits/chosen": -0.9479678273200989, |
|
"logits/rejected": -0.9467705488204956, |
|
"logps/chosen": -4.576265811920166, |
|
"logps/rejected": -5.270918846130371, |
|
"loss": 2.6714, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.76266098022461, |
|
"rewards/margins": 6.946530342102051, |
|
"rewards/rejected": -52.70918655395508, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8934124964659316, |
|
"grad_norm": 162.16334658512758, |
|
"learning_rate": 2.734842701466329e-08, |
|
"logits/chosen": -0.9540742635726929, |
|
"logits/rejected": -0.9456802010536194, |
|
"logps/chosen": -4.940086364746094, |
|
"logps/rejected": -5.668696880340576, |
|
"loss": 2.2655, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -49.4008674621582, |
|
"rewards/margins": 7.286104202270508, |
|
"rewards/rejected": -56.686973571777344, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 154.19569156235414, |
|
"learning_rate": 2.6209790228264438e-08, |
|
"logits/chosen": -0.9714961647987366, |
|
"logits/rejected": -0.9679863452911377, |
|
"logps/chosen": -4.16938591003418, |
|
"logps/rejected": -4.939549446105957, |
|
"loss": 2.2384, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -41.69386291503906, |
|
"rewards/margins": 7.701635837554932, |
|
"rewards/rejected": -49.39550018310547, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8979361040429743, |
|
"grad_norm": 149.68348291535114, |
|
"learning_rate": 2.5094560368170305e-08, |
|
"logits/chosen": -0.9434354305267334, |
|
"logits/rejected": -0.9568273425102234, |
|
"logps/chosen": -4.757465839385986, |
|
"logps/rejected": -5.512124061584473, |
|
"loss": 2.3075, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -47.57465744018555, |
|
"rewards/margins": 7.546583652496338, |
|
"rewards/rejected": -55.12124252319336, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9001979078314957, |
|
"grad_norm": 139.83871846925578, |
|
"learning_rate": 2.4002807270621893e-08, |
|
"logits/chosen": -0.9892213344573975, |
|
"logits/rejected": -0.9795714020729065, |
|
"logps/chosen": -4.536479473114014, |
|
"logps/rejected": -5.248366832733154, |
|
"loss": 2.4449, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -45.36479187011719, |
|
"rewards/margins": 7.118875026702881, |
|
"rewards/rejected": -52.48366928100586, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9024597116200169, |
|
"grad_norm": 166.9049138837263, |
|
"learning_rate": 2.293459930173354e-08, |
|
"logits/chosen": -0.9818339943885803, |
|
"logits/rejected": -0.9937196969985962, |
|
"logps/chosen": -4.615837574005127, |
|
"logps/rejected": -5.3938493728637695, |
|
"loss": 2.3729, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.15837478637695, |
|
"rewards/margins": 7.780113220214844, |
|
"rewards/rejected": -53.93849182128906, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9047215154085383, |
|
"grad_norm": 140.62439250035044, |
|
"learning_rate": 2.189000335321256e-08, |
|
"logits/chosen": -0.946580171585083, |
|
"logits/rejected": -0.9496131539344788, |
|
"logps/chosen": -4.469082355499268, |
|
"logps/rejected": -5.115677833557129, |
|
"loss": 2.9326, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.690818786621094, |
|
"rewards/margins": 6.465954780578613, |
|
"rewards/rejected": -51.156776428222656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9069833191970597, |
|
"grad_norm": 160.13785787767887, |
|
"learning_rate": 2.086908483816954e-08, |
|
"logits/chosen": -0.9588191509246826, |
|
"logits/rejected": -0.9561234712600708, |
|
"logps/chosen": -4.833827495574951, |
|
"logps/rejected": -5.5546088218688965, |
|
"loss": 2.5806, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -48.33827590942383, |
|
"rewards/margins": 7.207812309265137, |
|
"rewards/rejected": -55.54608917236328, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.909245122985581, |
|
"grad_norm": 143.62250331217595, |
|
"learning_rate": 1.9871907687022717e-08, |
|
"logits/chosen": -0.9328145980834961, |
|
"logits/rejected": -0.9461118578910828, |
|
"logps/chosen": -4.385026931762695, |
|
"logps/rejected": -5.071187496185303, |
|
"loss": 2.4165, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.85027313232422, |
|
"rewards/margins": 6.861605167388916, |
|
"rewards/rejected": -50.71187973022461, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9115069267741024, |
|
"grad_norm": 134.2795025444658, |
|
"learning_rate": 1.889853434349451e-08, |
|
"logits/chosen": -0.9710885286331177, |
|
"logits/rejected": -0.9686489105224609, |
|
"logps/chosen": -4.349798679351807, |
|
"logps/rejected": -5.178468227386475, |
|
"loss": 2.441, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -43.49799346923828, |
|
"rewards/margins": 8.286693572998047, |
|
"rewards/rejected": -51.78468322753906, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9137687305626236, |
|
"grad_norm": 164.30503356987847, |
|
"learning_rate": 1.7949025760701164e-08, |
|
"logits/chosen": -0.950221836566925, |
|
"logits/rejected": -0.9404230117797852, |
|
"logps/chosen": -4.71327543258667, |
|
"logps/rejected": -5.3966755867004395, |
|
"loss": 2.4213, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -47.132755279541016, |
|
"rewards/margins": 6.8340020179748535, |
|
"rewards/rejected": -53.96676254272461, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 158.87149794688602, |
|
"learning_rate": 1.7023441397336023e-08, |
|
"logits/chosen": -0.973084568977356, |
|
"logits/rejected": -0.98138827085495, |
|
"logps/chosen": -4.376833438873291, |
|
"logps/rejected": -5.152012825012207, |
|
"loss": 2.1802, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.76833724975586, |
|
"rewards/margins": 7.751791954040527, |
|
"rewards/rejected": -51.52012634277344, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9182923381396664, |
|
"grad_norm": 136.71185170412178, |
|
"learning_rate": 1.6121839213945854e-08, |
|
"logits/chosen": -0.943014919757843, |
|
"logits/rejected": -0.9729979634284973, |
|
"logps/chosen": -4.3754682540893555, |
|
"logps/rejected": -5.180696487426758, |
|
"loss": 2.6478, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.75468826293945, |
|
"rewards/margins": 8.052278518676758, |
|
"rewards/rejected": -51.80696487426758, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9205541419281877, |
|
"grad_norm": 184.17485871176777, |
|
"learning_rate": 1.5244275669301777e-08, |
|
"logits/chosen": -0.9659703373908997, |
|
"logits/rejected": -0.9741247892379761, |
|
"logps/chosen": -4.513559818267822, |
|
"logps/rejected": -5.29529333114624, |
|
"loss": 2.6662, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -45.135597229003906, |
|
"rewards/margins": 7.817337512969971, |
|
"rewards/rejected": -52.95293426513672, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9228159457167091, |
|
"grad_norm": 153.29528491119703, |
|
"learning_rate": 1.4390805716863398e-08, |
|
"logits/chosen": -0.9317939281463623, |
|
"logits/rejected": -0.9431477785110474, |
|
"logps/chosen": -4.3477582931518555, |
|
"logps/rejected": -4.961559295654297, |
|
"loss": 3.0725, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -43.47758102416992, |
|
"rewards/margins": 6.138009071350098, |
|
"rewards/rejected": -49.615596771240234, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9250777495052305, |
|
"grad_norm": 147.4396268964287, |
|
"learning_rate": 1.3561482801337908e-08, |
|
"logits/chosen": -0.9190540909767151, |
|
"logits/rejected": -0.9394365549087524, |
|
"logps/chosen": -4.4133453369140625, |
|
"logps/rejected": -5.246624946594238, |
|
"loss": 2.8243, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.13344955444336, |
|
"rewards/margins": 8.332796096801758, |
|
"rewards/rejected": -52.46624755859375, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9273395532937517, |
|
"grad_norm": 147.41157160150556, |
|
"learning_rate": 1.2756358855332904e-08, |
|
"logits/chosen": -0.9719132781028748, |
|
"logits/rejected": -0.9848355054855347, |
|
"logps/chosen": -4.328832626342773, |
|
"logps/rejected": -4.96996545791626, |
|
"loss": 3.201, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -43.288326263427734, |
|
"rewards/margins": 6.4113287925720215, |
|
"rewards/rejected": -49.69965362548828, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9296013570822731, |
|
"grad_norm": 147.810411578744, |
|
"learning_rate": 1.1975484296105154e-08, |
|
"logits/chosen": -0.9302911758422852, |
|
"logits/rejected": -0.9466784596443176, |
|
"logps/chosen": -4.584578037261963, |
|
"logps/rejected": -5.3103132247924805, |
|
"loss": 2.8047, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.845787048339844, |
|
"rewards/margins": 7.2573466300964355, |
|
"rewards/rejected": -53.10313415527344, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9318631608707945, |
|
"grad_norm": 152.41484142524448, |
|
"learning_rate": 1.1218908022402374e-08, |
|
"logits/chosen": -0.928033173084259, |
|
"logits/rejected": -0.940939724445343, |
|
"logps/chosen": -4.2157416343688965, |
|
"logps/rejected": -5.0531816482543945, |
|
"loss": 2.3413, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -42.157413482666016, |
|
"rewards/margins": 8.374399185180664, |
|
"rewards/rejected": -50.53181457519531, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9341249646593158, |
|
"grad_norm": 159.91966152195383, |
|
"learning_rate": 1.0486677411402079e-08, |
|
"logits/chosen": -1.0264177322387695, |
|
"logits/rejected": -1.0172362327575684, |
|
"logps/chosen": -4.612265110015869, |
|
"logps/rejected": -5.4898295402526855, |
|
"loss": 2.7482, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -46.12265396118164, |
|
"rewards/margins": 8.77564525604248, |
|
"rewards/rejected": -54.89829635620117, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 147.1808101672586, |
|
"learning_rate": 9.778838315744353e-09, |
|
"logits/chosen": -0.9884222149848938, |
|
"logits/rejected": -0.9936903715133667, |
|
"logps/chosen": -4.626962661743164, |
|
"logps/rejected": -5.381559371948242, |
|
"loss": 2.2191, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -46.269622802734375, |
|
"rewards/margins": 7.545971393585205, |
|
"rewards/rejected": -53.81559371948242, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9386485722363584, |
|
"grad_norm": 158.74475786885856, |
|
"learning_rate": 9.095435060660595e-09, |
|
"logits/chosen": -0.9156569242477417, |
|
"logits/rejected": -0.9195461273193359, |
|
"logps/chosen": -4.509635925292969, |
|
"logps/rejected": -5.232195854187012, |
|
"loss": 2.663, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -45.09635543823242, |
|
"rewards/margins": 7.225605010986328, |
|
"rewards/rejected": -52.321964263916016, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9409103760248798, |
|
"grad_norm": 178.1295778190553, |
|
"learning_rate": 8.436510441197864e-09, |
|
"logits/chosen": -0.9454046487808228, |
|
"logits/rejected": -0.9776556491851807, |
|
"logps/chosen": -4.444124221801758, |
|
"logps/rejected": -5.162923336029053, |
|
"loss": 2.9006, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.44124221801758, |
|
"rewards/margins": 7.187990188598633, |
|
"rewards/rejected": -51.62923812866211, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9431721798134012, |
|
"grad_norm": 245.5564838113027, |
|
"learning_rate": 7.802105719539076e-09, |
|
"logits/chosen": -0.9548664689064026, |
|
"logits/rejected": -0.9679651260375977, |
|
"logps/chosen": -4.6692795753479, |
|
"logps/rejected": -5.344656944274902, |
|
"loss": 3.2472, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -46.69279861450195, |
|
"rewards/margins": 6.753774642944336, |
|
"rewards/rejected": -53.44656753540039, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9454339836019225, |
|
"grad_norm": 138.25589021962577, |
|
"learning_rate": 7.1922606224192e-09, |
|
"logits/chosen": -0.9730819463729858, |
|
"logits/rejected": -0.9867460131645203, |
|
"logps/chosen": -4.586428642272949, |
|
"logps/rejected": -5.304539680480957, |
|
"loss": 2.396, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -45.864295959472656, |
|
"rewards/margins": 7.181105613708496, |
|
"rewards/rejected": -53.0453987121582, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9476957873904439, |
|
"grad_norm": 155.31797780611356, |
|
"learning_rate": 6.6070133386372906e-09, |
|
"logits/chosen": -0.951643168926239, |
|
"logits/rejected": -0.9600397348403931, |
|
"logps/chosen": -4.466057777404785, |
|
"logps/rejected": -5.151131629943848, |
|
"loss": 2.7637, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.66057586669922, |
|
"rewards/margins": 6.850742340087891, |
|
"rewards/rejected": -51.51131820678711, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9499575911789653, |
|
"grad_norm": 149.2139797209097, |
|
"learning_rate": 6.046400516665384e-09, |
|
"logits/chosen": -0.9598467946052551, |
|
"logits/rejected": -0.9552338123321533, |
|
"logps/chosen": -4.396927833557129, |
|
"logps/rejected": -5.164035320281982, |
|
"loss": 2.9147, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -43.969276428222656, |
|
"rewards/margins": 7.671075344085693, |
|
"rewards/rejected": -51.640357971191406, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9522193949674865, |
|
"grad_norm": 128.23107358922502, |
|
"learning_rate": 5.510457262353396e-09, |
|
"logits/chosen": -1.0141513347625732, |
|
"logits/rejected": -1.0092058181762695, |
|
"logps/chosen": -4.431485176086426, |
|
"logps/rejected": -5.144137382507324, |
|
"loss": 2.3305, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -44.314849853515625, |
|
"rewards/margins": 7.126527309417725, |
|
"rewards/rejected": -51.44137954711914, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9544811987560079, |
|
"grad_norm": 137.41821363865725, |
|
"learning_rate": 4.9992171367309265e-09, |
|
"logits/chosen": -0.9783574342727661, |
|
"logits/rejected": -0.9639812111854553, |
|
"logps/chosen": -4.1773505210876465, |
|
"logps/rejected": -4.8878397941589355, |
|
"loss": 2.2812, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -41.773502349853516, |
|
"rewards/margins": 7.104894161224365, |
|
"rewards/rejected": -48.878395080566406, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 150.80335007353597, |
|
"learning_rate": 4.5127121539052955e-09, |
|
"logits/chosen": -0.9814745187759399, |
|
"logits/rejected": -0.9915533661842346, |
|
"logps/chosen": -4.722474098205566, |
|
"logps/rejected": -5.471231937408447, |
|
"loss": 2.4137, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -47.224735260009766, |
|
"rewards/margins": 7.487585544586182, |
|
"rewards/rejected": -54.712318420410156, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9590048063330506, |
|
"grad_norm": 190.30249728923863, |
|
"learning_rate": 4.050972779057327e-09, |
|
"logits/chosen": -0.8814293146133423, |
|
"logits/rejected": -0.8948724865913391, |
|
"logps/chosen": -4.208849906921387, |
|
"logps/rejected": -4.926727294921875, |
|
"loss": 2.7246, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.0885009765625, |
|
"rewards/margins": 7.178774833679199, |
|
"rewards/rejected": -49.267276763916016, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.961266610121572, |
|
"grad_norm": 170.25417436856634, |
|
"learning_rate": 3.6140279265330477e-09, |
|
"logits/chosen": -0.9410693645477295, |
|
"logits/rejected": -0.9508557319641113, |
|
"logps/chosen": -4.573657035827637, |
|
"logps/rejected": -5.300447463989258, |
|
"loss": 2.6723, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -45.736572265625, |
|
"rewards/margins": 7.267903804779053, |
|
"rewards/rejected": -53.004478454589844, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9635284139100933, |
|
"grad_norm": 153.10970515350425, |
|
"learning_rate": 3.2019049580335853e-09, |
|
"logits/chosen": -0.9724865555763245, |
|
"logits/rejected": -0.9711145162582397, |
|
"logps/chosen": -4.299941062927246, |
|
"logps/rejected": -4.872091770172119, |
|
"loss": 3.2617, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -42.99940872192383, |
|
"rewards/margins": 5.721508979797363, |
|
"rewards/rejected": -48.720916748046875, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9657902176986146, |
|
"grad_norm": 135.53392350882962, |
|
"learning_rate": 2.814629680901337e-09, |
|
"logits/chosen": -0.9825209379196167, |
|
"logits/rejected": -1.0047634840011597, |
|
"logps/chosen": -4.572093486785889, |
|
"logps/rejected": -5.286868095397949, |
|
"loss": 2.4462, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -45.7209358215332, |
|
"rewards/margins": 7.1477484703063965, |
|
"rewards/rejected": -52.86868667602539, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.968052021487136, |
|
"grad_norm": 142.7322468520036, |
|
"learning_rate": 2.4522263465041937e-09, |
|
"logits/chosen": -0.9486875534057617, |
|
"logits/rejected": -0.9619259238243103, |
|
"logps/chosen": -4.454163551330566, |
|
"logps/rejected": -5.211050033569336, |
|
"loss": 2.1732, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -44.54163360595703, |
|
"rewards/margins": 7.5688652992248535, |
|
"rewards/rejected": -52.11050033569336, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9703138252756573, |
|
"grad_norm": 136.69603531498632, |
|
"learning_rate": 2.114717648716713e-09, |
|
"logits/chosen": -0.9177216291427612, |
|
"logits/rejected": -0.9293065667152405, |
|
"logps/chosen": -4.46873140335083, |
|
"logps/rejected": -5.2916765213012695, |
|
"loss": 2.2091, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -44.687313079833984, |
|
"rewards/margins": 8.22945499420166, |
|
"rewards/rejected": -52.91676712036133, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9725756290641787, |
|
"grad_norm": 152.57420599911407, |
|
"learning_rate": 1.802124722499121e-09, |
|
"logits/chosen": -0.9466343522071838, |
|
"logits/rejected": -0.9572174549102783, |
|
"logps/chosen": -4.5352349281311035, |
|
"logps/rejected": -5.298283100128174, |
|
"loss": 2.5129, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -45.352352142333984, |
|
"rewards/margins": 7.6304826736450195, |
|
"rewards/rejected": -52.98283386230469, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9748374328527001, |
|
"grad_norm": 148.47683038194563, |
|
"learning_rate": 1.5144671425737499e-09, |
|
"logits/chosen": -0.93849116563797, |
|
"logits/rejected": -0.9434796571731567, |
|
"logps/chosen": -4.241235256195068, |
|
"logps/rejected": -4.951510906219482, |
|
"loss": 2.8765, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.412353515625, |
|
"rewards/margins": 7.102751731872559, |
|
"rewards/rejected": -49.515106201171875, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 156.65455975752164, |
|
"learning_rate": 1.251762922199484e-09, |
|
"logits/chosen": -0.9012454748153687, |
|
"logits/rejected": -0.9075654149055481, |
|
"logps/chosen": -4.4999308586120605, |
|
"logps/rejected": -5.314447402954102, |
|
"loss": 1.9835, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -44.999305725097656, |
|
"rewards/margins": 8.14516544342041, |
|
"rewards/rejected": -53.144466400146484, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9793610404297427, |
|
"grad_norm": 142.53224570983375, |
|
"learning_rate": 1.0140285120433744e-09, |
|
"logits/chosen": -0.973222017288208, |
|
"logits/rejected": -0.9794344305992126, |
|
"logps/chosen": -4.608139991760254, |
|
"logps/rejected": -5.329331874847412, |
|
"loss": 2.527, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -46.081398010253906, |
|
"rewards/margins": 7.211921691894531, |
|
"rewards/rejected": -53.29331970214844, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9816228442182641, |
|
"grad_norm": 162.55094493344626, |
|
"learning_rate": 8.012787991508396e-10, |
|
"logits/chosen": -0.9416544437408447, |
|
"logits/rejected": -0.9537829160690308, |
|
"logps/chosen": -4.446515083312988, |
|
"logps/rejected": -5.29661226272583, |
|
"loss": 2.8152, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.465152740478516, |
|
"rewards/margins": 8.50097370147705, |
|
"rewards/rejected": -52.966129302978516, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9838846480067854, |
|
"grad_norm": 145.96078846455643, |
|
"learning_rate": 6.135271060133007e-10, |
|
"logits/chosen": -0.9080808758735657, |
|
"logits/rejected": -0.9170538187026978, |
|
"logps/chosen": -4.4007487297058105, |
|
"logps/rejected": -5.0969038009643555, |
|
"loss": 2.9644, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.00749206542969, |
|
"rewards/margins": 6.96155309677124, |
|
"rewards/rejected": -50.96904373168945, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9861464517953068, |
|
"grad_norm": 128.81354346980564, |
|
"learning_rate": 4.50785189733871e-10, |
|
"logits/chosen": -0.9219609498977661, |
|
"logits/rejected": -0.9404686689376831, |
|
"logps/chosen": -4.3535308837890625, |
|
"logps/rejected": -5.156708240509033, |
|
"loss": 2.1384, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.53531265258789, |
|
"rewards/margins": 8.031768798828125, |
|
"rewards/rejected": -51.56707763671875, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.988408255583828, |
|
"grad_norm": 144.04083799644908, |
|
"learning_rate": 3.1306324129118935e-10, |
|
"logits/chosen": -0.9294695854187012, |
|
"logits/rejected": -0.9402070045471191, |
|
"logps/chosen": -4.544949531555176, |
|
"logps/rejected": -5.239452362060547, |
|
"loss": 2.6014, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -45.449493408203125, |
|
"rewards/margins": 6.945026874542236, |
|
"rewards/rejected": -52.39452362060547, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9906700593723494, |
|
"grad_norm": 155.89550905311364, |
|
"learning_rate": 2.003698849011748e-10, |
|
"logits/chosen": -0.9938583374023438, |
|
"logits/rejected": -1.0007983446121216, |
|
"logps/chosen": -4.665548801422119, |
|
"logps/rejected": -5.291666507720947, |
|
"loss": 2.7518, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -46.65549087524414, |
|
"rewards/margins": 6.261169910430908, |
|
"rewards/rejected": -52.91666030883789, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9929318631608708, |
|
"grad_norm": 144.03429193625178, |
|
"learning_rate": 1.1271217747714779e-10, |
|
"logits/chosen": -0.957481324672699, |
|
"logits/rejected": -0.9813300371170044, |
|
"logps/chosen": -4.5693769454956055, |
|
"logps/rejected": -5.249251842498779, |
|
"loss": 2.7764, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.693763732910156, |
|
"rewards/margins": 6.798752784729004, |
|
"rewards/rejected": -52.49251937866211, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9951936669493922, |
|
"grad_norm": 144.95200313917013, |
|
"learning_rate": 5.0095608187739055e-11, |
|
"logits/chosen": -0.9488322734832764, |
|
"logits/rejected": -0.9575316309928894, |
|
"logps/chosen": -4.304515361785889, |
|
"logps/rejected": -4.995253086090088, |
|
"loss": 2.7284, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.0451545715332, |
|
"rewards/margins": 6.907373428344727, |
|
"rewards/rejected": -49.9525260925293, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 171.65037355117758, |
|
"learning_rate": 1.2524098113209092e-11, |
|
"logits/chosen": -0.9583221077919006, |
|
"logits/rejected": -0.9742617607116699, |
|
"logps/chosen": -4.497997283935547, |
|
"logps/rejected": -5.199645042419434, |
|
"loss": 2.9133, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -44.979976654052734, |
|
"rewards/margins": 7.016475677490234, |
|
"rewards/rejected": -51.99645233154297, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"grad_norm": 135.08490682698582, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.9516457319259644, |
|
"logits/rejected": -0.9772913455963135, |
|
"logps/chosen": -4.331707000732422, |
|
"logps/rejected": -5.04747200012207, |
|
"loss": 2.838, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -43.317073822021484, |
|
"rewards/margins": 7.15764856338501, |
|
"rewards/rejected": -50.47472381591797, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"eval_logits/chosen": -0.9504738450050354, |
|
"eval_logits/rejected": -0.9573480486869812, |
|
"eval_logps/chosen": -4.477466106414795, |
|
"eval_logps/rejected": -5.212347030639648, |
|
"eval_loss": 2.455941915512085, |
|
"eval_rewards/accuracies": 0.8001337051391602, |
|
"eval_rewards/chosen": -44.774662017822266, |
|
"eval_rewards/margins": 7.348804473876953, |
|
"eval_rewards/rejected": -52.12346649169922, |
|
"eval_runtime": 100.7219, |
|
"eval_samples_per_second": 29.576, |
|
"eval_steps_per_second": 1.857, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"step": 442, |
|
"total_flos": 134366991482880.0, |
|
"train_loss": 3.275575773208929, |
|
"train_runtime": 7589.4389, |
|
"train_samples_per_second": 7.456, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 442, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 134366991482880.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|