|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997172745264349, |
|
"eval_steps": 500, |
|
"global_step": 442, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022618037885213456, |
|
"grad_norm": 81.80145490381507, |
|
"learning_rate": 1.7777777777777777e-08, |
|
"logits/chosen": -0.932767391204834, |
|
"logits/rejected": -0.9067894220352173, |
|
"logps/chosen": -1.6399004459381104, |
|
"logps/rejected": -1.747880220413208, |
|
"loss": 5.8965, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -16.399005889892578, |
|
"rewards/margins": 1.079796552658081, |
|
"rewards/rejected": -17.478801727294922, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004523607577042691, |
|
"grad_norm": 90.52652005666872, |
|
"learning_rate": 3.5555555555555554e-08, |
|
"logits/chosen": -1.002763032913208, |
|
"logits/rejected": -0.9918949007987976, |
|
"logps/chosen": -1.6947053670883179, |
|
"logps/rejected": -1.6421573162078857, |
|
"loss": 6.1548, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -16.947053909301758, |
|
"rewards/margins": -0.5254815816879272, |
|
"rewards/rejected": -16.421573638916016, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006785411365564037, |
|
"grad_norm": 120.8672945601523, |
|
"learning_rate": 5.333333333333333e-08, |
|
"logits/chosen": -0.9584744572639465, |
|
"logits/rejected": -0.9756154417991638, |
|
"logps/chosen": -1.7169272899627686, |
|
"logps/rejected": -1.9392247200012207, |
|
"loss": 6.3628, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -17.169273376464844, |
|
"rewards/margins": 2.2229740619659424, |
|
"rewards/rejected": -19.39224624633789, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009047215154085382, |
|
"grad_norm": 93.35996874941277, |
|
"learning_rate": 7.111111111111111e-08, |
|
"logits/chosen": -0.9799128174781799, |
|
"logits/rejected": -0.956102192401886, |
|
"logps/chosen": -1.6881718635559082, |
|
"logps/rejected": -1.6591113805770874, |
|
"loss": 6.1904, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.8817195892334, |
|
"rewards/margins": -0.290606290102005, |
|
"rewards/rejected": -16.591114044189453, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01130901894260673, |
|
"grad_norm": 82.98748385212801, |
|
"learning_rate": 8.888888888888888e-08, |
|
"logits/chosen": -0.9929681420326233, |
|
"logits/rejected": -0.9702396988868713, |
|
"logps/chosen": -1.6399867534637451, |
|
"logps/rejected": -1.6883800029754639, |
|
"loss": 5.6442, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -16.399866104125977, |
|
"rewards/margins": 0.48393285274505615, |
|
"rewards/rejected": -16.883798599243164, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013570822731128074, |
|
"grad_norm": 84.15256473607785, |
|
"learning_rate": 1.0666666666666666e-07, |
|
"logits/chosen": -1.0631664991378784, |
|
"logits/rejected": -1.0385210514068604, |
|
"logps/chosen": -1.6836899518966675, |
|
"logps/rejected": -1.6393909454345703, |
|
"loss": 6.278, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -16.836902618408203, |
|
"rewards/margins": -0.44299259781837463, |
|
"rewards/rejected": -16.393909454345703, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01583262651964942, |
|
"grad_norm": 40.27523637385657, |
|
"learning_rate": 1.2444444444444443e-07, |
|
"logits/chosen": -0.97728431224823, |
|
"logits/rejected": -0.9518231153488159, |
|
"logps/chosen": -1.5182483196258545, |
|
"logps/rejected": -1.8960487842559814, |
|
"loss": 4.1003, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -15.182482719421387, |
|
"rewards/margins": 3.7780063152313232, |
|
"rewards/rejected": -18.96048927307129, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018094430308170765, |
|
"grad_norm": 103.67927618904142, |
|
"learning_rate": 1.4222222222222222e-07, |
|
"logits/chosen": -0.9407067894935608, |
|
"logits/rejected": -0.9416890144348145, |
|
"logps/chosen": -1.6324212551116943, |
|
"logps/rejected": -1.7221814393997192, |
|
"loss": 5.7843, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.324209213256836, |
|
"rewards/margins": 0.8976033329963684, |
|
"rewards/rejected": -17.22181510925293, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 78.6113677065651, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -1.003924012184143, |
|
"logits/rejected": -0.9841476082801819, |
|
"logps/chosen": -1.5638519525527954, |
|
"logps/rejected": -1.7627131938934326, |
|
"loss": 5.5396, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -15.638519287109375, |
|
"rewards/margins": 1.9886122941970825, |
|
"rewards/rejected": -17.627132415771484, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02261803788521346, |
|
"grad_norm": 81.83194237492314, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"logits/chosen": -0.9899390935897827, |
|
"logits/rejected": -0.9986801743507385, |
|
"logps/chosen": -1.716883897781372, |
|
"logps/rejected": -1.6546330451965332, |
|
"loss": 6.0548, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -17.168838500976562, |
|
"rewards/margins": -0.6225085258483887, |
|
"rewards/rejected": -16.54633140563965, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024879841673734804, |
|
"grad_norm": 114.69497185419166, |
|
"learning_rate": 1.9555555555555555e-07, |
|
"logits/chosen": -1.058133602142334, |
|
"logits/rejected": -1.0552024841308594, |
|
"logps/chosen": -1.6923774480819702, |
|
"logps/rejected": -1.9249680042266846, |
|
"loss": 5.4408, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.92377471923828, |
|
"rewards/margins": 2.3259057998657227, |
|
"rewards/rejected": -19.24968147277832, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02714164546225615, |
|
"grad_norm": 109.54034347578924, |
|
"learning_rate": 2.133333333333333e-07, |
|
"logits/chosen": -0.9806447625160217, |
|
"logits/rejected": -0.9427556991577148, |
|
"logps/chosen": -1.8206449747085571, |
|
"logps/rejected": -1.8985960483551025, |
|
"loss": 6.3971, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -18.206449508666992, |
|
"rewards/margins": 0.7795112133026123, |
|
"rewards/rejected": -18.985960006713867, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029403449250777494, |
|
"grad_norm": 102.04167414429497, |
|
"learning_rate": 2.3111111111111107e-07, |
|
"logits/chosen": -0.955179750919342, |
|
"logits/rejected": -0.9119459986686707, |
|
"logps/chosen": -1.7387391328811646, |
|
"logps/rejected": -1.6549574136734009, |
|
"loss": 6.2289, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -17.38739013671875, |
|
"rewards/margins": -0.8378164768218994, |
|
"rewards/rejected": -16.549575805664062, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03166525303929884, |
|
"grad_norm": 46.89991038205965, |
|
"learning_rate": 2.4888888888888886e-07, |
|
"logits/chosen": -0.9808716177940369, |
|
"logits/rejected": -0.9642709493637085, |
|
"logps/chosen": -1.4120961427688599, |
|
"logps/rejected": -1.6382782459259033, |
|
"loss": 4.5308, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -14.120962142944336, |
|
"rewards/margins": 2.261821746826172, |
|
"rewards/rejected": -16.382781982421875, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033927056827820185, |
|
"grad_norm": 90.27156602929041, |
|
"learning_rate": 2.666666666666666e-07, |
|
"logits/chosen": -0.9588332772254944, |
|
"logits/rejected": -0.9422608017921448, |
|
"logps/chosen": -1.554206371307373, |
|
"logps/rejected": -1.6721653938293457, |
|
"loss": 5.0591, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -15.542062759399414, |
|
"rewards/margins": 1.179589867591858, |
|
"rewards/rejected": -16.721651077270508, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03618886061634153, |
|
"grad_norm": 78.86328741004293, |
|
"learning_rate": 2.8444444444444443e-07, |
|
"logits/chosen": -1.0150483846664429, |
|
"logits/rejected": -0.9907156229019165, |
|
"logps/chosen": -1.6835522651672363, |
|
"logps/rejected": -1.6670693159103394, |
|
"loss": 6.2923, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -16.83552360534668, |
|
"rewards/margins": -0.16482998430728912, |
|
"rewards/rejected": -16.670692443847656, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038450664404862875, |
|
"grad_norm": 95.19038026650476, |
|
"learning_rate": 3.022222222222222e-07, |
|
"logits/chosen": -1.0418639183044434, |
|
"logits/rejected": -0.9912959337234497, |
|
"logps/chosen": -1.516981601715088, |
|
"logps/rejected": -1.4668210744857788, |
|
"loss": 6.417, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.169816017150879, |
|
"rewards/margins": -0.501606285572052, |
|
"rewards/rejected": -14.668210983276367, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 111.60218368821454, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": -1.0298357009887695, |
|
"logits/rejected": -1.0163593292236328, |
|
"logps/chosen": -1.998802661895752, |
|
"logps/rejected": -1.9897127151489258, |
|
"loss": 6.3954, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -19.98802947998047, |
|
"rewards/margins": -0.09090101718902588, |
|
"rewards/rejected": -19.897127151489258, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04297427198190557, |
|
"grad_norm": 89.45347136541167, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"logits/chosen": -1.0412547588348389, |
|
"logits/rejected": -1.0211807489395142, |
|
"logps/chosen": -1.6870794296264648, |
|
"logps/rejected": -1.7022802829742432, |
|
"loss": 5.4772, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.87079620361328, |
|
"rewards/margins": 0.15200670063495636, |
|
"rewards/rejected": -17.022802352905273, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04523607577042692, |
|
"grad_norm": 43.66556603777872, |
|
"learning_rate": 3.5555555555555553e-07, |
|
"logits/chosen": -1.0372862815856934, |
|
"logits/rejected": -1.0256741046905518, |
|
"logps/chosen": -1.5196560621261597, |
|
"logps/rejected": -1.760719895362854, |
|
"loss": 4.2799, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -15.196558952331543, |
|
"rewards/margins": 2.4106411933898926, |
|
"rewards/rejected": -17.607200622558594, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04749787955894826, |
|
"grad_norm": 59.46046316628326, |
|
"learning_rate": 3.7333333333333334e-07, |
|
"logits/chosen": -0.9277665615081787, |
|
"logits/rejected": -0.9204395413398743, |
|
"logps/chosen": -1.5353920459747314, |
|
"logps/rejected": -1.5475002527236938, |
|
"loss": 5.4632, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.353920936584473, |
|
"rewards/margins": 0.12108239531517029, |
|
"rewards/rejected": -15.475003242492676, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04975968334746961, |
|
"grad_norm": 59.72766214636501, |
|
"learning_rate": 3.911111111111111e-07, |
|
"logits/chosen": -0.9958820343017578, |
|
"logits/rejected": -0.9598875045776367, |
|
"logps/chosen": -1.5884332656860352, |
|
"logps/rejected": -1.6671531200408936, |
|
"loss": 4.6498, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -15.884333610534668, |
|
"rewards/margins": 0.787197470664978, |
|
"rewards/rejected": -16.671531677246094, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05202148713599095, |
|
"grad_norm": 77.59302084478524, |
|
"learning_rate": 4.0888888888888886e-07, |
|
"logits/chosen": -0.9856783151626587, |
|
"logits/rejected": -0.959747314453125, |
|
"logps/chosen": -1.5501296520233154, |
|
"logps/rejected": -1.556693196296692, |
|
"loss": 5.8485, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -15.501296043395996, |
|
"rewards/margins": 0.06563292443752289, |
|
"rewards/rejected": -15.566930770874023, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0542832909245123, |
|
"grad_norm": 56.05673157725325, |
|
"learning_rate": 4.266666666666666e-07, |
|
"logits/chosen": -0.9900195002555847, |
|
"logits/rejected": -0.9757727384567261, |
|
"logps/chosen": -1.3831775188446045, |
|
"logps/rejected": -1.4707231521606445, |
|
"loss": 5.0462, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -13.831775665283203, |
|
"rewards/margins": 0.8754545450210571, |
|
"rewards/rejected": -14.707229614257812, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05654509471303364, |
|
"grad_norm": 91.2883930453631, |
|
"learning_rate": 4.4444444444444444e-07, |
|
"logits/chosen": -0.979412317276001, |
|
"logits/rejected": -0.9593000411987305, |
|
"logps/chosen": -1.71976637840271, |
|
"logps/rejected": -1.7193169593811035, |
|
"loss": 5.6676, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.197664260864258, |
|
"rewards/margins": -0.004494845867156982, |
|
"rewards/rejected": -17.19317054748535, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05880689850155499, |
|
"grad_norm": 80.55549235370725, |
|
"learning_rate": 4.6222222222222214e-07, |
|
"logits/chosen": -1.0057790279388428, |
|
"logits/rejected": -0.9638312458992004, |
|
"logps/chosen": -1.7310502529144287, |
|
"logps/rejected": -1.713616132736206, |
|
"loss": 5.721, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -17.310503005981445, |
|
"rewards/margins": -0.1743413209915161, |
|
"rewards/rejected": -17.13616180419922, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 72.52729139912653, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": -1.0027204751968384, |
|
"logits/rejected": -1.0046546459197998, |
|
"logps/chosen": -1.7155876159667969, |
|
"logps/rejected": -1.6314939260482788, |
|
"loss": 6.2267, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -17.15587615966797, |
|
"rewards/margins": -0.8409355878829956, |
|
"rewards/rejected": -16.314939498901367, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06333050607859768, |
|
"grad_norm": 60.28479460121429, |
|
"learning_rate": 4.977777777777777e-07, |
|
"logits/chosen": -1.0015887022018433, |
|
"logits/rejected": -0.9888956546783447, |
|
"logps/chosen": -1.5956369638442993, |
|
"logps/rejected": -1.6741609573364258, |
|
"loss": 5.7275, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.956369400024414, |
|
"rewards/margins": 0.7852407097816467, |
|
"rewards/rejected": -16.741609573364258, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06559230986711903, |
|
"grad_norm": 105.47774070470895, |
|
"learning_rate": 5.155555555555556e-07, |
|
"logits/chosen": -1.0296828746795654, |
|
"logits/rejected": -1.0122419595718384, |
|
"logps/chosen": -1.617253303527832, |
|
"logps/rejected": -1.6238123178482056, |
|
"loss": 5.5968, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -16.172534942626953, |
|
"rewards/margins": 0.06558918952941895, |
|
"rewards/rejected": -16.238122940063477, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06785411365564037, |
|
"grad_norm": 98.42561066357203, |
|
"learning_rate": 5.333333333333332e-07, |
|
"logits/chosen": -1.0032376050949097, |
|
"logits/rejected": -0.9752593636512756, |
|
"logps/chosen": -1.6265501976013184, |
|
"logps/rejected": -1.6735022068023682, |
|
"loss": 6.0847, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -16.2655029296875, |
|
"rewards/margins": 0.46952024102211, |
|
"rewards/rejected": -16.735021591186523, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07011591744416172, |
|
"grad_norm": 86.21463482055418, |
|
"learning_rate": 5.511111111111111e-07, |
|
"logits/chosen": -1.0222843885421753, |
|
"logits/rejected": -1.0005640983581543, |
|
"logps/chosen": -1.6079727411270142, |
|
"logps/rejected": -1.5775009393692017, |
|
"loss": 5.7657, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -16.079727172851562, |
|
"rewards/margins": -0.30471527576446533, |
|
"rewards/rejected": -15.77501106262207, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07237772123268306, |
|
"grad_norm": 82.23057088609903, |
|
"learning_rate": 5.688888888888889e-07, |
|
"logits/chosen": -1.0451083183288574, |
|
"logits/rejected": -1.0329225063323975, |
|
"logps/chosen": -1.549233078956604, |
|
"logps/rejected": -1.5078167915344238, |
|
"loss": 5.7865, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.492328643798828, |
|
"rewards/margins": -0.4141616225242615, |
|
"rewards/rejected": -15.078166961669922, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07463952502120441, |
|
"grad_norm": 49.332916215780955, |
|
"learning_rate": 5.866666666666666e-07, |
|
"logits/chosen": -0.9852883219718933, |
|
"logits/rejected": -0.959178626537323, |
|
"logps/chosen": -1.5177154541015625, |
|
"logps/rejected": -1.7086073160171509, |
|
"loss": 4.6781, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.177152633666992, |
|
"rewards/margins": 1.908919095993042, |
|
"rewards/rejected": -17.08607292175293, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07690132880972575, |
|
"grad_norm": 57.4491220604369, |
|
"learning_rate": 6.044444444444444e-07, |
|
"logits/chosen": -1.0274769067764282, |
|
"logits/rejected": -0.9789716005325317, |
|
"logps/chosen": -1.4377570152282715, |
|
"logps/rejected": -1.6012887954711914, |
|
"loss": 4.463, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -14.377568244934082, |
|
"rewards/margins": 1.6353192329406738, |
|
"rewards/rejected": -16.012889862060547, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0791631325982471, |
|
"grad_norm": 102.30769729205291, |
|
"learning_rate": 6.222222222222223e-07, |
|
"logits/chosen": -1.06728196144104, |
|
"logits/rejected": -1.0222796201705933, |
|
"logps/chosen": -1.6283169984817505, |
|
"logps/rejected": -1.7765767574310303, |
|
"loss": 5.1919, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.28316879272461, |
|
"rewards/margins": 1.482597827911377, |
|
"rewards/rejected": -17.765766143798828, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 86.85943744483095, |
|
"learning_rate": 6.4e-07, |
|
"logits/chosen": -0.9614496231079102, |
|
"logits/rejected": -0.956063985824585, |
|
"logps/chosen": -1.5405638217926025, |
|
"logps/rejected": -1.7228600978851318, |
|
"loss": 4.895, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -15.405637741088867, |
|
"rewards/margins": 1.8229618072509766, |
|
"rewards/rejected": -17.228599548339844, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08368674017528979, |
|
"grad_norm": 62.68839336745218, |
|
"learning_rate": 6.577777777777777e-07, |
|
"logits/chosen": -0.9705076813697815, |
|
"logits/rejected": -0.9391173124313354, |
|
"logps/chosen": -1.3898224830627441, |
|
"logps/rejected": -1.393123745918274, |
|
"loss": 5.2912, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -13.898225784301758, |
|
"rewards/margins": 0.03301262855529785, |
|
"rewards/rejected": -13.931238174438477, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08594854396381114, |
|
"grad_norm": 82.79087394861669, |
|
"learning_rate": 6.755555555555555e-07, |
|
"logits/chosen": -1.0617159605026245, |
|
"logits/rejected": -1.0448527336120605, |
|
"logps/chosen": -1.5596251487731934, |
|
"logps/rejected": -1.5767797231674194, |
|
"loss": 5.2661, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -15.59625244140625, |
|
"rewards/margins": 0.17154589295387268, |
|
"rewards/rejected": -15.767797470092773, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08821034775233248, |
|
"grad_norm": 82.19504082670701, |
|
"learning_rate": 6.933333333333333e-07, |
|
"logits/chosen": -1.0380398035049438, |
|
"logits/rejected": -1.0502073764801025, |
|
"logps/chosen": -1.5372322797775269, |
|
"logps/rejected": -1.5468776226043701, |
|
"loss": 5.1939, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.372323036193848, |
|
"rewards/margins": 0.0964524894952774, |
|
"rewards/rejected": -15.468774795532227, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09047215154085383, |
|
"grad_norm": 47.49669984185076, |
|
"learning_rate": 7.111111111111111e-07, |
|
"logits/chosen": -0.9995688199996948, |
|
"logits/rejected": -0.9623959064483643, |
|
"logps/chosen": -1.411413550376892, |
|
"logps/rejected": -1.6402108669281006, |
|
"loss": 3.9605, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -14.1141357421875, |
|
"rewards/margins": 2.2879717350006104, |
|
"rewards/rejected": -16.402109146118164, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09273395532937517, |
|
"grad_norm": 42.861396640850224, |
|
"learning_rate": 7.288888888888888e-07, |
|
"logits/chosen": -1.0610852241516113, |
|
"logits/rejected": -1.0178624391555786, |
|
"logps/chosen": -1.4081141948699951, |
|
"logps/rejected": -1.575732707977295, |
|
"loss": 4.4087, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.081141471862793, |
|
"rewards/margins": 1.6761858463287354, |
|
"rewards/rejected": -15.757328033447266, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09499575911789652, |
|
"grad_norm": 74.18851579735109, |
|
"learning_rate": 7.466666666666667e-07, |
|
"logits/chosen": -0.9311728477478027, |
|
"logits/rejected": -0.8810253143310547, |
|
"logps/chosen": -1.4972470998764038, |
|
"logps/rejected": -1.6159507036209106, |
|
"loss": 4.9615, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -14.9724702835083, |
|
"rewards/margins": 1.187035083770752, |
|
"rewards/rejected": -16.159507751464844, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09725756290641786, |
|
"grad_norm": 68.32841303137472, |
|
"learning_rate": 7.644444444444444e-07, |
|
"logits/chosen": -1.0198612213134766, |
|
"logits/rejected": -0.9880091547966003, |
|
"logps/chosen": -1.4781789779663086, |
|
"logps/rejected": -1.5365804433822632, |
|
"loss": 4.9515, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -14.78178882598877, |
|
"rewards/margins": 0.5840151309967041, |
|
"rewards/rejected": -15.365804672241211, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09951936669493922, |
|
"grad_norm": 52.05451037282791, |
|
"learning_rate": 7.822222222222222e-07, |
|
"logits/chosen": -0.9488211274147034, |
|
"logits/rejected": -0.9243355989456177, |
|
"logps/chosen": -1.35234797000885, |
|
"logps/rejected": -1.4985325336456299, |
|
"loss": 4.2171, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -13.523480415344238, |
|
"rewards/margins": 1.4618456363677979, |
|
"rewards/rejected": -14.985325813293457, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 44.13903740045985, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -1.0067428350448608, |
|
"logits/rejected": -0.9953335523605347, |
|
"logps/chosen": -1.3989877700805664, |
|
"logps/rejected": -1.5204812288284302, |
|
"loss": 4.562, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -13.98987865447998, |
|
"rewards/margins": 1.2149348258972168, |
|
"rewards/rejected": -15.204813957214355, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1040429742719819, |
|
"grad_norm": 81.35008402323405, |
|
"learning_rate": 7.999874759018868e-07, |
|
"logits/chosen": -1.0461455583572388, |
|
"logits/rejected": -1.0266019105911255, |
|
"logps/chosen": -1.6549426317214966, |
|
"logps/rejected": -1.809093952178955, |
|
"loss": 4.6032, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.54942512512207, |
|
"rewards/margins": 1.5415133237838745, |
|
"rewards/rejected": -18.090938568115234, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10630477806050326, |
|
"grad_norm": 50.00858913025788, |
|
"learning_rate": 7.999499043918123e-07, |
|
"logits/chosen": -1.0584049224853516, |
|
"logits/rejected": -1.0710422992706299, |
|
"logps/chosen": -1.479085922241211, |
|
"logps/rejected": -1.540130853652954, |
|
"loss": 5.059, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -14.79085922241211, |
|
"rewards/margins": 0.6104493141174316, |
|
"rewards/rejected": -15.401309967041016, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1085665818490246, |
|
"grad_norm": 49.79402718739552, |
|
"learning_rate": 7.998872878225228e-07, |
|
"logits/chosen": -0.9640560150146484, |
|
"logits/rejected": -0.9456281661987305, |
|
"logps/chosen": -1.507000207901001, |
|
"logps/rejected": -1.6637290716171265, |
|
"loss": 4.3536, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.070001602172852, |
|
"rewards/margins": 1.5672889947891235, |
|
"rewards/rejected": -16.637290954589844, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11082838563754595, |
|
"grad_norm": 48.520949434884415, |
|
"learning_rate": 7.997996301150987e-07, |
|
"logits/chosen": -0.9819774627685547, |
|
"logits/rejected": -0.9651643633842468, |
|
"logps/chosen": -1.4353156089782715, |
|
"logps/rejected": -1.5485820770263672, |
|
"loss": 4.6161, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.353155136108398, |
|
"rewards/margins": 1.1326675415039062, |
|
"rewards/rejected": -15.485823631286621, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11309018942606729, |
|
"grad_norm": 55.11914259402325, |
|
"learning_rate": 7.996869367587088e-07, |
|
"logits/chosen": -0.9383893609046936, |
|
"logits/rejected": -0.9197224378585815, |
|
"logps/chosen": -1.5217969417572021, |
|
"logps/rejected": -1.6309021711349487, |
|
"loss": 4.6737, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.21796989440918, |
|
"rewards/margins": 1.0910515785217285, |
|
"rewards/rejected": -16.30902099609375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11535199321458864, |
|
"grad_norm": 47.6960438841783, |
|
"learning_rate": 7.99549214810266e-07, |
|
"logits/chosen": -0.9459134936332703, |
|
"logits/rejected": -0.937053918838501, |
|
"logps/chosen": -1.5119497776031494, |
|
"logps/rejected": -1.6049790382385254, |
|
"loss": 4.5856, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -15.119497299194336, |
|
"rewards/margins": 0.9302919507026672, |
|
"rewards/rejected": -16.049789428710938, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11761379700310998, |
|
"grad_norm": 51.14850272343972, |
|
"learning_rate": 7.993864728939867e-07, |
|
"logits/chosen": -0.9789059162139893, |
|
"logits/rejected": -0.9087902307510376, |
|
"logps/chosen": -1.4775077104568481, |
|
"logps/rejected": -1.5865354537963867, |
|
"loss": 4.8286, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.775075912475586, |
|
"rewards/margins": 1.090277910232544, |
|
"rewards/rejected": -15.865354537963867, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11987560079163133, |
|
"grad_norm": 55.812549437850066, |
|
"learning_rate": 7.991987212008491e-07, |
|
"logits/chosen": -0.9828415513038635, |
|
"logits/rejected": -0.9556114673614502, |
|
"logps/chosen": -1.543431043624878, |
|
"logps/rejected": -1.7568646669387817, |
|
"loss": 4.322, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -15.434309005737305, |
|
"rewards/margins": 2.134336471557617, |
|
"rewards/rejected": -17.568645477294922, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 58.73374282502916, |
|
"learning_rate": 7.989859714879565e-07, |
|
"logits/chosen": -1.019083857536316, |
|
"logits/rejected": -0.9894014596939087, |
|
"logps/chosen": -1.4954748153686523, |
|
"logps/rejected": -1.5853471755981445, |
|
"loss": 4.8631, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -14.95474624633789, |
|
"rewards/margins": 0.8987252712249756, |
|
"rewards/rejected": -15.853471755981445, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12439920836867402, |
|
"grad_norm": 50.33505759556518, |
|
"learning_rate": 7.987482370778005e-07, |
|
"logits/chosen": -0.9894726276397705, |
|
"logits/rejected": -0.9615195989608765, |
|
"logps/chosen": -1.509278416633606, |
|
"logps/rejected": -1.638422966003418, |
|
"loss": 4.6408, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.092784881591797, |
|
"rewards/margins": 1.2914447784423828, |
|
"rewards/rejected": -16.38422966003418, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12666101215719536, |
|
"grad_norm": 57.19617019865859, |
|
"learning_rate": 7.984855328574262e-07, |
|
"logits/chosen": -0.8349502086639404, |
|
"logits/rejected": -0.8322585225105286, |
|
"logps/chosen": -1.4673779010772705, |
|
"logps/rejected": -1.5953435897827148, |
|
"loss": 4.3717, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.673778533935547, |
|
"rewards/margins": 1.2796574831008911, |
|
"rewards/rejected": -15.953435897827148, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1289228159457167, |
|
"grad_norm": 81.97595614859871, |
|
"learning_rate": 7.981978752775009e-07, |
|
"logits/chosen": -0.9377725124359131, |
|
"logits/rejected": -0.9315369129180908, |
|
"logps/chosen": -1.559471845626831, |
|
"logps/rejected": -1.6976258754730225, |
|
"loss": 4.6026, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.594717025756836, |
|
"rewards/margins": 1.3815417289733887, |
|
"rewards/rejected": -16.976261138916016, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13118461973423806, |
|
"grad_norm": 82.53460988639671, |
|
"learning_rate": 7.978852823512833e-07, |
|
"logits/chosen": -0.9619041681289673, |
|
"logits/rejected": -0.9108754992485046, |
|
"logps/chosen": -1.6628988981246948, |
|
"logps/rejected": -1.804010272026062, |
|
"loss": 4.9115, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.628990173339844, |
|
"rewards/margins": 1.4111130237579346, |
|
"rewards/rejected": -18.040103912353516, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1334464235227594, |
|
"grad_norm": 82.64443469883086, |
|
"learning_rate": 7.975477736534957e-07, |
|
"logits/chosen": -0.9662675857543945, |
|
"logits/rejected": -0.9529209136962891, |
|
"logps/chosen": -1.6256754398345947, |
|
"logps/rejected": -1.8393034934997559, |
|
"loss": 4.3491, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -16.25675392150879, |
|
"rewards/margins": 2.1362812519073486, |
|
"rewards/rejected": -18.393035888671875, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13570822731128074, |
|
"grad_norm": 71.03771820170468, |
|
"learning_rate": 7.971853703190986e-07, |
|
"logits/chosen": -0.9512439966201782, |
|
"logits/rejected": -0.9367961883544922, |
|
"logps/chosen": -1.6175850629806519, |
|
"logps/rejected": -1.7883929014205933, |
|
"loss": 4.5502, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.175851821899414, |
|
"rewards/margins": 1.708077073097229, |
|
"rewards/rejected": -17.883928298950195, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1379700310998021, |
|
"grad_norm": 63.587178138885655, |
|
"learning_rate": 7.967980950419664e-07, |
|
"logits/chosen": -0.8925920128822327, |
|
"logits/rejected": -0.8741836547851562, |
|
"logps/chosen": -1.5468474626541138, |
|
"logps/rejected": -1.7149834632873535, |
|
"loss": 4.4498, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -15.468475341796875, |
|
"rewards/margins": 1.6813607215881348, |
|
"rewards/rejected": -17.14983558654785, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14023183488832344, |
|
"grad_norm": 57.0000546813876, |
|
"learning_rate": 7.963859720734669e-07, |
|
"logits/chosen": -0.976127564907074, |
|
"logits/rejected": -0.9543916583061218, |
|
"logps/chosen": -1.4541046619415283, |
|
"logps/rejected": -1.6479957103729248, |
|
"loss": 4.2985, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -14.541044235229492, |
|
"rewards/margins": 1.9389113187789917, |
|
"rewards/rejected": -16.479955673217773, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 45.921859975209195, |
|
"learning_rate": 7.959490272209427e-07, |
|
"logits/chosen": -0.955858051776886, |
|
"logits/rejected": -0.9108732342720032, |
|
"logps/chosen": -1.5097980499267578, |
|
"logps/rejected": -1.7698363065719604, |
|
"loss": 3.8473, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -15.097979545593262, |
|
"rewards/margins": 2.6003851890563965, |
|
"rewards/rejected": -17.6983642578125, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14475544246536612, |
|
"grad_norm": 51.49591408070723, |
|
"learning_rate": 7.954872878460946e-07, |
|
"logits/chosen": -0.9854045510292053, |
|
"logits/rejected": -0.929128885269165, |
|
"logps/chosen": -1.5844659805297852, |
|
"logps/rejected": -1.8169422149658203, |
|
"loss": 3.9946, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -15.844658851623535, |
|
"rewards/margins": 2.3247623443603516, |
|
"rewards/rejected": -18.169422149658203, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14701724625388748, |
|
"grad_norm": 62.69058123930268, |
|
"learning_rate": 7.950007828632691e-07, |
|
"logits/chosen": -0.923812210559845, |
|
"logits/rejected": -0.9139933586120605, |
|
"logps/chosen": -1.6581015586853027, |
|
"logps/rejected": -1.9306143522262573, |
|
"loss": 4.1669, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.58101463317871, |
|
"rewards/margins": 2.7251291275024414, |
|
"rewards/rejected": -19.30614471435547, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14927905004240882, |
|
"grad_norm": 58.35372759599982, |
|
"learning_rate": 7.944895427376465e-07, |
|
"logits/chosen": -0.9305150508880615, |
|
"logits/rejected": -0.9124011397361755, |
|
"logps/chosen": -1.647985816001892, |
|
"logps/rejected": -1.9000327587127686, |
|
"loss": 4.2476, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -16.4798583984375, |
|
"rewards/margins": 2.520467758178711, |
|
"rewards/rejected": -19.000324249267578, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15154085383093016, |
|
"grad_norm": 43.09008708056084, |
|
"learning_rate": 7.939535994833345e-07, |
|
"logits/chosen": -0.8772422671318054, |
|
"logits/rejected": -0.8756166696548462, |
|
"logps/chosen": -1.5184426307678223, |
|
"logps/rejected": -1.8162511587142944, |
|
"loss": 3.9122, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -15.184427261352539, |
|
"rewards/margins": 2.9780831336975098, |
|
"rewards/rejected": -18.16250991821289, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1538026576194515, |
|
"grad_norm": 59.93504396897099, |
|
"learning_rate": 7.933929866613628e-07, |
|
"logits/chosen": -0.910822331905365, |
|
"logits/rejected": -0.90012526512146, |
|
"logps/chosen": -1.5802690982818604, |
|
"logps/rejected": -1.7131223678588867, |
|
"loss": 4.5565, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.802693367004395, |
|
"rewards/margins": 1.3285305500030518, |
|
"rewards/rejected": -17.131221771240234, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15606446140797287, |
|
"grad_norm": 55.814177720185704, |
|
"learning_rate": 7.928077393775808e-07, |
|
"logits/chosen": -0.9053980112075806, |
|
"logits/rejected": -0.914262592792511, |
|
"logps/chosen": -1.612609624862671, |
|
"logps/rejected": -1.943806529045105, |
|
"loss": 3.6111, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -16.1260986328125, |
|
"rewards/margins": 3.3119657039642334, |
|
"rewards/rejected": -19.438064575195312, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1583262651964942, |
|
"grad_norm": 78.47927861004493, |
|
"learning_rate": 7.921978942804609e-07, |
|
"logits/chosen": -0.8953057527542114, |
|
"logits/rejected": -0.8986684679985046, |
|
"logps/chosen": -1.62074613571167, |
|
"logps/rejected": -1.8778736591339111, |
|
"loss": 4.0181, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -16.207462310791016, |
|
"rewards/margins": 2.5712764263153076, |
|
"rewards/rejected": -18.778738021850586, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16058806898501554, |
|
"grad_norm": 75.9518818004111, |
|
"learning_rate": 7.915634895588021e-07, |
|
"logits/chosen": -0.9190117120742798, |
|
"logits/rejected": -0.9021117687225342, |
|
"logps/chosen": -1.7251794338226318, |
|
"logps/rejected": -1.880922794342041, |
|
"loss": 4.5789, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -17.251794815063477, |
|
"rewards/margins": 1.557431697845459, |
|
"rewards/rejected": -18.809226989746094, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 61.022269041978284, |
|
"learning_rate": 7.909045649393394e-07, |
|
"logits/chosen": -0.9734601974487305, |
|
"logits/rejected": -0.9675798416137695, |
|
"logps/chosen": -1.5918437242507935, |
|
"logps/rejected": -1.7174441814422607, |
|
"loss": 4.5249, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -15.918436050415039, |
|
"rewards/margins": 1.2560052871704102, |
|
"rewards/rejected": -17.174442291259766, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16511167656205825, |
|
"grad_norm": 51.255067484840446, |
|
"learning_rate": 7.902211616842556e-07, |
|
"logits/chosen": -0.929259181022644, |
|
"logits/rejected": -0.9242822527885437, |
|
"logps/chosen": -1.6214507818222046, |
|
"logps/rejected": -1.8945387601852417, |
|
"loss": 4.097, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -16.214509963989258, |
|
"rewards/margins": 2.730879545211792, |
|
"rewards/rejected": -18.945388793945312, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16737348035057958, |
|
"grad_norm": 72.87980549926093, |
|
"learning_rate": 7.89513322588598e-07, |
|
"logits/chosen": -0.9137905240058899, |
|
"logits/rejected": -0.9027823209762573, |
|
"logps/chosen": -1.6158413887023926, |
|
"logps/rejected": -1.8116414546966553, |
|
"loss": 3.9823, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -16.158414840698242, |
|
"rewards/margins": 1.957998514175415, |
|
"rewards/rejected": -18.116413116455078, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16963528413910092, |
|
"grad_norm": 55.44552897547286, |
|
"learning_rate": 7.887810919775976e-07, |
|
"logits/chosen": -0.8373405933380127, |
|
"logits/rejected": -0.8267748951911926, |
|
"logps/chosen": -1.739768385887146, |
|
"logps/rejected": -1.942664384841919, |
|
"loss": 4.0906, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -17.397686004638672, |
|
"rewards/margins": 2.0289597511291504, |
|
"rewards/rejected": -19.42664337158203, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1718970879276223, |
|
"grad_norm": 46.14322271087677, |
|
"learning_rate": 7.880245157038949e-07, |
|
"logits/chosen": -0.9083539843559265, |
|
"logits/rejected": -0.8803065419197083, |
|
"logps/chosen": -1.6886913776397705, |
|
"logps/rejected": -1.9166233539581299, |
|
"loss": 4.1824, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -16.886913299560547, |
|
"rewards/margins": 2.279320001602173, |
|
"rewards/rejected": -19.16623306274414, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.17415889171614363, |
|
"grad_norm": 91.21439455118843, |
|
"learning_rate": 7.872436411446671e-07, |
|
"logits/chosen": -0.9421958923339844, |
|
"logits/rejected": -0.9557660818099976, |
|
"logps/chosen": -1.7618883848190308, |
|
"logps/rejected": -1.9200146198272705, |
|
"loss": 4.6535, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.61888313293457, |
|
"rewards/margins": 1.5812623500823975, |
|
"rewards/rejected": -19.200145721435547, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17642069550466496, |
|
"grad_norm": 54.571609458712125, |
|
"learning_rate": 7.86438517198662e-07, |
|
"logits/chosen": -0.8783223628997803, |
|
"logits/rejected": -0.8604764342308044, |
|
"logps/chosen": -1.6228320598602295, |
|
"logps/rejected": -1.8209893703460693, |
|
"loss": 4.2662, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -16.228322982788086, |
|
"rewards/margins": 1.9815720319747925, |
|
"rewards/rejected": -18.20989227294922, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1786824992931863, |
|
"grad_norm": 49.70647980269609, |
|
"learning_rate": 7.856091942831366e-07, |
|
"logits/chosen": -0.8506362438201904, |
|
"logits/rejected": -0.8499268889427185, |
|
"logps/chosen": -1.6531625986099243, |
|
"logps/rejected": -1.858489990234375, |
|
"loss": 4.5771, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -16.531625747680664, |
|
"rewards/margins": 2.053274154663086, |
|
"rewards/rejected": -18.58489990234375, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18094430308170767, |
|
"grad_norm": 58.13564388580717, |
|
"learning_rate": 7.847557243306982e-07, |
|
"logits/chosen": -0.9484611749649048, |
|
"logits/rejected": -0.9265221357345581, |
|
"logps/chosen": -1.6786153316497803, |
|
"logps/rejected": -1.9018019437789917, |
|
"loss": 4.0914, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -16.786151885986328, |
|
"rewards/margins": 2.2318663597106934, |
|
"rewards/rejected": -19.01801872253418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 62.4321346494523, |
|
"learning_rate": 7.838781607860541e-07, |
|
"logits/chosen": -0.9189624786376953, |
|
"logits/rejected": -0.9065747857093811, |
|
"logps/chosen": -1.7250890731811523, |
|
"logps/rejected": -1.9335139989852905, |
|
"loss": 3.7718, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -17.250890731811523, |
|
"rewards/margins": 2.0842509269714355, |
|
"rewards/rejected": -19.335142135620117, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18546791065875035, |
|
"grad_norm": 54.82908329001109, |
|
"learning_rate": 7.82976558602664e-07, |
|
"logits/chosen": -0.9506435394287109, |
|
"logits/rejected": -0.9545921087265015, |
|
"logps/chosen": -1.6549060344696045, |
|
"logps/rejected": -1.8563690185546875, |
|
"loss": 4.0786, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -16.549062728881836, |
|
"rewards/margins": 2.0146291255950928, |
|
"rewards/rejected": -18.563692092895508, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1877297144472717, |
|
"grad_norm": 73.24163989154486, |
|
"learning_rate": 7.820509742392988e-07, |
|
"logits/chosen": -0.9353307485580444, |
|
"logits/rejected": -0.9192267656326294, |
|
"logps/chosen": -1.8069275617599487, |
|
"logps/rejected": -1.9718246459960938, |
|
"loss": 4.1425, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.06927490234375, |
|
"rewards/margins": 1.648969292640686, |
|
"rewards/rejected": -19.718246459960938, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18999151823579305, |
|
"grad_norm": 83.82148038528139, |
|
"learning_rate": 7.811014656565054e-07, |
|
"logits/chosen": -0.949898362159729, |
|
"logits/rejected": -0.9215849041938782, |
|
"logps/chosen": -1.7020446062088013, |
|
"logps/rejected": -2.0518198013305664, |
|
"loss": 3.7671, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -17.02044677734375, |
|
"rewards/margins": 3.4977519512176514, |
|
"rewards/rejected": -20.518198013305664, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1922533220243144, |
|
"grad_norm": 63.84735072335941, |
|
"learning_rate": 7.801280923129773e-07, |
|
"logits/chosen": -0.9368395805358887, |
|
"logits/rejected": -0.9302814602851868, |
|
"logps/chosen": -1.7603507041931152, |
|
"logps/rejected": -1.9069691896438599, |
|
"loss": 4.6105, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.603506088256836, |
|
"rewards/margins": 1.4661844968795776, |
|
"rewards/rejected": -19.069690704345703, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19451512581283573, |
|
"grad_norm": 74.09676772081293, |
|
"learning_rate": 7.791309151618305e-07, |
|
"logits/chosen": -0.9267253875732422, |
|
"logits/rejected": -0.9177088141441345, |
|
"logps/chosen": -1.857153296470642, |
|
"logps/rejected": -2.063552141189575, |
|
"loss": 4.1447, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -18.571533203125, |
|
"rewards/margins": 2.063988447189331, |
|
"rewards/rejected": -20.635520935058594, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1967769296013571, |
|
"grad_norm": 52.1715347907722, |
|
"learning_rate": 7.781099966467874e-07, |
|
"logits/chosen": -0.9667163491249084, |
|
"logits/rejected": -0.9519374966621399, |
|
"logps/chosen": -1.6798536777496338, |
|
"logps/rejected": -1.856392502784729, |
|
"loss": 3.8298, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -16.798538208007812, |
|
"rewards/margins": 1.765390396118164, |
|
"rewards/rejected": -18.563926696777344, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19903873338987843, |
|
"grad_norm": 80.36263067206907, |
|
"learning_rate": 7.770654006982664e-07, |
|
"logits/chosen": -0.9384152889251709, |
|
"logits/rejected": -0.8907789587974548, |
|
"logps/chosen": -1.950421929359436, |
|
"logps/rejected": -2.1496126651763916, |
|
"loss": 4.4707, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -19.50421714782715, |
|
"rewards/margins": 1.9919090270996094, |
|
"rewards/rejected": -21.496124267578125, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20130053717839977, |
|
"grad_norm": 75.5591779672025, |
|
"learning_rate": 7.759971927293781e-07, |
|
"logits/chosen": -0.9437240362167358, |
|
"logits/rejected": -0.9195570945739746, |
|
"logps/chosen": -1.7843701839447021, |
|
"logps/rejected": -1.9893090724945068, |
|
"loss": 4.1294, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -17.843700408935547, |
|
"rewards/margins": 2.0493886470794678, |
|
"rewards/rejected": -19.893091201782227, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 67.15891286792656, |
|
"learning_rate": 7.749054396318297e-07, |
|
"logits/chosen": -0.9150665402412415, |
|
"logits/rejected": -0.901638925075531, |
|
"logps/chosen": -1.9084649085998535, |
|
"logps/rejected": -2.08243989944458, |
|
"loss": 4.2923, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -19.08464813232422, |
|
"rewards/margins": 1.7397501468658447, |
|
"rewards/rejected": -20.824398040771484, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20582414475544247, |
|
"grad_norm": 108.00560453735488, |
|
"learning_rate": 7.737902097717356e-07, |
|
"logits/chosen": -0.8958998918533325, |
|
"logits/rejected": -0.9058135747909546, |
|
"logps/chosen": -1.8446322679519653, |
|
"logps/rejected": -2.1106581687927246, |
|
"loss": 4.4077, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.44632339477539, |
|
"rewards/margins": 2.6602604389190674, |
|
"rewards/rejected": -21.106584548950195, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2080859485439638, |
|
"grad_norm": 75.63120201277066, |
|
"learning_rate": 7.726515729853367e-07, |
|
"logits/chosen": -0.8995416760444641, |
|
"logits/rejected": -0.9007349014282227, |
|
"logps/chosen": -1.8412714004516602, |
|
"logps/rejected": -1.9961681365966797, |
|
"loss": 4.4973, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -18.4127140045166, |
|
"rewards/margins": 1.5489662885665894, |
|
"rewards/rejected": -19.961679458618164, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21034775233248515, |
|
"grad_norm": 88.12288150261115, |
|
"learning_rate": 7.714896005746272e-07, |
|
"logits/chosen": -0.9239206314086914, |
|
"logits/rejected": -0.9102018475532532, |
|
"logps/chosen": -1.909857988357544, |
|
"logps/rejected": -2.197936773300171, |
|
"loss": 3.6919, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -19.09857940673828, |
|
"rewards/margins": 2.880788803100586, |
|
"rewards/rejected": -21.979368209838867, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21260955612100652, |
|
"grad_norm": 87.36974125605845, |
|
"learning_rate": 7.703043653028896e-07, |
|
"logits/chosen": -0.9769506454467773, |
|
"logits/rejected": -0.9598399996757507, |
|
"logps/chosen": -2.121058464050293, |
|
"logps/rejected": -2.3538308143615723, |
|
"loss": 4.1322, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -21.210582733154297, |
|
"rewards/margins": 2.327723979949951, |
|
"rewards/rejected": -23.538307189941406, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21487135990952785, |
|
"grad_norm": 85.03774833702244, |
|
"learning_rate": 7.690959413901379e-07, |
|
"logits/chosen": -0.9238636493682861, |
|
"logits/rejected": -0.8953381180763245, |
|
"logps/chosen": -1.991039752960205, |
|
"logps/rejected": -2.2404017448425293, |
|
"loss": 4.0353, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -19.910398483276367, |
|
"rewards/margins": 2.4936180114746094, |
|
"rewards/rejected": -22.404016494750977, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2171331636980492, |
|
"grad_norm": 72.36376692256862, |
|
"learning_rate": 7.678644045084704e-07, |
|
"logits/chosen": -0.8678968548774719, |
|
"logits/rejected": -0.8794847726821899, |
|
"logps/chosen": -1.8975541591644287, |
|
"logps/rejected": -2.146233558654785, |
|
"loss": 4.2224, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.975540161132812, |
|
"rewards/margins": 2.486798048019409, |
|
"rewards/rejected": -21.462339401245117, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21939496748657053, |
|
"grad_norm": 69.60095446938993, |
|
"learning_rate": 7.666098317773308e-07, |
|
"logits/chosen": -0.9311771392822266, |
|
"logits/rejected": -0.937899649143219, |
|
"logps/chosen": -2.0553524494171143, |
|
"logps/rejected": -2.284069299697876, |
|
"loss": 3.7902, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -20.553525924682617, |
|
"rewards/margins": 2.2871673107147217, |
|
"rewards/rejected": -22.8406925201416, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2216567712750919, |
|
"grad_norm": 62.371207962205936, |
|
"learning_rate": 7.653323017586789e-07, |
|
"logits/chosen": -0.9331353306770325, |
|
"logits/rejected": -0.9246288537979126, |
|
"logps/chosen": -1.8696472644805908, |
|
"logps/rejected": -2.085547685623169, |
|
"loss": 3.8258, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -18.69647216796875, |
|
"rewards/margins": 2.1590051651000977, |
|
"rewards/rejected": -20.85547637939453, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 75.06088082843614, |
|
"learning_rate": 7.640318944520711e-07, |
|
"logits/chosen": -0.9489973783493042, |
|
"logits/rejected": -0.9453101754188538, |
|
"logps/chosen": -2.065215826034546, |
|
"logps/rejected": -2.2743237018585205, |
|
"loss": 3.9136, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -20.652158737182617, |
|
"rewards/margins": 2.0910775661468506, |
|
"rewards/rejected": -22.743234634399414, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22618037885213457, |
|
"grad_norm": 81.98491165956241, |
|
"learning_rate": 7.627086912896511e-07, |
|
"logits/chosen": -0.8288049697875977, |
|
"logits/rejected": -0.8540509939193726, |
|
"logps/chosen": -1.9648536443710327, |
|
"logps/rejected": -2.2276651859283447, |
|
"loss": 3.7455, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -19.648534774780273, |
|
"rewards/margins": 2.6281163692474365, |
|
"rewards/rejected": -22.27665138244629, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2284421826406559, |
|
"grad_norm": 65.93723986609388, |
|
"learning_rate": 7.613627751310499e-07, |
|
"logits/chosen": -0.974036693572998, |
|
"logits/rejected": -0.9655519127845764, |
|
"logps/chosen": -2.1046342849731445, |
|
"logps/rejected": -2.335761547088623, |
|
"loss": 3.5216, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -21.046340942382812, |
|
"rewards/margins": 2.311272144317627, |
|
"rewards/rejected": -23.357616424560547, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23070398642917728, |
|
"grad_norm": 87.06110168746535, |
|
"learning_rate": 7.599942302581977e-07, |
|
"logits/chosen": -0.9455628395080566, |
|
"logits/rejected": -0.9504780769348145, |
|
"logps/chosen": -2.1105945110321045, |
|
"logps/rejected": -2.4446237087249756, |
|
"loss": 3.5095, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -21.105945587158203, |
|
"rewards/margins": 3.3402912616729736, |
|
"rewards/rejected": -24.446237564086914, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.23296579021769862, |
|
"grad_norm": 80.47217791681035, |
|
"learning_rate": 7.586031423700457e-07, |
|
"logits/chosen": -0.9198256731033325, |
|
"logits/rejected": -0.9162503480911255, |
|
"logps/chosen": -2.081482410430908, |
|
"logps/rejected": -2.323981285095215, |
|
"loss": 3.9101, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -20.814823150634766, |
|
"rewards/margins": 2.424990177154541, |
|
"rewards/rejected": -23.23981475830078, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23522759400621995, |
|
"grad_norm": 84.59324902974666, |
|
"learning_rate": 7.571895985772e-07, |
|
"logits/chosen": -0.8880999684333801, |
|
"logits/rejected": -0.8877614140510559, |
|
"logps/chosen": -2.112489700317383, |
|
"logps/rejected": -2.463395595550537, |
|
"loss": 3.2862, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -21.124897003173828, |
|
"rewards/margins": 3.50905704498291, |
|
"rewards/rejected": -24.633956909179688, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23748939779474132, |
|
"grad_norm": 93.88519238767245, |
|
"learning_rate": 7.557536873964661e-07, |
|
"logits/chosen": -0.9578956365585327, |
|
"logits/rejected": -0.9488856196403503, |
|
"logps/chosen": -2.4465017318725586, |
|
"logps/rejected": -2.6574926376342773, |
|
"loss": 4.2785, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -24.465015411376953, |
|
"rewards/margins": 2.1099109649658203, |
|
"rewards/rejected": -26.57492446899414, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23975120158326266, |
|
"grad_norm": 79.55620950019336, |
|
"learning_rate": 7.542954987453069e-07, |
|
"logits/chosen": -0.9276089072227478, |
|
"logits/rejected": -0.9335793852806091, |
|
"logps/chosen": -2.2928051948547363, |
|
"logps/rejected": -2.54110050201416, |
|
"loss": 3.7542, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -22.928054809570312, |
|
"rewards/margins": 2.4829494953155518, |
|
"rewards/rejected": -25.4110050201416, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.242013005371784, |
|
"grad_norm": 91.27838484081917, |
|
"learning_rate": 7.528151239362108e-07, |
|
"logits/chosen": -0.9437668323516846, |
|
"logits/rejected": -0.9462485313415527, |
|
"logps/chosen": -2.3910629749298096, |
|
"logps/rejected": -2.6854913234710693, |
|
"loss": 3.5219, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -23.91063117980957, |
|
"rewards/margins": 2.9442789554595947, |
|
"rewards/rejected": -26.85491180419922, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 88.68410449777316, |
|
"learning_rate": 7.513126556709748e-07, |
|
"logits/chosen": -0.9345430135726929, |
|
"logits/rejected": -0.9268801808357239, |
|
"logps/chosen": -2.4017930030822754, |
|
"logps/rejected": -2.769888401031494, |
|
"loss": 3.408, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -24.01793098449707, |
|
"rewards/margins": 3.68095326423645, |
|
"rewards/rejected": -27.69888687133789, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2465366129488267, |
|
"grad_norm": 102.0857656020729, |
|
"learning_rate": 7.497881880348984e-07, |
|
"logits/chosen": -0.8913191556930542, |
|
"logits/rejected": -0.8778472542762756, |
|
"logps/chosen": -2.472365379333496, |
|
"logps/rejected": -2.761366367340088, |
|
"loss": 3.7704, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -24.723657608032227, |
|
"rewards/margins": 2.890007495880127, |
|
"rewards/rejected": -27.613662719726562, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24879841673734804, |
|
"grad_norm": 105.73063880460941, |
|
"learning_rate": 7.482418164908931e-07, |
|
"logits/chosen": -0.8976746797561646, |
|
"logits/rejected": -0.9002203941345215, |
|
"logps/chosen": -2.6170969009399414, |
|
"logps/rejected": -2.8415138721466064, |
|
"loss": 4.0031, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -26.17096710205078, |
|
"rewards/margins": 2.244166374206543, |
|
"rewards/rejected": -28.415136337280273, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510602205258694, |
|
"grad_norm": 97.59315977325764, |
|
"learning_rate": 7.466736378735035e-07, |
|
"logits/chosen": -0.8856804370880127, |
|
"logits/rejected": -0.8837353587150574, |
|
"logps/chosen": -2.677194356918335, |
|
"logps/rejected": -3.0208263397216797, |
|
"loss": 3.6384, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -26.771942138671875, |
|
"rewards/margins": 3.4363198280334473, |
|
"rewards/rejected": -30.208263397216797, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2533220243143907, |
|
"grad_norm": 126.79312096317705, |
|
"learning_rate": 7.450837503828439e-07, |
|
"logits/chosen": -0.8715759515762329, |
|
"logits/rejected": -0.8573368787765503, |
|
"logps/chosen": -2.7976884841918945, |
|
"logps/rejected": -3.2237420082092285, |
|
"loss": 3.5204, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -27.976884841918945, |
|
"rewards/margins": 4.260531902313232, |
|
"rewards/rejected": -32.23741912841797, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2555838281029121, |
|
"grad_norm": 87.07706648937445, |
|
"learning_rate": 7.43472253578449e-07, |
|
"logits/chosen": -0.8923739790916443, |
|
"logits/rejected": -0.8869914412498474, |
|
"logps/chosen": -2.504340410232544, |
|
"logps/rejected": -2.828713893890381, |
|
"loss": 3.6506, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -25.04340362548828, |
|
"rewards/margins": 3.2437362670898438, |
|
"rewards/rejected": -28.287139892578125, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2578456318914334, |
|
"grad_norm": 104.48863847079933, |
|
"learning_rate": 7.418392483730389e-07, |
|
"logits/chosen": -0.8830041289329529, |
|
"logits/rejected": -0.8893088102340698, |
|
"logps/chosen": -2.6945056915283203, |
|
"logps/rejected": -3.040527582168579, |
|
"loss": 3.3466, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -26.94506072998047, |
|
"rewards/margins": 3.460216522216797, |
|
"rewards/rejected": -30.405275344848633, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26010743567995476, |
|
"grad_norm": 91.32754287184382, |
|
"learning_rate": 7.401848370262012e-07, |
|
"logits/chosen": -0.9312195181846619, |
|
"logits/rejected": -0.9182373285293579, |
|
"logps/chosen": -2.703903913497925, |
|
"logps/rejected": -2.9744763374328613, |
|
"loss": 3.6787, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -27.039039611816406, |
|
"rewards/margins": 2.7057249546051025, |
|
"rewards/rejected": -29.744762420654297, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2623692394684761, |
|
"grad_norm": 125.7427227518665, |
|
"learning_rate": 7.385091231379856e-07, |
|
"logits/chosen": -0.8779256343841553, |
|
"logits/rejected": -0.8929042220115662, |
|
"logps/chosen": -2.9074618816375732, |
|
"logps/rejected": -3.239941120147705, |
|
"loss": 3.9438, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -29.07461929321289, |
|
"rewards/margins": 3.3247933387756348, |
|
"rewards/rejected": -32.399410247802734, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 119.85008448842645, |
|
"learning_rate": 7.368122116424182e-07, |
|
"logits/chosen": -0.8717849254608154, |
|
"logits/rejected": -0.8825671076774597, |
|
"logps/chosen": -2.822004795074463, |
|
"logps/rejected": -3.154021739959717, |
|
"loss": 3.857, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -28.220046997070312, |
|
"rewards/margins": 3.3201732635498047, |
|
"rewards/rejected": -31.540220260620117, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2668928470455188, |
|
"grad_norm": 113.06206788907483, |
|
"learning_rate": 7.350942088009289e-07, |
|
"logits/chosen": -0.9109733700752258, |
|
"logits/rejected": -0.9194462299346924, |
|
"logps/chosen": -2.891629695892334, |
|
"logps/rejected": -3.2021901607513428, |
|
"loss": 3.4546, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -28.916297912597656, |
|
"rewards/margins": 3.105602741241455, |
|
"rewards/rejected": -32.02190017700195, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.26915465083404017, |
|
"grad_norm": 127.60563608136493, |
|
"learning_rate": 7.333552221956986e-07, |
|
"logits/chosen": -1.0249210596084595, |
|
"logits/rejected": -1.0173821449279785, |
|
"logps/chosen": -3.0308027267456055, |
|
"logps/rejected": -3.419254779815674, |
|
"loss": 3.5686, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -30.308027267456055, |
|
"rewards/margins": 3.8845183849334717, |
|
"rewards/rejected": -34.192543029785156, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2714164546225615, |
|
"grad_norm": 100.45773101215859, |
|
"learning_rate": 7.315953607229217e-07, |
|
"logits/chosen": -0.9121578335762024, |
|
"logits/rejected": -0.9298517107963562, |
|
"logps/chosen": -3.1007490158081055, |
|
"logps/rejected": -3.4755465984344482, |
|
"loss": 3.4094, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -31.007490158081055, |
|
"rewards/margins": 3.747976303100586, |
|
"rewards/rejected": -34.75546646118164, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27367825841108284, |
|
"grad_norm": 109.20037330739903, |
|
"learning_rate": 7.298147345859869e-07, |
|
"logits/chosen": -0.9282402396202087, |
|
"logits/rejected": -0.9546459317207336, |
|
"logps/chosen": -2.9003353118896484, |
|
"logps/rejected": -3.2729110717773438, |
|
"loss": 3.4825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -29.003353118896484, |
|
"rewards/margins": 3.7257604598999023, |
|
"rewards/rejected": -32.72911071777344, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2759400621996042, |
|
"grad_norm": 131.78377393504542, |
|
"learning_rate": 7.280134552885762e-07, |
|
"logits/chosen": -0.8907239437103271, |
|
"logits/rejected": -0.8915092945098877, |
|
"logps/chosen": -3.0040464401245117, |
|
"logps/rejected": -3.404412269592285, |
|
"loss": 3.2541, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -30.040464401245117, |
|
"rewards/margins": 4.00365686416626, |
|
"rewards/rejected": -34.04412078857422, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2782018659881255, |
|
"grad_norm": 100.9284768839828, |
|
"learning_rate": 7.261916356276831e-07, |
|
"logits/chosen": -0.8974347114562988, |
|
"logits/rejected": -0.8869479894638062, |
|
"logps/chosen": -3.0903728008270264, |
|
"logps/rejected": -3.5891921520233154, |
|
"loss": 2.9826, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -30.90372657775879, |
|
"rewards/margins": 4.988193035125732, |
|
"rewards/rejected": -35.89192199707031, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2804636697766469, |
|
"grad_norm": 100.52698585430983, |
|
"learning_rate": 7.243493896865486e-07, |
|
"logits/chosen": -0.9068385362625122, |
|
"logits/rejected": -0.9022574424743652, |
|
"logps/chosen": -2.895165205001831, |
|
"logps/rejected": -3.168149471282959, |
|
"loss": 3.6795, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -28.95165252685547, |
|
"rewards/margins": 2.7298452854156494, |
|
"rewards/rejected": -31.68149757385254, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2827254735651682, |
|
"grad_norm": 112.06965575315508, |
|
"learning_rate": 7.224868328275169e-07, |
|
"logits/chosen": -0.895264744758606, |
|
"logits/rejected": -0.889894962310791, |
|
"logps/chosen": -2.9756557941436768, |
|
"logps/rejected": -3.372425079345703, |
|
"loss": 3.509, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -29.756555557250977, |
|
"rewards/margins": 3.967694044113159, |
|
"rewards/rejected": -33.72425079345703, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 200.69722210464988, |
|
"learning_rate": 7.206040816848126e-07, |
|
"logits/chosen": -0.9270356893539429, |
|
"logits/rejected": -0.9209667444229126, |
|
"logps/chosen": -3.1612963676452637, |
|
"logps/rejected": -3.414872407913208, |
|
"loss": 4.2247, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -31.612964630126953, |
|
"rewards/margins": 2.535757064819336, |
|
"rewards/rejected": -34.14872360229492, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2872490811422109, |
|
"grad_norm": 113.76490541559193, |
|
"learning_rate": 7.187012541572356e-07, |
|
"logits/chosen": -0.9891903400421143, |
|
"logits/rejected": -0.9760682582855225, |
|
"logps/chosen": -3.2219786643981934, |
|
"logps/rejected": -3.6162261962890625, |
|
"loss": 3.646, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -32.219783782958984, |
|
"rewards/margins": 3.9424736499786377, |
|
"rewards/rejected": -36.162261962890625, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.28951088493073224, |
|
"grad_norm": 161.28339588297143, |
|
"learning_rate": 7.167784694007791e-07, |
|
"logits/chosen": -0.9055167436599731, |
|
"logits/rejected": -0.9182201623916626, |
|
"logps/chosen": -3.0764412879943848, |
|
"logps/rejected": -3.50889253616333, |
|
"loss": 3.4768, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -30.76441192626953, |
|
"rewards/margins": 4.324512958526611, |
|
"rewards/rejected": -35.088924407958984, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2917726887192536, |
|
"grad_norm": 130.49520984475546, |
|
"learning_rate": 7.148358478211682e-07, |
|
"logits/chosen": -0.9700244665145874, |
|
"logits/rejected": -0.9521669149398804, |
|
"logps/chosen": -3.1916344165802, |
|
"logps/rejected": -3.6720499992370605, |
|
"loss": 2.8, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -31.916339874267578, |
|
"rewards/margins": 4.804160118103027, |
|
"rewards/rejected": -36.72050476074219, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29403449250777497, |
|
"grad_norm": 95.95845462646429, |
|
"learning_rate": 7.128735110663187e-07, |
|
"logits/chosen": -0.9428349733352661, |
|
"logits/rejected": -0.9141571521759033, |
|
"logps/chosen": -2.8836987018585205, |
|
"logps/rejected": -3.3434090614318848, |
|
"loss": 3.2795, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -28.836990356445312, |
|
"rewards/margins": 4.59710168838501, |
|
"rewards/rejected": -33.43408966064453, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 133.0597558266743, |
|
"learning_rate": 7.108915820187211e-07, |
|
"logits/chosen": -0.9095150232315063, |
|
"logits/rejected": -0.9167226552963257, |
|
"logps/chosen": -3.4271678924560547, |
|
"logps/rejected": -3.8848564624786377, |
|
"loss": 3.8022, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -34.27167510986328, |
|
"rewards/margins": 4.576888084411621, |
|
"rewards/rejected": -38.84856414794922, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29855810008481765, |
|
"grad_norm": 138.52287769507078, |
|
"learning_rate": 7.088901847877447e-07, |
|
"logits/chosen": -0.9113332033157349, |
|
"logits/rejected": -0.9178464412689209, |
|
"logps/chosen": -3.193387269973755, |
|
"logps/rejected": -3.468454122543335, |
|
"loss": 4.6596, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -31.93387222290039, |
|
"rewards/margins": 2.7506697177886963, |
|
"rewards/rejected": -34.68454360961914, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.300819903873339, |
|
"grad_norm": 145.32503447631242, |
|
"learning_rate": 7.068694447018658e-07, |
|
"logits/chosen": -0.9390870928764343, |
|
"logits/rejected": -0.9346014857292175, |
|
"logps/chosen": -3.2123446464538574, |
|
"logps/rejected": -3.691742420196533, |
|
"loss": 3.4082, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -32.12344741821289, |
|
"rewards/margins": 4.793976306915283, |
|
"rewards/rejected": -36.91742706298828, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3030817076618603, |
|
"grad_norm": 109.50807225224436, |
|
"learning_rate": 7.048294883008199e-07, |
|
"logits/chosen": -0.9084957838058472, |
|
"logits/rejected": -0.9136879444122314, |
|
"logps/chosen": -3.135310649871826, |
|
"logps/rejected": -3.5266218185424805, |
|
"loss": 3.414, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -31.353107452392578, |
|
"rewards/margins": 3.9131107330322266, |
|
"rewards/rejected": -35.26622009277344, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 98.50668335135161, |
|
"learning_rate": 7.027704433276776e-07, |
|
"logits/chosen": -0.8919773697853088, |
|
"logits/rejected": -0.8786300420761108, |
|
"logps/chosen": -3.161726236343384, |
|
"logps/rejected": -3.697993755340576, |
|
"loss": 3.2742, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -31.617259979248047, |
|
"rewards/margins": 5.3626790046691895, |
|
"rewards/rejected": -36.97993850708008, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.307605315238903, |
|
"grad_norm": 146.8115216507012, |
|
"learning_rate": 7.006924387208452e-07, |
|
"logits/chosen": -0.8836665749549866, |
|
"logits/rejected": -0.8726370334625244, |
|
"logps/chosen": -3.013075828552246, |
|
"logps/rejected": -3.3784284591674805, |
|
"loss": 3.3517, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -30.13075828552246, |
|
"rewards/margins": 3.653524398803711, |
|
"rewards/rejected": -33.78428268432617, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30986711902742436, |
|
"grad_norm": 127.23140329263654, |
|
"learning_rate": 6.985956046059904e-07, |
|
"logits/chosen": -0.8876525163650513, |
|
"logits/rejected": -0.8859204053878784, |
|
"logps/chosen": -2.833786964416504, |
|
"logps/rejected": -3.3170793056488037, |
|
"loss": 3.5162, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -28.337867736816406, |
|
"rewards/margins": 4.832923889160156, |
|
"rewards/rejected": -33.17079162597656, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.31212892281594573, |
|
"grad_norm": 101.84038444893979, |
|
"learning_rate": 6.964800722878945e-07, |
|
"logits/chosen": -0.8228280544281006, |
|
"logits/rejected": -0.8289366364479065, |
|
"logps/chosen": -3.1117374897003174, |
|
"logps/rejected": -3.662769317626953, |
|
"loss": 3.0821, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -31.117374420166016, |
|
"rewards/margins": 5.510316848754883, |
|
"rewards/rejected": -36.62769317626953, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31439072660446704, |
|
"grad_norm": 99.28663041889389, |
|
"learning_rate": 6.943459742422287e-07, |
|
"logits/chosen": -0.8386703729629517, |
|
"logits/rejected": -0.8169263005256653, |
|
"logps/chosen": -3.0303568840026855, |
|
"logps/rejected": -3.5507583618164062, |
|
"loss": 3.4395, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -30.303569793701172, |
|
"rewards/margins": 5.204019069671631, |
|
"rewards/rejected": -35.50758361816406, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3166525303929884, |
|
"grad_norm": 113.73196679076027, |
|
"learning_rate": 6.921934441072597e-07, |
|
"logits/chosen": -0.9092671871185303, |
|
"logits/rejected": -0.8999166488647461, |
|
"logps/chosen": -3.266817331314087, |
|
"logps/rejected": -3.6174590587615967, |
|
"loss": 4.0401, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -32.66817855834961, |
|
"rewards/margins": 3.5064148902893066, |
|
"rewards/rejected": -36.174591064453125, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3189143341815098, |
|
"grad_norm": 145.6035076738277, |
|
"learning_rate": 6.900226166754807e-07, |
|
"logits/chosen": -0.877338707447052, |
|
"logits/rejected": -0.8949355483055115, |
|
"logps/chosen": -3.3075101375579834, |
|
"logps/rejected": -3.648167133331299, |
|
"loss": 3.8983, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -33.075103759765625, |
|
"rewards/margins": 3.40657114982605, |
|
"rewards/rejected": -36.48167037963867, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3211761379700311, |
|
"grad_norm": 109.20506732018467, |
|
"learning_rate": 6.8783362788517e-07, |
|
"logits/chosen": -0.8769165277481079, |
|
"logits/rejected": -0.8802676796913147, |
|
"logps/chosen": -3.2150471210479736, |
|
"logps/rejected": -3.6389479637145996, |
|
"loss": 4.0594, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -32.15047073364258, |
|
"rewards/margins": 4.239009380340576, |
|
"rewards/rejected": -36.38947677612305, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.32343794175855245, |
|
"grad_norm": 105.68748626637623, |
|
"learning_rate": 6.856266148118796e-07, |
|
"logits/chosen": -0.8756333589553833, |
|
"logits/rejected": -0.8771237134933472, |
|
"logps/chosen": -2.903958797454834, |
|
"logps/rejected": -3.4417638778686523, |
|
"loss": 3.1671, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -29.039588928222656, |
|
"rewards/margins": 5.378047943115234, |
|
"rewards/rejected": -34.417640686035156, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 116.34718602170548, |
|
"learning_rate": 6.834017156598512e-07, |
|
"logits/chosen": -0.8410471081733704, |
|
"logits/rejected": -0.8324297666549683, |
|
"logps/chosen": -3.194199800491333, |
|
"logps/rejected": -3.6479578018188477, |
|
"loss": 3.3653, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -31.941993713378906, |
|
"rewards/margins": 4.537581920623779, |
|
"rewards/rejected": -36.47957992553711, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3279615493355951, |
|
"grad_norm": 82.46812031176178, |
|
"learning_rate": 6.811590697533607e-07, |
|
"logits/chosen": -0.9150828719139099, |
|
"logits/rejected": -0.9395722150802612, |
|
"logps/chosen": -3.1108005046844482, |
|
"logps/rejected": -3.5240511894226074, |
|
"loss": 3.4543, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -31.108003616333008, |
|
"rewards/margins": 4.132503032684326, |
|
"rewards/rejected": -35.240509033203125, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3302233531241165, |
|
"grad_norm": 147.40171952386683, |
|
"learning_rate": 6.788988175279951e-07, |
|
"logits/chosen": -0.8900083899497986, |
|
"logits/rejected": -0.8809686303138733, |
|
"logps/chosen": -3.0602874755859375, |
|
"logps/rejected": -3.421034574508667, |
|
"loss": 3.8574, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -30.602872848510742, |
|
"rewards/margins": 3.607468605041504, |
|
"rewards/rejected": -34.21034240722656, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3324851569126378, |
|
"grad_norm": 91.95584038459033, |
|
"learning_rate": 6.766211005218577e-07, |
|
"logits/chosen": -0.860228419303894, |
|
"logits/rejected": -0.8647564053535461, |
|
"logps/chosen": -3.031662940979004, |
|
"logps/rejected": -3.60959529876709, |
|
"loss": 2.8285, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -30.316631317138672, |
|
"rewards/margins": 5.779325008392334, |
|
"rewards/rejected": -36.09595489501953, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33474696070115917, |
|
"grad_norm": 102.75334355234361, |
|
"learning_rate": 6.743260613667047e-07, |
|
"logits/chosen": -0.9344837665557861, |
|
"logits/rejected": -0.9322720170021057, |
|
"logps/chosen": -2.962679147720337, |
|
"logps/rejected": -3.4261035919189453, |
|
"loss": 3.3547, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -29.626792907714844, |
|
"rewards/margins": 4.634244918823242, |
|
"rewards/rejected": -34.26103973388672, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.33700876448968053, |
|
"grad_norm": 81.59653105672612, |
|
"learning_rate": 6.720138437790139e-07, |
|
"logits/chosen": -0.8822786211967468, |
|
"logits/rejected": -0.8795664310455322, |
|
"logps/chosen": -2.886265516281128, |
|
"logps/rejected": -3.3533620834350586, |
|
"loss": 3.1003, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -28.862653732299805, |
|
"rewards/margins": 4.670964241027832, |
|
"rewards/rejected": -33.53361892700195, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33927056827820185, |
|
"grad_norm": 128.46033594147116, |
|
"learning_rate": 6.696845925509848e-07, |
|
"logits/chosen": -0.924140989780426, |
|
"logits/rejected": -0.9208129048347473, |
|
"logps/chosen": -2.98405122756958, |
|
"logps/rejected": -3.2943079471588135, |
|
"loss": 3.9016, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -29.840513229370117, |
|
"rewards/margins": 3.1025681495666504, |
|
"rewards/rejected": -32.94308090209961, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3415323720667232, |
|
"grad_norm": 102.1950847032747, |
|
"learning_rate": 6.673384535414718e-07, |
|
"logits/chosen": -0.9400888085365295, |
|
"logits/rejected": -0.9220947027206421, |
|
"logps/chosen": -3.0953633785247803, |
|
"logps/rejected": -3.3882498741149902, |
|
"loss": 4.0013, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -30.953636169433594, |
|
"rewards/margins": 2.9288623332977295, |
|
"rewards/rejected": -33.88249588012695, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3437941758552446, |
|
"grad_norm": 107.39573001679527, |
|
"learning_rate": 6.649755736668511e-07, |
|
"logits/chosen": -0.8626015186309814, |
|
"logits/rejected": -0.8548362255096436, |
|
"logps/chosen": -2.697676181793213, |
|
"logps/rejected": -3.1207501888275146, |
|
"loss": 2.8599, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -26.976764678955078, |
|
"rewards/margins": 4.230738639831543, |
|
"rewards/rejected": -31.207502365112305, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 104.71174534676842, |
|
"learning_rate": 6.625961008918192e-07, |
|
"logits/chosen": -0.8862609267234802, |
|
"logits/rejected": -0.8788568377494812, |
|
"logps/chosen": -2.73195481300354, |
|
"logps/rejected": -3.2243716716766357, |
|
"loss": 2.858, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -27.319549560546875, |
|
"rewards/margins": 4.924167633056641, |
|
"rewards/rejected": -32.243717193603516, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34831778343228725, |
|
"grad_norm": 99.07029502577936, |
|
"learning_rate": 6.602001842201289e-07, |
|
"logits/chosen": -0.8615016341209412, |
|
"logits/rejected": -0.865327000617981, |
|
"logps/chosen": -2.728917360305786, |
|
"logps/rejected": -3.0277836322784424, |
|
"loss": 3.9863, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -27.28917694091797, |
|
"rewards/margins": 2.988664150238037, |
|
"rewards/rejected": -30.27783966064453, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3505795872208086, |
|
"grad_norm": 101.52470029021832, |
|
"learning_rate": 6.577879736852571e-07, |
|
"logits/chosen": -0.9043705463409424, |
|
"logits/rejected": -0.9043326377868652, |
|
"logps/chosen": -2.790837287902832, |
|
"logps/rejected": -3.0531444549560547, |
|
"loss": 3.947, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -27.908374786376953, |
|
"rewards/margins": 2.6230695247650146, |
|
"rewards/rejected": -30.531444549560547, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.35284139100932993, |
|
"grad_norm": 96.22259310348454, |
|
"learning_rate": 6.553596203410112e-07, |
|
"logits/chosen": -0.9045987129211426, |
|
"logits/rejected": -0.8944877982139587, |
|
"logps/chosen": -2.755389451980591, |
|
"logps/rejected": -3.288483142852783, |
|
"loss": 2.4869, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -27.55389404296875, |
|
"rewards/margins": 5.330938816070557, |
|
"rewards/rejected": -32.884830474853516, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3551031947978513, |
|
"grad_norm": 93.67685988363229, |
|
"learning_rate": 6.529152762520688e-07, |
|
"logits/chosen": -0.8912635445594788, |
|
"logits/rejected": -0.9007562398910522, |
|
"logps/chosen": -2.8563380241394043, |
|
"logps/rejected": -3.2389822006225586, |
|
"loss": 3.3839, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -28.563379287719727, |
|
"rewards/margins": 3.826446056365967, |
|
"rewards/rejected": -32.38982391357422, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3573649985863726, |
|
"grad_norm": 120.7356829678153, |
|
"learning_rate": 6.504550944844558e-07, |
|
"logits/chosen": -0.8484708666801453, |
|
"logits/rejected": -0.8691813945770264, |
|
"logps/chosen": -2.738534450531006, |
|
"logps/rejected": -3.144260883331299, |
|
"loss": 3.3977, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -27.385345458984375, |
|
"rewards/margins": 4.057261943817139, |
|
"rewards/rejected": -31.44260597229004, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.359626802374894, |
|
"grad_norm": 101.61751894995689, |
|
"learning_rate": 6.479792290959613e-07, |
|
"logits/chosen": -0.856887698173523, |
|
"logits/rejected": -0.8730866312980652, |
|
"logps/chosen": -2.881028413772583, |
|
"logps/rejected": -3.426086664199829, |
|
"loss": 3.0172, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -28.810283660888672, |
|
"rewards/margins": 5.450582504272461, |
|
"rewards/rejected": -34.2608642578125, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.36188860616341534, |
|
"grad_norm": 100.3697480798816, |
|
"learning_rate": 6.454878351264906e-07, |
|
"logits/chosen": -0.846646249294281, |
|
"logits/rejected": -0.8335630893707275, |
|
"logps/chosen": -2.741071939468384, |
|
"logps/rejected": -3.175828456878662, |
|
"loss": 3.2459, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -27.410720825195312, |
|
"rewards/margins": 4.347564220428467, |
|
"rewards/rejected": -31.758285522460938, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36415040995193665, |
|
"grad_norm": 110.19538748311565, |
|
"learning_rate": 6.429810685883565e-07, |
|
"logits/chosen": -0.8944506645202637, |
|
"logits/rejected": -0.8965756297111511, |
|
"logps/chosen": -2.9672818183898926, |
|
"logps/rejected": -3.3783416748046875, |
|
"loss": 3.0929, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -29.672819137573242, |
|
"rewards/margins": 4.110595226287842, |
|
"rewards/rejected": -33.783416748046875, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 158.64429402578588, |
|
"learning_rate": 6.404590864565088e-07, |
|
"logits/chosen": -0.8493690490722656, |
|
"logits/rejected": -0.8430997133255005, |
|
"logps/chosen": -2.9426426887512207, |
|
"logps/rejected": -3.213247537612915, |
|
"loss": 3.8471, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -29.426427841186523, |
|
"rewards/margins": 2.7060494422912598, |
|
"rewards/rejected": -32.132476806640625, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3686740175289794, |
|
"grad_norm": 141.2904311013793, |
|
"learning_rate": 6.379220466587063e-07, |
|
"logits/chosen": -0.8736861348152161, |
|
"logits/rejected": -0.8479949235916138, |
|
"logps/chosen": -2.944532871246338, |
|
"logps/rejected": -3.3523709774017334, |
|
"loss": 3.1949, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -29.445331573486328, |
|
"rewards/margins": 4.078380107879639, |
|
"rewards/rejected": -33.523712158203125, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3709358213175007, |
|
"grad_norm": 109.95165872611736, |
|
"learning_rate": 6.353701080656254e-07, |
|
"logits/chosen": -0.8461288809776306, |
|
"logits/rejected": -0.8680405616760254, |
|
"logps/chosen": -3.0780773162841797, |
|
"logps/rejected": -3.4420604705810547, |
|
"loss": 3.5518, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -30.780773162841797, |
|
"rewards/margins": 3.6398301124572754, |
|
"rewards/rejected": -34.42060470581055, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.37319762510602206, |
|
"grad_norm": 115.807734337384, |
|
"learning_rate": 6.32803430480913e-07, |
|
"logits/chosen": -0.8767830729484558, |
|
"logits/rejected": -0.8601579666137695, |
|
"logps/chosen": -3.0346415042877197, |
|
"logps/rejected": -3.4634854793548584, |
|
"loss": 3.4447, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -30.346412658691406, |
|
"rewards/margins": 4.288439750671387, |
|
"rewards/rejected": -34.63485336303711, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3754594288945434, |
|
"grad_norm": 113.20880426701919, |
|
"learning_rate": 6.302221746311782e-07, |
|
"logits/chosen": -0.8934029936790466, |
|
"logits/rejected": -0.8591723442077637, |
|
"logps/chosen": -2.9002223014831543, |
|
"logps/rejected": -3.3115527629852295, |
|
"loss": 3.4717, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -29.002216339111328, |
|
"rewards/margins": 4.113308906555176, |
|
"rewards/rejected": -33.11552810668945, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.37772123268306473, |
|
"grad_norm": 106.94671927380018, |
|
"learning_rate": 6.276265021559288e-07, |
|
"logits/chosen": -0.8767241835594177, |
|
"logits/rejected": -0.8691208362579346, |
|
"logps/chosen": -3.1625356674194336, |
|
"logps/rejected": -3.526569128036499, |
|
"loss": 3.7261, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -31.625356674194336, |
|
"rewards/margins": 3.6403377056121826, |
|
"rewards/rejected": -35.265689849853516, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3799830364715861, |
|
"grad_norm": 94.77582430883331, |
|
"learning_rate": 6.250165755974487e-07, |
|
"logits/chosen": -0.8312730193138123, |
|
"logits/rejected": -0.8397226333618164, |
|
"logps/chosen": -3.107558250427246, |
|
"logps/rejected": -3.5196921825408936, |
|
"loss": 3.0451, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -31.07558250427246, |
|
"rewards/margins": 4.121337890625, |
|
"rewards/rejected": -35.196922302246094, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3822448402601074, |
|
"grad_norm": 113.24260436647522, |
|
"learning_rate": 6.223925583906192e-07, |
|
"logits/chosen": -0.9056702852249146, |
|
"logits/rejected": -0.8984103798866272, |
|
"logps/chosen": -3.167409896850586, |
|
"logps/rejected": -3.6634039878845215, |
|
"loss": 2.7232, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -31.67409896850586, |
|
"rewards/margins": 4.959937572479248, |
|
"rewards/rejected": -36.634037017822266, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3845066440486288, |
|
"grad_norm": 110.27954792472926, |
|
"learning_rate": 6.19754614852685e-07, |
|
"logits/chosen": -0.887773871421814, |
|
"logits/rejected": -0.8898996114730835, |
|
"logps/chosen": -3.052790641784668, |
|
"logps/rejected": -3.4982476234436035, |
|
"loss": 3.0573, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -30.527912139892578, |
|
"rewards/margins": 4.454569339752197, |
|
"rewards/rejected": -34.982479095458984, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 104.0326748803761, |
|
"learning_rate": 6.171029101729644e-07, |
|
"logits/chosen": -0.8020922541618347, |
|
"logits/rejected": -0.8171724677085876, |
|
"logps/chosen": -3.3534414768218994, |
|
"logps/rejected": -3.8365039825439453, |
|
"loss": 3.2169, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -33.53441619873047, |
|
"rewards/margins": 4.830626487731934, |
|
"rewards/rejected": -38.36503982543945, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.38903025162567145, |
|
"grad_norm": 115.8006667215364, |
|
"learning_rate": 6.144376104025055e-07, |
|
"logits/chosen": -0.8859556317329407, |
|
"logits/rejected": -0.8754591941833496, |
|
"logps/chosen": -3.1711325645446777, |
|
"logps/rejected": -3.650319814682007, |
|
"loss": 3.061, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -31.71132469177246, |
|
"rewards/margins": 4.791874408721924, |
|
"rewards/rejected": -36.503196716308594, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3912920554141928, |
|
"grad_norm": 116.32227065050793, |
|
"learning_rate": 6.117588824436873e-07, |
|
"logits/chosen": -0.9079657793045044, |
|
"logits/rejected": -0.9178179502487183, |
|
"logps/chosen": -3.2952144145965576, |
|
"logps/rejected": -3.7104384899139404, |
|
"loss": 3.5394, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -32.952144622802734, |
|
"rewards/margins": 4.1522393226623535, |
|
"rewards/rejected": -37.10438537597656, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3935538592027142, |
|
"grad_norm": 117.90694024279327, |
|
"learning_rate": 6.090668940397688e-07, |
|
"logits/chosen": -0.8631306290626526, |
|
"logits/rejected": -0.8745336532592773, |
|
"logps/chosen": -3.2797317504882812, |
|
"logps/rejected": -3.7525224685668945, |
|
"loss": 3.3501, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -32.79732131958008, |
|
"rewards/margins": 4.727904319763184, |
|
"rewards/rejected": -37.52522277832031, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3958156629912355, |
|
"grad_norm": 137.57570732430528, |
|
"learning_rate": 6.063618137643844e-07, |
|
"logits/chosen": -0.8693101406097412, |
|
"logits/rejected": -0.8659628629684448, |
|
"logps/chosen": -3.2315754890441895, |
|
"logps/rejected": -3.66987681388855, |
|
"loss": 3.1902, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -32.315757751464844, |
|
"rewards/margins": 4.383011817932129, |
|
"rewards/rejected": -36.698768615722656, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39807746677975686, |
|
"grad_norm": 129.17148648807188, |
|
"learning_rate": 6.03643811010988e-07, |
|
"logits/chosen": -0.9099083542823792, |
|
"logits/rejected": -0.9289382696151733, |
|
"logps/chosen": -3.4815526008605957, |
|
"logps/rejected": -3.9359803199768066, |
|
"loss": 3.0441, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -34.81552505493164, |
|
"rewards/margins": 4.544279098510742, |
|
"rewards/rejected": -39.359806060791016, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4003392705682782, |
|
"grad_norm": 151.3346504007205, |
|
"learning_rate": 6.009130559822453e-07, |
|
"logits/chosen": -0.9238193035125732, |
|
"logits/rejected": -0.9373087286949158, |
|
"logps/chosen": -3.4238133430480957, |
|
"logps/rejected": -3.734138011932373, |
|
"loss": 3.9058, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -34.238136291503906, |
|
"rewards/margins": 3.1032469272613525, |
|
"rewards/rejected": -37.34138107299805, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.40260107435679954, |
|
"grad_norm": 119.98025767660518, |
|
"learning_rate": 5.981697196793758e-07, |
|
"logits/chosen": -0.9748789072036743, |
|
"logits/rejected": -0.9766548871994019, |
|
"logps/chosen": -3.595698356628418, |
|
"logps/rejected": -4.107189655303955, |
|
"loss": 2.5614, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -35.95698165893555, |
|
"rewards/margins": 5.114919662475586, |
|
"rewards/rejected": -41.0718994140625, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4048628781453209, |
|
"grad_norm": 149.68316275003738, |
|
"learning_rate": 5.954139738914446e-07, |
|
"logits/chosen": -0.9417423009872437, |
|
"logits/rejected": -0.9513258934020996, |
|
"logps/chosen": -3.444281578063965, |
|
"logps/rejected": -3.8950817584991455, |
|
"loss": 3.329, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -34.44281768798828, |
|
"rewards/margins": 4.507997512817383, |
|
"rewards/rejected": -38.95081329345703, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 134.08149703977352, |
|
"learning_rate": 5.92645991184605e-07, |
|
"logits/chosen": -0.9165428876876831, |
|
"logits/rejected": -0.9207024574279785, |
|
"logps/chosen": -3.5991551876068115, |
|
"logps/rejected": -4.07746696472168, |
|
"loss": 2.9381, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -35.991554260253906, |
|
"rewards/margins": 4.783118724822998, |
|
"rewards/rejected": -40.77467346191406, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4093864857223636, |
|
"grad_norm": 114.52146902238273, |
|
"learning_rate": 5.898659448912917e-07, |
|
"logits/chosen": -0.9070485830307007, |
|
"logits/rejected": -0.931403398513794, |
|
"logps/chosen": -3.4507501125335693, |
|
"logps/rejected": -3.940732717514038, |
|
"loss": 3.1991, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -34.507503509521484, |
|
"rewards/margins": 4.899825572967529, |
|
"rewards/rejected": -39.407325744628906, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.41164828951088495, |
|
"grad_norm": 126.10902385051384, |
|
"learning_rate": 5.870740090993676e-07, |
|
"logits/chosen": -0.9509062767028809, |
|
"logits/rejected": -0.9542712569236755, |
|
"logps/chosen": -3.7261013984680176, |
|
"logps/rejected": -4.24366569519043, |
|
"loss": 2.9464, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.26101303100586, |
|
"rewards/margins": 5.175642013549805, |
|
"rewards/rejected": -42.43665313720703, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.41391009329940626, |
|
"grad_norm": 147.18106120122667, |
|
"learning_rate": 5.842703586412214e-07, |
|
"logits/chosen": -0.9539936184883118, |
|
"logits/rejected": -0.9632295370101929, |
|
"logps/chosen": -3.779463291168213, |
|
"logps/rejected": -4.1752824783325195, |
|
"loss": 3.9504, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -37.794639587402344, |
|
"rewards/margins": 3.958191394805908, |
|
"rewards/rejected": -41.752830505371094, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4161718970879276, |
|
"grad_norm": 123.95369052981256, |
|
"learning_rate": 5.814551690828203e-07, |
|
"logits/chosen": -0.9254580140113831, |
|
"logits/rejected": -0.9361709356307983, |
|
"logps/chosen": -3.641896963119507, |
|
"logps/rejected": -4.133872032165527, |
|
"loss": 2.8252, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -36.41897201538086, |
|
"rewards/margins": 4.919750690460205, |
|
"rewards/rejected": -41.338722229003906, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.418433700876449, |
|
"grad_norm": 165.77999607048756, |
|
"learning_rate": 5.786286167127155e-07, |
|
"logits/chosen": -0.9551374912261963, |
|
"logits/rejected": -0.9540834426879883, |
|
"logps/chosen": -3.5414278507232666, |
|
"logps/rejected": -4.075000762939453, |
|
"loss": 3.3449, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -35.414276123046875, |
|
"rewards/margins": 5.335728168487549, |
|
"rewards/rejected": -40.750003814697266, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4206955046649703, |
|
"grad_norm": 123.48982790968616, |
|
"learning_rate": 5.757908785310031e-07, |
|
"logits/chosen": -0.8877527117729187, |
|
"logits/rejected": -0.9115056395530701, |
|
"logps/chosen": -3.351555585861206, |
|
"logps/rejected": -3.8177757263183594, |
|
"loss": 3.1598, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -33.51555633544922, |
|
"rewards/margins": 4.66220235824585, |
|
"rewards/rejected": -38.177757263183594, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.42295730845349166, |
|
"grad_norm": 111.92086682513862, |
|
"learning_rate": 5.729421322382399e-07, |
|
"logits/chosen": -0.9053335189819336, |
|
"logits/rejected": -0.9254867434501648, |
|
"logps/chosen": -3.23016357421875, |
|
"logps/rejected": -3.751375675201416, |
|
"loss": 3.1103, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -32.301631927490234, |
|
"rewards/margins": 5.212121963500977, |
|
"rewards/rejected": -37.513755798339844, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42521911224201303, |
|
"grad_norm": 136.4736713997996, |
|
"learning_rate": 5.700825562243163e-07, |
|
"logits/chosen": -0.8880442976951599, |
|
"logits/rejected": -0.8915799856185913, |
|
"logps/chosen": -3.2851951122283936, |
|
"logps/rejected": -3.7715601921081543, |
|
"loss": 3.0085, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -32.851951599121094, |
|
"rewards/margins": 4.863647937774658, |
|
"rewards/rejected": -37.71560287475586, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 123.81512596023282, |
|
"learning_rate": 5.672123295572854e-07, |
|
"logits/chosen": -0.9444673657417297, |
|
"logits/rejected": -0.9649919271469116, |
|
"logps/chosen": -3.375215530395508, |
|
"logps/rejected": -3.71567964553833, |
|
"loss": 3.2624, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -33.75215530395508, |
|
"rewards/margins": 3.4046430587768555, |
|
"rewards/rejected": -37.15679931640625, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4297427198190557, |
|
"grad_norm": 123.00524832180807, |
|
"learning_rate": 5.643316319721487e-07, |
|
"logits/chosen": -0.931572437286377, |
|
"logits/rejected": -0.9433319568634033, |
|
"logps/chosen": -3.548570156097412, |
|
"logps/rejected": -3.98114013671875, |
|
"loss": 3.5326, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -35.48570251464844, |
|
"rewards/margins": 4.3256988525390625, |
|
"rewards/rejected": -39.8114013671875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.432004523607577, |
|
"grad_norm": 121.3462365535433, |
|
"learning_rate": 5.614406438596026e-07, |
|
"logits/chosen": -0.9843615889549255, |
|
"logits/rejected": -0.9843348264694214, |
|
"logps/chosen": -3.5935218334198, |
|
"logps/rejected": -4.021365642547607, |
|
"loss": 3.6033, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -35.935218811035156, |
|
"rewards/margins": 4.278437614440918, |
|
"rewards/rejected": -40.21365737915039, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4342663273960984, |
|
"grad_norm": 118.58776393660315, |
|
"learning_rate": 5.585395462547406e-07, |
|
"logits/chosen": -0.9345005750656128, |
|
"logits/rejected": -0.9257915019989014, |
|
"logps/chosen": -3.3855388164520264, |
|
"logps/rejected": -3.6936349868774414, |
|
"loss": 3.6903, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -33.85539245605469, |
|
"rewards/margins": 3.080961227416992, |
|
"rewards/rejected": -36.93634796142578, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43652813118461975, |
|
"grad_norm": 124.09729948342199, |
|
"learning_rate": 5.55628520825718e-07, |
|
"logits/chosen": -1.0164012908935547, |
|
"logits/rejected": -1.0249830484390259, |
|
"logps/chosen": -3.3843159675598145, |
|
"logps/rejected": -3.7799105644226074, |
|
"loss": 3.4471, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -33.84315872192383, |
|
"rewards/margins": 3.955946922302246, |
|
"rewards/rejected": -37.799102783203125, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.43878993497314106, |
|
"grad_norm": 111.07716830837144, |
|
"learning_rate": 5.527077498623752e-07, |
|
"logits/chosen": -0.969367265701294, |
|
"logits/rejected": -0.9817877411842346, |
|
"logps/chosen": -3.272331714630127, |
|
"logps/rejected": -3.6942224502563477, |
|
"loss": 3.0369, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -32.72331619262695, |
|
"rewards/margins": 4.218908786773682, |
|
"rewards/rejected": -36.942222595214844, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4410517387616624, |
|
"grad_norm": 110.82898924664865, |
|
"learning_rate": 5.497774162648228e-07, |
|
"logits/chosen": -0.9307990670204163, |
|
"logits/rejected": -0.9575142860412598, |
|
"logps/chosen": -3.230003833770752, |
|
"logps/rejected": -3.7915592193603516, |
|
"loss": 2.9101, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -32.3000373840332, |
|
"rewards/margins": 5.615551948547363, |
|
"rewards/rejected": -37.91558837890625, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4433135425501838, |
|
"grad_norm": 109.94667588199378, |
|
"learning_rate": 5.468377035319882e-07, |
|
"logits/chosen": -1.0030239820480347, |
|
"logits/rejected": -0.9989161491394043, |
|
"logps/chosen": -3.2055721282958984, |
|
"logps/rejected": -3.716688394546509, |
|
"loss": 3.0171, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -32.055721282958984, |
|
"rewards/margins": 5.11116361618042, |
|
"rewards/rejected": -37.1668815612793, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4455753463387051, |
|
"grad_norm": 125.27040886315275, |
|
"learning_rate": 5.438887957501248e-07, |
|
"logits/chosen": -0.8769274353981018, |
|
"logits/rejected": -0.8877967596054077, |
|
"logps/chosen": -3.203533172607422, |
|
"logps/rejected": -3.6154625415802, |
|
"loss": 3.2528, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -32.03533172607422, |
|
"rewards/margins": 4.119295120239258, |
|
"rewards/rejected": -36.154624938964844, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 103.81022792804097, |
|
"learning_rate": 5.409308775812844e-07, |
|
"logits/chosen": -0.9267116189002991, |
|
"logits/rejected": -0.9381961822509766, |
|
"logps/chosen": -3.330132246017456, |
|
"logps/rejected": -3.7476911544799805, |
|
"loss": 3.5717, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -33.30132293701172, |
|
"rewards/margins": 4.175592422485352, |
|
"rewards/rejected": -37.47691345214844, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45009895391574783, |
|
"grad_norm": 99.94564050121403, |
|
"learning_rate": 5.379641342517541e-07, |
|
"logits/chosen": -0.986073911190033, |
|
"logits/rejected": -0.9813234210014343, |
|
"logps/chosen": -3.126271963119507, |
|
"logps/rejected": -3.6492385864257812, |
|
"loss": 3.0423, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -31.262718200683594, |
|
"rewards/margins": 5.2296671867370605, |
|
"rewards/rejected": -36.49238204956055, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.45236075770426915, |
|
"grad_norm": 111.98428978609695, |
|
"learning_rate": 5.349887515404564e-07, |
|
"logits/chosen": -0.9647108316421509, |
|
"logits/rejected": -0.9899980425834656, |
|
"logps/chosen": -3.3272621631622314, |
|
"logps/rejected": -3.902920722961426, |
|
"loss": 2.7821, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -33.272621154785156, |
|
"rewards/margins": 5.756584167480469, |
|
"rewards/rejected": -39.029205322265625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4546225614927905, |
|
"grad_norm": 107.57636910311598, |
|
"learning_rate": 5.320049157673163e-07, |
|
"logits/chosen": -0.89461350440979, |
|
"logits/rejected": -0.8987120389938354, |
|
"logps/chosen": -3.157144546508789, |
|
"logps/rejected": -3.6261062622070312, |
|
"loss": 3.0688, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -31.57144546508789, |
|
"rewards/margins": 4.6896138191223145, |
|
"rewards/rejected": -36.26105880737305, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4568843652813118, |
|
"grad_norm": 108.84345703602494, |
|
"learning_rate": 5.290128137815938e-07, |
|
"logits/chosen": -0.9613075256347656, |
|
"logits/rejected": -0.9883145093917847, |
|
"logps/chosen": -3.33732533454895, |
|
"logps/rejected": -3.883516550064087, |
|
"loss": 2.5808, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -33.37324905395508, |
|
"rewards/margins": 5.461914539337158, |
|
"rewards/rejected": -38.835166931152344, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4591461690698332, |
|
"grad_norm": 100.3756469658318, |
|
"learning_rate": 5.260126329501828e-07, |
|
"logits/chosen": -1.0020052194595337, |
|
"logits/rejected": -0.9960248470306396, |
|
"logps/chosen": -3.233987331390381, |
|
"logps/rejected": -3.822892665863037, |
|
"loss": 2.6001, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -32.339874267578125, |
|
"rewards/margins": 5.889050483703613, |
|
"rewards/rejected": -38.22892379760742, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.46140797285835455, |
|
"grad_norm": 117.92031866066928, |
|
"learning_rate": 5.230045611458789e-07, |
|
"logits/chosen": -0.9191077947616577, |
|
"logits/rejected": -0.9465888738632202, |
|
"logps/chosen": -3.157311201095581, |
|
"logps/rejected": -3.581965923309326, |
|
"loss": 3.2322, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -31.57311248779297, |
|
"rewards/margins": 4.24654483795166, |
|
"rewards/rejected": -35.81965637207031, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.46366977664687586, |
|
"grad_norm": 108.59013513036588, |
|
"learning_rate": 5.199887867356143e-07, |
|
"logits/chosen": -0.9234997034072876, |
|
"logits/rejected": -0.9457142353057861, |
|
"logps/chosen": -3.4247939586639404, |
|
"logps/rejected": -4.004105567932129, |
|
"loss": 2.3777, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -34.24794006347656, |
|
"rewards/margins": 5.793112754821777, |
|
"rewards/rejected": -40.041053771972656, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.46593158043539723, |
|
"grad_norm": 109.86606090598335, |
|
"learning_rate": 5.16965498568662e-07, |
|
"logits/chosen": -0.9631933569908142, |
|
"logits/rejected": -0.985203206539154, |
|
"logps/chosen": -3.5760979652404785, |
|
"logps/rejected": -4.27176570892334, |
|
"loss": 2.6549, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -35.76097869873047, |
|
"rewards/margins": 6.9566802978515625, |
|
"rewards/rejected": -42.71765899658203, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 109.37570846064393, |
|
"learning_rate": 5.139348859648098e-07, |
|
"logits/chosen": -0.9843616485595703, |
|
"logits/rejected": -0.9931260943412781, |
|
"logps/chosen": -3.337782382965088, |
|
"logps/rejected": -3.815596580505371, |
|
"loss": 2.752, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -33.37782669067383, |
|
"rewards/margins": 4.778145790100098, |
|
"rewards/rejected": -38.15597152709961, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4704551880124399, |
|
"grad_norm": 123.84513628663106, |
|
"learning_rate": 5.10897138702506e-07, |
|
"logits/chosen": -0.924982488155365, |
|
"logits/rejected": -0.9445345401763916, |
|
"logps/chosen": -3.5001182556152344, |
|
"logps/rejected": -4.009296417236328, |
|
"loss": 3.4707, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -35.00117874145508, |
|
"rewards/margins": 5.091784477233887, |
|
"rewards/rejected": -40.09296417236328, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4727169918009613, |
|
"grad_norm": 115.40500395416548, |
|
"learning_rate": 5.078524470069743e-07, |
|
"logits/chosen": -1.0325067043304443, |
|
"logits/rejected": -1.0381354093551636, |
|
"logps/chosen": -3.7030789852142334, |
|
"logps/rejected": -4.310380935668945, |
|
"loss": 2.4021, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -37.030792236328125, |
|
"rewards/margins": 6.073019504547119, |
|
"rewards/rejected": -43.10380935668945, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.47497879558948264, |
|
"grad_norm": 117.29555270641235, |
|
"learning_rate": 5.048010015383021e-07, |
|
"logits/chosen": -0.932005763053894, |
|
"logits/rejected": -0.9313934445381165, |
|
"logps/chosen": -3.7217373847961426, |
|
"logps/rejected": -4.419771671295166, |
|
"loss": 2.5579, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -37.217376708984375, |
|
"rewards/margins": 6.980340957641602, |
|
"rewards/rejected": -44.197715759277344, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47724059937800395, |
|
"grad_norm": 99.0605590955242, |
|
"learning_rate": 5.01742993379502e-07, |
|
"logits/chosen": -0.9642963409423828, |
|
"logits/rejected": -0.9944700002670288, |
|
"logps/chosen": -3.8130156993865967, |
|
"logps/rejected": -4.41444206237793, |
|
"loss": 2.4226, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -38.130157470703125, |
|
"rewards/margins": 6.0142645835876465, |
|
"rewards/rejected": -44.14441680908203, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4795024031665253, |
|
"grad_norm": 137.92065301035657, |
|
"learning_rate": 4.986786140245446e-07, |
|
"logits/chosen": -0.941580057144165, |
|
"logits/rejected": -0.9521224498748779, |
|
"logps/chosen": -3.7896742820739746, |
|
"logps/rejected": -4.292535305023193, |
|
"loss": 3.3326, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -37.8967399597168, |
|
"rewards/margins": 5.028613090515137, |
|
"rewards/rejected": -42.92535400390625, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4817642069550466, |
|
"grad_norm": 140.2469732339002, |
|
"learning_rate": 4.956080553663687e-07, |
|
"logits/chosen": -1.0177710056304932, |
|
"logits/rejected": -1.0228863954544067, |
|
"logps/chosen": -3.8647103309631348, |
|
"logps/rejected": -4.441557884216309, |
|
"loss": 3.1669, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -38.64710998535156, |
|
"rewards/margins": 5.7684736251831055, |
|
"rewards/rejected": -44.41558074951172, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.484026010743568, |
|
"grad_norm": 138.7141757029019, |
|
"learning_rate": 4.925315096848636e-07, |
|
"logits/chosen": -1.0070769786834717, |
|
"logits/rejected": -1.0411479473114014, |
|
"logps/chosen": -4.106783866882324, |
|
"logps/rejected": -4.8575568199157715, |
|
"loss": 2.7284, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.067840576171875, |
|
"rewards/margins": 7.507728576660156, |
|
"rewards/rejected": -48.57556915283203, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48628781453208936, |
|
"grad_norm": 131.4422460143438, |
|
"learning_rate": 4.894491696348293e-07, |
|
"logits/chosen": -1.0420957803726196, |
|
"logits/rejected": -1.0374058485031128, |
|
"logps/chosen": -3.8714852333068848, |
|
"logps/rejected": -4.324233055114746, |
|
"loss": 3.2868, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -38.71485137939453, |
|
"rewards/margins": 4.527476787567139, |
|
"rewards/rejected": -43.24232864379883, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 119.956771774232, |
|
"learning_rate": 4.863612282339116e-07, |
|
"logits/chosen": -0.9629250764846802, |
|
"logits/rejected": -0.9726973176002502, |
|
"logps/chosen": -4.18977165222168, |
|
"logps/rejected": -4.725983619689941, |
|
"loss": 3.3963, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -41.8977165222168, |
|
"rewards/margins": 5.362123489379883, |
|
"rewards/rejected": -47.25984191894531, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.49081142210913203, |
|
"grad_norm": 126.39733995056336, |
|
"learning_rate": 4.832678788505161e-07, |
|
"logits/chosen": -0.9921486973762512, |
|
"logits/rejected": -0.9952837824821472, |
|
"logps/chosen": -4.200188636779785, |
|
"logps/rejected": -4.784489631652832, |
|
"loss": 3.3473, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -42.001888275146484, |
|
"rewards/margins": 5.843010902404785, |
|
"rewards/rejected": -47.84489822387695, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4930732258976534, |
|
"grad_norm": 145.1832593985925, |
|
"learning_rate": 4.801693151916985e-07, |
|
"logits/chosen": -1.0200079679489136, |
|
"logits/rejected": -1.0674527883529663, |
|
"logps/chosen": -4.111248016357422, |
|
"logps/rejected": -4.704890251159668, |
|
"loss": 2.7314, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.11248016357422, |
|
"rewards/margins": 5.936426639556885, |
|
"rewards/rejected": -47.04890060424805, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4953350296861747, |
|
"grad_norm": 122.8894691842728, |
|
"learning_rate": 4.770657312910354e-07, |
|
"logits/chosen": -1.0514639616012573, |
|
"logits/rejected": -1.0606637001037598, |
|
"logps/chosen": -4.16422176361084, |
|
"logps/rejected": -4.627882957458496, |
|
"loss": 3.4181, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -41.642215728759766, |
|
"rewards/margins": 4.6366143226623535, |
|
"rewards/rejected": -46.278831481933594, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4975968334746961, |
|
"grad_norm": 183.15201644549884, |
|
"learning_rate": 4.739573214964729e-07, |
|
"logits/chosen": -1.019283413887024, |
|
"logits/rejected": -1.0272706747055054, |
|
"logps/chosen": -4.017280101776123, |
|
"logps/rejected": -4.622611045837402, |
|
"loss": 2.8836, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.17280197143555, |
|
"rewards/margins": 6.053311347961426, |
|
"rewards/rejected": -46.226104736328125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49985863726321744, |
|
"grad_norm": 161.1665347902561, |
|
"learning_rate": 4.7084428045815733e-07, |
|
"logits/chosen": -1.0222294330596924, |
|
"logits/rejected": -1.0282480716705322, |
|
"logps/chosen": -4.2613067626953125, |
|
"logps/rejected": -4.757590293884277, |
|
"loss": 3.294, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -42.61307144165039, |
|
"rewards/margins": 4.962835311889648, |
|
"rewards/rejected": -47.575904846191406, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5021204410517388, |
|
"grad_norm": 172.53802465037856, |
|
"learning_rate": 4.677268031162457e-07, |
|
"logits/chosen": -1.0452945232391357, |
|
"logits/rejected": -1.0611770153045654, |
|
"logps/chosen": -4.031656265258789, |
|
"logps/rejected": -4.551456928253174, |
|
"loss": 3.239, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -40.31656265258789, |
|
"rewards/margins": 5.198005199432373, |
|
"rewards/rejected": -45.514564514160156, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5043822448402601, |
|
"grad_norm": 126.00401758603967, |
|
"learning_rate": 4.646050846886985e-07, |
|
"logits/chosen": -0.9657736420631409, |
|
"logits/rejected": -0.983400285243988, |
|
"logps/chosen": -3.8987081050872803, |
|
"logps/rejected": -4.5452728271484375, |
|
"loss": 2.4758, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -38.987083435058594, |
|
"rewards/margins": 6.465641975402832, |
|
"rewards/rejected": -45.452728271484375, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5066440486287814, |
|
"grad_norm": 133.2213689896863, |
|
"learning_rate": 4.6147932065905494e-07, |
|
"logits/chosen": -1.0288223028182983, |
|
"logits/rejected": -1.0277618169784546, |
|
"logps/chosen": -4.195347309112549, |
|
"logps/rejected": -4.668145179748535, |
|
"loss": 3.3766, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -41.9534797668457, |
|
"rewards/margins": 4.727977752685547, |
|
"rewards/rejected": -46.681453704833984, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 131.6600388105565, |
|
"learning_rate": 4.5834970676419214e-07, |
|
"logits/chosen": -1.0146931409835815, |
|
"logits/rejected": -1.035142183303833, |
|
"logps/chosen": -4.00057315826416, |
|
"logps/rejected": -4.482962608337402, |
|
"loss": 3.1903, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -40.005733489990234, |
|
"rewards/margins": 4.823896408081055, |
|
"rewards/rejected": -44.829627990722656, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5111676562058242, |
|
"grad_norm": 162.562796593888, |
|
"learning_rate": 4.552164389820673e-07, |
|
"logits/chosen": -0.9230223894119263, |
|
"logits/rejected": -0.95595782995224, |
|
"logps/chosen": -4.004783630371094, |
|
"logps/rejected": -4.67029333114624, |
|
"loss": 2.8204, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.04783630371094, |
|
"rewards/margins": 6.655096530914307, |
|
"rewards/rejected": -46.70293045043945, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5134294599943455, |
|
"grad_norm": 135.08666773974417, |
|
"learning_rate": 4.5207971351944605e-07, |
|
"logits/chosen": -1.0635424852371216, |
|
"logits/rejected": -1.0724692344665527, |
|
"logps/chosen": -4.0045270919799805, |
|
"logps/rejected": -4.564865589141846, |
|
"loss": 3.483, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -40.04526901245117, |
|
"rewards/margins": 5.60338830947876, |
|
"rewards/rejected": -45.648658752441406, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5156912637828668, |
|
"grad_norm": 149.84066704705657, |
|
"learning_rate": 4.489397267996157e-07, |
|
"logits/chosen": -1.0270864963531494, |
|
"logits/rejected": -1.0296591520309448, |
|
"logps/chosen": -3.937476634979248, |
|
"logps/rejected": -4.4472575187683105, |
|
"loss": 2.9656, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -39.37476348876953, |
|
"rewards/margins": 5.097811222076416, |
|
"rewards/rejected": -44.472572326660156, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5179530675713881, |
|
"grad_norm": 159.5920659793943, |
|
"learning_rate": 4.45796675450085e-07, |
|
"logits/chosen": -1.0151578187942505, |
|
"logits/rejected": -1.0253784656524658, |
|
"logps/chosen": -3.902965545654297, |
|
"logps/rejected": -4.4983906745910645, |
|
"loss": 2.7879, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.0296630859375, |
|
"rewards/margins": 5.954242706298828, |
|
"rewards/rejected": -44.98390197753906, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5202148713599095, |
|
"grad_norm": 137.17327043394232, |
|
"learning_rate": 4.4265075629027126e-07, |
|
"logits/chosen": -0.9616280794143677, |
|
"logits/rejected": -0.9747657775878906, |
|
"logps/chosen": -4.009452819824219, |
|
"logps/rejected": -4.5196943283081055, |
|
"loss": 2.8877, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -40.09452819824219, |
|
"rewards/margins": 5.102418422698975, |
|
"rewards/rejected": -45.19694900512695, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5224766751484309, |
|
"grad_norm": 127.83635262123063, |
|
"learning_rate": 4.3950216631917563e-07, |
|
"logits/chosen": -1.0352201461791992, |
|
"logits/rejected": -1.0629258155822754, |
|
"logps/chosen": -3.9494400024414062, |
|
"logps/rejected": -4.613613128662109, |
|
"loss": 2.6865, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -39.49440002441406, |
|
"rewards/margins": 6.641729354858398, |
|
"rewards/rejected": -46.136131286621094, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5247384789369522, |
|
"grad_norm": 116.98731975875538, |
|
"learning_rate": 4.3635110270304676e-07, |
|
"logits/chosen": -1.012160062789917, |
|
"logits/rejected": -1.006544828414917, |
|
"logps/chosen": -3.773225784301758, |
|
"logps/rejected": -4.404544353485107, |
|
"loss": 2.1975, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -37.732261657714844, |
|
"rewards/margins": 6.3131866455078125, |
|
"rewards/rejected": -44.04544448852539, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5270002827254736, |
|
"grad_norm": 157.36112184734307, |
|
"learning_rate": 4.331977627630339e-07, |
|
"logits/chosen": -0.9593961238861084, |
|
"logits/rejected": -0.9582856297492981, |
|
"logps/chosen": -3.712995767593384, |
|
"logps/rejected": -4.420125961303711, |
|
"loss": 2.4962, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.12995529174805, |
|
"rewards/margins": 7.071303367614746, |
|
"rewards/rejected": -44.20125961303711, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 125.73537522423167, |
|
"learning_rate": 4.300423439628313e-07, |
|
"logits/chosen": -1.0273864269256592, |
|
"logits/rejected": -1.0561580657958984, |
|
"logps/chosen": -3.85683536529541, |
|
"logps/rejected": -4.466761589050293, |
|
"loss": 2.5333, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -38.568355560302734, |
|
"rewards/margins": 6.099256992340088, |
|
"rewards/rejected": -44.66761016845703, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5315238903025162, |
|
"grad_norm": 130.86792641111873, |
|
"learning_rate": 4.268850438963118e-07, |
|
"logits/chosen": -1.02294921875, |
|
"logits/rejected": -1.044440507888794, |
|
"logps/chosen": -4.049023628234863, |
|
"logps/rejected": -4.641694068908691, |
|
"loss": 2.5942, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.490230560302734, |
|
"rewards/margins": 5.926709175109863, |
|
"rewards/rejected": -46.41694641113281, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5337856940910376, |
|
"grad_norm": 113.36493907903346, |
|
"learning_rate": 4.2372606027515463e-07, |
|
"logits/chosen": -0.9929027557373047, |
|
"logits/rejected": -1.00745689868927, |
|
"logps/chosen": -3.740131378173828, |
|
"logps/rejected": -4.291561126708984, |
|
"loss": 2.8976, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -37.40131378173828, |
|
"rewards/margins": 5.514293670654297, |
|
"rewards/rejected": -42.915611267089844, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.536047497879559, |
|
"grad_norm": 156.56338161214597, |
|
"learning_rate": 4.2056559091646387e-07, |
|
"logits/chosen": -1.0055474042892456, |
|
"logits/rejected": -1.0448949337005615, |
|
"logps/chosen": -3.886705160140991, |
|
"logps/rejected": -4.394184112548828, |
|
"loss": 3.2048, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -38.86705780029297, |
|
"rewards/margins": 5.074785232543945, |
|
"rewards/rejected": -43.941837310791016, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5383093016680803, |
|
"grad_norm": 127.47130440167564, |
|
"learning_rate": 4.1740383373038116e-07, |
|
"logits/chosen": -0.9955140352249146, |
|
"logits/rejected": -1.0308566093444824, |
|
"logps/chosen": -3.7038443088531494, |
|
"logps/rejected": -4.284459114074707, |
|
"loss": 3.0277, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -37.0384407043457, |
|
"rewards/margins": 5.806148052215576, |
|
"rewards/rejected": -42.84458923339844, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5405711054566016, |
|
"grad_norm": 117.61173290234126, |
|
"learning_rate": 4.1424098670769255e-07, |
|
"logits/chosen": -1.0755598545074463, |
|
"logits/rejected": -1.0830613374710083, |
|
"logps/chosen": -3.8486387729644775, |
|
"logps/rejected": -4.272940158843994, |
|
"loss": 3.0995, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -38.48638916015625, |
|
"rewards/margins": 4.243013858795166, |
|
"rewards/rejected": -42.729400634765625, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.542832909245123, |
|
"grad_norm": 128.52803563206294, |
|
"learning_rate": 4.1107724790743007e-07, |
|
"logits/chosen": -0.9770523309707642, |
|
"logits/rejected": -1.0023921728134155, |
|
"logps/chosen": -3.738056182861328, |
|
"logps/rejected": -4.222794055938721, |
|
"loss": 2.7684, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -37.38056182861328, |
|
"rewards/margins": 4.847377777099609, |
|
"rewards/rejected": -42.22793960571289, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5450947130336443, |
|
"grad_norm": 147.2490235837458, |
|
"learning_rate": 4.0791281544446947e-07, |
|
"logits/chosen": -1.021409511566162, |
|
"logits/rejected": -1.0223335027694702, |
|
"logps/chosen": -3.826664447784424, |
|
"logps/rejected": -4.393285751342773, |
|
"loss": 2.7042, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -38.26664733886719, |
|
"rewards/margins": 5.666211128234863, |
|
"rewards/rejected": -43.932861328125, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5473565168221657, |
|
"grad_norm": 129.31123041885093, |
|
"learning_rate": 4.0474788747712416e-07, |
|
"logits/chosen": -1.0275057554244995, |
|
"logits/rejected": -1.0452649593353271, |
|
"logps/chosen": -3.818359375, |
|
"logps/rejected": -4.30177640914917, |
|
"loss": 3.2596, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -38.183597564697266, |
|
"rewards/margins": 4.83416748046875, |
|
"rewards/rejected": -43.01776885986328, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 120.8533355064889, |
|
"learning_rate": 4.0158266219473573e-07, |
|
"logits/chosen": -1.0068129301071167, |
|
"logits/rejected": -1.0314866304397583, |
|
"logps/chosen": -3.585325002670288, |
|
"logps/rejected": -4.2265400886535645, |
|
"loss": 2.4481, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -35.853248596191406, |
|
"rewards/margins": 6.412153244018555, |
|
"rewards/rejected": -42.265403747558594, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5518801243992084, |
|
"grad_norm": 110.90045627236604, |
|
"learning_rate": 3.984173378052643e-07, |
|
"logits/chosen": -0.9716352820396423, |
|
"logits/rejected": -0.9799646139144897, |
|
"logps/chosen": -3.5723931789398193, |
|
"logps/rejected": -4.194863319396973, |
|
"loss": 2.5185, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -35.72392654418945, |
|
"rewards/margins": 6.224704742431641, |
|
"rewards/rejected": -41.948638916015625, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5541419281877297, |
|
"grad_norm": 120.07980210184144, |
|
"learning_rate": 3.9525211252287585e-07, |
|
"logits/chosen": -1.058106541633606, |
|
"logits/rejected": -1.0559909343719482, |
|
"logps/chosen": -3.759784698486328, |
|
"logps/rejected": -4.45266580581665, |
|
"loss": 2.938, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -37.59784698486328, |
|
"rewards/margins": 6.928810119628906, |
|
"rewards/rejected": -44.52665710449219, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.556403731976251, |
|
"grad_norm": 106.61134125279938, |
|
"learning_rate": 3.920871845555305e-07, |
|
"logits/chosen": -0.9979184865951538, |
|
"logits/rejected": -0.9970757961273193, |
|
"logps/chosen": -3.738825559616089, |
|
"logps/rejected": -4.284128189086914, |
|
"loss": 2.5864, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -37.38825607299805, |
|
"rewards/margins": 5.453031539916992, |
|
"rewards/rejected": -42.841285705566406, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5586655357647724, |
|
"grad_norm": 119.36615767473035, |
|
"learning_rate": 3.8892275209256984e-07, |
|
"logits/chosen": -1.0302151441574097, |
|
"logits/rejected": -1.0194628238677979, |
|
"logps/chosen": -3.9706714153289795, |
|
"logps/rejected": -4.4482197761535645, |
|
"loss": 2.8411, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.70671844482422, |
|
"rewards/margins": 4.775484561920166, |
|
"rewards/rejected": -44.482200622558594, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5609273395532938, |
|
"grad_norm": 130.80267894092015, |
|
"learning_rate": 3.8575901329230747e-07, |
|
"logits/chosen": -0.9776235818862915, |
|
"logits/rejected": -0.9871199727058411, |
|
"logps/chosen": -3.9265997409820557, |
|
"logps/rejected": -4.459488391876221, |
|
"loss": 3.1604, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.265995025634766, |
|
"rewards/margins": 5.328887462615967, |
|
"rewards/rejected": -44.594886779785156, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5631891433418151, |
|
"grad_norm": 125.25589598162007, |
|
"learning_rate": 3.8259616626961886e-07, |
|
"logits/chosen": -0.9738092422485352, |
|
"logits/rejected": -0.9954218864440918, |
|
"logps/chosen": -3.7458341121673584, |
|
"logps/rejected": -4.224867343902588, |
|
"loss": 2.6759, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.45833969116211, |
|
"rewards/margins": 4.790335178375244, |
|
"rewards/rejected": -42.24867630004883, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5654509471303364, |
|
"grad_norm": 130.56420647255675, |
|
"learning_rate": 3.794344090835362e-07, |
|
"logits/chosen": -0.9899100661277771, |
|
"logits/rejected": -0.9924334287643433, |
|
"logps/chosen": -3.9572811126708984, |
|
"logps/rejected": -4.470721244812012, |
|
"loss": 3.0459, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -39.57280731201172, |
|
"rewards/margins": 5.134402275085449, |
|
"rewards/rejected": -44.707210540771484, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5677127509188578, |
|
"grad_norm": 142.26182043370164, |
|
"learning_rate": 3.7627393972484534e-07, |
|
"logits/chosen": -1.0790722370147705, |
|
"logits/rejected": -1.0966429710388184, |
|
"logps/chosen": -3.9179506301879883, |
|
"logps/rejected": -4.354313850402832, |
|
"loss": 3.2659, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -39.17950439453125, |
|
"rewards/margins": 4.363633155822754, |
|
"rewards/rejected": -43.54313659667969, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 140.00427052937326, |
|
"learning_rate": 3.7311495610368823e-07, |
|
"logits/chosen": -1.0594093799591064, |
|
"logits/rejected": -1.072257399559021, |
|
"logps/chosen": -3.9446516036987305, |
|
"logps/rejected": -4.463018417358398, |
|
"loss": 3.0563, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -39.44651412963867, |
|
"rewards/margins": 5.1836748123168945, |
|
"rewards/rejected": -44.630191802978516, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5722363584959005, |
|
"grad_norm": 122.25132186267928, |
|
"learning_rate": 3.699576560371689e-07, |
|
"logits/chosen": -0.993110716342926, |
|
"logits/rejected": -1.021927833557129, |
|
"logps/chosen": -4.053295135498047, |
|
"logps/rejected": -4.832113265991211, |
|
"loss": 2.2737, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.53295135498047, |
|
"rewards/margins": 7.788183689117432, |
|
"rewards/rejected": -48.321136474609375, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5744981622844219, |
|
"grad_norm": 129.41061855221696, |
|
"learning_rate": 3.66802237236966e-07, |
|
"logits/chosen": -0.9714381694793701, |
|
"logits/rejected": -0.9900146126747131, |
|
"logps/chosen": -4.126524448394775, |
|
"logps/rejected": -4.7076215744018555, |
|
"loss": 2.8103, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.26524353027344, |
|
"rewards/margins": 5.810970783233643, |
|
"rewards/rejected": -47.076210021972656, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5767599660729432, |
|
"grad_norm": 120.66659888681296, |
|
"learning_rate": 3.636488972969532e-07, |
|
"logits/chosen": -0.9955747127532959, |
|
"logits/rejected": -1.003379225730896, |
|
"logps/chosen": -3.956935405731201, |
|
"logps/rejected": -4.526253700256348, |
|
"loss": 2.7392, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -39.56935501098633, |
|
"rewards/margins": 5.6931843757629395, |
|
"rewards/rejected": -45.26253890991211, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5790217698614645, |
|
"grad_norm": 124.78591584478642, |
|
"learning_rate": 3.604978336808244e-07, |
|
"logits/chosen": -1.1119791269302368, |
|
"logits/rejected": -1.1065473556518555, |
|
"logps/chosen": -3.9045979976654053, |
|
"logps/rejected": -4.47547721862793, |
|
"loss": 2.8609, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -39.04597854614258, |
|
"rewards/margins": 5.708797454833984, |
|
"rewards/rejected": -44.75477981567383, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5812835736499858, |
|
"grad_norm": 153.07201378471103, |
|
"learning_rate": 3.5734924370972876e-07, |
|
"logits/chosen": -1.0215953588485718, |
|
"logits/rejected": -1.0469621419906616, |
|
"logps/chosen": -3.8699734210968018, |
|
"logps/rejected": -4.371584892272949, |
|
"loss": 3.1003, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -38.699729919433594, |
|
"rewards/margins": 5.016113758087158, |
|
"rewards/rejected": -43.71584701538086, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5835453774385072, |
|
"grad_norm": 124.41939420392593, |
|
"learning_rate": 3.5420332454991504e-07, |
|
"logits/chosen": -0.9726427793502808, |
|
"logits/rejected": -0.9951732158660889, |
|
"logps/chosen": -3.9074888229370117, |
|
"logps/rejected": -4.492865085601807, |
|
"loss": 3.0649, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -39.074886322021484, |
|
"rewards/margins": 5.853763580322266, |
|
"rewards/rejected": -44.92864990234375, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5858071812270286, |
|
"grad_norm": 112.37511160787908, |
|
"learning_rate": 3.510602732003843e-07, |
|
"logits/chosen": -1.049844741821289, |
|
"logits/rejected": -1.076419711112976, |
|
"logps/chosen": -4.0514678955078125, |
|
"logps/rejected": -4.697297096252441, |
|
"loss": 2.5823, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.514678955078125, |
|
"rewards/margins": 6.458294868469238, |
|
"rewards/rejected": -46.97297668457031, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5880689850155499, |
|
"grad_norm": 139.5835828699119, |
|
"learning_rate": 3.4792028648055396e-07, |
|
"logits/chosen": -1.008234977722168, |
|
"logits/rejected": -1.0256637334823608, |
|
"logps/chosen": -3.886525869369507, |
|
"logps/rejected": -4.43724250793457, |
|
"loss": 3.0132, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -38.86526107788086, |
|
"rewards/margins": 5.5071635246276855, |
|
"rewards/rejected": -44.37242126464844, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 118.37128244370328, |
|
"learning_rate": 3.447835610179327e-07, |
|
"logits/chosen": -0.9956722855567932, |
|
"logits/rejected": -0.9972206354141235, |
|
"logps/chosen": -3.823888063430786, |
|
"logps/rejected": -4.58199405670166, |
|
"loss": 2.3816, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -38.23887634277344, |
|
"rewards/margins": 7.5810627937316895, |
|
"rewards/rejected": -45.8199462890625, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 160.9457186044303, |
|
"learning_rate": 3.416502932358079e-07, |
|
"logits/chosen": -1.0353963375091553, |
|
"logits/rejected": -1.050615906715393, |
|
"logps/chosen": -4.162081718444824, |
|
"logps/rejected": -4.554502487182617, |
|
"loss": 3.3382, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -41.620819091796875, |
|
"rewards/margins": 3.924206018447876, |
|
"rewards/rejected": -45.54502487182617, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5948543963811139, |
|
"grad_norm": 161.7727781170275, |
|
"learning_rate": 3.385206793409451e-07, |
|
"logits/chosen": -0.9707048535346985, |
|
"logits/rejected": -0.9927971363067627, |
|
"logps/chosen": -3.6864235401153564, |
|
"logps/rejected": -4.235956192016602, |
|
"loss": 2.7797, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -36.864234924316406, |
|
"rewards/margins": 5.495333194732666, |
|
"rewards/rejected": -42.35956573486328, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5971162001696353, |
|
"grad_norm": 137.02910196773473, |
|
"learning_rate": 3.3539491531130163e-07, |
|
"logits/chosen": -0.9960398077964783, |
|
"logits/rejected": -1.0142368078231812, |
|
"logps/chosen": -3.9248194694519043, |
|
"logps/rejected": -4.580231189727783, |
|
"loss": 2.7295, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -39.24819564819336, |
|
"rewards/margins": 6.554119110107422, |
|
"rewards/rejected": -45.80231475830078, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5993780039581567, |
|
"grad_norm": 128.6769155355823, |
|
"learning_rate": 3.3227319688375426e-07, |
|
"logits/chosen": -1.0612592697143555, |
|
"logits/rejected": -1.0557100772857666, |
|
"logps/chosen": -3.9213783740997314, |
|
"logps/rejected": -4.47890043258667, |
|
"loss": 2.5451, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -39.213783264160156, |
|
"rewards/margins": 5.575222015380859, |
|
"rewards/rejected": -44.78900909423828, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.601639807746678, |
|
"grad_norm": 138.67796847601616, |
|
"learning_rate": 3.291557195418427e-07, |
|
"logits/chosen": -1.034224033355713, |
|
"logits/rejected": -1.036610722541809, |
|
"logps/chosen": -3.7480900287628174, |
|
"logps/rejected": -4.198832035064697, |
|
"loss": 3.2608, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -37.48090362548828, |
|
"rewards/margins": 4.507420063018799, |
|
"rewards/rejected": -41.98832321166992, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6039016115351993, |
|
"grad_norm": 160.19433700002344, |
|
"learning_rate": 3.260426785035272e-07, |
|
"logits/chosen": -1.0255311727523804, |
|
"logits/rejected": -1.019267201423645, |
|
"logps/chosen": -3.8694424629211426, |
|
"logps/rejected": -4.41411018371582, |
|
"loss": 3.3589, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -38.69442367553711, |
|
"rewards/margins": 5.446678161621094, |
|
"rewards/rejected": -44.1411018371582, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6061634153237206, |
|
"grad_norm": 121.3163905223523, |
|
"learning_rate": 3.229342687089646e-07, |
|
"logits/chosen": -1.0178768634796143, |
|
"logits/rejected": -1.0326604843139648, |
|
"logps/chosen": -3.775817394256592, |
|
"logps/rejected": -4.42494010925293, |
|
"loss": 2.5944, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -37.75817108154297, |
|
"rewards/margins": 6.491227626800537, |
|
"rewards/rejected": -44.2494010925293, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.608425219112242, |
|
"grad_norm": 131.50615333630248, |
|
"learning_rate": 3.1983068480830143e-07, |
|
"logits/chosen": -1.0081015825271606, |
|
"logits/rejected": -1.0148911476135254, |
|
"logps/chosen": -3.7946343421936035, |
|
"logps/rejected": -4.409883499145508, |
|
"loss": 2.8688, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -37.94634246826172, |
|
"rewards/margins": 6.152492523193359, |
|
"rewards/rejected": -44.09883499145508, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 107.57877969566793, |
|
"learning_rate": 3.1673212114948387e-07, |
|
"logits/chosen": -1.007430911064148, |
|
"logits/rejected": -1.0249990224838257, |
|
"logps/chosen": -3.656942367553711, |
|
"logps/rejected": -4.346622943878174, |
|
"loss": 2.3841, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -36.56942367553711, |
|
"rewards/margins": 6.8968048095703125, |
|
"rewards/rejected": -43.46622848510742, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6129488266892847, |
|
"grad_norm": 147.68119768244156, |
|
"learning_rate": 3.1363877176608845e-07, |
|
"logits/chosen": -0.971410870552063, |
|
"logits/rejected": -0.9872971177101135, |
|
"logps/chosen": -3.5491039752960205, |
|
"logps/rejected": -4.17361307144165, |
|
"loss": 3.0515, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -35.49103927612305, |
|
"rewards/margins": 6.245091438293457, |
|
"rewards/rejected": -41.73612976074219, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.615210630477806, |
|
"grad_norm": 123.34689558332101, |
|
"learning_rate": 3.1055083036517076e-07, |
|
"logits/chosen": -0.9573481678962708, |
|
"logits/rejected": -0.9527941346168518, |
|
"logps/chosen": -3.5805444717407227, |
|
"logps/rejected": -4.212943077087402, |
|
"loss": 2.6692, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -35.80544662475586, |
|
"rewards/margins": 6.323984146118164, |
|
"rewards/rejected": -42.129425048828125, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6174724342663274, |
|
"grad_norm": 160.545824011332, |
|
"learning_rate": 3.074684903151364e-07, |
|
"logits/chosen": -0.886579692363739, |
|
"logits/rejected": -0.8759438991546631, |
|
"logps/chosen": -3.2595725059509277, |
|
"logps/rejected": -3.7537169456481934, |
|
"loss": 2.6308, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -32.595726013183594, |
|
"rewards/margins": 4.9414472579956055, |
|
"rewards/rejected": -37.537174224853516, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6197342380548487, |
|
"grad_norm": 125.66829712347311, |
|
"learning_rate": 3.0439194463363136e-07, |
|
"logits/chosen": -0.9526737332344055, |
|
"logits/rejected": -0.9451663494110107, |
|
"logps/chosen": -3.394702672958374, |
|
"logps/rejected": -3.9073362350463867, |
|
"loss": 3.125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -33.947025299072266, |
|
"rewards/margins": 5.126337051391602, |
|
"rewards/rejected": -39.0733642578125, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6219960418433701, |
|
"grad_norm": 104.78396204673304, |
|
"learning_rate": 3.0132138597545537e-07, |
|
"logits/chosen": -0.9857915639877319, |
|
"logits/rejected": -1.0121065378189087, |
|
"logps/chosen": -3.5853991508483887, |
|
"logps/rejected": -4.302824020385742, |
|
"loss": 2.4616, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -35.85398864746094, |
|
"rewards/margins": 7.174255847930908, |
|
"rewards/rejected": -43.02824401855469, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6242578456318915, |
|
"grad_norm": 108.85090956828184, |
|
"learning_rate": 2.982570066204981e-07, |
|
"logits/chosen": -0.9941725730895996, |
|
"logits/rejected": -1.0057865381240845, |
|
"logps/chosen": -3.477489709854126, |
|
"logps/rejected": -4.046513557434082, |
|
"loss": 2.7425, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -34.77489471435547, |
|
"rewards/margins": 5.690241813659668, |
|
"rewards/rejected": -40.46513366699219, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6265196494204128, |
|
"grad_norm": 135.32599122966644, |
|
"learning_rate": 2.951989984616979e-07, |
|
"logits/chosen": -0.9097891449928284, |
|
"logits/rejected": -0.9378990530967712, |
|
"logps/chosen": -3.668203353881836, |
|
"logps/rejected": -4.28189754486084, |
|
"loss": 3.1229, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -36.68203353881836, |
|
"rewards/margins": 6.1369452476501465, |
|
"rewards/rejected": -42.81897735595703, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6287814532089341, |
|
"grad_norm": 139.27661482355427, |
|
"learning_rate": 2.9214755299302584e-07, |
|
"logits/chosen": -0.9209241271018982, |
|
"logits/rejected": -0.9369145035743713, |
|
"logps/chosen": -3.863503932952881, |
|
"logps/rejected": -4.510960578918457, |
|
"loss": 2.4385, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -38.635040283203125, |
|
"rewards/margins": 6.4745659828186035, |
|
"rewards/rejected": -45.1096076965332, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 123.59593876525113, |
|
"learning_rate": 2.89102861297494e-07, |
|
"logits/chosen": -1.0065804719924927, |
|
"logits/rejected": -1.0266199111938477, |
|
"logps/chosen": -3.5933847427368164, |
|
"logps/rejected": -4.118598937988281, |
|
"loss": 3.2899, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -35.93384552001953, |
|
"rewards/margins": 5.2521491050720215, |
|
"rewards/rejected": -41.18599319458008, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6333050607859768, |
|
"grad_norm": 146.8835649043664, |
|
"learning_rate": 2.860651140351902e-07, |
|
"logits/chosen": -0.9877956509590149, |
|
"logits/rejected": -0.9763099551200867, |
|
"logps/chosen": -3.7720227241516113, |
|
"logps/rejected": -4.390742301940918, |
|
"loss": 3.0084, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -37.72023010253906, |
|
"rewards/margins": 6.187195777893066, |
|
"rewards/rejected": -43.90742492675781, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6355668645744982, |
|
"grad_norm": 136.87735762041433, |
|
"learning_rate": 2.830345014313381e-07, |
|
"logits/chosen": -0.9115749001502991, |
|
"logits/rejected": -0.9458879232406616, |
|
"logps/chosen": -3.7744479179382324, |
|
"logps/rejected": -4.492808818817139, |
|
"loss": 2.1555, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -37.744476318359375, |
|
"rewards/margins": 7.183604717254639, |
|
"rewards/rejected": -44.92808532714844, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6378286683630195, |
|
"grad_norm": 264.5592737704986, |
|
"learning_rate": 2.800112132643856e-07, |
|
"logits/chosen": -0.9835478067398071, |
|
"logits/rejected": -0.9920839071273804, |
|
"logps/chosen": -3.7704484462738037, |
|
"logps/rejected": -4.425080299377441, |
|
"loss": 2.748, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -37.704490661621094, |
|
"rewards/margins": 6.5463151931762695, |
|
"rewards/rejected": -44.25080108642578, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6400904721515408, |
|
"grad_norm": 140.02954229421476, |
|
"learning_rate": 2.7699543885412105e-07, |
|
"logits/chosen": -0.9847967624664307, |
|
"logits/rejected": -0.9947928190231323, |
|
"logps/chosen": -3.926264762878418, |
|
"logps/rejected": -4.609628677368164, |
|
"loss": 2.6621, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -39.26264572143555, |
|
"rewards/margins": 6.833641529083252, |
|
"rewards/rejected": -46.096290588378906, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6423522759400622, |
|
"grad_norm": 129.82255460172894, |
|
"learning_rate": 2.7398736704981725e-07, |
|
"logits/chosen": -0.9884201884269714, |
|
"logits/rejected": -0.9775363206863403, |
|
"logps/chosen": -3.9083778858184814, |
|
"logps/rejected": -4.508337497711182, |
|
"loss": 2.6882, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.083778381347656, |
|
"rewards/margins": 5.999593734741211, |
|
"rewards/rejected": -45.083377838134766, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6446140797285835, |
|
"grad_norm": 121.35553644312031, |
|
"learning_rate": 2.709871862184063e-07, |
|
"logits/chosen": -0.9748336672782898, |
|
"logits/rejected": -0.9760329723358154, |
|
"logps/chosen": -3.854752779006958, |
|
"logps/rejected": -4.452006816864014, |
|
"loss": 2.9473, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.54752731323242, |
|
"rewards/margins": 5.972541809082031, |
|
"rewards/rejected": -44.52007293701172, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6468758835171049, |
|
"grad_norm": 125.06664837032388, |
|
"learning_rate": 2.679950842326837e-07, |
|
"logits/chosen": -0.9941256642341614, |
|
"logits/rejected": -1.0105838775634766, |
|
"logps/chosen": -3.97147536277771, |
|
"logps/rejected": -4.675477027893066, |
|
"loss": 2.4702, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -39.71475601196289, |
|
"rewards/margins": 7.0400166511535645, |
|
"rewards/rejected": -46.75476837158203, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6491376873056263, |
|
"grad_norm": 117.15372256913174, |
|
"learning_rate": 2.6501124845954363e-07, |
|
"logits/chosen": -0.9609854221343994, |
|
"logits/rejected": -0.97614985704422, |
|
"logps/chosen": -3.9064829349517822, |
|
"logps/rejected": -4.611234664916992, |
|
"loss": 2.3138, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -39.06482696533203, |
|
"rewards/margins": 7.047514915466309, |
|
"rewards/rejected": -46.112342834472656, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 121.11552693153162, |
|
"learning_rate": 2.62035865748246e-07, |
|
"logits/chosen": -0.9279154539108276, |
|
"logits/rejected": -0.9519023895263672, |
|
"logps/chosen": -3.7087767124176025, |
|
"logps/rejected": -4.319790840148926, |
|
"loss": 2.7549, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.0877685546875, |
|
"rewards/margins": 6.110138893127441, |
|
"rewards/rejected": -43.19790267944336, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6536612948826689, |
|
"grad_norm": 130.26218993087767, |
|
"learning_rate": 2.5906912241871554e-07, |
|
"logits/chosen": -1.0201935768127441, |
|
"logits/rejected": -1.043340802192688, |
|
"logps/chosen": -3.9793386459350586, |
|
"logps/rejected": -4.5773797035217285, |
|
"loss": 2.7682, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.79338836669922, |
|
"rewards/margins": 5.980408668518066, |
|
"rewards/rejected": -45.77379608154297, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6559230986711903, |
|
"grad_norm": 125.55185782665302, |
|
"learning_rate": 2.561112042498753e-07, |
|
"logits/chosen": -0.8966621160507202, |
|
"logits/rejected": -0.9353015422821045, |
|
"logps/chosen": -3.6716508865356445, |
|
"logps/rejected": -4.206057071685791, |
|
"loss": 3.198, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -36.71651077270508, |
|
"rewards/margins": 5.344060897827148, |
|
"rewards/rejected": -42.060569763183594, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6581849024597116, |
|
"grad_norm": 144.3268094681645, |
|
"learning_rate": 2.5316229646801195e-07, |
|
"logits/chosen": -0.932185173034668, |
|
"logits/rejected": -0.9732425808906555, |
|
"logps/chosen": -4.166126251220703, |
|
"logps/rejected": -4.736380100250244, |
|
"loss": 2.7184, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.661258697509766, |
|
"rewards/margins": 5.702537536621094, |
|
"rewards/rejected": -47.363800048828125, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.660446706248233, |
|
"grad_norm": 126.32353599732778, |
|
"learning_rate": 2.5022258373517714e-07, |
|
"logits/chosen": -1.027467966079712, |
|
"logits/rejected": -1.0455117225646973, |
|
"logps/chosen": -4.024359703063965, |
|
"logps/rejected": -4.604401111602783, |
|
"loss": 2.4376, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.24359130859375, |
|
"rewards/margins": 5.8004150390625, |
|
"rewards/rejected": -46.04401397705078, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6627085100367544, |
|
"grad_norm": 153.1292132880523, |
|
"learning_rate": 2.4729225013762474e-07, |
|
"logits/chosen": -1.0765081644058228, |
|
"logits/rejected": -1.0887564420700073, |
|
"logps/chosen": -4.230800151824951, |
|
"logps/rejected": -4.803038120269775, |
|
"loss": 3.3685, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.30799865722656, |
|
"rewards/margins": 5.722385406494141, |
|
"rewards/rejected": -48.03038787841797, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6649703138252756, |
|
"grad_norm": 174.69077104466194, |
|
"learning_rate": 2.4437147917428203e-07, |
|
"logits/chosen": -0.9644549489021301, |
|
"logits/rejected": -0.9886618852615356, |
|
"logps/chosen": -4.125370979309082, |
|
"logps/rejected": -4.838853359222412, |
|
"loss": 2.4304, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -41.25370788574219, |
|
"rewards/margins": 7.13482666015625, |
|
"rewards/rejected": -48.38853454589844, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.667232117613797, |
|
"grad_norm": 141.58375978060164, |
|
"learning_rate": 2.414604537452595e-07, |
|
"logits/chosen": -0.9465200901031494, |
|
"logits/rejected": -0.9769154787063599, |
|
"logps/chosen": -3.9610066413879395, |
|
"logps/rejected": -4.540956020355225, |
|
"loss": 2.7019, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.61006546020508, |
|
"rewards/margins": 5.799488067626953, |
|
"rewards/rejected": -45.4095573425293, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6694939214023183, |
|
"grad_norm": 127.64444965210518, |
|
"learning_rate": 2.385593561403974e-07, |
|
"logits/chosen": -0.9842420816421509, |
|
"logits/rejected": -1.0134546756744385, |
|
"logps/chosen": -3.8516955375671387, |
|
"logps/rejected": -4.495049476623535, |
|
"loss": 2.3919, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -38.5169563293457, |
|
"rewards/margins": 6.433538436889648, |
|
"rewards/rejected": -44.95048904418945, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 144.10890298479302, |
|
"learning_rate": 2.3566836802785119e-07, |
|
"logits/chosen": -0.979082465171814, |
|
"logits/rejected": -1.023681879043579, |
|
"logps/chosen": -3.9952311515808105, |
|
"logps/rejected": -4.672858715057373, |
|
"loss": 2.6128, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -39.95231246948242, |
|
"rewards/margins": 6.776278495788574, |
|
"rewards/rejected": -46.72859191894531, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6740175289793611, |
|
"grad_norm": 146.40552176309802, |
|
"learning_rate": 2.327876704427146e-07, |
|
"logits/chosen": -0.9357240200042725, |
|
"logits/rejected": -0.9553586840629578, |
|
"logps/chosen": -3.953981399536133, |
|
"logps/rejected": -4.403899192810059, |
|
"loss": 3.1675, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -39.539817810058594, |
|
"rewards/margins": 4.499179363250732, |
|
"rewards/rejected": -44.038997650146484, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6762793327678824, |
|
"grad_norm": 187.31050943295546, |
|
"learning_rate": 2.2991744377568358e-07, |
|
"logits/chosen": -0.970415472984314, |
|
"logits/rejected": -0.9577158093452454, |
|
"logps/chosen": -4.1939377784729, |
|
"logps/rejected": -4.748856067657471, |
|
"loss": 2.9641, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -41.93937683105469, |
|
"rewards/margins": 5.549182415008545, |
|
"rewards/rejected": -47.48855972290039, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6785411365564037, |
|
"grad_norm": 135.98924741006712, |
|
"learning_rate": 2.270578677617601e-07, |
|
"logits/chosen": -1.0224244594573975, |
|
"logits/rejected": -1.027580976486206, |
|
"logps/chosen": -3.9916560649871826, |
|
"logps/rejected": -4.7170891761779785, |
|
"loss": 2.9469, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -39.916561126708984, |
|
"rewards/margins": 7.254332065582275, |
|
"rewards/rejected": -47.170894622802734, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6808029403449251, |
|
"grad_norm": 136.640613281119, |
|
"learning_rate": 2.242091214689971e-07, |
|
"logits/chosen": -0.9999783635139465, |
|
"logits/rejected": -1.0360140800476074, |
|
"logps/chosen": -4.102837085723877, |
|
"logps/rejected": -4.871516704559326, |
|
"loss": 2.1939, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -41.02837371826172, |
|
"rewards/margins": 7.686791896820068, |
|
"rewards/rejected": -48.71516418457031, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6830647441334464, |
|
"grad_norm": 154.17837760004184, |
|
"learning_rate": 2.2137138328728456e-07, |
|
"logits/chosen": -1.068799614906311, |
|
"logits/rejected": -1.0587490797042847, |
|
"logps/chosen": -4.274501800537109, |
|
"logps/rejected": -4.7902913093566895, |
|
"loss": 2.7619, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -42.74501419067383, |
|
"rewards/margins": 5.157898902893066, |
|
"rewards/rejected": -47.90291213989258, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6853265479219678, |
|
"grad_norm": 131.10065185492104, |
|
"learning_rate": 2.1854483091717974e-07, |
|
"logits/chosen": -1.0478172302246094, |
|
"logits/rejected": -1.082123875617981, |
|
"logps/chosen": -4.067604064941406, |
|
"logps/rejected": -4.748412132263184, |
|
"loss": 2.3182, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.67604064941406, |
|
"rewards/margins": 6.808077812194824, |
|
"rewards/rejected": -47.4841194152832, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6875883517104892, |
|
"grad_norm": 141.99343439282094, |
|
"learning_rate": 2.1572964135877863e-07, |
|
"logits/chosen": -1.0282924175262451, |
|
"logits/rejected": -1.044624924659729, |
|
"logps/chosen": -4.233683109283447, |
|
"logps/rejected": -4.781349182128906, |
|
"loss": 3.0716, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.33683395385742, |
|
"rewards/margins": 5.47665548324585, |
|
"rewards/rejected": -47.81349182128906, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6898501554990104, |
|
"grad_norm": 132.95424309349204, |
|
"learning_rate": 2.1292599090063245e-07, |
|
"logits/chosen": -1.051018238067627, |
|
"logits/rejected": -1.0652154684066772, |
|
"logps/chosen": -4.106940269470215, |
|
"logps/rejected": -4.771925926208496, |
|
"loss": 2.7157, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.06940460205078, |
|
"rewards/margins": 6.64985466003418, |
|
"rewards/rejected": -47.719261169433594, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 168.53760998662602, |
|
"learning_rate": 2.1013405510870824e-07, |
|
"logits/chosen": -0.9595056772232056, |
|
"logits/rejected": -1.0049453973770142, |
|
"logps/chosen": -4.183114051818848, |
|
"logps/rejected": -4.8534088134765625, |
|
"loss": 2.2945, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.83113479614258, |
|
"rewards/margins": 6.702949047088623, |
|
"rewards/rejected": -48.53408432006836, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6943737630760531, |
|
"grad_norm": 131.40781932802983, |
|
"learning_rate": 2.0735400881539494e-07, |
|
"logits/chosen": -0.9997584819793701, |
|
"logits/rejected": -1.0140373706817627, |
|
"logps/chosen": -4.345150947570801, |
|
"logps/rejected": -5.112905979156494, |
|
"loss": 2.5635, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.45151138305664, |
|
"rewards/margins": 7.677546977996826, |
|
"rewards/rejected": -51.129058837890625, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6966355668645745, |
|
"grad_norm": 146.1006073159469, |
|
"learning_rate": 2.0458602610855536e-07, |
|
"logits/chosen": -1.0719635486602783, |
|
"logits/rejected": -1.0923492908477783, |
|
"logps/chosen": -4.266014575958252, |
|
"logps/rejected": -4.919377326965332, |
|
"loss": 2.1382, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -42.6601448059082, |
|
"rewards/margins": 6.533631324768066, |
|
"rewards/rejected": -49.19377517700195, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6988973706530959, |
|
"grad_norm": 134.08733031208772, |
|
"learning_rate": 2.0183028032062422e-07, |
|
"logits/chosen": -1.0127052068710327, |
|
"logits/rejected": -1.0283401012420654, |
|
"logps/chosen": -4.235683917999268, |
|
"logps/rejected": -4.89675235748291, |
|
"loss": 2.6057, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.356834411621094, |
|
"rewards/margins": 6.610686779022217, |
|
"rewards/rejected": -48.967525482177734, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7011591744416172, |
|
"grad_norm": 141.92746181345072, |
|
"learning_rate": 1.9908694401775473e-07, |
|
"logits/chosen": -1.0361559391021729, |
|
"logits/rejected": -1.0541431903839111, |
|
"logps/chosen": -4.365784645080566, |
|
"logps/rejected": -4.999772071838379, |
|
"loss": 2.4663, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.6578483581543, |
|
"rewards/margins": 6.339878082275391, |
|
"rewards/rejected": -49.99772644042969, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7034209782301385, |
|
"grad_norm": 142.1330410448072, |
|
"learning_rate": 1.9635618898901196e-07, |
|
"logits/chosen": -1.0413570404052734, |
|
"logits/rejected": -1.0654895305633545, |
|
"logps/chosen": -4.698367118835449, |
|
"logps/rejected": -5.39738655090332, |
|
"loss": 2.783, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -46.98366165161133, |
|
"rewards/margins": 6.990198135375977, |
|
"rewards/rejected": -53.97386169433594, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7056827820186599, |
|
"grad_norm": 145.89263720825656, |
|
"learning_rate": 1.9363818623561565e-07, |
|
"logits/chosen": -0.9755901098251343, |
|
"logits/rejected": -1.0139052867889404, |
|
"logps/chosen": -4.391622066497803, |
|
"logps/rejected": -5.099162578582764, |
|
"loss": 2.3118, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.916221618652344, |
|
"rewards/margins": 7.075403690338135, |
|
"rewards/rejected": -50.99162292480469, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7079445858071812, |
|
"grad_norm": 162.43440436504977, |
|
"learning_rate": 1.9093310596023108e-07, |
|
"logits/chosen": -0.9699859619140625, |
|
"logits/rejected": -0.989612340927124, |
|
"logps/chosen": -4.177772045135498, |
|
"logps/rejected": -4.972480773925781, |
|
"loss": 2.3045, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -41.7777214050293, |
|
"rewards/margins": 7.947083950042725, |
|
"rewards/rejected": -49.72480773925781, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7102063895957026, |
|
"grad_norm": 145.42479603932756, |
|
"learning_rate": 1.8824111755631274e-07, |
|
"logits/chosen": -1.040204405784607, |
|
"logits/rejected": -1.0809576511383057, |
|
"logps/chosen": -4.195197582244873, |
|
"logps/rejected": -4.8409929275512695, |
|
"loss": 2.6452, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -41.95197677612305, |
|
"rewards/margins": 6.457963466644287, |
|
"rewards/rejected": -48.409934997558594, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 184.2900445939564, |
|
"learning_rate": 1.8556238959749457e-07, |
|
"logits/chosen": -1.0256731510162354, |
|
"logits/rejected": -1.0392942428588867, |
|
"logps/chosen": -4.564783573150635, |
|
"logps/rejected": -5.029587745666504, |
|
"loss": 3.6971, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -45.64784240722656, |
|
"rewards/margins": 4.648043155670166, |
|
"rewards/rejected": -50.29588317871094, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7147299971727452, |
|
"grad_norm": 167.31951186290343, |
|
"learning_rate": 1.8289708982703562e-07, |
|
"logits/chosen": -0.9890896677970886, |
|
"logits/rejected": -0.9944027662277222, |
|
"logps/chosen": -4.358548164367676, |
|
"logps/rejected": -5.011772632598877, |
|
"loss": 3.4896, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -43.585479736328125, |
|
"rewards/margins": 6.532249450683594, |
|
"rewards/rejected": -50.117733001708984, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7169918009612666, |
|
"grad_norm": 157.74959564317945, |
|
"learning_rate": 1.802453851473151e-07, |
|
"logits/chosen": -1.0457000732421875, |
|
"logits/rejected": -1.049147367477417, |
|
"logps/chosen": -4.55790901184082, |
|
"logps/rejected": -5.1913676261901855, |
|
"loss": 2.8568, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -45.57909393310547, |
|
"rewards/margins": 6.334589958190918, |
|
"rewards/rejected": -51.91367721557617, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.719253604749788, |
|
"grad_norm": 124.17425017836192, |
|
"learning_rate": 1.7760744160938093e-07, |
|
"logits/chosen": -0.9741401672363281, |
|
"logits/rejected": -0.9875414967536926, |
|
"logps/chosen": -4.3163018226623535, |
|
"logps/rejected": -5.189938545227051, |
|
"loss": 2.4261, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.16301727294922, |
|
"rewards/margins": 8.736367225646973, |
|
"rewards/rejected": -51.899383544921875, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7215154085383093, |
|
"grad_norm": 143.2333139435795, |
|
"learning_rate": 1.7498342440255135e-07, |
|
"logits/chosen": -1.02549409866333, |
|
"logits/rejected": -1.0476658344268799, |
|
"logps/chosen": -4.412936687469482, |
|
"logps/rejected": -5.046929836273193, |
|
"loss": 2.3563, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.129364013671875, |
|
"rewards/margins": 6.339935302734375, |
|
"rewards/rejected": -50.46929931640625, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7237772123268307, |
|
"grad_norm": 129.7693981499202, |
|
"learning_rate": 1.7237349784407115e-07, |
|
"logits/chosen": -1.0487139225006104, |
|
"logits/rejected": -1.0472726821899414, |
|
"logps/chosen": -4.422894477844238, |
|
"logps/rejected": -5.140397071838379, |
|
"loss": 2.4265, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.22894287109375, |
|
"rewards/margins": 7.175027847290039, |
|
"rewards/rejected": -51.40396499633789, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.726039016115352, |
|
"grad_norm": 173.50321052624827, |
|
"learning_rate": 1.6977782536882178e-07, |
|
"logits/chosen": -0.965328574180603, |
|
"logits/rejected": -0.9848321676254272, |
|
"logps/chosen": -4.0111260414123535, |
|
"logps/rejected": -4.735203266143799, |
|
"loss": 2.9344, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -40.11125946044922, |
|
"rewards/margins": 7.240774154663086, |
|
"rewards/rejected": -47.35203170776367, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7283008199038733, |
|
"grad_norm": 123.39857018353256, |
|
"learning_rate": 1.6719656951908708e-07, |
|
"logits/chosen": -0.9558125138282776, |
|
"logits/rejected": -0.9725270867347717, |
|
"logps/chosen": -3.8042140007019043, |
|
"logps/rejected": -4.5209832191467285, |
|
"loss": 2.3297, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -38.04214096069336, |
|
"rewards/margins": 7.167694568634033, |
|
"rewards/rejected": -45.209835052490234, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7305626236923947, |
|
"grad_norm": 130.55176101349375, |
|
"learning_rate": 1.6462989193437453e-07, |
|
"logits/chosen": -1.0484962463378906, |
|
"logits/rejected": -1.0619350671768188, |
|
"logps/chosen": -4.2033610343933105, |
|
"logps/rejected": -4.800261974334717, |
|
"loss": 2.7066, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.033607482910156, |
|
"rewards/margins": 5.96901273727417, |
|
"rewards/rejected": -48.002620697021484, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 158.82804097297827, |
|
"learning_rate": 1.6207795334129365e-07, |
|
"logits/chosen": -1.0020612478256226, |
|
"logits/rejected": -1.013137936592102, |
|
"logps/chosen": -4.441118240356445, |
|
"logps/rejected": -5.05643367767334, |
|
"loss": 2.8291, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.41117858886719, |
|
"rewards/margins": 6.15315580368042, |
|
"rewards/rejected": -50.56433868408203, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7350862312694374, |
|
"grad_norm": 150.76796968458723, |
|
"learning_rate": 1.5954091354349121e-07, |
|
"logits/chosen": -1.03180992603302, |
|
"logits/rejected": -1.0410631895065308, |
|
"logps/chosen": -4.214303016662598, |
|
"logps/rejected": -4.780424118041992, |
|
"loss": 3.0735, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -42.143028259277344, |
|
"rewards/margins": 5.661211013793945, |
|
"rewards/rejected": -47.80424118041992, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7373480350579588, |
|
"grad_norm": 218.6742952589567, |
|
"learning_rate": 1.5701893141164364e-07, |
|
"logits/chosen": -1.017613410949707, |
|
"logits/rejected": -1.0423823595046997, |
|
"logps/chosen": -4.3751220703125, |
|
"logps/rejected": -5.050397872924805, |
|
"loss": 3.5092, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -43.75121307373047, |
|
"rewards/margins": 6.752757549285889, |
|
"rewards/rejected": -50.503971099853516, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.73960983884648, |
|
"grad_norm": 160.23254624175624, |
|
"learning_rate": 1.545121648735093e-07, |
|
"logits/chosen": -0.9994143843650818, |
|
"logits/rejected": -1.0151207447052002, |
|
"logps/chosen": -4.263593673706055, |
|
"logps/rejected": -4.8338093757629395, |
|
"loss": 3.1735, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.63593673706055, |
|
"rewards/margins": 5.702155113220215, |
|
"rewards/rejected": -48.33809280395508, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7418716426350014, |
|
"grad_norm": 163.32673670478113, |
|
"learning_rate": 1.5202077090403863e-07, |
|
"logits/chosen": -1.0353153944015503, |
|
"logits/rejected": -1.0160064697265625, |
|
"logps/chosen": -3.8241469860076904, |
|
"logps/rejected": -4.438475608825684, |
|
"loss": 2.3941, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -38.24147033691406, |
|
"rewards/margins": 6.143289089202881, |
|
"rewards/rejected": -44.38475799560547, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7441334464235227, |
|
"grad_norm": 130.92998188860977, |
|
"learning_rate": 1.495449055155443e-07, |
|
"logits/chosen": -1.0376348495483398, |
|
"logits/rejected": -1.0563832521438599, |
|
"logps/chosen": -4.206333160400391, |
|
"logps/rejected": -5.007129192352295, |
|
"loss": 2.0791, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -42.06332778930664, |
|
"rewards/margins": 8.007962226867676, |
|
"rewards/rejected": -50.071292877197266, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7463952502120441, |
|
"grad_norm": 142.24197159740774, |
|
"learning_rate": 1.4708472374793112e-07, |
|
"logits/chosen": -0.9717074036598206, |
|
"logits/rejected": -0.9877137541770935, |
|
"logps/chosen": -4.200423717498779, |
|
"logps/rejected": -4.679144382476807, |
|
"loss": 3.3547, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.004234313964844, |
|
"rewards/margins": 4.787207126617432, |
|
"rewards/rejected": -46.791446685791016, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7486570540005655, |
|
"grad_norm": 146.37687615562592, |
|
"learning_rate": 1.4464037965898878e-07, |
|
"logits/chosen": -0.9298246502876282, |
|
"logits/rejected": -0.9464177489280701, |
|
"logps/chosen": -4.0088372230529785, |
|
"logps/rejected": -4.592347145080566, |
|
"loss": 2.8655, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -40.0883674621582, |
|
"rewards/margins": 5.835107326507568, |
|
"rewards/rejected": -45.9234733581543, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7509188577890868, |
|
"grad_norm": 127.84221615734145, |
|
"learning_rate": 1.4221202631474282e-07, |
|
"logits/chosen": -0.9620643854141235, |
|
"logits/rejected": -0.9797786474227905, |
|
"logps/chosen": -4.01053524017334, |
|
"logps/rejected": -4.633822917938232, |
|
"loss": 2.7568, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -40.105350494384766, |
|
"rewards/margins": 6.232873439788818, |
|
"rewards/rejected": -46.338226318359375, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 150.03647459536558, |
|
"learning_rate": 1.3979981577987113e-07, |
|
"logits/chosen": -0.9951493740081787, |
|
"logits/rejected": -0.9948168992996216, |
|
"logps/chosen": -3.800516128540039, |
|
"logps/rejected": -4.444062232971191, |
|
"loss": 2.7132, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -38.00516128540039, |
|
"rewards/margins": 6.43546199798584, |
|
"rewards/rejected": -44.44062805175781, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7554424653661295, |
|
"grad_norm": 135.7290132406485, |
|
"learning_rate": 1.374038991081807e-07, |
|
"logits/chosen": -1.027406930923462, |
|
"logits/rejected": -1.036132574081421, |
|
"logps/chosen": -3.998603582382202, |
|
"logps/rejected": -4.531172752380371, |
|
"loss": 3.0388, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -39.98603439331055, |
|
"rewards/margins": 5.325689315795898, |
|
"rewards/rejected": -45.31172180175781, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7577042691546508, |
|
"grad_norm": 125.23444933932889, |
|
"learning_rate": 1.3502442633314882e-07, |
|
"logits/chosen": -0.9691722393035889, |
|
"logits/rejected": -0.9745641946792603, |
|
"logps/chosen": -3.532977819442749, |
|
"logps/rejected": -4.133716106414795, |
|
"loss": 2.4349, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -35.329776763916016, |
|
"rewards/margins": 6.007383346557617, |
|
"rewards/rejected": -41.337162017822266, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7599660729431722, |
|
"grad_norm": 128.92513394998343, |
|
"learning_rate": 1.3266154645852815e-07, |
|
"logits/chosen": -0.954643726348877, |
|
"logits/rejected": -0.9673725366592407, |
|
"logps/chosen": -3.8769335746765137, |
|
"logps/rejected": -4.439427375793457, |
|
"loss": 2.5734, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.76933288574219, |
|
"rewards/margins": 5.624938011169434, |
|
"rewards/rejected": -44.39426803588867, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7622278767316936, |
|
"grad_norm": 147.76541455493768, |
|
"learning_rate": 1.303154074490152e-07, |
|
"logits/chosen": -1.0287764072418213, |
|
"logits/rejected": -1.0113904476165771, |
|
"logps/chosen": -3.786878824234009, |
|
"logps/rejected": -4.46217155456543, |
|
"loss": 2.5939, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -37.86878967285156, |
|
"rewards/margins": 6.752928733825684, |
|
"rewards/rejected": -44.6217155456543, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7644896805202148, |
|
"grad_norm": 142.75305434316238, |
|
"learning_rate": 1.2798615622098616e-07, |
|
"logits/chosen": -1.0177001953125, |
|
"logits/rejected": -1.0262219905853271, |
|
"logps/chosen": -3.7984120845794678, |
|
"logps/rejected": -4.458211898803711, |
|
"loss": 2.9229, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -37.98412322998047, |
|
"rewards/margins": 6.597997188568115, |
|
"rewards/rejected": -44.58211898803711, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7667514843087362, |
|
"grad_norm": 112.30213958654507, |
|
"learning_rate": 1.2567393863329523e-07, |
|
"logits/chosen": -0.9927775859832764, |
|
"logits/rejected": -1.0197595357894897, |
|
"logps/chosen": -3.857407808303833, |
|
"logps/rejected": -4.5602006912231445, |
|
"loss": 2.5316, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -38.57407760620117, |
|
"rewards/margins": 7.027931213378906, |
|
"rewards/rejected": -45.602012634277344, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7690132880972576, |
|
"grad_norm": 108.74864691561903, |
|
"learning_rate": 1.233788994781423e-07, |
|
"logits/chosen": -1.0283674001693726, |
|
"logits/rejected": -1.0587186813354492, |
|
"logps/chosen": -3.7999324798583984, |
|
"logps/rejected": -4.409298896789551, |
|
"loss": 2.5153, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -37.99932861328125, |
|
"rewards/margins": 6.0936665534973145, |
|
"rewards/rejected": -44.092994689941406, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7712750918857789, |
|
"grad_norm": 129.95842547216532, |
|
"learning_rate": 1.2110118247200468e-07, |
|
"logits/chosen": -1.0152841806411743, |
|
"logits/rejected": -1.024249792098999, |
|
"logps/chosen": -3.7863681316375732, |
|
"logps/rejected": -4.425829887390137, |
|
"loss": 2.3635, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -37.863685607910156, |
|
"rewards/margins": 6.394619941711426, |
|
"rewards/rejected": -44.258304595947266, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 146.66216567516653, |
|
"learning_rate": 1.1884093024663933e-07, |
|
"logits/chosen": -1.0188724994659424, |
|
"logits/rejected": -1.0249488353729248, |
|
"logps/chosen": -3.599705219268799, |
|
"logps/rejected": -4.3782243728637695, |
|
"loss": 2.8172, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -35.99705123901367, |
|
"rewards/margins": 7.78519344329834, |
|
"rewards/rejected": -43.78224182128906, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7757986994628217, |
|
"grad_norm": 146.372388655652, |
|
"learning_rate": 1.1659828434014886e-07, |
|
"logits/chosen": -1.0295227766036987, |
|
"logits/rejected": -1.0136163234710693, |
|
"logps/chosen": -3.5961639881134033, |
|
"logps/rejected": -4.3412322998046875, |
|
"loss": 2.3208, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -35.961639404296875, |
|
"rewards/margins": 7.450683116912842, |
|
"rewards/rejected": -43.41232681274414, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7780605032513429, |
|
"grad_norm": 175.70856414947238, |
|
"learning_rate": 1.143733851881203e-07, |
|
"logits/chosen": -1.0678043365478516, |
|
"logits/rejected": -1.0645167827606201, |
|
"logps/chosen": -3.902024030685425, |
|
"logps/rejected": -4.638064861297607, |
|
"loss": 2.7146, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -39.020240783691406, |
|
"rewards/margins": 7.360406875610352, |
|
"rewards/rejected": -46.38064956665039, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7803223070398643, |
|
"grad_norm": 121.75335598450164, |
|
"learning_rate": 1.1216637211483005e-07, |
|
"logits/chosen": -0.9865887761116028, |
|
"logits/rejected": -1.0157550573349, |
|
"logps/chosen": -3.750114679336548, |
|
"logps/rejected": -4.309493064880371, |
|
"loss": 2.8548, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -37.50114059448242, |
|
"rewards/margins": 5.593784332275391, |
|
"rewards/rejected": -43.094932556152344, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7825841108283856, |
|
"grad_norm": 136.96698465160594, |
|
"learning_rate": 1.0997738332451936e-07, |
|
"logits/chosen": -0.9917511343955994, |
|
"logits/rejected": -1.0070972442626953, |
|
"logps/chosen": -4.089888572692871, |
|
"logps/rejected": -4.696536064147949, |
|
"loss": 2.6429, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -40.898887634277344, |
|
"rewards/margins": 6.066469192504883, |
|
"rewards/rejected": -46.96535873413086, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.784845914616907, |
|
"grad_norm": 131.29458632017975, |
|
"learning_rate": 1.0780655589274031e-07, |
|
"logits/chosen": -1.059531331062317, |
|
"logits/rejected": -1.0492100715637207, |
|
"logps/chosen": -3.8942158222198486, |
|
"logps/rejected": -4.510371685028076, |
|
"loss": 2.3025, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -38.94215774536133, |
|
"rewards/margins": 6.161560535430908, |
|
"rewards/rejected": -45.103721618652344, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7871077184054284, |
|
"grad_norm": 142.84107622082357, |
|
"learning_rate": 1.056540257577712e-07, |
|
"logits/chosen": -0.983575165271759, |
|
"logits/rejected": -1.002815842628479, |
|
"logps/chosen": -4.463508129119873, |
|
"logps/rejected": -5.253409385681152, |
|
"loss": 1.9854, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.635074615478516, |
|
"rewards/margins": 7.899021625518799, |
|
"rewards/rejected": -52.534095764160156, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7893695221939496, |
|
"grad_norm": 146.34376837206838, |
|
"learning_rate": 1.0351992771210554e-07, |
|
"logits/chosen": -0.9950528144836426, |
|
"logits/rejected": -1.0003957748413086, |
|
"logps/chosen": -3.912632942199707, |
|
"logps/rejected": -4.543516635894775, |
|
"loss": 2.7445, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -39.1263313293457, |
|
"rewards/margins": 6.308837890625, |
|
"rewards/rejected": -45.43516540527344, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.791631325982471, |
|
"grad_norm": 153.38059619748054, |
|
"learning_rate": 1.0140439539400953e-07, |
|
"logits/chosen": -0.9723285436630249, |
|
"logits/rejected": -1.006958246231079, |
|
"logps/chosen": -3.9608702659606934, |
|
"logps/rejected": -4.5546956062316895, |
|
"loss": 3.0106, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.608699798583984, |
|
"rewards/margins": 5.938253402709961, |
|
"rewards/rejected": -45.54695510864258, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 124.90800941340382, |
|
"learning_rate": 9.930756127915488e-08, |
|
"logits/chosen": -0.9936292767524719, |
|
"logits/rejected": -1.0269596576690674, |
|
"logps/chosen": -3.848879337310791, |
|
"logps/rejected": -4.502224445343018, |
|
"loss": 2.5, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -38.488792419433594, |
|
"rewards/margins": 6.53345251083374, |
|
"rewards/rejected": -45.022247314453125, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7961549335595137, |
|
"grad_norm": 154.38244112243044, |
|
"learning_rate": 9.722955667232242e-08, |
|
"logits/chosen": -1.054602861404419, |
|
"logits/rejected": -1.071683406829834, |
|
"logps/chosen": -4.191054344177246, |
|
"logps/rejected": -4.7031779289245605, |
|
"loss": 3.0974, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -41.91053771972656, |
|
"rewards/margins": 5.121236324310303, |
|
"rewards/rejected": -47.03177261352539, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7984167373480351, |
|
"grad_norm": 132.92438904995544, |
|
"learning_rate": 9.517051169918016e-08, |
|
"logits/chosen": -1.0513854026794434, |
|
"logits/rejected": -1.054985523223877, |
|
"logps/chosen": -3.903560161590576, |
|
"logps/rejected": -4.443456172943115, |
|
"loss": 3.2099, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -39.03560256958008, |
|
"rewards/margins": 5.398958683013916, |
|
"rewards/rejected": -44.43456268310547, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8006785411365565, |
|
"grad_norm": 124.21259625496651, |
|
"learning_rate": 9.313055529813412e-08, |
|
"logits/chosen": -0.9662154912948608, |
|
"logits/rejected": -0.9941588044166565, |
|
"logps/chosen": -3.924314022064209, |
|
"logps/rejected": -4.602962017059326, |
|
"loss": 2.4673, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -39.24313735961914, |
|
"rewards/margins": 6.786487102508545, |
|
"rewards/rejected": -46.029624938964844, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8029403449250777, |
|
"grad_norm": 179.47089863851778, |
|
"learning_rate": 9.110981521225532e-08, |
|
"logits/chosen": -1.0300958156585693, |
|
"logits/rejected": -1.064093828201294, |
|
"logps/chosen": -3.959928035736084, |
|
"logps/rejected": -4.51727819442749, |
|
"loss": 3.009, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -39.599281311035156, |
|
"rewards/margins": 5.573493957519531, |
|
"rewards/rejected": -45.17278289794922, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8052021487135991, |
|
"grad_norm": 136.0902085837272, |
|
"learning_rate": 8.910841798127884e-08, |
|
"logits/chosen": -0.9911931753158569, |
|
"logits/rejected": -1.0163114070892334, |
|
"logps/chosen": -4.0560173988342285, |
|
"logps/rejected": -4.719514846801758, |
|
"loss": 2.5832, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.56017303466797, |
|
"rewards/margins": 6.634976863861084, |
|
"rewards/rejected": -47.19514846801758, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8074639525021204, |
|
"grad_norm": 132.18080891905515, |
|
"learning_rate": 8.712648893368139e-08, |
|
"logits/chosen": -1.0100892782211304, |
|
"logits/rejected": -1.0391135215759277, |
|
"logps/chosen": -3.988699197769165, |
|
"logps/rejected": -4.807621002197266, |
|
"loss": 2.3239, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -39.88698959350586, |
|
"rewards/margins": 8.189220428466797, |
|
"rewards/rejected": -48.076210021972656, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8097257562906418, |
|
"grad_norm": 130.44096460023937, |
|
"learning_rate": 8.516415217883186e-08, |
|
"logits/chosen": -0.9774779081344604, |
|
"logits/rejected": -0.9853562116622925, |
|
"logps/chosen": -3.8554654121398926, |
|
"logps/rejected": -4.613882064819336, |
|
"loss": 2.4051, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -38.554656982421875, |
|
"rewards/margins": 7.584163665771484, |
|
"rewards/rejected": -46.138816833496094, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8119875600791632, |
|
"grad_norm": 153.29331364976971, |
|
"learning_rate": 8.32215305992209e-08, |
|
"logits/chosen": -1.0638753175735474, |
|
"logits/rejected": -1.078364610671997, |
|
"logps/chosen": -3.8455119132995605, |
|
"logps/rejected": -4.452728271484375, |
|
"loss": 2.9144, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -38.455116271972656, |
|
"rewards/margins": 6.072165489196777, |
|
"rewards/rejected": -44.52728271484375, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 135.16199186349922, |
|
"learning_rate": 8.129874584276448e-08, |
|
"logits/chosen": -1.0097771883010864, |
|
"logits/rejected": -1.0268316268920898, |
|
"logps/chosen": -3.8960204124450684, |
|
"logps/rejected": -4.635379791259766, |
|
"loss": 2.171, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -38.960205078125, |
|
"rewards/margins": 7.393592834472656, |
|
"rewards/rejected": -46.353790283203125, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8165111676562058, |
|
"grad_norm": 152.76637850843736, |
|
"learning_rate": 7.939591831518746e-08, |
|
"logits/chosen": -1.0360825061798096, |
|
"logits/rejected": -1.0462737083435059, |
|
"logps/chosen": -3.917339563369751, |
|
"logps/rejected": -4.442863464355469, |
|
"loss": 2.4861, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.173397064208984, |
|
"rewards/margins": 5.2552361488342285, |
|
"rewards/rejected": -44.42863464355469, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8187729714447272, |
|
"grad_norm": 148.10930386230368, |
|
"learning_rate": 7.751316717248304e-08, |
|
"logits/chosen": -1.016704797744751, |
|
"logits/rejected": -1.0399643182754517, |
|
"logps/chosen": -4.277474880218506, |
|
"logps/rejected": -5.120466232299805, |
|
"loss": 2.5569, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -42.774749755859375, |
|
"rewards/margins": 8.429914474487305, |
|
"rewards/rejected": -51.20466232299805, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8210347752332485, |
|
"grad_norm": 140.6170950732965, |
|
"learning_rate": 7.565061031345142e-08, |
|
"logits/chosen": -0.9965285062789917, |
|
"logits/rejected": -1.018410325050354, |
|
"logps/chosen": -4.43507194519043, |
|
"logps/rejected": -5.124161243438721, |
|
"loss": 2.5493, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.35072708129883, |
|
"rewards/margins": 6.890886306762695, |
|
"rewards/rejected": -51.24161148071289, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8232965790217699, |
|
"grad_norm": 154.50546969387295, |
|
"learning_rate": 7.380836437231686e-08, |
|
"logits/chosen": -1.0115327835083008, |
|
"logits/rejected": -1.012681245803833, |
|
"logps/chosen": -3.950958251953125, |
|
"logps/rejected": -4.639514923095703, |
|
"loss": 2.4455, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.50958251953125, |
|
"rewards/margins": 6.885568141937256, |
|
"rewards/rejected": -46.39514923095703, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8255583828102913, |
|
"grad_norm": 121.79022691907734, |
|
"learning_rate": 7.198654471142371e-08, |
|
"logits/chosen": -1.0323009490966797, |
|
"logits/rejected": -1.0337766408920288, |
|
"logps/chosen": -4.018780708312988, |
|
"logps/rejected": -4.8869147300720215, |
|
"loss": 2.0292, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -40.18780517578125, |
|
"rewards/margins": 8.681337356567383, |
|
"rewards/rejected": -48.869144439697266, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8278201865988125, |
|
"grad_norm": 162.4036903117769, |
|
"learning_rate": 7.01852654140132e-08, |
|
"logits/chosen": -1.0495915412902832, |
|
"logits/rejected": -1.0743041038513184, |
|
"logps/chosen": -4.471255779266357, |
|
"logps/rejected": -5.142270565032959, |
|
"loss": 2.72, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.712554931640625, |
|
"rewards/margins": 6.710155487060547, |
|
"rewards/rejected": -51.42271041870117, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8300819903873339, |
|
"grad_norm": 139.11619645731545, |
|
"learning_rate": 6.840463927707833e-08, |
|
"logits/chosen": -1.0255920886993408, |
|
"logits/rejected": -1.0401992797851562, |
|
"logps/chosen": -4.438215255737305, |
|
"logps/rejected": -5.0541253089904785, |
|
"loss": 2.5918, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -44.38215637207031, |
|
"rewards/margins": 6.159101963043213, |
|
"rewards/rejected": -50.541259765625, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8323437941758552, |
|
"grad_norm": 130.09438336462927, |
|
"learning_rate": 6.664477780430138e-08, |
|
"logits/chosen": -1.0260366201400757, |
|
"logits/rejected": -1.042960286140442, |
|
"logps/chosen": -4.194912910461426, |
|
"logps/rejected": -4.732971668243408, |
|
"loss": 2.8488, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.949127197265625, |
|
"rewards/margins": 5.380587100982666, |
|
"rewards/rejected": -47.329715728759766, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 179.70180806771685, |
|
"learning_rate": 6.49057911990711e-08, |
|
"logits/chosen": -0.9928116798400879, |
|
"logits/rejected": -1.0055856704711914, |
|
"logps/chosen": -4.191740989685059, |
|
"logps/rejected": -4.796926975250244, |
|
"loss": 2.7647, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.91741180419922, |
|
"rewards/margins": 6.051861763000488, |
|
"rewards/rejected": -47.969268798828125, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.836867401752898, |
|
"grad_norm": 149.49310718136232, |
|
"learning_rate": 6.318778835758189e-08, |
|
"logits/chosen": -1.0360987186431885, |
|
"logits/rejected": -1.054109811782837, |
|
"logps/chosen": -4.3690643310546875, |
|
"logps/rejected": -5.046197414398193, |
|
"loss": 2.0524, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -43.690635681152344, |
|
"rewards/margins": 6.7713398933410645, |
|
"rewards/rejected": -50.461978912353516, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8391292055414192, |
|
"grad_norm": 159.83555289195797, |
|
"learning_rate": 6.149087686201433e-08, |
|
"logits/chosen": -1.0573538541793823, |
|
"logits/rejected": -1.0731968879699707, |
|
"logps/chosen": -4.134281158447266, |
|
"logps/rejected": -4.711938858032227, |
|
"loss": 3.5669, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.34281539916992, |
|
"rewards/margins": 5.7765703201293945, |
|
"rewards/rejected": -47.119384765625, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8413910093299406, |
|
"grad_norm": 150.5855978421956, |
|
"learning_rate": 5.98151629737988e-08, |
|
"logits/chosen": -1.038978099822998, |
|
"logits/rejected": -1.0429781675338745, |
|
"logps/chosen": -4.25129508972168, |
|
"logps/rejected": -5.000112056732178, |
|
"loss": 2.3346, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -42.512943267822266, |
|
"rewards/margins": 7.488168716430664, |
|
"rewards/rejected": -50.00111389160156, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.843652813118462, |
|
"grad_norm": 124.21372308680823, |
|
"learning_rate": 5.816075162696097e-08, |
|
"logits/chosen": -1.064916729927063, |
|
"logits/rejected": -1.099676251411438, |
|
"logps/chosen": -4.10329532623291, |
|
"logps/rejected": -4.680285930633545, |
|
"loss": 2.4539, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.032955169677734, |
|
"rewards/margins": 5.769906044006348, |
|
"rewards/rejected": -46.8028564453125, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8459146169069833, |
|
"grad_norm": 126.22142568488516, |
|
"learning_rate": 5.6527746421551046e-08, |
|
"logits/chosen": -1.015106439590454, |
|
"logits/rejected": -1.009599208831787, |
|
"logps/chosen": -4.1230268478393555, |
|
"logps/rejected": -4.814833641052246, |
|
"loss": 2.4592, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -41.23027038574219, |
|
"rewards/margins": 6.918064594268799, |
|
"rewards/rejected": -48.14833450317383, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8481764206955047, |
|
"grad_norm": 147.2128802344832, |
|
"learning_rate": 5.4916249617156064e-08, |
|
"logits/chosen": -1.013479232788086, |
|
"logits/rejected": -1.0405837297439575, |
|
"logps/chosen": -3.972564935684204, |
|
"logps/rejected": -4.571763515472412, |
|
"loss": 2.7557, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.725650787353516, |
|
"rewards/margins": 5.991988182067871, |
|
"rewards/rejected": -45.71763610839844, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8504382244840261, |
|
"grad_norm": 161.4912221497581, |
|
"learning_rate": 5.332636212649646e-08, |
|
"logits/chosen": -1.0066248178482056, |
|
"logits/rejected": -1.0287178754806519, |
|
"logps/chosen": -4.124461650848389, |
|
"logps/rejected": -4.8116021156311035, |
|
"loss": 2.2421, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -41.24461364746094, |
|
"rewards/margins": 6.871405601501465, |
|
"rewards/rejected": -48.11602020263672, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8527000282725473, |
|
"grad_norm": 160.8395714187991, |
|
"learning_rate": 5.17581835091069e-08, |
|
"logits/chosen": -1.0193694829940796, |
|
"logits/rejected": -1.0548650026321411, |
|
"logps/chosen": -4.25200891494751, |
|
"logps/rejected": -4.873752593994141, |
|
"loss": 3.0174, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -42.520084381103516, |
|
"rewards/margins": 6.217440605163574, |
|
"rewards/rejected": -48.73752975463867, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 138.12661901206238, |
|
"learning_rate": 5.02118119651016e-08, |
|
"logits/chosen": -1.0508525371551514, |
|
"logits/rejected": -1.0576024055480957, |
|
"logps/chosen": -4.178540229797363, |
|
"logps/rejected": -4.849919319152832, |
|
"loss": 2.749, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -41.785404205322266, |
|
"rewards/margins": 6.713788032531738, |
|
"rewards/rejected": -48.49919128417969, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.85722363584959, |
|
"grad_norm": 165.96686025369587, |
|
"learning_rate": 4.868734432902526e-08, |
|
"logits/chosen": -1.1101027727127075, |
|
"logits/rejected": -1.1102192401885986, |
|
"logps/chosen": -4.186249256134033, |
|
"logps/rejected": -4.9721903800964355, |
|
"loss": 3.1448, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.86249542236328, |
|
"rewards/margins": 7.859411239624023, |
|
"rewards/rejected": -49.721900939941406, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8594854396381114, |
|
"grad_norm": 144.04636682752445, |
|
"learning_rate": 4.7184876063789134e-08, |
|
"logits/chosen": -1.0436012744903564, |
|
"logits/rejected": -1.0544618368148804, |
|
"logps/chosen": -3.689404010772705, |
|
"logps/rejected": -4.364074230194092, |
|
"loss": 2.4507, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -36.894039154052734, |
|
"rewards/margins": 6.746699333190918, |
|
"rewards/rejected": -43.64073944091797, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8617472434266328, |
|
"grad_norm": 131.27661299667287, |
|
"learning_rate": 4.570450125469314e-08, |
|
"logits/chosen": -1.0156922340393066, |
|
"logits/rejected": -1.0324246883392334, |
|
"logps/chosen": -4.311813831329346, |
|
"logps/rejected": -5.098617076873779, |
|
"loss": 2.3979, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -43.118141174316406, |
|
"rewards/margins": 7.868029594421387, |
|
"rewards/rejected": -50.986167907714844, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.864009047215154, |
|
"grad_norm": 161.93080890480886, |
|
"learning_rate": 4.424631260353378e-08, |
|
"logits/chosen": -1.0678380727767944, |
|
"logits/rejected": -1.0773905515670776, |
|
"logps/chosen": -4.031643390655518, |
|
"logps/rejected": -4.666827201843262, |
|
"loss": 2.6249, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.31643295288086, |
|
"rewards/margins": 6.351841449737549, |
|
"rewards/rejected": -46.66827392578125, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8662708510036754, |
|
"grad_norm": 127.36488209439509, |
|
"learning_rate": 4.281040142280008e-08, |
|
"logits/chosen": -1.090057134628296, |
|
"logits/rejected": -1.100411295890808, |
|
"logps/chosen": -3.895373582839966, |
|
"logps/rejected": -4.652320861816406, |
|
"loss": 1.9867, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -38.9537353515625, |
|
"rewards/margins": 7.569474697113037, |
|
"rewards/rejected": -46.52320861816406, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8685326547921968, |
|
"grad_norm": 160.26273709564055, |
|
"learning_rate": 4.1396857629954286e-08, |
|
"logits/chosen": -1.0430915355682373, |
|
"logits/rejected": -1.0619637966156006, |
|
"logps/chosen": -4.50911808013916, |
|
"logps/rejected": -5.252225399017334, |
|
"loss": 2.6351, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.09117889404297, |
|
"rewards/margins": 7.431077003479004, |
|
"rewards/rejected": -52.52225875854492, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8707944585807181, |
|
"grad_norm": 144.9016539338167, |
|
"learning_rate": 4.000576974180232e-08, |
|
"logits/chosen": -1.0110925436019897, |
|
"logits/rejected": -1.0365351438522339, |
|
"logps/chosen": -4.012753009796143, |
|
"logps/rejected": -4.618809700012207, |
|
"loss": 2.9777, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -40.12752914428711, |
|
"rewards/margins": 6.060567378997803, |
|
"rewards/rejected": -46.1880989074707, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8730562623692395, |
|
"grad_norm": 166.9766215023394, |
|
"learning_rate": 3.8637224868950066e-08, |
|
"logits/chosen": -1.0025962591171265, |
|
"logits/rejected": -1.008257269859314, |
|
"logps/chosen": -4.001153469085693, |
|
"logps/rejected": -4.589109897613525, |
|
"loss": 2.7268, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.011531829833984, |
|
"rewards/margins": 5.879566669464111, |
|
"rewards/rejected": -45.8911018371582, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 135.00904920288568, |
|
"learning_rate": 3.729130871034885e-08, |
|
"logits/chosen": -1.0388230085372925, |
|
"logits/rejected": -1.0440409183502197, |
|
"logps/chosen": -4.16123104095459, |
|
"logps/rejected": -4.8505730628967285, |
|
"loss": 2.5713, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -41.612308502197266, |
|
"rewards/margins": 6.893416881561279, |
|
"rewards/rejected": -48.505733489990234, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8775798699462821, |
|
"grad_norm": 152.39451592755424, |
|
"learning_rate": 3.596810554792888e-08, |
|
"logits/chosen": -1.041504144668579, |
|
"logits/rejected": -1.059584140777588, |
|
"logps/chosen": -4.13081169128418, |
|
"logps/rejected": -4.7521257400512695, |
|
"loss": 2.9434, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -41.30812072753906, |
|
"rewards/margins": 6.213135719299316, |
|
"rewards/rejected": -47.5212516784668, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8798416737348035, |
|
"grad_norm": 133.10218079382537, |
|
"learning_rate": 3.466769824132116e-08, |
|
"logits/chosen": -1.0184890031814575, |
|
"logits/rejected": -1.0346693992614746, |
|
"logps/chosen": -4.073379039764404, |
|
"logps/rejected": -4.787722587585449, |
|
"loss": 2.1193, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -40.733787536621094, |
|
"rewards/margins": 7.143433094024658, |
|
"rewards/rejected": -47.87722396850586, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8821034775233249, |
|
"grad_norm": 140.51203203229483, |
|
"learning_rate": 3.339016822266925e-08, |
|
"logits/chosen": -0.9923376441001892, |
|
"logits/rejected": -1.0220943689346313, |
|
"logps/chosen": -4.294771671295166, |
|
"logps/rejected": -5.1391282081604, |
|
"loss": 1.7011, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -42.94771957397461, |
|
"rewards/margins": 8.443564414978027, |
|
"rewards/rejected": -51.39128112792969, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8843652813118462, |
|
"grad_norm": 144.39270574029896, |
|
"learning_rate": 3.213559549152958e-08, |
|
"logits/chosen": -1.0516475439071655, |
|
"logits/rejected": -1.0599684715270996, |
|
"logps/chosen": -3.9776928424835205, |
|
"logps/rejected": -4.740298271179199, |
|
"loss": 2.5164, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -39.77692413330078, |
|
"rewards/margins": 7.626054763793945, |
|
"rewards/rejected": -47.402984619140625, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8866270851003676, |
|
"grad_norm": 143.12851378448838, |
|
"learning_rate": 3.090405860986203e-08, |
|
"logits/chosen": -1.0642292499542236, |
|
"logits/rejected": -1.1000754833221436, |
|
"logps/chosen": -4.253165245056152, |
|
"logps/rejected": -5.0864386558532715, |
|
"loss": 2.4326, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.53165054321289, |
|
"rewards/margins": 8.332735061645508, |
|
"rewards/rejected": -50.86438751220703, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 133.84734565217644, |
|
"learning_rate": 2.9695634697110315e-08, |
|
"logits/chosen": -0.9916976690292358, |
|
"logits/rejected": -1.0187020301818848, |
|
"logps/chosen": -4.004427909851074, |
|
"logps/rejected": -4.804125785827637, |
|
"loss": 2.6551, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.04427719116211, |
|
"rewards/margins": 7.9969801902771, |
|
"rewards/rejected": -48.041255950927734, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8911506926774102, |
|
"grad_norm": 151.41243641809086, |
|
"learning_rate": 2.8510399425372766e-08, |
|
"logits/chosen": -1.0137168169021606, |
|
"logits/rejected": -1.0167896747589111, |
|
"logps/chosen": -4.214908123016357, |
|
"logps/rejected": -4.8761210441589355, |
|
"loss": 2.6268, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.14908218383789, |
|
"rewards/margins": 6.612125873565674, |
|
"rewards/rejected": -48.76121139526367, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8934124964659316, |
|
"grad_norm": 176.36974589271762, |
|
"learning_rate": 2.734842701466329e-08, |
|
"logits/chosen": -1.0456002950668335, |
|
"logits/rejected": -1.030018925666809, |
|
"logps/chosen": -4.534852027893066, |
|
"logps/rejected": -5.177453517913818, |
|
"loss": 2.7321, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -45.3485221862793, |
|
"rewards/margins": 6.4260101318359375, |
|
"rewards/rejected": -51.7745361328125, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 136.05060209221426, |
|
"learning_rate": 2.6209790228264438e-08, |
|
"logits/chosen": -1.0385910272598267, |
|
"logits/rejected": -1.0548242330551147, |
|
"logps/chosen": -3.89418888092041, |
|
"logps/rejected": -4.614352703094482, |
|
"loss": 2.1896, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -38.94188690185547, |
|
"rewards/margins": 7.201638698577881, |
|
"rewards/rejected": -46.14352798461914, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8979361040429743, |
|
"grad_norm": 137.77899483207824, |
|
"learning_rate": 2.5094560368170305e-08, |
|
"logits/chosen": -1.0086749792099, |
|
"logits/rejected": -1.0264006853103638, |
|
"logps/chosen": -4.3441243171691895, |
|
"logps/rejected": -4.981533050537109, |
|
"loss": 2.5835, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.44124221801758, |
|
"rewards/margins": 6.374091148376465, |
|
"rewards/rejected": -49.81533432006836, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9001979078314957, |
|
"grad_norm": 139.85039508236764, |
|
"learning_rate": 2.4002807270621893e-08, |
|
"logits/chosen": -1.0533496141433716, |
|
"logits/rejected": -1.0745186805725098, |
|
"logps/chosen": -4.139496326446533, |
|
"logps/rejected": -4.774450302124023, |
|
"loss": 2.5903, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.394962310791016, |
|
"rewards/margins": 6.349542617797852, |
|
"rewards/rejected": -47.7445068359375, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9024597116200169, |
|
"grad_norm": 140.65962937425047, |
|
"learning_rate": 2.293459930173354e-08, |
|
"logits/chosen": -1.063666820526123, |
|
"logits/rejected": -1.0821877717971802, |
|
"logps/chosen": -4.26739501953125, |
|
"logps/rejected": -4.949607849121094, |
|
"loss": 2.552, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -42.6739501953125, |
|
"rewards/margins": 6.8221306800842285, |
|
"rewards/rejected": -49.49607849121094, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9047215154085383, |
|
"grad_norm": 162.51821792297238, |
|
"learning_rate": 2.189000335321256e-08, |
|
"logits/chosen": -0.9957281351089478, |
|
"logits/rejected": -1.0204914808273315, |
|
"logps/chosen": -4.09246826171875, |
|
"logps/rejected": -4.667595386505127, |
|
"loss": 3.0649, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -40.9246826171875, |
|
"rewards/margins": 5.751271724700928, |
|
"rewards/rejected": -46.67595291137695, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9069833191970597, |
|
"grad_norm": 163.77450188740653, |
|
"learning_rate": 2.086908483816954e-08, |
|
"logits/chosen": -1.045667052268982, |
|
"logits/rejected": -1.048872470855713, |
|
"logps/chosen": -4.371889114379883, |
|
"logps/rejected": -4.995105743408203, |
|
"loss": 2.538, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.718894958496094, |
|
"rewards/margins": 6.232160568237305, |
|
"rewards/rejected": -49.95105743408203, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.909245122985581, |
|
"grad_norm": 137.1467300025307, |
|
"learning_rate": 1.9871907687022717e-08, |
|
"logits/chosen": -1.021450161933899, |
|
"logits/rejected": -1.0458714962005615, |
|
"logps/chosen": -4.020608901977539, |
|
"logps/rejected": -4.647358417510986, |
|
"loss": 2.6527, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.206092834472656, |
|
"rewards/margins": 6.267497539520264, |
|
"rewards/rejected": -46.47358322143555, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9115069267741024, |
|
"grad_norm": 123.97603513782903, |
|
"learning_rate": 1.889853434349451e-08, |
|
"logits/chosen": -1.0321791172027588, |
|
"logits/rejected": -1.0512281656265259, |
|
"logps/chosen": -3.998810291290283, |
|
"logps/rejected": -4.7578511238098145, |
|
"loss": 2.4665, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.988101959228516, |
|
"rewards/margins": 7.5904083251953125, |
|
"rewards/rejected": -47.57851028442383, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9137687305626236, |
|
"grad_norm": 157.09942325279997, |
|
"learning_rate": 1.7949025760701164e-08, |
|
"logits/chosen": -1.035341501235962, |
|
"logits/rejected": -1.0430874824523926, |
|
"logps/chosen": -4.379273414611816, |
|
"logps/rejected": -4.940178871154785, |
|
"loss": 2.6808, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.7927360534668, |
|
"rewards/margins": 5.609053611755371, |
|
"rewards/rejected": -49.40178680419922, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 140.82151938023563, |
|
"learning_rate": 1.7023441397336023e-08, |
|
"logits/chosen": -1.0455622673034668, |
|
"logits/rejected": -1.0693451166152954, |
|
"logps/chosen": -4.000870704650879, |
|
"logps/rejected": -4.7458295822143555, |
|
"loss": 2.3305, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -40.00870895385742, |
|
"rewards/margins": 7.449586868286133, |
|
"rewards/rejected": -47.45829772949219, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9182923381396664, |
|
"grad_norm": 131.6986145998449, |
|
"learning_rate": 1.6121839213945854e-08, |
|
"logits/chosen": -1.0103528499603271, |
|
"logits/rejected": -1.0649735927581787, |
|
"logps/chosen": -4.078824996948242, |
|
"logps/rejected": -4.82664680480957, |
|
"loss": 2.7537, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.78825378417969, |
|
"rewards/margins": 7.478220462799072, |
|
"rewards/rejected": -48.2664680480957, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9205541419281877, |
|
"grad_norm": 142.46269327778631, |
|
"learning_rate": 1.5244275669301777e-08, |
|
"logits/chosen": -1.0364878177642822, |
|
"logits/rejected": -1.066939115524292, |
|
"logps/chosen": -4.2033491134643555, |
|
"logps/rejected": -4.9280171394348145, |
|
"loss": 2.4872, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -42.03349304199219, |
|
"rewards/margins": 7.246674537658691, |
|
"rewards/rejected": -49.28017044067383, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9228159457167091, |
|
"grad_norm": 130.32810490131635, |
|
"learning_rate": 1.4390805716863398e-08, |
|
"logits/chosen": -1.0064778327941895, |
|
"logits/rejected": -1.0294207334518433, |
|
"logps/chosen": -4.072377681732178, |
|
"logps/rejected": -4.647193431854248, |
|
"loss": 3.0416, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -40.72377395629883, |
|
"rewards/margins": 5.748159408569336, |
|
"rewards/rejected": -46.47193145751953, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9250777495052305, |
|
"grad_norm": 130.1100780335673, |
|
"learning_rate": 1.3561482801337908e-08, |
|
"logits/chosen": -0.9784885048866272, |
|
"logits/rejected": -1.0064728260040283, |
|
"logps/chosen": -4.038685321807861, |
|
"logps/rejected": -4.811799049377441, |
|
"loss": 2.7997, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -40.38685607910156, |
|
"rewards/margins": 7.73114013671875, |
|
"rewards/rejected": -48.11799621582031, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9273395532937517, |
|
"grad_norm": 141.3638914034757, |
|
"learning_rate": 1.2756358855332904e-08, |
|
"logits/chosen": -1.0268959999084473, |
|
"logits/rejected": -1.0494606494903564, |
|
"logps/chosen": -4.104990482330322, |
|
"logps/rejected": -4.68447208404541, |
|
"loss": 3.2425, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -41.04990768432617, |
|
"rewards/margins": 5.794814586639404, |
|
"rewards/rejected": -46.844722747802734, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9296013570822731, |
|
"grad_norm": 180.11716995625815, |
|
"learning_rate": 1.1975484296105154e-08, |
|
"logits/chosen": -1.0106980800628662, |
|
"logits/rejected": -1.0233476161956787, |
|
"logps/chosen": -4.163227081298828, |
|
"logps/rejected": -4.8278961181640625, |
|
"loss": 3.008, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.632266998291016, |
|
"rewards/margins": 6.646695613861084, |
|
"rewards/rejected": -48.27896499633789, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9318631608707945, |
|
"grad_norm": 161.87689449482465, |
|
"learning_rate": 1.1218908022402374e-08, |
|
"logits/chosen": -1.0240004062652588, |
|
"logits/rejected": -1.0363930463790894, |
|
"logps/chosen": -3.9222424030303955, |
|
"logps/rejected": -4.67780876159668, |
|
"loss": 2.3157, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -39.2224235534668, |
|
"rewards/margins": 7.555658340454102, |
|
"rewards/rejected": -46.778076171875, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9341249646593158, |
|
"grad_norm": 143.6381951292034, |
|
"learning_rate": 1.0486677411402079e-08, |
|
"logits/chosen": -1.1034907102584839, |
|
"logits/rejected": -1.1017773151397705, |
|
"logps/chosen": -4.310455322265625, |
|
"logps/rejected": -5.140730381011963, |
|
"loss": 2.6336, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.10455322265625, |
|
"rewards/margins": 8.302750587463379, |
|
"rewards/rejected": -51.40729904174805, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 140.42812592146282, |
|
"learning_rate": 9.778838315744353e-09, |
|
"logits/chosen": -1.048314094543457, |
|
"logits/rejected": -1.0627115964889526, |
|
"logps/chosen": -4.331496238708496, |
|
"logps/rejected": -5.030874252319336, |
|
"loss": 2.5785, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.31496047973633, |
|
"rewards/margins": 6.993783950805664, |
|
"rewards/rejected": -50.308746337890625, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9386485722363584, |
|
"grad_norm": 351.7697826217826, |
|
"learning_rate": 9.095435060660595e-09, |
|
"logits/chosen": -1.0014101266860962, |
|
"logits/rejected": -1.0172253847122192, |
|
"logps/chosen": -4.207537651062012, |
|
"logps/rejected": -4.894926071166992, |
|
"loss": 2.8102, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.07537841796875, |
|
"rewards/margins": 6.873883247375488, |
|
"rewards/rejected": -48.94926071166992, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9409103760248798, |
|
"grad_norm": 179.96209865058057, |
|
"learning_rate": 8.436510441197864e-09, |
|
"logits/chosen": -1.0143978595733643, |
|
"logits/rejected": -1.0375560522079468, |
|
"logps/chosen": -4.138617515563965, |
|
"logps/rejected": -4.8338751792907715, |
|
"loss": 2.8654, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.38618087768555, |
|
"rewards/margins": 6.952574729919434, |
|
"rewards/rejected": -48.33875274658203, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9431721798134012, |
|
"grad_norm": 203.25622727123965, |
|
"learning_rate": 7.802105719539076e-09, |
|
"logits/chosen": -1.039108157157898, |
|
"logits/rejected": -1.0598160028457642, |
|
"logps/chosen": -4.279730796813965, |
|
"logps/rejected": -4.866627216339111, |
|
"loss": 3.1143, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.79730987548828, |
|
"rewards/margins": 5.868966102600098, |
|
"rewards/rejected": -48.6662712097168, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9454339836019225, |
|
"grad_norm": 133.2202097597037, |
|
"learning_rate": 7.1922606224192e-09, |
|
"logits/chosen": -1.0421922206878662, |
|
"logits/rejected": -1.050264835357666, |
|
"logps/chosen": -4.236346244812012, |
|
"logps/rejected": -4.931118011474609, |
|
"loss": 2.4184, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.36345672607422, |
|
"rewards/margins": 6.947718143463135, |
|
"rewards/rejected": -49.311180114746094, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9476957873904439, |
|
"grad_norm": 140.76593367995287, |
|
"learning_rate": 6.6070133386372906e-09, |
|
"logits/chosen": -1.028511643409729, |
|
"logits/rejected": -1.053504467010498, |
|
"logps/chosen": -4.154304504394531, |
|
"logps/rejected": -4.801187038421631, |
|
"loss": 2.7198, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.54304885864258, |
|
"rewards/margins": 6.468821048736572, |
|
"rewards/rejected": -48.011863708496094, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9499575911789653, |
|
"grad_norm": 166.91435918950876, |
|
"learning_rate": 6.046400516665384e-09, |
|
"logits/chosen": -1.041597604751587, |
|
"logits/rejected": -1.0513004064559937, |
|
"logps/chosen": -4.086845397949219, |
|
"logps/rejected": -4.8092474937438965, |
|
"loss": 2.8818, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.86845397949219, |
|
"rewards/margins": 7.224020481109619, |
|
"rewards/rejected": -48.09246826171875, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9522193949674865, |
|
"grad_norm": 137.10007565839797, |
|
"learning_rate": 5.510457262353396e-09, |
|
"logits/chosen": -1.0585057735443115, |
|
"logits/rejected": -1.0793222188949585, |
|
"logps/chosen": -4.073187351226807, |
|
"logps/rejected": -4.68537712097168, |
|
"loss": 2.4793, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -40.73187255859375, |
|
"rewards/margins": 6.121899127960205, |
|
"rewards/rejected": -46.8537712097168, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9544811987560079, |
|
"grad_norm": 142.9290850450619, |
|
"learning_rate": 4.9992171367309265e-09, |
|
"logits/chosen": -1.0340638160705566, |
|
"logits/rejected": -1.0444408655166626, |
|
"logps/chosen": -3.9371700286865234, |
|
"logps/rejected": -4.585000038146973, |
|
"loss": 2.3349, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -39.3717041015625, |
|
"rewards/margins": 6.478300094604492, |
|
"rewards/rejected": -45.849998474121094, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 159.95768684423896, |
|
"learning_rate": 4.5127121539052955e-09, |
|
"logits/chosen": -1.0674660205841064, |
|
"logits/rejected": -1.083560824394226, |
|
"logps/chosen": -4.3573174476623535, |
|
"logps/rejected": -5.064486503601074, |
|
"loss": 2.5332, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.57317352294922, |
|
"rewards/margins": 7.07169771194458, |
|
"rewards/rejected": -50.64486312866211, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9590048063330506, |
|
"grad_norm": 133.1800397752489, |
|
"learning_rate": 4.050972779057327e-09, |
|
"logits/chosen": -0.9619131684303284, |
|
"logits/rejected": -0.9836653470993042, |
|
"logps/chosen": -3.862929105758667, |
|
"logps/rejected": -4.547606468200684, |
|
"loss": 2.6159, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -38.62928771972656, |
|
"rewards/margins": 6.846776962280273, |
|
"rewards/rejected": -45.476070404052734, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.961266610121572, |
|
"grad_norm": 151.97121884898982, |
|
"learning_rate": 3.6140279265330477e-09, |
|
"logits/chosen": -1.0054244995117188, |
|
"logits/rejected": -1.0335164070129395, |
|
"logps/chosen": -4.238347053527832, |
|
"logps/rejected": -4.858821392059326, |
|
"loss": 2.7745, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.38346862792969, |
|
"rewards/margins": 6.204741477966309, |
|
"rewards/rejected": -48.58821105957031, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9635284139100933, |
|
"grad_norm": 133.89260556520827, |
|
"learning_rate": 3.2019049580335853e-09, |
|
"logits/chosen": -1.0441107749938965, |
|
"logits/rejected": -1.0574443340301514, |
|
"logps/chosen": -4.1341938972473145, |
|
"logps/rejected": -4.6629719734191895, |
|
"loss": 3.3466, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.34193420410156, |
|
"rewards/margins": 5.287779808044434, |
|
"rewards/rejected": -46.62971878051758, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9657902176986146, |
|
"grad_norm": 136.44570395336234, |
|
"learning_rate": 2.814629680901337e-09, |
|
"logits/chosen": -1.0538722276687622, |
|
"logits/rejected": -1.082961082458496, |
|
"logps/chosen": -4.21182918548584, |
|
"logps/rejected": -4.85057258605957, |
|
"loss": 2.4631, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -42.1182975769043, |
|
"rewards/margins": 6.3874311447143555, |
|
"rewards/rejected": -48.50572967529297, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.968052021487136, |
|
"grad_norm": 155.16390948205506, |
|
"learning_rate": 2.4522263465041937e-09, |
|
"logits/chosen": -1.0136550664901733, |
|
"logits/rejected": -1.0397028923034668, |
|
"logps/chosen": -4.1881914138793945, |
|
"logps/rejected": -4.900773048400879, |
|
"loss": 2.2036, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.88191604614258, |
|
"rewards/margins": 7.12581729888916, |
|
"rewards/rejected": -49.007728576660156, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9703138252756573, |
|
"grad_norm": 167.79707116620713, |
|
"learning_rate": 2.114717648716713e-09, |
|
"logits/chosen": -0.9971777200698853, |
|
"logits/rejected": -1.0160534381866455, |
|
"logps/chosen": -4.099374771118164, |
|
"logps/rejected": -4.867983341217041, |
|
"loss": 2.5572, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.993751525878906, |
|
"rewards/margins": 7.68608283996582, |
|
"rewards/rejected": -48.67983627319336, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9725756290641787, |
|
"grad_norm": 154.44159972952917, |
|
"learning_rate": 1.802124722499121e-09, |
|
"logits/chosen": -1.0254014730453491, |
|
"logits/rejected": -1.025618076324463, |
|
"logps/chosen": -4.222236156463623, |
|
"logps/rejected": -4.938643455505371, |
|
"loss": 2.6617, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.22235870361328, |
|
"rewards/margins": 7.164079189300537, |
|
"rewards/rejected": -49.386436462402344, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9748374328527001, |
|
"grad_norm": 172.965776611708, |
|
"learning_rate": 1.5144671425737499e-09, |
|
"logits/chosen": -1.0241471529006958, |
|
"logits/rejected": -1.0436562299728394, |
|
"logps/chosen": -4.061724662780762, |
|
"logps/rejected": -4.724231243133545, |
|
"loss": 2.9197, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -40.61724853515625, |
|
"rewards/margins": 6.62506628036499, |
|
"rewards/rejected": -47.242313385009766, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 140.1675403441603, |
|
"learning_rate": 1.251762922199484e-09, |
|
"logits/chosen": -0.955613911151886, |
|
"logits/rejected": -0.9706370234489441, |
|
"logps/chosen": -4.233730316162109, |
|
"logps/rejected": -4.96435022354126, |
|
"loss": 2.0341, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -42.33729934692383, |
|
"rewards/margins": 7.306206703186035, |
|
"rewards/rejected": -49.64350128173828, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9793610404297427, |
|
"grad_norm": 141.09041534498175, |
|
"learning_rate": 1.0140285120433744e-09, |
|
"logits/chosen": -1.0407795906066895, |
|
"logits/rejected": -1.0590593814849854, |
|
"logps/chosen": -4.238687515258789, |
|
"logps/rejected": -4.867761611938477, |
|
"loss": 2.7513, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -42.386871337890625, |
|
"rewards/margins": 6.290744781494141, |
|
"rewards/rejected": -48.67761993408203, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9816228442182641, |
|
"grad_norm": 137.0478600644848, |
|
"learning_rate": 8.012787991508396e-10, |
|
"logits/chosen": -0.9884441494941711, |
|
"logits/rejected": -1.0191917419433594, |
|
"logps/chosen": -4.157409191131592, |
|
"logps/rejected": -4.948687553405762, |
|
"loss": 2.8435, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -41.574092864990234, |
|
"rewards/margins": 7.912780284881592, |
|
"rewards/rejected": -49.48686981201172, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9838846480067854, |
|
"grad_norm": 145.21121550974632, |
|
"learning_rate": 6.135271060133007e-10, |
|
"logits/chosen": -0.9823290109634399, |
|
"logits/rejected": -0.9896907210350037, |
|
"logps/chosen": -4.1680145263671875, |
|
"logps/rejected": -4.843698024749756, |
|
"loss": 2.5662, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -41.68014907836914, |
|
"rewards/margins": 6.756835460662842, |
|
"rewards/rejected": -48.436981201171875, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9861464517953068, |
|
"grad_norm": 132.44462548580682, |
|
"learning_rate": 4.50785189733871e-10, |
|
"logits/chosen": -0.9916869401931763, |
|
"logits/rejected": -1.0404301881790161, |
|
"logps/chosen": -3.9972922801971436, |
|
"logps/rejected": -4.756333351135254, |
|
"loss": 1.9819, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -39.97291946411133, |
|
"rewards/margins": 7.590409278869629, |
|
"rewards/rejected": -47.563331604003906, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.988408255583828, |
|
"grad_norm": 155.76964083931634, |
|
"learning_rate": 3.1306324129118935e-10, |
|
"logits/chosen": -1.003543496131897, |
|
"logits/rejected": -1.0272934436798096, |
|
"logps/chosen": -4.250460147857666, |
|
"logps/rejected": -4.891244888305664, |
|
"loss": 2.7662, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.50460433959961, |
|
"rewards/margins": 6.407839775085449, |
|
"rewards/rejected": -48.912445068359375, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9906700593723494, |
|
"grad_norm": 143.82254177555114, |
|
"learning_rate": 2.003698849011748e-10, |
|
"logits/chosen": -1.058904767036438, |
|
"logits/rejected": -1.0692777633666992, |
|
"logps/chosen": -4.345088958740234, |
|
"logps/rejected": -4.9252610206604, |
|
"loss": 2.7411, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.45088577270508, |
|
"rewards/margins": 5.801725387573242, |
|
"rewards/rejected": -49.25261306762695, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9929318631608708, |
|
"grad_norm": 144.96350837968777, |
|
"learning_rate": 1.1271217747714779e-10, |
|
"logits/chosen": -1.0338635444641113, |
|
"logits/rejected": -1.0704212188720703, |
|
"logps/chosen": -4.210494041442871, |
|
"logps/rejected": -4.849006652832031, |
|
"loss": 2.8569, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -42.104942321777344, |
|
"rewards/margins": 6.38512659072876, |
|
"rewards/rejected": -48.49006652832031, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9951936669493922, |
|
"grad_norm": 126.89028026080639, |
|
"learning_rate": 5.0095608187739055e-11, |
|
"logits/chosen": -1.0046041011810303, |
|
"logits/rejected": -1.0206345319747925, |
|
"logps/chosen": -3.8843600749969482, |
|
"logps/rejected": -4.559191703796387, |
|
"loss": 2.5909, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -38.843605041503906, |
|
"rewards/margins": 6.748312473297119, |
|
"rewards/rejected": -45.591915130615234, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 154.00756589483856, |
|
"learning_rate": 1.2524098113209092e-11, |
|
"logits/chosen": -1.048063039779663, |
|
"logits/rejected": -1.0505824089050293, |
|
"logps/chosen": -4.204846382141113, |
|
"logps/rejected": -4.801551818847656, |
|
"loss": 3.2727, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -42.048465728759766, |
|
"rewards/margins": 5.967052936553955, |
|
"rewards/rejected": -48.01551818847656, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"grad_norm": 157.06146350653665, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.0390198230743408, |
|
"logits/rejected": -1.047530174255371, |
|
"logps/chosen": -4.086942672729492, |
|
"logps/rejected": -4.746278762817383, |
|
"loss": 2.8273, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -40.86943054199219, |
|
"rewards/margins": 6.593357563018799, |
|
"rewards/rejected": -47.46278381347656, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"eval_logits/chosen": -1.0220762491226196, |
|
"eval_logits/rejected": -1.0401626825332642, |
|
"eval_logps/chosen": -4.172447204589844, |
|
"eval_logps/rejected": -4.85194206237793, |
|
"eval_loss": 2.5764122009277344, |
|
"eval_rewards/accuracies": 0.7931150197982788, |
|
"eval_rewards/chosen": -41.72446823120117, |
|
"eval_rewards/margins": 6.794951915740967, |
|
"eval_rewards/rejected": -48.51941680908203, |
|
"eval_runtime": 62.9581, |
|
"eval_samples_per_second": 47.317, |
|
"eval_steps_per_second": 2.97, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"step": 442, |
|
"total_flos": 134366991482880.0, |
|
"train_loss": 3.350108276664941, |
|
"train_runtime": 3771.9425, |
|
"train_samples_per_second": 15.003, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 442, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 134366991482880.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|