|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997172745264349, |
|
"eval_steps": 500, |
|
"global_step": 442, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022618037885213456, |
|
"grad_norm": 115.54104008253113, |
|
"learning_rate": 1.7777777777777777e-08, |
|
"logits/chosen": -0.8356236219406128, |
|
"logits/rejected": -0.8113616108894348, |
|
"logps/chosen": -1.5973824262619019, |
|
"logps/rejected": -1.7205333709716797, |
|
"loss": 5.7448, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.973824501037598, |
|
"rewards/margins": 1.2315096855163574, |
|
"rewards/rejected": -17.205333709716797, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004523607577042691, |
|
"grad_norm": 74.3673874458241, |
|
"learning_rate": 3.5555555555555554e-08, |
|
"logits/chosen": -0.880168616771698, |
|
"logits/rejected": -0.8785539269447327, |
|
"logps/chosen": -1.676809310913086, |
|
"logps/rejected": -1.6232023239135742, |
|
"loss": 6.1494, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -16.76809310913086, |
|
"rewards/margins": -0.5360711812973022, |
|
"rewards/rejected": -16.23202133178711, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006785411365564037, |
|
"grad_norm": 103.24134323529078, |
|
"learning_rate": 5.333333333333333e-08, |
|
"logits/chosen": -0.8413803577423096, |
|
"logits/rejected": -0.8578035831451416, |
|
"logps/chosen": -1.679062843322754, |
|
"logps/rejected": -1.8749037981033325, |
|
"loss": 6.2783, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.79062843322754, |
|
"rewards/margins": 1.9584112167358398, |
|
"rewards/rejected": -18.749040603637695, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009047215154085382, |
|
"grad_norm": 118.3827028625394, |
|
"learning_rate": 7.111111111111111e-08, |
|
"logits/chosen": -0.8771331906318665, |
|
"logits/rejected": -0.8562486171722412, |
|
"logps/chosen": -1.6714611053466797, |
|
"logps/rejected": -1.6346337795257568, |
|
"loss": 6.182, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -16.71461296081543, |
|
"rewards/margins": -0.368274062871933, |
|
"rewards/rejected": -16.346338272094727, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01130901894260673, |
|
"grad_norm": 80.57413510413119, |
|
"learning_rate": 8.888888888888888e-08, |
|
"logits/chosen": -0.8716552257537842, |
|
"logits/rejected": -0.8481063842773438, |
|
"logps/chosen": -1.5923399925231934, |
|
"logps/rejected": -1.6487252712249756, |
|
"loss": 5.4404, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.92340087890625, |
|
"rewards/margins": 0.5638511776924133, |
|
"rewards/rejected": -16.48725128173828, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013570822731128074, |
|
"grad_norm": 87.54382641921318, |
|
"learning_rate": 1.0666666666666666e-07, |
|
"logits/chosen": -0.9280990958213806, |
|
"logits/rejected": -0.9075251221656799, |
|
"logps/chosen": -1.659511685371399, |
|
"logps/rejected": -1.6108341217041016, |
|
"loss": 6.2758, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -16.595117568969727, |
|
"rewards/margins": -0.48677870631217957, |
|
"rewards/rejected": -16.108339309692383, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01583262651964942, |
|
"grad_norm": 39.60022994902477, |
|
"learning_rate": 1.2444444444444443e-07, |
|
"logits/chosen": -0.8669524192810059, |
|
"logits/rejected": -0.8501181602478027, |
|
"logps/chosen": -1.4954458475112915, |
|
"logps/rejected": -1.8645169734954834, |
|
"loss": 4.1135, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -14.954458236694336, |
|
"rewards/margins": 3.6907100677490234, |
|
"rewards/rejected": -18.64516830444336, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018094430308170765, |
|
"grad_norm": 97.49420569209839, |
|
"learning_rate": 1.4222222222222222e-07, |
|
"logits/chosen": -0.8323963284492493, |
|
"logits/rejected": -0.8386867046356201, |
|
"logps/chosen": -1.5902166366577148, |
|
"logps/rejected": -1.6685606241226196, |
|
"loss": 5.7368, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.902165412902832, |
|
"rewards/margins": 0.7834409475326538, |
|
"rewards/rejected": -16.685604095458984, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 65.1210073833328, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -0.8797612190246582, |
|
"logits/rejected": -0.8696941137313843, |
|
"logps/chosen": -1.5322370529174805, |
|
"logps/rejected": -1.739979863166809, |
|
"loss": 5.4506, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -15.322370529174805, |
|
"rewards/margins": 2.0774283409118652, |
|
"rewards/rejected": -17.399799346923828, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02261803788521346, |
|
"grad_norm": 87.75880998151953, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"logits/chosen": -0.8726012706756592, |
|
"logits/rejected": -0.8817980885505676, |
|
"logps/chosen": -1.692103385925293, |
|
"logps/rejected": -1.6219866275787354, |
|
"loss": 6.0529, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.92103385925293, |
|
"rewards/margins": -0.7011662721633911, |
|
"rewards/rejected": -16.219867706298828, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024879841673734804, |
|
"grad_norm": 94.08875549981737, |
|
"learning_rate": 1.9555555555555555e-07, |
|
"logits/chosen": -0.9308934211730957, |
|
"logits/rejected": -0.9283267259597778, |
|
"logps/chosen": -1.6734390258789062, |
|
"logps/rejected": -1.9049830436706543, |
|
"loss": 5.3677, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -16.734390258789062, |
|
"rewards/margins": 2.315438747406006, |
|
"rewards/rejected": -19.049829483032227, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02714164546225615, |
|
"grad_norm": 114.82744103438529, |
|
"learning_rate": 2.133333333333333e-07, |
|
"logits/chosen": -0.8485775589942932, |
|
"logits/rejected": -0.8313932418823242, |
|
"logps/chosen": -1.7942991256713867, |
|
"logps/rejected": -1.8555328845977783, |
|
"loss": 6.4051, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -17.942991256713867, |
|
"rewards/margins": 0.6123358607292175, |
|
"rewards/rejected": -18.555328369140625, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029403449250777494, |
|
"grad_norm": 97.6559617021603, |
|
"learning_rate": 2.3111111111111107e-07, |
|
"logits/chosen": -0.8471003770828247, |
|
"logits/rejected": -0.8123136162757874, |
|
"logps/chosen": -1.7098432779312134, |
|
"logps/rejected": -1.626631736755371, |
|
"loss": 6.1671, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -17.098432540893555, |
|
"rewards/margins": -0.8321163654327393, |
|
"rewards/rejected": -16.26631736755371, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03166525303929884, |
|
"grad_norm": 48.43299087579507, |
|
"learning_rate": 2.4888888888888886e-07, |
|
"logits/chosen": -0.8456010222434998, |
|
"logits/rejected": -0.843168318271637, |
|
"logps/chosen": -1.3908941745758057, |
|
"logps/rejected": -1.59244704246521, |
|
"loss": 4.5976, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -13.908943176269531, |
|
"rewards/margins": 2.0155270099639893, |
|
"rewards/rejected": -15.924469947814941, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033927056827820185, |
|
"grad_norm": 94.32433506251559, |
|
"learning_rate": 2.666666666666666e-07, |
|
"logits/chosen": -0.8408608436584473, |
|
"logits/rejected": -0.8317903280258179, |
|
"logps/chosen": -1.5308924913406372, |
|
"logps/rejected": -1.621803879737854, |
|
"loss": 5.1224, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.308926582336426, |
|
"rewards/margins": 0.9091131091117859, |
|
"rewards/rejected": -16.218036651611328, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03618886061634153, |
|
"grad_norm": 80.0199360911188, |
|
"learning_rate": 2.8444444444444443e-07, |
|
"logits/chosen": -0.896875262260437, |
|
"logits/rejected": -0.8800469636917114, |
|
"logps/chosen": -1.6712013483047485, |
|
"logps/rejected": -1.6556079387664795, |
|
"loss": 6.2495, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -16.71201515197754, |
|
"rewards/margins": -0.15593338012695312, |
|
"rewards/rejected": -16.556079864501953, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038450664404862875, |
|
"grad_norm": 95.62781163862564, |
|
"learning_rate": 3.022222222222222e-07, |
|
"logits/chosen": -0.9052500128746033, |
|
"logits/rejected": -0.8847813010215759, |
|
"logps/chosen": -1.4807989597320557, |
|
"logps/rejected": -1.447709321975708, |
|
"loss": 6.2111, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -14.807989120483398, |
|
"rewards/margins": -0.33089762926101685, |
|
"rewards/rejected": -14.477092742919922, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 99.53047146451797, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": -0.9046046733856201, |
|
"logits/rejected": -0.8962881565093994, |
|
"logps/chosen": -1.9553179740905762, |
|
"logps/rejected": -1.9541630744934082, |
|
"loss": 6.2661, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -19.553178787231445, |
|
"rewards/margins": -0.011548399925231934, |
|
"rewards/rejected": -19.5416316986084, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04297427198190557, |
|
"grad_norm": 94.64334054203071, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"logits/chosen": -0.9112716913223267, |
|
"logits/rejected": -0.8977913856506348, |
|
"logps/chosen": -1.6549549102783203, |
|
"logps/rejected": -1.672560214996338, |
|
"loss": 5.3987, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.549549102783203, |
|
"rewards/margins": 0.1760539710521698, |
|
"rewards/rejected": -16.725605010986328, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04523607577042692, |
|
"grad_norm": 41.05939278803522, |
|
"learning_rate": 3.5555555555555553e-07, |
|
"logits/chosen": -0.9180342555046082, |
|
"logits/rejected": -0.9136630892753601, |
|
"logps/chosen": -1.5036756992340088, |
|
"logps/rejected": -1.7418506145477295, |
|
"loss": 4.306, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -15.03675651550293, |
|
"rewards/margins": 2.3817477226257324, |
|
"rewards/rejected": -17.41850471496582, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04749787955894826, |
|
"grad_norm": 65.62889809973544, |
|
"learning_rate": 3.7333333333333334e-07, |
|
"logits/chosen": -0.8245253562927246, |
|
"logits/rejected": -0.8135088086128235, |
|
"logps/chosen": -1.5167511701583862, |
|
"logps/rejected": -1.5217550992965698, |
|
"loss": 5.4951, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -15.167511940002441, |
|
"rewards/margins": 0.05004033446311951, |
|
"rewards/rejected": -15.217550277709961, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04975968334746961, |
|
"grad_norm": 53.92197856426591, |
|
"learning_rate": 3.911111111111111e-07, |
|
"logits/chosen": -0.8624619245529175, |
|
"logits/rejected": -0.8261862397193909, |
|
"logps/chosen": -1.559888243675232, |
|
"logps/rejected": -1.6315239667892456, |
|
"loss": 4.6857, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -15.598882675170898, |
|
"rewards/margins": 0.716356098651886, |
|
"rewards/rejected": -16.31523895263672, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05202148713599095, |
|
"grad_norm": 71.94913453042106, |
|
"learning_rate": 4.0888888888888886e-07, |
|
"logits/chosen": -0.8572225570678711, |
|
"logits/rejected": -0.8356618881225586, |
|
"logps/chosen": -1.5296409130096436, |
|
"logps/rejected": -1.5351814031600952, |
|
"loss": 5.7403, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.296407699584961, |
|
"rewards/margins": 0.055405229330062866, |
|
"rewards/rejected": -15.351814270019531, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0542832909245123, |
|
"grad_norm": 53.61180470189225, |
|
"learning_rate": 4.266666666666666e-07, |
|
"logits/chosen": -0.8729988932609558, |
|
"logits/rejected": -0.8457622528076172, |
|
"logps/chosen": -1.371631383895874, |
|
"logps/rejected": -1.4574888944625854, |
|
"loss": 5.0383, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -13.716312408447266, |
|
"rewards/margins": 0.8585769534111023, |
|
"rewards/rejected": -14.574889183044434, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05654509471303364, |
|
"grad_norm": 98.16261763681565, |
|
"learning_rate": 4.4444444444444444e-07, |
|
"logits/chosen": -0.862544596195221, |
|
"logits/rejected": -0.8518227934837341, |
|
"logps/chosen": -1.6941993236541748, |
|
"logps/rejected": -1.677493691444397, |
|
"loss": 5.7118, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -16.941993713378906, |
|
"rewards/margins": -0.16705602407455444, |
|
"rewards/rejected": -16.77493667602539, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05880689850155499, |
|
"grad_norm": 94.80833058904163, |
|
"learning_rate": 4.6222222222222214e-07, |
|
"logits/chosen": -0.8756837844848633, |
|
"logits/rejected": -0.8487232327461243, |
|
"logps/chosen": -1.6833150386810303, |
|
"logps/rejected": -1.6622823476791382, |
|
"loss": 5.6915, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -16.83315086364746, |
|
"rewards/margins": -0.21032753586769104, |
|
"rewards/rejected": -16.622821807861328, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 81.08001128654294, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": -0.8789094686508179, |
|
"logits/rejected": -0.8827879428863525, |
|
"logps/chosen": -1.6698274612426758, |
|
"logps/rejected": -1.5913212299346924, |
|
"loss": 6.1429, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -16.698274612426758, |
|
"rewards/margins": -0.7850615978240967, |
|
"rewards/rejected": -15.913213729858398, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06333050607859768, |
|
"grad_norm": 56.644003389915696, |
|
"learning_rate": 4.977777777777777e-07, |
|
"logits/chosen": -0.8799877166748047, |
|
"logits/rejected": -0.870951235294342, |
|
"logps/chosen": -1.5632425546646118, |
|
"logps/rejected": -1.624694585800171, |
|
"loss": 5.6969, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.632424354553223, |
|
"rewards/margins": 0.6145212054252625, |
|
"rewards/rejected": -16.246946334838867, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06559230986711903, |
|
"grad_norm": 74.85771393756472, |
|
"learning_rate": 5.155555555555556e-07, |
|
"logits/chosen": -0.8934893608093262, |
|
"logits/rejected": -0.8896267414093018, |
|
"logps/chosen": -1.5920103788375854, |
|
"logps/rejected": -1.6025701761245728, |
|
"loss": 5.5342, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -15.920103073120117, |
|
"rewards/margins": 0.10559805482625961, |
|
"rewards/rejected": -16.02570152282715, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06785411365564037, |
|
"grad_norm": 76.84594367688287, |
|
"learning_rate": 5.333333333333332e-07, |
|
"logits/chosen": -0.8594059944152832, |
|
"logits/rejected": -0.8437649607658386, |
|
"logps/chosen": -1.5912779569625854, |
|
"logps/rejected": -1.6219682693481445, |
|
"loss": 5.9684, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -15.912779808044434, |
|
"rewards/margins": 0.3069048821926117, |
|
"rewards/rejected": -16.219684600830078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07011591744416172, |
|
"grad_norm": 78.87037036995574, |
|
"learning_rate": 5.511111111111111e-07, |
|
"logits/chosen": -0.8989169001579285, |
|
"logits/rejected": -0.88699871301651, |
|
"logps/chosen": -1.577941656112671, |
|
"logps/rejected": -1.548736572265625, |
|
"loss": 5.6791, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.779415130615234, |
|
"rewards/margins": -0.2920517921447754, |
|
"rewards/rejected": -15.487363815307617, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07237772123268306, |
|
"grad_norm": 70.30306886857991, |
|
"learning_rate": 5.688888888888889e-07, |
|
"logits/chosen": -0.9056158661842346, |
|
"logits/rejected": -0.9041393399238586, |
|
"logps/chosen": -1.5190542936325073, |
|
"logps/rejected": -1.482797622680664, |
|
"loss": 5.7225, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.190543174743652, |
|
"rewards/margins": -0.36256617307662964, |
|
"rewards/rejected": -14.827978134155273, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07463952502120441, |
|
"grad_norm": 52.019953616790154, |
|
"learning_rate": 5.866666666666666e-07, |
|
"logits/chosen": -0.8431529998779297, |
|
"logits/rejected": -0.8326103687286377, |
|
"logps/chosen": -1.4754631519317627, |
|
"logps/rejected": -1.6542396545410156, |
|
"loss": 4.6444, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.754631042480469, |
|
"rewards/margins": 1.787764072418213, |
|
"rewards/rejected": -16.542396545410156, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07690132880972575, |
|
"grad_norm": 51.76600092399858, |
|
"learning_rate": 6.044444444444444e-07, |
|
"logits/chosen": -0.8958278298377991, |
|
"logits/rejected": -0.8506935834884644, |
|
"logps/chosen": -1.4213745594024658, |
|
"logps/rejected": -1.5557016134262085, |
|
"loss": 4.5416, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -14.213743209838867, |
|
"rewards/margins": 1.343271017074585, |
|
"rewards/rejected": -15.557015419006348, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0791631325982471, |
|
"grad_norm": 88.73341217553781, |
|
"learning_rate": 6.222222222222223e-07, |
|
"logits/chosen": -0.9224306344985962, |
|
"logits/rejected": -0.8935542106628418, |
|
"logps/chosen": -1.5873973369598389, |
|
"logps/rejected": -1.7230992317199707, |
|
"loss": 5.1296, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.87397289276123, |
|
"rewards/margins": 1.3570194244384766, |
|
"rewards/rejected": -17.230993270874023, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 81.48522456937111, |
|
"learning_rate": 6.4e-07, |
|
"logits/chosen": -0.8501981496810913, |
|
"logits/rejected": -0.8491517305374146, |
|
"logps/chosen": -1.5095704793930054, |
|
"logps/rejected": -1.6728523969650269, |
|
"loss": 4.9032, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.095704078674316, |
|
"rewards/margins": 1.632819652557373, |
|
"rewards/rejected": -16.72852325439453, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08368674017528979, |
|
"grad_norm": 63.09197425067475, |
|
"learning_rate": 6.577777777777777e-07, |
|
"logits/chosen": -0.8523389101028442, |
|
"logits/rejected": -0.8278622627258301, |
|
"logps/chosen": -1.3732750415802002, |
|
"logps/rejected": -1.3724522590637207, |
|
"loss": 5.2905, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -13.732749938964844, |
|
"rewards/margins": -0.008226484060287476, |
|
"rewards/rejected": -13.724522590637207, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08594854396381114, |
|
"grad_norm": 73.86459203067565, |
|
"learning_rate": 6.755555555555555e-07, |
|
"logits/chosen": -0.9427972435951233, |
|
"logits/rejected": -0.9414781332015991, |
|
"logps/chosen": -1.5264731645584106, |
|
"logps/rejected": -1.5371237993240356, |
|
"loss": 5.2678, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.264732360839844, |
|
"rewards/margins": 0.1065058782696724, |
|
"rewards/rejected": -15.371236801147461, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08821034775233248, |
|
"grad_norm": 73.25463666536884, |
|
"learning_rate": 6.933333333333333e-07, |
|
"logits/chosen": -0.926520586013794, |
|
"logits/rejected": -0.9318759441375732, |
|
"logps/chosen": -1.5116084814071655, |
|
"logps/rejected": -1.524423360824585, |
|
"loss": 5.1166, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -15.116085052490234, |
|
"rewards/margins": 0.12814898788928986, |
|
"rewards/rejected": -15.244234085083008, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09047215154085383, |
|
"grad_norm": 47.01597449801661, |
|
"learning_rate": 7.111111111111111e-07, |
|
"logits/chosen": -0.8796355128288269, |
|
"logits/rejected": -0.8566000461578369, |
|
"logps/chosen": -1.3858391046524048, |
|
"logps/rejected": -1.5868655443191528, |
|
"loss": 4.0668, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -13.858390808105469, |
|
"rewards/margins": 2.0102648735046387, |
|
"rewards/rejected": -15.868656158447266, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09273395532937517, |
|
"grad_norm": 38.92083571265448, |
|
"learning_rate": 7.288888888888888e-07, |
|
"logits/chosen": -0.9404792189598083, |
|
"logits/rejected": -0.9076958894729614, |
|
"logps/chosen": -1.3758432865142822, |
|
"logps/rejected": -1.5328952074050903, |
|
"loss": 4.4094, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -13.75843334197998, |
|
"rewards/margins": 1.5705193281173706, |
|
"rewards/rejected": -15.32895278930664, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09499575911789652, |
|
"grad_norm": 68.82170323755115, |
|
"learning_rate": 7.466666666666667e-07, |
|
"logits/chosen": -0.8351485729217529, |
|
"logits/rejected": -0.7955107092857361, |
|
"logps/chosen": -1.487162709236145, |
|
"logps/rejected": -1.5933465957641602, |
|
"loss": 5.0699, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -14.871627807617188, |
|
"rewards/margins": 1.0618385076522827, |
|
"rewards/rejected": -15.933464050292969, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09725756290641786, |
|
"grad_norm": 63.18032695061353, |
|
"learning_rate": 7.644444444444444e-07, |
|
"logits/chosen": -0.9111210703849792, |
|
"logits/rejected": -0.8793379664421082, |
|
"logps/chosen": -1.4616880416870117, |
|
"logps/rejected": -1.5058850049972534, |
|
"loss": 5.005, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -14.616881370544434, |
|
"rewards/margins": 0.44196972250938416, |
|
"rewards/rejected": -15.058850288391113, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09951936669493922, |
|
"grad_norm": 48.381985529172866, |
|
"learning_rate": 7.822222222222222e-07, |
|
"logits/chosen": -0.8437673449516296, |
|
"logits/rejected": -0.8208142518997192, |
|
"logps/chosen": -1.3148137331008911, |
|
"logps/rejected": -1.4531042575836182, |
|
"loss": 4.179, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -13.148136138916016, |
|
"rewards/margins": 1.38290536403656, |
|
"rewards/rejected": -14.53104305267334, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 38.81813502976088, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -0.9005692005157471, |
|
"logits/rejected": -0.8871059417724609, |
|
"logps/chosen": -1.3741270303726196, |
|
"logps/rejected": -1.4992985725402832, |
|
"loss": 4.5246, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -13.741270065307617, |
|
"rewards/margins": 1.2517166137695312, |
|
"rewards/rejected": -14.992988586425781, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1040429742719819, |
|
"grad_norm": 89.05318759018981, |
|
"learning_rate": 7.999874759018868e-07, |
|
"logits/chosen": -0.9439775943756104, |
|
"logits/rejected": -0.9177378416061401, |
|
"logps/chosen": -1.6424872875213623, |
|
"logps/rejected": -1.7683295011520386, |
|
"loss": 4.7224, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -16.42487144470215, |
|
"rewards/margins": 1.2584227323532104, |
|
"rewards/rejected": -17.68329429626465, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10630477806050326, |
|
"grad_norm": 43.17715042286116, |
|
"learning_rate": 7.999499043918123e-07, |
|
"logits/chosen": -0.934738278388977, |
|
"logits/rejected": -0.9424084424972534, |
|
"logps/chosen": -1.4421114921569824, |
|
"logps/rejected": -1.5015398263931274, |
|
"loss": 5.0113, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -14.421113967895508, |
|
"rewards/margins": 0.5942831635475159, |
|
"rewards/rejected": -15.015397071838379, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1085665818490246, |
|
"grad_norm": 43.641796737833445, |
|
"learning_rate": 7.998872878225228e-07, |
|
"logits/chosen": -0.8617913722991943, |
|
"logits/rejected": -0.8524473905563354, |
|
"logps/chosen": -1.48220694065094, |
|
"logps/rejected": -1.628198504447937, |
|
"loss": 4.4078, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -14.822070121765137, |
|
"rewards/margins": 1.4599149227142334, |
|
"rewards/rejected": -16.281984329223633, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11082838563754595, |
|
"grad_norm": 44.9928271242027, |
|
"learning_rate": 7.997996301150987e-07, |
|
"logits/chosen": -0.8672093152999878, |
|
"logits/rejected": -0.8628696203231812, |
|
"logps/chosen": -1.4041790962219238, |
|
"logps/rejected": -1.5184260606765747, |
|
"loss": 4.6114, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -14.041790962219238, |
|
"rewards/margins": 1.142470121383667, |
|
"rewards/rejected": -15.184259414672852, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11309018942606729, |
|
"grad_norm": 48.332832486571874, |
|
"learning_rate": 7.996869367587088e-07, |
|
"logits/chosen": -0.836407482624054, |
|
"logits/rejected": -0.8215224146842957, |
|
"logps/chosen": -1.4828814268112183, |
|
"logps/rejected": -1.5967737436294556, |
|
"loss": 4.6077, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.828814506530762, |
|
"rewards/margins": 1.1389241218566895, |
|
"rewards/rejected": -15.967738151550293, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11535199321458864, |
|
"grad_norm": 42.46620935554636, |
|
"learning_rate": 7.99549214810266e-07, |
|
"logits/chosen": -0.8490492105484009, |
|
"logits/rejected": -0.8362867832183838, |
|
"logps/chosen": -1.4633221626281738, |
|
"logps/rejected": -1.5538841485977173, |
|
"loss": 4.5936, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.633221626281738, |
|
"rewards/margins": 0.9056205153465271, |
|
"rewards/rejected": -15.538841247558594, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11761379700310998, |
|
"grad_norm": 46.23878461965418, |
|
"learning_rate": 7.993864728939867e-07, |
|
"logits/chosen": -0.8653365969657898, |
|
"logits/rejected": -0.8207730650901794, |
|
"logps/chosen": -1.4526644945144653, |
|
"logps/rejected": -1.5614793300628662, |
|
"loss": 4.8368, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -14.526643753051758, |
|
"rewards/margins": 1.0881470441818237, |
|
"rewards/rejected": -15.614792823791504, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11987560079163133, |
|
"grad_norm": 50.746711219977314, |
|
"learning_rate": 7.991987212008491e-07, |
|
"logits/chosen": -0.8787316083908081, |
|
"logits/rejected": -0.8544822931289673, |
|
"logps/chosen": -1.524681568145752, |
|
"logps/rejected": -1.7203454971313477, |
|
"loss": 4.3884, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -15.246816635131836, |
|
"rewards/margins": 1.9566391706466675, |
|
"rewards/rejected": -17.203454971313477, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 51.98936145640891, |
|
"learning_rate": 7.989859714879565e-07, |
|
"logits/chosen": -0.9071463346481323, |
|
"logits/rejected": -0.8824944496154785, |
|
"logps/chosen": -1.4744333028793335, |
|
"logps/rejected": -1.5566731691360474, |
|
"loss": 4.8618, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -14.744333267211914, |
|
"rewards/margins": 0.8223981261253357, |
|
"rewards/rejected": -15.566731452941895, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12439920836867402, |
|
"grad_norm": 44.93136877143668, |
|
"learning_rate": 7.987482370778005e-07, |
|
"logits/chosen": -0.8825117349624634, |
|
"logits/rejected": -0.8596429824829102, |
|
"logps/chosen": -1.500649094581604, |
|
"logps/rejected": -1.6202951669692993, |
|
"loss": 4.696, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.006490707397461, |
|
"rewards/margins": 1.1964606046676636, |
|
"rewards/rejected": -16.20294952392578, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12666101215719536, |
|
"grad_norm": 57.11711572983479, |
|
"learning_rate": 7.984855328574262e-07, |
|
"logits/chosen": -0.748485803604126, |
|
"logits/rejected": -0.7519769668579102, |
|
"logps/chosen": -1.4509243965148926, |
|
"logps/rejected": -1.5625280141830444, |
|
"loss": 4.4574, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -14.50924301147461, |
|
"rewards/margins": 1.1160372495651245, |
|
"rewards/rejected": -15.625280380249023, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1289228159457167, |
|
"grad_norm": 70.17018909190087, |
|
"learning_rate": 7.981978752775009e-07, |
|
"logits/chosen": -0.8194972276687622, |
|
"logits/rejected": -0.8117552399635315, |
|
"logps/chosen": -1.5257998704910278, |
|
"logps/rejected": -1.6556179523468018, |
|
"loss": 4.665, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -15.257999420166016, |
|
"rewards/margins": 1.298180341720581, |
|
"rewards/rejected": -16.55617904663086, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13118461973423806, |
|
"grad_norm": 79.53233397371731, |
|
"learning_rate": 7.978852823512833e-07, |
|
"logits/chosen": -0.8595327138900757, |
|
"logits/rejected": -0.8340020179748535, |
|
"logps/chosen": -1.641236424446106, |
|
"logps/rejected": -1.7583504915237427, |
|
"loss": 4.9327, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -16.412364959716797, |
|
"rewards/margins": 1.1711419820785522, |
|
"rewards/rejected": -17.583507537841797, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1334464235227594, |
|
"grad_norm": 69.60803219031307, |
|
"learning_rate": 7.975477736534957e-07, |
|
"logits/chosen": -0.8586044907569885, |
|
"logits/rejected": -0.8539649844169617, |
|
"logps/chosen": -1.5942871570587158, |
|
"logps/rejected": -1.808882474899292, |
|
"loss": 4.3286, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -15.942872047424316, |
|
"rewards/margins": 2.1459531784057617, |
|
"rewards/rejected": -18.088825225830078, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13570822731128074, |
|
"grad_norm": 62.64898369105346, |
|
"learning_rate": 7.971853703190986e-07, |
|
"logits/chosen": -0.8574209213256836, |
|
"logits/rejected": -0.8404501080513, |
|
"logps/chosen": -1.5743780136108398, |
|
"logps/rejected": -1.7517762184143066, |
|
"loss": 4.4022, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.743781089782715, |
|
"rewards/margins": 1.773982048034668, |
|
"rewards/rejected": -17.517763137817383, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1379700310998021, |
|
"grad_norm": 59.54476826787708, |
|
"learning_rate": 7.967980950419664e-07, |
|
"logits/chosen": -0.8027121424674988, |
|
"logits/rejected": -0.7864540815353394, |
|
"logps/chosen": -1.5260121822357178, |
|
"logps/rejected": -1.6937767267227173, |
|
"loss": 4.4368, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -15.260122299194336, |
|
"rewards/margins": 1.6776450872421265, |
|
"rewards/rejected": -16.937767028808594, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14023183488832344, |
|
"grad_norm": 54.120200448145056, |
|
"learning_rate": 7.963859720734669e-07, |
|
"logits/chosen": -0.8626559376716614, |
|
"logits/rejected": -0.8548423051834106, |
|
"logps/chosen": -1.4451175928115845, |
|
"logps/rejected": -1.646138310432434, |
|
"loss": 4.3155, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -14.451175689697266, |
|
"rewards/margins": 2.0102078914642334, |
|
"rewards/rejected": -16.461383819580078, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 48.27367689289302, |
|
"learning_rate": 7.959490272209427e-07, |
|
"logits/chosen": -0.8522219061851501, |
|
"logits/rejected": -0.8149221539497375, |
|
"logps/chosen": -1.4842830896377563, |
|
"logps/rejected": -1.741237998008728, |
|
"loss": 3.8194, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -14.842830657958984, |
|
"rewards/margins": 2.5695488452911377, |
|
"rewards/rejected": -17.41238021850586, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14475544246536612, |
|
"grad_norm": 47.53970538406451, |
|
"learning_rate": 7.954872878460946e-07, |
|
"logits/chosen": -0.8807967901229858, |
|
"logits/rejected": -0.8453354835510254, |
|
"logps/chosen": -1.5572218894958496, |
|
"logps/rejected": -1.7726668119430542, |
|
"loss": 4.0647, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -15.572219848632812, |
|
"rewards/margins": 2.154447555541992, |
|
"rewards/rejected": -17.726669311523438, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14701724625388748, |
|
"grad_norm": 61.35510962861716, |
|
"learning_rate": 7.950007828632691e-07, |
|
"logits/chosen": -0.8250374794006348, |
|
"logits/rejected": -0.820457935333252, |
|
"logps/chosen": -1.607496738433838, |
|
"logps/rejected": -1.8878852128982544, |
|
"loss": 4.0136, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -16.074966430664062, |
|
"rewards/margins": 2.8038859367370605, |
|
"rewards/rejected": -18.87885284423828, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14927905004240882, |
|
"grad_norm": 54.05120684424973, |
|
"learning_rate": 7.944895427376465e-07, |
|
"logits/chosen": -0.8387467861175537, |
|
"logits/rejected": -0.8197423219680786, |
|
"logps/chosen": -1.613673210144043, |
|
"logps/rejected": -1.8641467094421387, |
|
"loss": 4.1501, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -16.13673210144043, |
|
"rewards/margins": 2.5047359466552734, |
|
"rewards/rejected": -18.641468048095703, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15154085383093016, |
|
"grad_norm": 43.4329226476985, |
|
"learning_rate": 7.939535994833345e-07, |
|
"logits/chosen": -0.80382239818573, |
|
"logits/rejected": -0.7954918742179871, |
|
"logps/chosen": -1.4918571710586548, |
|
"logps/rejected": -1.7582557201385498, |
|
"loss": 4.0382, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -14.918570518493652, |
|
"rewards/margins": 2.663985013961792, |
|
"rewards/rejected": -17.582555770874023, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1538026576194515, |
|
"grad_norm": 58.50135003917604, |
|
"learning_rate": 7.933929866613628e-07, |
|
"logits/chosen": -0.8193422555923462, |
|
"logits/rejected": -0.8229498863220215, |
|
"logps/chosen": -1.5523847341537476, |
|
"logps/rejected": -1.6850162744522095, |
|
"loss": 4.511, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.523847579956055, |
|
"rewards/margins": 1.32631516456604, |
|
"rewards/rejected": -16.850162506103516, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15606446140797287, |
|
"grad_norm": 52.388333685361864, |
|
"learning_rate": 7.928077393775808e-07, |
|
"logits/chosen": -0.8074467778205872, |
|
"logits/rejected": -0.8173753619194031, |
|
"logps/chosen": -1.5872104167938232, |
|
"logps/rejected": -1.9215919971466064, |
|
"loss": 3.605, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -15.87210464477539, |
|
"rewards/margins": 3.3438150882720947, |
|
"rewards/rejected": -19.215919494628906, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1583262651964942, |
|
"grad_norm": 70.46015135129937, |
|
"learning_rate": 7.921978942804609e-07, |
|
"logits/chosen": -0.7921926975250244, |
|
"logits/rejected": -0.7895167469978333, |
|
"logps/chosen": -1.5997159481048584, |
|
"logps/rejected": -1.855806827545166, |
|
"loss": 3.9852, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -15.997159004211426, |
|
"rewards/margins": 2.5609097480773926, |
|
"rewards/rejected": -18.558067321777344, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16058806898501554, |
|
"grad_norm": 66.5782725292864, |
|
"learning_rate": 7.915634895588021e-07, |
|
"logits/chosen": -0.8188354969024658, |
|
"logits/rejected": -0.803663969039917, |
|
"logps/chosen": -1.694320797920227, |
|
"logps/rejected": -1.8535633087158203, |
|
"loss": 4.5753, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -16.943206787109375, |
|
"rewards/margins": 1.5924267768859863, |
|
"rewards/rejected": -18.535634994506836, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 63.98536775928476, |
|
"learning_rate": 7.909045649393394e-07, |
|
"logits/chosen": -0.8593119382858276, |
|
"logits/rejected": -0.8650994896888733, |
|
"logps/chosen": -1.585839033126831, |
|
"logps/rejected": -1.7022672891616821, |
|
"loss": 4.5482, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -15.858390808105469, |
|
"rewards/margins": 1.1642816066741943, |
|
"rewards/rejected": -17.02267074584961, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16511167656205825, |
|
"grad_norm": 47.71315634678256, |
|
"learning_rate": 7.902211616842556e-07, |
|
"logits/chosen": -0.8264446258544922, |
|
"logits/rejected": -0.8236741423606873, |
|
"logps/chosen": -1.623077154159546, |
|
"logps/rejected": -1.879746437072754, |
|
"loss": 4.1393, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -16.23077392578125, |
|
"rewards/margins": 2.566693067550659, |
|
"rewards/rejected": -18.79746437072754, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16737348035057958, |
|
"grad_norm": 70.53123827391246, |
|
"learning_rate": 7.89513322588598e-07, |
|
"logits/chosen": -0.808039665222168, |
|
"logits/rejected": -0.7966674566268921, |
|
"logps/chosen": -1.592429757118225, |
|
"logps/rejected": -1.8032734394073486, |
|
"loss": 3.9256, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -15.924297332763672, |
|
"rewards/margins": 2.108438014984131, |
|
"rewards/rejected": -18.03273582458496, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16963528413910092, |
|
"grad_norm": 56.68159659271728, |
|
"learning_rate": 7.887810919775976e-07, |
|
"logits/chosen": -0.7461099028587341, |
|
"logits/rejected": -0.7355799674987793, |
|
"logps/chosen": -1.6924803256988525, |
|
"logps/rejected": -1.9031829833984375, |
|
"loss": 4.0589, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.9248046875, |
|
"rewards/margins": 2.107023239135742, |
|
"rewards/rejected": -19.031827926635742, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1718970879276223, |
|
"grad_norm": 42.71145723908974, |
|
"learning_rate": 7.880245157038949e-07, |
|
"logits/chosen": -0.8165091276168823, |
|
"logits/rejected": -0.793809175491333, |
|
"logps/chosen": -1.688427448272705, |
|
"logps/rejected": -1.9064791202545166, |
|
"loss": 4.0899, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -16.884273529052734, |
|
"rewards/margins": 2.180520534515381, |
|
"rewards/rejected": -19.064794540405273, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.17415889171614363, |
|
"grad_norm": 80.58036409049882, |
|
"learning_rate": 7.872436411446671e-07, |
|
"logits/chosen": -0.836346447467804, |
|
"logits/rejected": -0.8506262302398682, |
|
"logps/chosen": -1.7576085329055786, |
|
"logps/rejected": -1.920924186706543, |
|
"loss": 4.5954, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -17.57608413696289, |
|
"rewards/margins": 1.6331558227539062, |
|
"rewards/rejected": -19.209239959716797, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17642069550466496, |
|
"grad_norm": 46.381702188392666, |
|
"learning_rate": 7.86438517198662e-07, |
|
"logits/chosen": -0.780924379825592, |
|
"logits/rejected": -0.767948567867279, |
|
"logps/chosen": -1.650989294052124, |
|
"logps/rejected": -1.8504787683486938, |
|
"loss": 4.2658, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.5098934173584, |
|
"rewards/margins": 1.9948934316635132, |
|
"rewards/rejected": -18.50478744506836, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1786824992931863, |
|
"grad_norm": 56.20092121621141, |
|
"learning_rate": 7.856091942831366e-07, |
|
"logits/chosen": -0.7430872321128845, |
|
"logits/rejected": -0.7503747940063477, |
|
"logps/chosen": -1.644688367843628, |
|
"logps/rejected": -1.8490663766860962, |
|
"loss": 4.517, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.446882247924805, |
|
"rewards/margins": 2.0437800884246826, |
|
"rewards/rejected": -18.49066162109375, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18094430308170767, |
|
"grad_norm": 52.81332760090688, |
|
"learning_rate": 7.847557243306982e-07, |
|
"logits/chosen": -0.8418252468109131, |
|
"logits/rejected": -0.8341580629348755, |
|
"logps/chosen": -1.6995246410369873, |
|
"logps/rejected": -1.9185855388641357, |
|
"loss": 4.0086, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -16.9952449798584, |
|
"rewards/margins": 2.190608024597168, |
|
"rewards/rejected": -19.18585205078125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 56.02862880172386, |
|
"learning_rate": 7.838781607860541e-07, |
|
"logits/chosen": -0.8196614980697632, |
|
"logits/rejected": -0.8126786947250366, |
|
"logps/chosen": -1.7471215724945068, |
|
"logps/rejected": -1.9539873600006104, |
|
"loss": 3.7371, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.471214294433594, |
|
"rewards/margins": 2.068657398223877, |
|
"rewards/rejected": -19.539873123168945, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18546791065875035, |
|
"grad_norm": 54.48859910947903, |
|
"learning_rate": 7.82976558602664e-07, |
|
"logits/chosen": -0.8580424785614014, |
|
"logits/rejected": -0.8641104102134705, |
|
"logps/chosen": -1.7102807760238647, |
|
"logps/rejected": -1.8986783027648926, |
|
"loss": 4.2118, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -17.102807998657227, |
|
"rewards/margins": 1.8839763402938843, |
|
"rewards/rejected": -18.986783981323242, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1877297144472717, |
|
"grad_norm": 78.04437890690556, |
|
"learning_rate": 7.820509742392988e-07, |
|
"logits/chosen": -0.8468527793884277, |
|
"logits/rejected": -0.8453028202056885, |
|
"logps/chosen": -1.8543328046798706, |
|
"logps/rejected": -2.0150225162506104, |
|
"loss": 4.3218, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -18.54332733154297, |
|
"rewards/margins": 1.6068973541259766, |
|
"rewards/rejected": -20.150224685668945, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18999151823579305, |
|
"grad_norm": 88.82175847045283, |
|
"learning_rate": 7.811014656565054e-07, |
|
"logits/chosen": -0.8449732661247253, |
|
"logits/rejected": -0.815599799156189, |
|
"logps/chosen": -1.738198161125183, |
|
"logps/rejected": -2.115600347518921, |
|
"loss": 3.5074, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -17.381982803344727, |
|
"rewards/margins": 3.7740225791931152, |
|
"rewards/rejected": -21.156005859375, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1922533220243144, |
|
"grad_norm": 63.74399903736426, |
|
"learning_rate": 7.801280923129773e-07, |
|
"logits/chosen": -0.8337980508804321, |
|
"logits/rejected": -0.8294973969459534, |
|
"logps/chosen": -1.8116644620895386, |
|
"logps/rejected": -1.987363338470459, |
|
"loss": 4.549, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -18.11664581298828, |
|
"rewards/margins": 1.756988763809204, |
|
"rewards/rejected": -19.873632431030273, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19451512581283573, |
|
"grad_norm": 77.0826088582988, |
|
"learning_rate": 7.791309151618305e-07, |
|
"logits/chosen": -0.8380694389343262, |
|
"logits/rejected": -0.8311120271682739, |
|
"logps/chosen": -1.9478144645690918, |
|
"logps/rejected": -2.143031597137451, |
|
"loss": 4.2291, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": -19.478145599365234, |
|
"rewards/margins": 1.9521695375442505, |
|
"rewards/rejected": -21.430315017700195, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1967769296013571, |
|
"grad_norm": 53.07834075055144, |
|
"learning_rate": 7.781099966467874e-07, |
|
"logits/chosen": -0.8639700412750244, |
|
"logits/rejected": -0.8545355200767517, |
|
"logps/chosen": -1.727626919746399, |
|
"logps/rejected": -1.8995643854141235, |
|
"loss": 3.8778, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.276269912719727, |
|
"rewards/margins": 1.7193742990493774, |
|
"rewards/rejected": -18.995643615722656, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19903873338987843, |
|
"grad_norm": 82.55613961122098, |
|
"learning_rate": 7.770654006982664e-07, |
|
"logits/chosen": -0.8509809374809265, |
|
"logits/rejected": -0.8106420040130615, |
|
"logps/chosen": -2.0078237056732178, |
|
"logps/rejected": -2.231494426727295, |
|
"loss": 4.4582, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -20.078235626220703, |
|
"rewards/margins": 2.2367055416107178, |
|
"rewards/rejected": -22.31494140625, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20130053717839977, |
|
"grad_norm": 72.58396338245271, |
|
"learning_rate": 7.759971927293781e-07, |
|
"logits/chosen": -0.8639533519744873, |
|
"logits/rejected": -0.8477087616920471, |
|
"logps/chosen": -1.8459900617599487, |
|
"logps/rejected": -2.0477120876312256, |
|
"loss": 4.1424, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -18.459901809692383, |
|
"rewards/margins": 2.0172195434570312, |
|
"rewards/rejected": -20.477121353149414, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 63.790702792270665, |
|
"learning_rate": 7.749054396318297e-07, |
|
"logits/chosen": -0.839960515499115, |
|
"logits/rejected": -0.8227687478065491, |
|
"logps/chosen": -1.9486068487167358, |
|
"logps/rejected": -2.1220030784606934, |
|
"loss": 4.2943, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -19.486068725585938, |
|
"rewards/margins": 1.733961582183838, |
|
"rewards/rejected": -21.220029830932617, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20582414475544247, |
|
"grad_norm": 77.99250593212055, |
|
"learning_rate": 7.737902097717356e-07, |
|
"logits/chosen": -0.8212487101554871, |
|
"logits/rejected": -0.8338538408279419, |
|
"logps/chosen": -1.863584280014038, |
|
"logps/rejected": -2.1549441814422607, |
|
"loss": 4.1322, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -18.635841369628906, |
|
"rewards/margins": 2.913600444793701, |
|
"rewards/rejected": -21.549442291259766, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2080859485439638, |
|
"grad_norm": 72.8959596074702, |
|
"learning_rate": 7.726515729853367e-07, |
|
"logits/chosen": -0.8232444524765015, |
|
"logits/rejected": -0.819841742515564, |
|
"logps/chosen": -1.8698346614837646, |
|
"logps/rejected": -2.029289484024048, |
|
"loss": 4.5166, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -18.698345184326172, |
|
"rewards/margins": 1.5945475101470947, |
|
"rewards/rejected": -20.29289436340332, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21034775233248515, |
|
"grad_norm": 87.04210388064935, |
|
"learning_rate": 7.714896005746272e-07, |
|
"logits/chosen": -0.8586671948432922, |
|
"logits/rejected": -0.8418205976486206, |
|
"logps/chosen": -1.9241650104522705, |
|
"logps/rejected": -2.2153499126434326, |
|
"loss": 3.6986, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -19.24165153503418, |
|
"rewards/margins": 2.9118492603302, |
|
"rewards/rejected": -22.153499603271484, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21260955612100652, |
|
"grad_norm": 111.68532775800007, |
|
"learning_rate": 7.703043653028896e-07, |
|
"logits/chosen": -0.8883798122406006, |
|
"logits/rejected": -0.8768050670623779, |
|
"logps/chosen": -2.1463255882263184, |
|
"logps/rejected": -2.3561160564422607, |
|
"loss": 4.2491, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -21.463254928588867, |
|
"rewards/margins": 2.097905158996582, |
|
"rewards/rejected": -23.561161041259766, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21487135990952785, |
|
"grad_norm": 73.84778954625673, |
|
"learning_rate": 7.690959413901379e-07, |
|
"logits/chosen": -0.8396280407905579, |
|
"logits/rejected": -0.8113132119178772, |
|
"logps/chosen": -2.0055348873138428, |
|
"logps/rejected": -2.264613628387451, |
|
"loss": 4.0505, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -20.055349349975586, |
|
"rewards/margins": 2.590789318084717, |
|
"rewards/rejected": -22.646137237548828, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2171331636980492, |
|
"grad_norm": 76.75066956482912, |
|
"learning_rate": 7.678644045084704e-07, |
|
"logits/chosen": -0.7979358434677124, |
|
"logits/rejected": -0.8126614093780518, |
|
"logps/chosen": -1.901458978652954, |
|
"logps/rejected": -2.1271097660064697, |
|
"loss": 4.3121, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -19.014589309692383, |
|
"rewards/margins": 2.2565088272094727, |
|
"rewards/rejected": -21.271099090576172, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21939496748657053, |
|
"grad_norm": 69.10947756524247, |
|
"learning_rate": 7.666098317773308e-07, |
|
"logits/chosen": -0.8482003211975098, |
|
"logits/rejected": -0.8479557037353516, |
|
"logps/chosen": -2.065002918243408, |
|
"logps/rejected": -2.2882683277130127, |
|
"loss": 3.7783, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -20.650028228759766, |
|
"rewards/margins": 2.2326550483703613, |
|
"rewards/rejected": -22.8826847076416, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2216567712750919, |
|
"grad_norm": 71.38081973867735, |
|
"learning_rate": 7.653323017586789e-07, |
|
"logits/chosen": -0.8626989722251892, |
|
"logits/rejected": -0.8502533435821533, |
|
"logps/chosen": -1.871101975440979, |
|
"logps/rejected": -2.0896551609039307, |
|
"loss": 3.8377, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -18.711015701293945, |
|
"rewards/margins": 2.1855337619781494, |
|
"rewards/rejected": -20.89655303955078, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 80.27831480952293, |
|
"learning_rate": 7.640318944520711e-07, |
|
"logits/chosen": -0.8602339029312134, |
|
"logits/rejected": -0.8497695922851562, |
|
"logps/chosen": -2.0520148277282715, |
|
"logps/rejected": -2.2807064056396484, |
|
"loss": 3.7873, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -20.52014923095703, |
|
"rewards/margins": 2.286914825439453, |
|
"rewards/rejected": -22.80706214904785, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22618037885213457, |
|
"grad_norm": 82.83162144065287, |
|
"learning_rate": 7.627086912896511e-07, |
|
"logits/chosen": -0.755748987197876, |
|
"logits/rejected": -0.7821561098098755, |
|
"logps/chosen": -1.9375782012939453, |
|
"logps/rejected": -2.1892411708831787, |
|
"loss": 3.7342, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -19.37578010559082, |
|
"rewards/margins": 2.516632318496704, |
|
"rewards/rejected": -21.892412185668945, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2284421826406559, |
|
"grad_norm": 61.630906980874535, |
|
"learning_rate": 7.613627751310499e-07, |
|
"logits/chosen": -0.8867424726486206, |
|
"logits/rejected": -0.8885044455528259, |
|
"logps/chosen": -2.1041259765625, |
|
"logps/rejected": -2.3255014419555664, |
|
"loss": 3.658, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -21.041257858276367, |
|
"rewards/margins": 2.2137553691864014, |
|
"rewards/rejected": -23.255016326904297, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23070398642917728, |
|
"grad_norm": 89.96142120049903, |
|
"learning_rate": 7.599942302581977e-07, |
|
"logits/chosen": -0.8578089475631714, |
|
"logits/rejected": -0.8603122234344482, |
|
"logps/chosen": -2.110222816467285, |
|
"logps/rejected": -2.432941198348999, |
|
"loss": 3.5255, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -21.10222816467285, |
|
"rewards/margins": 3.227184295654297, |
|
"rewards/rejected": -24.329410552978516, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.23296579021769862, |
|
"grad_norm": 76.3035369590703, |
|
"learning_rate": 7.586031423700457e-07, |
|
"logits/chosen": -0.8419609069824219, |
|
"logits/rejected": -0.8390515446662903, |
|
"logps/chosen": -2.08290958404541, |
|
"logps/rejected": -2.321464776992798, |
|
"loss": 3.9243, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -20.8290958404541, |
|
"rewards/margins": 2.3855514526367188, |
|
"rewards/rejected": -23.214645385742188, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23522759400621995, |
|
"grad_norm": 73.90826312722105, |
|
"learning_rate": 7.571895985772e-07, |
|
"logits/chosen": -0.8009840846061707, |
|
"logits/rejected": -0.8087509870529175, |
|
"logps/chosen": -2.1095006465911865, |
|
"logps/rejected": -2.4837350845336914, |
|
"loss": 3.27, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -21.095006942749023, |
|
"rewards/margins": 3.7423441410064697, |
|
"rewards/rejected": -24.837350845336914, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23748939779474132, |
|
"grad_norm": 90.91119808796685, |
|
"learning_rate": 7.557536873964661e-07, |
|
"logits/chosen": -0.8794471025466919, |
|
"logits/rejected": -0.8741526007652283, |
|
"logps/chosen": -2.432756185531616, |
|
"logps/rejected": -2.654453992843628, |
|
"loss": 4.2395, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -24.327564239501953, |
|
"rewards/margins": 2.2169761657714844, |
|
"rewards/rejected": -26.544538497924805, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23975120158326266, |
|
"grad_norm": 72.09150920561058, |
|
"learning_rate": 7.542954987453069e-07, |
|
"logits/chosen": -0.8508340716362, |
|
"logits/rejected": -0.8528013825416565, |
|
"logps/chosen": -2.29594087600708, |
|
"logps/rejected": -2.5421571731567383, |
|
"loss": 3.7295, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -22.959407806396484, |
|
"rewards/margins": 2.462160348892212, |
|
"rewards/rejected": -25.421571731567383, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.242013005371784, |
|
"grad_norm": 92.05218134624694, |
|
"learning_rate": 7.528151239362108e-07, |
|
"logits/chosen": -0.8492079377174377, |
|
"logits/rejected": -0.8603383898735046, |
|
"logps/chosen": -2.424321174621582, |
|
"logps/rejected": -2.706427812576294, |
|
"loss": 3.7107, |
|
"rewards/accuracies": 0.6484375, |
|
"rewards/chosen": -24.243213653564453, |
|
"rewards/margins": 2.8210651874542236, |
|
"rewards/rejected": -27.064279556274414, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 87.963517642823, |
|
"learning_rate": 7.513126556709748e-07, |
|
"logits/chosen": -0.8548307418823242, |
|
"logits/rejected": -0.8365832567214966, |
|
"logps/chosen": -2.367842197418213, |
|
"logps/rejected": -2.776707172393799, |
|
"loss": 3.2256, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -23.678422927856445, |
|
"rewards/margins": 4.088651657104492, |
|
"rewards/rejected": -27.767070770263672, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2465366129488267, |
|
"grad_norm": 104.59173334061525, |
|
"learning_rate": 7.497881880348984e-07, |
|
"logits/chosen": -0.8096323013305664, |
|
"logits/rejected": -0.7972118854522705, |
|
"logps/chosen": -2.4743218421936035, |
|
"logps/rejected": -2.755343437194824, |
|
"loss": 3.9171, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -24.74321937561035, |
|
"rewards/margins": 2.8102121353149414, |
|
"rewards/rejected": -27.55343246459961, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24879841673734804, |
|
"grad_norm": 164.02635203828626, |
|
"learning_rate": 7.482418164908931e-07, |
|
"logits/chosen": -0.8311317563056946, |
|
"logits/rejected": -0.8270904421806335, |
|
"logps/chosen": -2.616877794265747, |
|
"logps/rejected": -2.835850477218628, |
|
"loss": 4.1572, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -26.168777465820312, |
|
"rewards/margins": 2.1897284984588623, |
|
"rewards/rejected": -28.358505249023438, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510602205258694, |
|
"grad_norm": 91.04185257653407, |
|
"learning_rate": 7.466736378735035e-07, |
|
"logits/chosen": -0.8114999532699585, |
|
"logits/rejected": -0.813449501991272, |
|
"logps/chosen": -2.6687309741973877, |
|
"logps/rejected": -3.0298876762390137, |
|
"loss": 3.5266, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -26.68730926513672, |
|
"rewards/margins": 3.6115658283233643, |
|
"rewards/rejected": -30.298873901367188, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2533220243143907, |
|
"grad_norm": 103.42000228406864, |
|
"learning_rate": 7.450837503828439e-07, |
|
"logits/chosen": -0.7962609529495239, |
|
"logits/rejected": -0.7812699675559998, |
|
"logps/chosen": -2.829650640487671, |
|
"logps/rejected": -3.2509734630584717, |
|
"loss": 3.4804, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -28.296504974365234, |
|
"rewards/margins": 4.213228702545166, |
|
"rewards/rejected": -32.509735107421875, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2555838281029121, |
|
"grad_norm": 79.3801670576343, |
|
"learning_rate": 7.43472253578449e-07, |
|
"logits/chosen": -0.792640745639801, |
|
"logits/rejected": -0.7946760654449463, |
|
"logps/chosen": -2.4934792518615723, |
|
"logps/rejected": -2.861074447631836, |
|
"loss": 3.6492, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -24.934791564941406, |
|
"rewards/margins": 3.675952434539795, |
|
"rewards/rejected": -28.61074447631836, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2578456318914334, |
|
"grad_norm": 84.41846931366052, |
|
"learning_rate": 7.418392483730389e-07, |
|
"logits/chosen": -0.8063937425613403, |
|
"logits/rejected": -0.8131504058837891, |
|
"logps/chosen": -2.685357093811035, |
|
"logps/rejected": -3.0498905181884766, |
|
"loss": 3.312, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -26.85357093811035, |
|
"rewards/margins": 3.6453330516815186, |
|
"rewards/rejected": -30.4989013671875, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26010743567995476, |
|
"grad_norm": 83.25446388909985, |
|
"learning_rate": 7.401848370262012e-07, |
|
"logits/chosen": -0.8394590020179749, |
|
"logits/rejected": -0.8276815414428711, |
|
"logps/chosen": -2.709625244140625, |
|
"logps/rejected": -2.9904394149780273, |
|
"loss": 3.5602, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -27.096254348754883, |
|
"rewards/margins": 2.808140754699707, |
|
"rewards/rejected": -29.904394149780273, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2623692394684761, |
|
"grad_norm": 99.34909208476857, |
|
"learning_rate": 7.385091231379856e-07, |
|
"logits/chosen": -0.8177067041397095, |
|
"logits/rejected": -0.8186391592025757, |
|
"logps/chosen": -2.9124531745910645, |
|
"logps/rejected": -3.2890923023223877, |
|
"loss": 3.7712, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -29.124530792236328, |
|
"rewards/margins": 3.7663931846618652, |
|
"rewards/rejected": -32.89092254638672, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 196.05646746243912, |
|
"learning_rate": 7.368122116424182e-07, |
|
"logits/chosen": -0.7795528769493103, |
|
"logits/rejected": -0.7889488935470581, |
|
"logps/chosen": -2.8638410568237305, |
|
"logps/rejected": -3.2152295112609863, |
|
"loss": 3.873, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -28.63841438293457, |
|
"rewards/margins": 3.5138840675354004, |
|
"rewards/rejected": -32.15229797363281, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2668928470455188, |
|
"grad_norm": 123.65267983469268, |
|
"learning_rate": 7.350942088009289e-07, |
|
"logits/chosen": -0.8420966863632202, |
|
"logits/rejected": -0.8411574363708496, |
|
"logps/chosen": -2.9619340896606445, |
|
"logps/rejected": -3.260565996170044, |
|
"loss": 3.5104, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -29.619338989257812, |
|
"rewards/margins": 2.9863200187683105, |
|
"rewards/rejected": -32.60565948486328, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.26915465083404017, |
|
"grad_norm": 124.41958542086248, |
|
"learning_rate": 7.333552221956986e-07, |
|
"logits/chosen": -0.9466845393180847, |
|
"logits/rejected": -0.9294576644897461, |
|
"logps/chosen": -3.071857213973999, |
|
"logps/rejected": -3.4685423374176025, |
|
"loss": 3.8289, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -30.718576431274414, |
|
"rewards/margins": 3.9668469429016113, |
|
"rewards/rejected": -34.6854248046875, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2714164546225615, |
|
"grad_norm": 95.20752031494493, |
|
"learning_rate": 7.315953607229217e-07, |
|
"logits/chosen": -0.8441572785377502, |
|
"logits/rejected": -0.8446038961410522, |
|
"logps/chosen": -3.104475498199463, |
|
"logps/rejected": -3.505401611328125, |
|
"loss": 3.32, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -31.04475212097168, |
|
"rewards/margins": 4.009262561798096, |
|
"rewards/rejected": -35.05401611328125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27367825841108284, |
|
"grad_norm": 106.09440015221676, |
|
"learning_rate": 7.298147345859869e-07, |
|
"logits/chosen": -0.8386214375495911, |
|
"logits/rejected": -0.8599450588226318, |
|
"logps/chosen": -2.900517463684082, |
|
"logps/rejected": -3.2684617042541504, |
|
"loss": 3.5845, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -29.00517463684082, |
|
"rewards/margins": 3.6794400215148926, |
|
"rewards/rejected": -32.68461608886719, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2759400621996042, |
|
"grad_norm": 160.41836210136088, |
|
"learning_rate": 7.280134552885762e-07, |
|
"logits/chosen": -0.8167920112609863, |
|
"logits/rejected": -0.8117007613182068, |
|
"logps/chosen": -2.9862632751464844, |
|
"logps/rejected": -3.363959789276123, |
|
"loss": 3.5251, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -29.86263084411621, |
|
"rewards/margins": 3.776963710784912, |
|
"rewards/rejected": -33.63959884643555, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2782018659881255, |
|
"grad_norm": 99.5977130216733, |
|
"learning_rate": 7.261916356276831e-07, |
|
"logits/chosen": -0.8167967796325684, |
|
"logits/rejected": -0.8092377185821533, |
|
"logps/chosen": -3.0471675395965576, |
|
"logps/rejected": -3.5443172454833984, |
|
"loss": 2.9443, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -30.47167205810547, |
|
"rewards/margins": 4.971498489379883, |
|
"rewards/rejected": -35.443172454833984, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2804636697766469, |
|
"grad_norm": 98.66168594344816, |
|
"learning_rate": 7.243493896865486e-07, |
|
"logits/chosen": -0.8218358755111694, |
|
"logits/rejected": -0.8053916096687317, |
|
"logps/chosen": -2.7801990509033203, |
|
"logps/rejected": -3.0717597007751465, |
|
"loss": 3.5017, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -27.801990509033203, |
|
"rewards/margins": 2.9156064987182617, |
|
"rewards/rejected": -30.71759796142578, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2827254735651682, |
|
"grad_norm": 114.74683317574737, |
|
"learning_rate": 7.224868328275169e-07, |
|
"logits/chosen": -0.8093046545982361, |
|
"logits/rejected": -0.805727481842041, |
|
"logps/chosen": -2.894709348678589, |
|
"logps/rejected": -3.254554033279419, |
|
"loss": 3.7134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -28.947093963623047, |
|
"rewards/margins": 3.5984435081481934, |
|
"rewards/rejected": -32.54553985595703, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 194.3294016359615, |
|
"learning_rate": 7.206040816848126e-07, |
|
"logits/chosen": -0.8390508890151978, |
|
"logits/rejected": -0.8263464570045471, |
|
"logps/chosen": -3.065004825592041, |
|
"logps/rejected": -3.259084939956665, |
|
"loss": 4.415, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -30.650049209594727, |
|
"rewards/margins": 1.9408013820648193, |
|
"rewards/rejected": -32.590850830078125, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2872490811422109, |
|
"grad_norm": 120.3073948329199, |
|
"learning_rate": 7.187012541572356e-07, |
|
"logits/chosen": -0.905938446521759, |
|
"logits/rejected": -0.8901224136352539, |
|
"logps/chosen": -3.1450047492980957, |
|
"logps/rejected": -3.485504627227783, |
|
"loss": 3.7657, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -31.450042724609375, |
|
"rewards/margins": 3.4050049781799316, |
|
"rewards/rejected": -34.85504913330078, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.28951088493073224, |
|
"grad_norm": 137.9411831080582, |
|
"learning_rate": 7.167784694007791e-07, |
|
"logits/chosen": -0.8116433620452881, |
|
"logits/rejected": -0.8170086741447449, |
|
"logps/chosen": -3.0017967224121094, |
|
"logps/rejected": -3.353875160217285, |
|
"loss": 3.7386, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -30.017967224121094, |
|
"rewards/margins": 3.5207817554473877, |
|
"rewards/rejected": -33.53874969482422, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2917726887192536, |
|
"grad_norm": 110.61290745803281, |
|
"learning_rate": 7.148358478211682e-07, |
|
"logits/chosen": -0.8747140169143677, |
|
"logits/rejected": -0.8586560487747192, |
|
"logps/chosen": -3.0916569232940674, |
|
"logps/rejected": -3.517625331878662, |
|
"loss": 2.9442, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -30.91657066345215, |
|
"rewards/margins": 4.259681701660156, |
|
"rewards/rejected": -35.17625045776367, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29403449250777497, |
|
"grad_norm": 84.56951029170779, |
|
"learning_rate": 7.128735110663187e-07, |
|
"logits/chosen": -0.8497614860534668, |
|
"logits/rejected": -0.8194679617881775, |
|
"logps/chosen": -2.7605772018432617, |
|
"logps/rejected": -3.1635406017303467, |
|
"loss": 3.4017, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -27.605772018432617, |
|
"rewards/margins": 4.029631614685059, |
|
"rewards/rejected": -31.63540267944336, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 112.17591295821964, |
|
"learning_rate": 7.108915820187211e-07, |
|
"logits/chosen": -0.8097432255744934, |
|
"logits/rejected": -0.8088663816452026, |
|
"logps/chosen": -3.1781814098358154, |
|
"logps/rejected": -3.545646905899048, |
|
"loss": 3.8459, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -31.781814575195312, |
|
"rewards/margins": 3.6746530532836914, |
|
"rewards/rejected": -35.45646667480469, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29855810008481765, |
|
"grad_norm": 120.89188876376829, |
|
"learning_rate": 7.088901847877447e-07, |
|
"logits/chosen": -0.7971144914627075, |
|
"logits/rejected": -0.7930186986923218, |
|
"logps/chosen": -3.015921115875244, |
|
"logps/rejected": -3.2458338737487793, |
|
"loss": 4.6776, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -30.159210205078125, |
|
"rewards/margins": 2.2991273403167725, |
|
"rewards/rejected": -32.458335876464844, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.300819903873339, |
|
"grad_norm": 123.91186702122735, |
|
"learning_rate": 7.068694447018658e-07, |
|
"logits/chosen": -0.8384436964988708, |
|
"logits/rejected": -0.846354603767395, |
|
"logps/chosen": -3.0088987350463867, |
|
"logps/rejected": -3.410034656524658, |
|
"loss": 3.3848, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -30.088991165161133, |
|
"rewards/margins": 4.011353492736816, |
|
"rewards/rejected": -34.100341796875, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3030817076618603, |
|
"grad_norm": 98.50507786273067, |
|
"learning_rate": 7.048294883008199e-07, |
|
"logits/chosen": -0.8138392567634583, |
|
"logits/rejected": -0.8176507353782654, |
|
"logps/chosen": -2.9301810264587402, |
|
"logps/rejected": -3.2924013137817383, |
|
"loss": 3.3271, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -29.30181121826172, |
|
"rewards/margins": 3.6222026348114014, |
|
"rewards/rejected": -32.924015045166016, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 80.23471164611576, |
|
"learning_rate": 7.027704433276776e-07, |
|
"logits/chosen": -0.7829840183258057, |
|
"logits/rejected": -0.7787750363349915, |
|
"logps/chosen": -2.9372944831848145, |
|
"logps/rejected": -3.404730796813965, |
|
"loss": 3.1137, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -29.37294578552246, |
|
"rewards/margins": 4.674362659454346, |
|
"rewards/rejected": -34.047306060791016, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.307605315238903, |
|
"grad_norm": 118.34531582043013, |
|
"learning_rate": 7.006924387208452e-07, |
|
"logits/chosen": -0.7873696088790894, |
|
"logits/rejected": -0.7685777544975281, |
|
"logps/chosen": -2.834895610809326, |
|
"logps/rejected": -3.1748013496398926, |
|
"loss": 3.3333, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -28.348957061767578, |
|
"rewards/margins": 3.3990557193756104, |
|
"rewards/rejected": -31.74801254272461, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30986711902742436, |
|
"grad_norm": 95.16445157429489, |
|
"learning_rate": 6.985956046059904e-07, |
|
"logits/chosen": -0.7679412961006165, |
|
"logits/rejected": -0.7677736878395081, |
|
"logps/chosen": -2.7393550872802734, |
|
"logps/rejected": -3.1393771171569824, |
|
"loss": 3.5669, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -27.393552780151367, |
|
"rewards/margins": 4.000221252441406, |
|
"rewards/rejected": -31.39377212524414, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.31212892281594573, |
|
"grad_norm": 82.62714852037034, |
|
"learning_rate": 6.964800722878945e-07, |
|
"logits/chosen": -0.724460244178772, |
|
"logits/rejected": -0.721074104309082, |
|
"logps/chosen": -2.9422450065612793, |
|
"logps/rejected": -3.427375555038452, |
|
"loss": 3.0358, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -29.42245101928711, |
|
"rewards/margins": 4.85130500793457, |
|
"rewards/rejected": -34.27375411987305, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31439072660446704, |
|
"grad_norm": 88.6570876412774, |
|
"learning_rate": 6.943459742422287e-07, |
|
"logits/chosen": -0.75481116771698, |
|
"logits/rejected": -0.7281723022460938, |
|
"logps/chosen": -2.926560401916504, |
|
"logps/rejected": -3.3593132495880127, |
|
"loss": 3.6077, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -29.265605926513672, |
|
"rewards/margins": 4.327524662017822, |
|
"rewards/rejected": -33.59313201904297, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3166525303929884, |
|
"grad_norm": 97.82195429096225, |
|
"learning_rate": 6.921934441072597e-07, |
|
"logits/chosen": -0.800703227519989, |
|
"logits/rejected": -0.8018285036087036, |
|
"logps/chosen": -3.131863594055176, |
|
"logps/rejected": -3.468980073928833, |
|
"loss": 3.8994, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -31.318635940551758, |
|
"rewards/margins": 3.3711633682250977, |
|
"rewards/rejected": -34.68980026245117, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3189143341815098, |
|
"grad_norm": 130.70910880053893, |
|
"learning_rate": 6.900226166754807e-07, |
|
"logits/chosen": -0.7732895016670227, |
|
"logits/rejected": -0.7903754115104675, |
|
"logps/chosen": -3.2161002159118652, |
|
"logps/rejected": -3.525928497314453, |
|
"loss": 3.9513, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -32.1609992980957, |
|
"rewards/margins": 3.0982844829559326, |
|
"rewards/rejected": -35.2592887878418, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3211761379700311, |
|
"grad_norm": 94.8619719166109, |
|
"learning_rate": 6.8783362788517e-07, |
|
"logits/chosen": -0.7802690267562866, |
|
"logits/rejected": -0.781207263469696, |
|
"logps/chosen": -3.136306047439575, |
|
"logps/rejected": -3.542205333709717, |
|
"loss": 4.0634, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -31.363059997558594, |
|
"rewards/margins": 4.058990478515625, |
|
"rewards/rejected": -35.422054290771484, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.32343794175855245, |
|
"grad_norm": 96.17049011345333, |
|
"learning_rate": 6.856266148118796e-07, |
|
"logits/chosen": -0.7571829557418823, |
|
"logits/rejected": -0.7646656036376953, |
|
"logps/chosen": -2.8659865856170654, |
|
"logps/rejected": -3.3385119438171387, |
|
"loss": 3.2572, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -28.659862518310547, |
|
"rewards/margins": 4.725252151489258, |
|
"rewards/rejected": -33.38511657714844, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 110.19796175719054, |
|
"learning_rate": 6.834017156598512e-07, |
|
"logits/chosen": -0.7483683228492737, |
|
"logits/rejected": -0.7414959073066711, |
|
"logps/chosen": -3.109438419342041, |
|
"logps/rejected": -3.5427985191345215, |
|
"loss": 3.4928, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -31.09438133239746, |
|
"rewards/margins": 4.333602428436279, |
|
"rewards/rejected": -35.427982330322266, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3279615493355951, |
|
"grad_norm": 79.78726439178003, |
|
"learning_rate": 6.811590697533607e-07, |
|
"logits/chosen": -0.8195265531539917, |
|
"logits/rejected": -0.838479220867157, |
|
"logps/chosen": -3.01442813873291, |
|
"logps/rejected": -3.3907887935638428, |
|
"loss": 3.5141, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -30.144283294677734, |
|
"rewards/margins": 3.7636024951934814, |
|
"rewards/rejected": -33.90788650512695, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3302233531241165, |
|
"grad_norm": 151.81377404607898, |
|
"learning_rate": 6.788988175279951e-07, |
|
"logits/chosen": -0.7769032120704651, |
|
"logits/rejected": -0.7602939605712891, |
|
"logps/chosen": -3.0373010635375977, |
|
"logps/rejected": -3.367269515991211, |
|
"loss": 4.0091, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -30.373010635375977, |
|
"rewards/margins": 3.299685001373291, |
|
"rewards/rejected": -33.672698974609375, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3324851569126378, |
|
"grad_norm": 93.15629599355084, |
|
"learning_rate": 6.766211005218577e-07, |
|
"logits/chosen": -0.7618966698646545, |
|
"logits/rejected": -0.7614046931266785, |
|
"logps/chosen": -3.0041072368621826, |
|
"logps/rejected": -3.5381112098693848, |
|
"loss": 3.0388, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -30.04107093811035, |
|
"rewards/margins": 5.340038299560547, |
|
"rewards/rejected": -35.38111114501953, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33474696070115917, |
|
"grad_norm": 93.03406880519788, |
|
"learning_rate": 6.743260613667047e-07, |
|
"logits/chosen": -0.8518264889717102, |
|
"logits/rejected": -0.8462361693382263, |
|
"logps/chosen": -2.939993143081665, |
|
"logps/rejected": -3.3589367866516113, |
|
"loss": 3.4992, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -29.399932861328125, |
|
"rewards/margins": 4.18943452835083, |
|
"rewards/rejected": -33.58937072753906, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.33700876448968053, |
|
"grad_norm": 73.40361139697784, |
|
"learning_rate": 6.720138437790139e-07, |
|
"logits/chosen": -0.8052965998649597, |
|
"logits/rejected": -0.7937459945678711, |
|
"logps/chosen": -2.8842406272888184, |
|
"logps/rejected": -3.307342529296875, |
|
"loss": 3.1965, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -28.8424015045166, |
|
"rewards/margins": 4.23102331161499, |
|
"rewards/rejected": -33.07342529296875, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33927056827820185, |
|
"grad_norm": 106.95490507848946, |
|
"learning_rate": 6.696845925509848e-07, |
|
"logits/chosen": -0.8310205936431885, |
|
"logits/rejected": -0.8272488713264465, |
|
"logps/chosen": -2.934943437576294, |
|
"logps/rejected": -3.2793498039245605, |
|
"loss": 3.6164, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -29.349435806274414, |
|
"rewards/margins": 3.444066047668457, |
|
"rewards/rejected": -32.79349899291992, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3415323720667232, |
|
"grad_norm": 100.186884927208, |
|
"learning_rate": 6.673384535414718e-07, |
|
"logits/chosen": -0.8480186462402344, |
|
"logits/rejected": -0.8310289978981018, |
|
"logps/chosen": -3.0514492988586426, |
|
"logps/rejected": -3.3322248458862305, |
|
"loss": 3.9173, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -30.514493942260742, |
|
"rewards/margins": 2.8077542781829834, |
|
"rewards/rejected": -33.32224655151367, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3437941758552446, |
|
"grad_norm": 100.75576403172839, |
|
"learning_rate": 6.649755736668511e-07, |
|
"logits/chosen": -0.7694522738456726, |
|
"logits/rejected": -0.7615189552307129, |
|
"logps/chosen": -2.6866354942321777, |
|
"logps/rejected": -3.11234188079834, |
|
"loss": 2.8237, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -26.866352081298828, |
|
"rewards/margins": 4.257061958312988, |
|
"rewards/rejected": -31.123416900634766, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 117.9327947173104, |
|
"learning_rate": 6.625961008918192e-07, |
|
"logits/chosen": -0.7936750054359436, |
|
"logits/rejected": -0.7835503220558167, |
|
"logps/chosen": -2.7540676593780518, |
|
"logps/rejected": -3.2012295722961426, |
|
"loss": 2.9183, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -27.540678024291992, |
|
"rewards/margins": 4.471617221832275, |
|
"rewards/rejected": -32.01229476928711, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34831778343228725, |
|
"grad_norm": 96.1956695260042, |
|
"learning_rate": 6.602001842201289e-07, |
|
"logits/chosen": -0.7796362042427063, |
|
"logits/rejected": -0.7905425429344177, |
|
"logps/chosen": -2.7750422954559326, |
|
"logps/rejected": -3.049340009689331, |
|
"loss": 4.0488, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -27.75042152404785, |
|
"rewards/margins": 2.74297833442688, |
|
"rewards/rejected": -30.49340057373047, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3505795872208086, |
|
"grad_norm": 101.24392958865374, |
|
"learning_rate": 6.577879736852571e-07, |
|
"logits/chosen": -0.8088594079017639, |
|
"logits/rejected": -0.8155099749565125, |
|
"logps/chosen": -2.821500301361084, |
|
"logps/rejected": -3.0726358890533447, |
|
"loss": 3.9081, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -28.215003967285156, |
|
"rewards/margins": 2.5113601684570312, |
|
"rewards/rejected": -30.726364135742188, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.35284139100932993, |
|
"grad_norm": 73.2743068340787, |
|
"learning_rate": 6.553596203410112e-07, |
|
"logits/chosen": -0.8153470754623413, |
|
"logits/rejected": -0.8048913478851318, |
|
"logps/chosen": -2.7679576873779297, |
|
"logps/rejected": -3.2883174419403076, |
|
"loss": 2.546, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -27.679576873779297, |
|
"rewards/margins": 5.203596115112305, |
|
"rewards/rejected": -32.883174896240234, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3551031947978513, |
|
"grad_norm": 82.3012728094315, |
|
"learning_rate": 6.529152762520688e-07, |
|
"logits/chosen": -0.8138669729232788, |
|
"logits/rejected": -0.8138793110847473, |
|
"logps/chosen": -2.864006757736206, |
|
"logps/rejected": -3.2064666748046875, |
|
"loss": 3.5646, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -28.64006805419922, |
|
"rewards/margins": 3.424598217010498, |
|
"rewards/rejected": -32.06466293334961, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3573649985863726, |
|
"grad_norm": 100.03152867926248, |
|
"learning_rate": 6.504550944844558e-07, |
|
"logits/chosen": -0.7475910782814026, |
|
"logits/rejected": -0.7779514789581299, |
|
"logps/chosen": -2.7473607063293457, |
|
"logps/rejected": -3.1302061080932617, |
|
"loss": 3.452, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -27.47360610961914, |
|
"rewards/margins": 3.8284525871276855, |
|
"rewards/rejected": -31.302059173583984, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.359626802374894, |
|
"grad_norm": 105.22720404522045, |
|
"learning_rate": 6.479792290959613e-07, |
|
"logits/chosen": -0.7691587209701538, |
|
"logits/rejected": -0.7878850698471069, |
|
"logps/chosen": -2.8018503189086914, |
|
"logps/rejected": -3.312527656555176, |
|
"loss": 3.2183, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -28.01850128173828, |
|
"rewards/margins": 5.106773853302002, |
|
"rewards/rejected": -33.125274658203125, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.36188860616341534, |
|
"grad_norm": 89.61616128866521, |
|
"learning_rate": 6.454878351264906e-07, |
|
"logits/chosen": -0.7589330673217773, |
|
"logits/rejected": -0.745934009552002, |
|
"logps/chosen": -2.6822848320007324, |
|
"logps/rejected": -3.0995330810546875, |
|
"loss": 3.4046, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -26.822847366333008, |
|
"rewards/margins": 4.172482490539551, |
|
"rewards/rejected": -30.995328903198242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36415040995193665, |
|
"grad_norm": 96.61923230400093, |
|
"learning_rate": 6.429810685883565e-07, |
|
"logits/chosen": -0.8186591267585754, |
|
"logits/rejected": -0.82514488697052, |
|
"logps/chosen": -2.8654110431671143, |
|
"logps/rejected": -3.2399096488952637, |
|
"loss": 3.238, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -28.65411376953125, |
|
"rewards/margins": 3.7449822425842285, |
|
"rewards/rejected": -32.39909362792969, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 109.68200048681109, |
|
"learning_rate": 6.404590864565088e-07, |
|
"logits/chosen": -0.7650143504142761, |
|
"logits/rejected": -0.7517848014831543, |
|
"logps/chosen": -2.817117214202881, |
|
"logps/rejected": -3.050743341445923, |
|
"loss": 3.9095, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -28.171171188354492, |
|
"rewards/margins": 2.3362598419189453, |
|
"rewards/rejected": -30.507431030273438, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3686740175289794, |
|
"grad_norm": 103.61475684738596, |
|
"learning_rate": 6.379220466587063e-07, |
|
"logits/chosen": -0.7960351705551147, |
|
"logits/rejected": -0.7686564922332764, |
|
"logps/chosen": -2.810275077819824, |
|
"logps/rejected": -3.18802809715271, |
|
"loss": 3.246, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -28.10274887084961, |
|
"rewards/margins": 3.777529239654541, |
|
"rewards/rejected": -31.880279541015625, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3709358213175007, |
|
"grad_norm": 119.25022849905575, |
|
"learning_rate": 6.353701080656254e-07, |
|
"logits/chosen": -0.7721018195152283, |
|
"logits/rejected": -0.7901967763900757, |
|
"logps/chosen": -2.9517931938171387, |
|
"logps/rejected": -3.250936985015869, |
|
"loss": 3.6435, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -29.517929077148438, |
|
"rewards/margins": 2.9914417266845703, |
|
"rewards/rejected": -32.50937271118164, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.37319762510602206, |
|
"grad_norm": 106.89469142388397, |
|
"learning_rate": 6.32803430480913e-07, |
|
"logits/chosen": -0.7933882474899292, |
|
"logits/rejected": -0.785070538520813, |
|
"logps/chosen": -2.898366689682007, |
|
"logps/rejected": -3.3264529705047607, |
|
"loss": 3.3983, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -28.98366928100586, |
|
"rewards/margins": 4.2808613777160645, |
|
"rewards/rejected": -33.264530181884766, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3754594288945434, |
|
"grad_norm": 90.5066177326131, |
|
"learning_rate": 6.302221746311782e-07, |
|
"logits/chosen": -0.8018909096717834, |
|
"logits/rejected": -0.7745494246482849, |
|
"logps/chosen": -2.7565348148345947, |
|
"logps/rejected": -3.1439740657806396, |
|
"loss": 3.4065, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -27.565349578857422, |
|
"rewards/margins": 3.874392032623291, |
|
"rewards/rejected": -31.439741134643555, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.37772123268306473, |
|
"grad_norm": 86.55925111562716, |
|
"learning_rate": 6.276265021559288e-07, |
|
"logits/chosen": -0.8132920861244202, |
|
"logits/rejected": -0.8016676306724548, |
|
"logps/chosen": -2.9889135360717773, |
|
"logps/rejected": -3.338178873062134, |
|
"loss": 3.5752, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -29.88913345336914, |
|
"rewards/margins": 3.4926562309265137, |
|
"rewards/rejected": -33.38179016113281, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3799830364715861, |
|
"grad_norm": 73.35269687730333, |
|
"learning_rate": 6.250165755974487e-07, |
|
"logits/chosen": -0.757270336151123, |
|
"logits/rejected": -0.7608906626701355, |
|
"logps/chosen": -2.9331836700439453, |
|
"logps/rejected": -3.337920904159546, |
|
"loss": 3.1407, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -29.33183479309082, |
|
"rewards/margins": 4.047374725341797, |
|
"rewards/rejected": -33.37921142578125, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3822448402601074, |
|
"grad_norm": 86.91929641048736, |
|
"learning_rate": 6.223925583906192e-07, |
|
"logits/chosen": -0.8268415331840515, |
|
"logits/rejected": -0.8238467574119568, |
|
"logps/chosen": -3.028745174407959, |
|
"logps/rejected": -3.4829823970794678, |
|
"loss": 2.8551, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -30.287450790405273, |
|
"rewards/margins": 4.5423736572265625, |
|
"rewards/rejected": -34.8298225402832, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3845066440486288, |
|
"grad_norm": 85.76060468853915, |
|
"learning_rate": 6.19754614852685e-07, |
|
"logits/chosen": -0.8132871985435486, |
|
"logits/rejected": -0.8051560521125793, |
|
"logps/chosen": -2.942837953567505, |
|
"logps/rejected": -3.3470842838287354, |
|
"loss": 3.0876, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -29.428375244140625, |
|
"rewards/margins": 4.04246711730957, |
|
"rewards/rejected": -33.470848083496094, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 101.76248326578423, |
|
"learning_rate": 6.171029101729644e-07, |
|
"logits/chosen": -0.733617901802063, |
|
"logits/rejected": -0.7339813709259033, |
|
"logps/chosen": -3.1949334144592285, |
|
"logps/rejected": -3.661990165710449, |
|
"loss": 3.1671, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -31.94933319091797, |
|
"rewards/margins": 4.670570373535156, |
|
"rewards/rejected": -36.61989974975586, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.38903025162567145, |
|
"grad_norm": 113.9018020285417, |
|
"learning_rate": 6.144376104025055e-07, |
|
"logits/chosen": -0.8161033987998962, |
|
"logits/rejected": -0.8007526993751526, |
|
"logps/chosen": -3.0398004055023193, |
|
"logps/rejected": -3.4840195178985596, |
|
"loss": 3.1426, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -30.398000717163086, |
|
"rewards/margins": 4.442192554473877, |
|
"rewards/rejected": -34.84019470214844, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3912920554141928, |
|
"grad_norm": 109.66608111755836, |
|
"learning_rate": 6.117588824436873e-07, |
|
"logits/chosen": -0.8302391171455383, |
|
"logits/rejected": -0.8384109735488892, |
|
"logps/chosen": -3.1871933937072754, |
|
"logps/rejected": -3.547971487045288, |
|
"loss": 3.7869, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -31.87193489074707, |
|
"rewards/margins": 3.607778549194336, |
|
"rewards/rejected": -35.47970962524414, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3935538592027142, |
|
"grad_norm": 97.34693909102697, |
|
"learning_rate": 6.090668940397688e-07, |
|
"logits/chosen": -0.7868531942367554, |
|
"logits/rejected": -0.7912797331809998, |
|
"logps/chosen": -3.1741623878479004, |
|
"logps/rejected": -3.6076653003692627, |
|
"loss": 3.2467, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -31.741622924804688, |
|
"rewards/margins": 4.3350300788879395, |
|
"rewards/rejected": -36.07665252685547, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3958156629912355, |
|
"grad_norm": 112.75426033024542, |
|
"learning_rate": 6.063618137643844e-07, |
|
"logits/chosen": -0.7921246290206909, |
|
"logits/rejected": -0.78474360704422, |
|
"logps/chosen": -3.200976610183716, |
|
"logps/rejected": -3.6109395027160645, |
|
"loss": 3.3634, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -32.009769439697266, |
|
"rewards/margins": 4.099628925323486, |
|
"rewards/rejected": -36.109397888183594, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39807746677975686, |
|
"grad_norm": 113.32746124615062, |
|
"learning_rate": 6.03643811010988e-07, |
|
"logits/chosen": -0.8276042938232422, |
|
"logits/rejected": -0.8417137265205383, |
|
"logps/chosen": -3.3886866569519043, |
|
"logps/rejected": -3.824484348297119, |
|
"loss": 3.0549, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -33.886863708496094, |
|
"rewards/margins": 4.357980728149414, |
|
"rewards/rejected": -38.24484634399414, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4003392705682782, |
|
"grad_norm": 115.44790456463876, |
|
"learning_rate": 6.009130559822453e-07, |
|
"logits/chosen": -0.8511748909950256, |
|
"logits/rejected": -0.8455148339271545, |
|
"logps/chosen": -3.3521997928619385, |
|
"logps/rejected": -3.621072292327881, |
|
"loss": 4.1968, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -33.521995544433594, |
|
"rewards/margins": 2.6887285709381104, |
|
"rewards/rejected": -36.210723876953125, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.40260107435679954, |
|
"grad_norm": 108.10466917985082, |
|
"learning_rate": 5.981697196793758e-07, |
|
"logits/chosen": -0.8837382793426514, |
|
"logits/rejected": -0.8865911364555359, |
|
"logps/chosen": -3.501465082168579, |
|
"logps/rejected": -3.9723386764526367, |
|
"loss": 2.5792, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -35.0146484375, |
|
"rewards/margins": 4.708735466003418, |
|
"rewards/rejected": -39.723388671875, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4048628781453209, |
|
"grad_norm": 113.75351199007196, |
|
"learning_rate": 5.954139738914446e-07, |
|
"logits/chosen": -0.8577677607536316, |
|
"logits/rejected": -0.869698703289032, |
|
"logps/chosen": -3.4370806217193604, |
|
"logps/rejected": -3.856444835662842, |
|
"loss": 3.4991, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -34.37080764770508, |
|
"rewards/margins": 4.193644046783447, |
|
"rewards/rejected": -38.564453125, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 105.83270319758616, |
|
"learning_rate": 5.92645991184605e-07, |
|
"logits/chosen": -0.8364049792289734, |
|
"logits/rejected": -0.8378995656967163, |
|
"logps/chosen": -3.5678813457489014, |
|
"logps/rejected": -4.040313720703125, |
|
"loss": 2.9427, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -35.67881393432617, |
|
"rewards/margins": 4.724322319030762, |
|
"rewards/rejected": -40.403133392333984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4093864857223636, |
|
"grad_norm": 97.36394616930042, |
|
"learning_rate": 5.898659448912917e-07, |
|
"logits/chosen": -0.8220387697219849, |
|
"logits/rejected": -0.8456301689147949, |
|
"logps/chosen": -3.3772408962249756, |
|
"logps/rejected": -3.840843915939331, |
|
"loss": 3.2422, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -33.77241134643555, |
|
"rewards/margins": 4.636030197143555, |
|
"rewards/rejected": -38.40843963623047, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.41164828951088495, |
|
"grad_norm": 119.36449481585935, |
|
"learning_rate": 5.870740090993676e-07, |
|
"logits/chosen": -0.8707118034362793, |
|
"logits/rejected": -0.8762695789337158, |
|
"logps/chosen": -3.7325124740600586, |
|
"logps/rejected": -4.200765609741211, |
|
"loss": 3.0099, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -37.32512664794922, |
|
"rewards/margins": 4.68253755569458, |
|
"rewards/rejected": -42.007659912109375, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.41391009329940626, |
|
"grad_norm": 112.59179043910055, |
|
"learning_rate": 5.842703586412214e-07, |
|
"logits/chosen": -0.8712838888168335, |
|
"logits/rejected": -0.8757526278495789, |
|
"logps/chosen": -3.783506393432617, |
|
"logps/rejected": -4.1643571853637695, |
|
"loss": 3.9002, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -37.83506774902344, |
|
"rewards/margins": 3.8085036277770996, |
|
"rewards/rejected": -41.6435661315918, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4161718970879276, |
|
"grad_norm": 129.66104630601248, |
|
"learning_rate": 5.814551690828203e-07, |
|
"logits/chosen": -0.8484200239181519, |
|
"logits/rejected": -0.861181378364563, |
|
"logps/chosen": -3.624147653579712, |
|
"logps/rejected": -4.078574180603027, |
|
"loss": 2.9549, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -36.241477966308594, |
|
"rewards/margins": 4.544264793395996, |
|
"rewards/rejected": -40.785743713378906, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.418433700876449, |
|
"grad_norm": 123.33358190542005, |
|
"learning_rate": 5.786286167127155e-07, |
|
"logits/chosen": -0.8848705291748047, |
|
"logits/rejected": -0.8773502111434937, |
|
"logps/chosen": -3.5523104667663574, |
|
"logps/rejected": -4.035827159881592, |
|
"loss": 3.2787, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -35.523101806640625, |
|
"rewards/margins": 4.83516788482666, |
|
"rewards/rejected": -40.35826873779297, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4206955046649703, |
|
"grad_norm": 105.18151417237235, |
|
"learning_rate": 5.757908785310031e-07, |
|
"logits/chosen": -0.812483012676239, |
|
"logits/rejected": -0.8327686786651611, |
|
"logps/chosen": -3.3677561283111572, |
|
"logps/rejected": -3.8305165767669678, |
|
"loss": 3.089, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -33.67756271362305, |
|
"rewards/margins": 4.627603530883789, |
|
"rewards/rejected": -38.3051643371582, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.42295730845349166, |
|
"grad_norm": 112.39088354266822, |
|
"learning_rate": 5.729421322382399e-07, |
|
"logits/chosen": -0.8071901202201843, |
|
"logits/rejected": -0.8371500372886658, |
|
"logps/chosen": -3.244313955307007, |
|
"logps/rejected": -3.724259376525879, |
|
"loss": 3.1866, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -32.443138122558594, |
|
"rewards/margins": 4.799454212188721, |
|
"rewards/rejected": -37.242591857910156, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42521911224201303, |
|
"grad_norm": 99.91697305071902, |
|
"learning_rate": 5.700825562243163e-07, |
|
"logits/chosen": -0.7996731996536255, |
|
"logits/rejected": -0.8074153065681458, |
|
"logps/chosen": -3.3295788764953613, |
|
"logps/rejected": -3.8264358043670654, |
|
"loss": 3.0297, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -33.29579162597656, |
|
"rewards/margins": 4.968564510345459, |
|
"rewards/rejected": -38.26435089111328, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 106.43222768263621, |
|
"learning_rate": 5.672123295572854e-07, |
|
"logits/chosen": -0.8531807661056519, |
|
"logits/rejected": -0.8710072636604309, |
|
"logps/chosen": -3.4436635971069336, |
|
"logps/rejected": -3.7774899005889893, |
|
"loss": 3.2074, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -34.43663787841797, |
|
"rewards/margins": 3.3382644653320312, |
|
"rewards/rejected": -37.774898529052734, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4297427198190557, |
|
"grad_norm": 101.78216988587263, |
|
"learning_rate": 5.643316319721487e-07, |
|
"logits/chosen": -0.834848940372467, |
|
"logits/rejected": -0.8536701798439026, |
|
"logps/chosen": -3.5879836082458496, |
|
"logps/rejected": -3.99747896194458, |
|
"loss": 3.6465, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -35.87983703613281, |
|
"rewards/margins": 4.094951152801514, |
|
"rewards/rejected": -39.97479248046875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.432004523607577, |
|
"grad_norm": 102.01291002558317, |
|
"learning_rate": 5.614406438596026e-07, |
|
"logits/chosen": -0.8791413307189941, |
|
"logits/rejected": -0.8761864900588989, |
|
"logps/chosen": -3.594583511352539, |
|
"logps/rejected": -4.055732727050781, |
|
"loss": 3.5126, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -35.945831298828125, |
|
"rewards/margins": 4.611495018005371, |
|
"rewards/rejected": -40.55733108520508, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4342663273960984, |
|
"grad_norm": 112.72220193442307, |
|
"learning_rate": 5.585395462547406e-07, |
|
"logits/chosen": -0.8375272154808044, |
|
"logits/rejected": -0.8324666023254395, |
|
"logps/chosen": -3.421821117401123, |
|
"logps/rejected": -3.717869758605957, |
|
"loss": 3.8101, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -34.21821212768555, |
|
"rewards/margins": 2.9604828357696533, |
|
"rewards/rejected": -37.17869186401367, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43652813118461975, |
|
"grad_norm": 99.3082505357167, |
|
"learning_rate": 5.55628520825718e-07, |
|
"logits/chosen": -0.908355712890625, |
|
"logits/rejected": -0.9252756237983704, |
|
"logps/chosen": -3.4431350231170654, |
|
"logps/rejected": -3.812532424926758, |
|
"loss": 3.5263, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -34.43135070800781, |
|
"rewards/margins": 3.6939783096313477, |
|
"rewards/rejected": -38.125328063964844, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.43878993497314106, |
|
"grad_norm": 107.13322204576244, |
|
"learning_rate": 5.527077498623752e-07, |
|
"logits/chosen": -0.8578076958656311, |
|
"logits/rejected": -0.8740971088409424, |
|
"logps/chosen": -3.3862037658691406, |
|
"logps/rejected": -3.792330741882324, |
|
"loss": 3.1196, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -33.862037658691406, |
|
"rewards/margins": 4.0612711906433105, |
|
"rewards/rejected": -37.92330551147461, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4410517387616624, |
|
"grad_norm": 85.34410402793644, |
|
"learning_rate": 5.497774162648228e-07, |
|
"logits/chosen": -0.8335473537445068, |
|
"logits/rejected": -0.8551528453826904, |
|
"logps/chosen": -3.339934825897217, |
|
"logps/rejected": -3.8869519233703613, |
|
"loss": 3.1318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -33.39934539794922, |
|
"rewards/margins": 5.470172882080078, |
|
"rewards/rejected": -38.86952209472656, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4433135425501838, |
|
"grad_norm": 112.04047145787284, |
|
"learning_rate": 5.468377035319882e-07, |
|
"logits/chosen": -0.8870958089828491, |
|
"logits/rejected": -0.8841900825500488, |
|
"logps/chosen": -3.344312906265259, |
|
"logps/rejected": -3.845787763595581, |
|
"loss": 3.2742, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -33.44313049316406, |
|
"rewards/margins": 5.014750003814697, |
|
"rewards/rejected": -38.457881927490234, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4455753463387051, |
|
"grad_norm": 108.93166440182182, |
|
"learning_rate": 5.438887957501248e-07, |
|
"logits/chosen": -0.7933169603347778, |
|
"logits/rejected": -0.7912404537200928, |
|
"logps/chosen": -3.3394107818603516, |
|
"logps/rejected": -3.764794111251831, |
|
"loss": 3.0992, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -33.39411163330078, |
|
"rewards/margins": 4.253833293914795, |
|
"rewards/rejected": -37.64794158935547, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 132.62161789111477, |
|
"learning_rate": 5.409308775812844e-07, |
|
"logits/chosen": -0.8376902341842651, |
|
"logits/rejected": -0.8406752347946167, |
|
"logps/chosen": -3.4705710411071777, |
|
"logps/rejected": -3.8878021240234375, |
|
"loss": 3.5095, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -34.705711364746094, |
|
"rewards/margins": 4.172308444976807, |
|
"rewards/rejected": -38.878021240234375, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45009895391574783, |
|
"grad_norm": 105.66558509816933, |
|
"learning_rate": 5.379641342517541e-07, |
|
"logits/chosen": -0.8948197960853577, |
|
"logits/rejected": -0.8918160200119019, |
|
"logps/chosen": -3.276104211807251, |
|
"logps/rejected": -3.7821552753448486, |
|
"loss": 3.1998, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -32.76103973388672, |
|
"rewards/margins": 5.060507297515869, |
|
"rewards/rejected": -37.82155227661133, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.45236075770426915, |
|
"grad_norm": 100.99361157251298, |
|
"learning_rate": 5.349887515404564e-07, |
|
"logits/chosen": -0.8491485714912415, |
|
"logits/rejected": -0.8752503991127014, |
|
"logps/chosen": -3.4885029792785645, |
|
"logps/rejected": -4.05246114730835, |
|
"loss": 2.811, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -34.88502883911133, |
|
"rewards/margins": 5.639582633972168, |
|
"rewards/rejected": -40.52460479736328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4546225614927905, |
|
"grad_norm": 113.88837192083922, |
|
"learning_rate": 5.320049157673163e-07, |
|
"logits/chosen": -0.7907375693321228, |
|
"logits/rejected": -0.7869551181793213, |
|
"logps/chosen": -3.329808235168457, |
|
"logps/rejected": -3.815051794052124, |
|
"loss": 3.0112, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -33.29808044433594, |
|
"rewards/margins": 4.8524346351623535, |
|
"rewards/rejected": -38.15052032470703, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4568843652813118, |
|
"grad_norm": 96.00315980556027, |
|
"learning_rate": 5.290128137815938e-07, |
|
"logits/chosen": -0.8615790009498596, |
|
"logits/rejected": -0.8816788792610168, |
|
"logps/chosen": -3.5456151962280273, |
|
"logps/rejected": -4.082833766937256, |
|
"loss": 2.6221, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -35.456146240234375, |
|
"rewards/margins": 5.372189044952393, |
|
"rewards/rejected": -40.828338623046875, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4591461690698332, |
|
"grad_norm": 88.74384836731605, |
|
"learning_rate": 5.260126329501828e-07, |
|
"logits/chosen": -0.8821161985397339, |
|
"logits/rejected": -0.8808766603469849, |
|
"logps/chosen": -3.4488883018493652, |
|
"logps/rejected": -4.065739631652832, |
|
"loss": 2.526, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -34.48888397216797, |
|
"rewards/margins": 6.168512344360352, |
|
"rewards/rejected": -40.65739440917969, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.46140797285835455, |
|
"grad_norm": 113.38423627891478, |
|
"learning_rate": 5.230045611458789e-07, |
|
"logits/chosen": -0.8067418932914734, |
|
"logits/rejected": -0.8317432403564453, |
|
"logps/chosen": -3.4061567783355713, |
|
"logps/rejected": -3.852400302886963, |
|
"loss": 3.1033, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -34.06156921386719, |
|
"rewards/margins": 4.462434768676758, |
|
"rewards/rejected": -38.52400207519531, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.46366977664687586, |
|
"grad_norm": 109.078062930805, |
|
"learning_rate": 5.199887867356143e-07, |
|
"logits/chosen": -0.8303195238113403, |
|
"logits/rejected": -0.8461140394210815, |
|
"logps/chosen": -3.646005153656006, |
|
"logps/rejected": -4.241487979888916, |
|
"loss": 2.5447, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -36.46004867553711, |
|
"rewards/margins": 5.954832077026367, |
|
"rewards/rejected": -42.41488265991211, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.46593158043539723, |
|
"grad_norm": 115.30415505519554, |
|
"learning_rate": 5.16965498568662e-07, |
|
"logits/chosen": -0.8711931705474854, |
|
"logits/rejected": -0.8695412278175354, |
|
"logps/chosen": -3.7641541957855225, |
|
"logps/rejected": -4.470314025878906, |
|
"loss": 2.7657, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.64154052734375, |
|
"rewards/margins": 7.061600685119629, |
|
"rewards/rejected": -44.70314025878906, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 112.79356107718269, |
|
"learning_rate": 5.139348859648098e-07, |
|
"logits/chosen": -0.8668640851974487, |
|
"logits/rejected": -0.8753060698509216, |
|
"logps/chosen": -3.509500026702881, |
|
"logps/rejected": -4.0229034423828125, |
|
"loss": 2.9855, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -35.095001220703125, |
|
"rewards/margins": 5.134032249450684, |
|
"rewards/rejected": -40.229034423828125, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4704551880124399, |
|
"grad_norm": 117.01804715220312, |
|
"learning_rate": 5.10897138702506e-07, |
|
"logits/chosen": -0.8137744665145874, |
|
"logits/rejected": -0.838422417640686, |
|
"logps/chosen": -3.5989084243774414, |
|
"logps/rejected": -4.12141227722168, |
|
"loss": 3.4055, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -35.98908615112305, |
|
"rewards/margins": 5.225040912628174, |
|
"rewards/rejected": -41.21411895751953, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4727169918009613, |
|
"grad_norm": 101.49552741213645, |
|
"learning_rate": 5.078524470069743e-07, |
|
"logits/chosen": -0.9176779985427856, |
|
"logits/rejected": -0.9260926246643066, |
|
"logps/chosen": -3.756748914718628, |
|
"logps/rejected": -4.329287052154541, |
|
"loss": 2.4625, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -37.56748580932617, |
|
"rewards/margins": 5.725386142730713, |
|
"rewards/rejected": -43.29287338256836, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.47497879558948264, |
|
"grad_norm": 103.91381353366985, |
|
"learning_rate": 5.048010015383021e-07, |
|
"logits/chosen": -0.8263366222381592, |
|
"logits/rejected": -0.8194425106048584, |
|
"logps/chosen": -3.8313450813293457, |
|
"logps/rejected": -4.535330772399902, |
|
"loss": 2.4896, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -38.31344985961914, |
|
"rewards/margins": 7.039859294891357, |
|
"rewards/rejected": -45.35331344604492, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47724059937800395, |
|
"grad_norm": 93.64016980030927, |
|
"learning_rate": 5.01742993379502e-07, |
|
"logits/chosen": -0.8458577990531921, |
|
"logits/rejected": -0.868080735206604, |
|
"logps/chosen": -3.8605237007141113, |
|
"logps/rejected": -4.4653801918029785, |
|
"loss": 2.6156, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -38.60523986816406, |
|
"rewards/margins": 6.04856538772583, |
|
"rewards/rejected": -44.653804779052734, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4795024031665253, |
|
"grad_norm": 127.6076556977002, |
|
"learning_rate": 4.986786140245446e-07, |
|
"logits/chosen": -0.8188483715057373, |
|
"logits/rejected": -0.826935887336731, |
|
"logps/chosen": -3.8074846267700195, |
|
"logps/rejected": -4.2949419021606445, |
|
"loss": 3.2102, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -38.074851989746094, |
|
"rewards/margins": 4.874571323394775, |
|
"rewards/rejected": -42.94941711425781, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4817642069550466, |
|
"grad_norm": 147.29228875677396, |
|
"learning_rate": 4.956080553663687e-07, |
|
"logits/chosen": -0.8854949474334717, |
|
"logits/rejected": -0.8917779922485352, |
|
"logps/chosen": -3.8558950424194336, |
|
"logps/rejected": -4.444454193115234, |
|
"loss": 2.994, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.55895233154297, |
|
"rewards/margins": 5.885589122772217, |
|
"rewards/rejected": -44.444541931152344, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.484026010743568, |
|
"grad_norm": 136.51813237025374, |
|
"learning_rate": 4.925315096848636e-07, |
|
"logits/chosen": -0.8785922527313232, |
|
"logits/rejected": -0.8924418091773987, |
|
"logps/chosen": -4.0408549308776855, |
|
"logps/rejected": -4.76306676864624, |
|
"loss": 2.8242, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -40.40855407714844, |
|
"rewards/margins": 7.2221174240112305, |
|
"rewards/rejected": -47.63066864013672, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48628781453208936, |
|
"grad_norm": 110.42259187397326, |
|
"learning_rate": 4.894491696348293e-07, |
|
"logits/chosen": -0.8856892585754395, |
|
"logits/rejected": -0.8893029093742371, |
|
"logps/chosen": -3.8507235050201416, |
|
"logps/rejected": -4.282730579376221, |
|
"loss": 3.3235, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -38.507232666015625, |
|
"rewards/margins": 4.320071220397949, |
|
"rewards/rejected": -42.82730484008789, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 112.38972321227853, |
|
"learning_rate": 4.863612282339116e-07, |
|
"logits/chosen": -0.817990779876709, |
|
"logits/rejected": -0.8263007998466492, |
|
"logps/chosen": -4.125490188598633, |
|
"logps/rejected": -4.6461663246154785, |
|
"loss": 3.3821, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -41.25490188598633, |
|
"rewards/margins": 5.206766128540039, |
|
"rewards/rejected": -46.461666107177734, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.49081142210913203, |
|
"grad_norm": 122.33140314915195, |
|
"learning_rate": 4.832678788505161e-07, |
|
"logits/chosen": -0.8691527843475342, |
|
"logits/rejected": -0.8712851405143738, |
|
"logps/chosen": -4.1400322914123535, |
|
"logps/rejected": -4.675261497497559, |
|
"loss": 3.4518, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": -41.400325775146484, |
|
"rewards/margins": 5.352287769317627, |
|
"rewards/rejected": -46.75260925292969, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4930732258976534, |
|
"grad_norm": 153.70015543824144, |
|
"learning_rate": 4.801693151916985e-07, |
|
"logits/chosen": -0.8753068447113037, |
|
"logits/rejected": -0.9178647398948669, |
|
"logps/chosen": -4.102838516235352, |
|
"logps/rejected": -4.627809047698975, |
|
"loss": 3.1055, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -41.02838134765625, |
|
"rewards/margins": 5.249708652496338, |
|
"rewards/rejected": -46.27809143066406, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4953350296861747, |
|
"grad_norm": 110.68812495356474, |
|
"learning_rate": 4.770657312910354e-07, |
|
"logits/chosen": -0.8981303572654724, |
|
"logits/rejected": -0.915514349937439, |
|
"logps/chosen": -4.165809631347656, |
|
"logps/rejected": -4.6610212326049805, |
|
"loss": 3.376, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -41.65810012817383, |
|
"rewards/margins": 4.952118396759033, |
|
"rewards/rejected": -46.6102180480957, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4975968334746961, |
|
"grad_norm": 116.58607540402677, |
|
"learning_rate": 4.739573214964729e-07, |
|
"logits/chosen": -0.8669706583023071, |
|
"logits/rejected": -0.8741896748542786, |
|
"logps/chosen": -3.9599337577819824, |
|
"logps/rejected": -4.534191608428955, |
|
"loss": 2.7624, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -39.599342346191406, |
|
"rewards/margins": 5.742575645446777, |
|
"rewards/rejected": -45.341915130615234, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49985863726321744, |
|
"grad_norm": 138.68860730884543, |
|
"learning_rate": 4.7084428045815733e-07, |
|
"logits/chosen": -0.8756369948387146, |
|
"logits/rejected": -0.8829125165939331, |
|
"logps/chosen": -4.227509498596191, |
|
"logps/rejected": -4.716983795166016, |
|
"loss": 3.2761, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -42.27509307861328, |
|
"rewards/margins": 4.894742012023926, |
|
"rewards/rejected": -47.16983413696289, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5021204410517388, |
|
"grad_norm": 184.1037285692299, |
|
"learning_rate": 4.677268031162457e-07, |
|
"logits/chosen": -0.896783709526062, |
|
"logits/rejected": -0.9043738842010498, |
|
"logps/chosen": -4.029943943023682, |
|
"logps/rejected": -4.505390644073486, |
|
"loss": 3.5501, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": -40.299442291259766, |
|
"rewards/margins": 4.7544660568237305, |
|
"rewards/rejected": -45.05391311645508, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5043822448402601, |
|
"grad_norm": 112.45603200436628, |
|
"learning_rate": 4.646050846886985e-07, |
|
"logits/chosen": -0.8041797876358032, |
|
"logits/rejected": -0.8242363929748535, |
|
"logps/chosen": -3.881974220275879, |
|
"logps/rejected": -4.4934492111206055, |
|
"loss": 2.6607, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.81974411010742, |
|
"rewards/margins": 6.114748954772949, |
|
"rewards/rejected": -44.93449401855469, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5066440486287814, |
|
"grad_norm": 134.00364181034922, |
|
"learning_rate": 4.6147932065905494e-07, |
|
"logits/chosen": -0.867178738117218, |
|
"logits/rejected": -0.8653546571731567, |
|
"logps/chosen": -4.144719123840332, |
|
"logps/rejected": -4.623917102813721, |
|
"loss": 3.5008, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -41.44718933105469, |
|
"rewards/margins": 4.791982650756836, |
|
"rewards/rejected": -46.239173889160156, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 114.91237964387022, |
|
"learning_rate": 4.5834970676419214e-07, |
|
"logits/chosen": -0.8645190000534058, |
|
"logits/rejected": -0.8773024082183838, |
|
"logps/chosen": -3.982334613800049, |
|
"logps/rejected": -4.518105983734131, |
|
"loss": 3.0231, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -39.82334899902344, |
|
"rewards/margins": 5.357712268829346, |
|
"rewards/rejected": -45.181060791015625, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5111676562058242, |
|
"grad_norm": 154.2435864953568, |
|
"learning_rate": 4.552164389820673e-07, |
|
"logits/chosen": -0.7863515615463257, |
|
"logits/rejected": -0.804935097694397, |
|
"logps/chosen": -4.0218400955200195, |
|
"logps/rejected": -4.741469383239746, |
|
"loss": 2.7939, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -40.218406677246094, |
|
"rewards/margins": 7.1962890625, |
|
"rewards/rejected": -47.414695739746094, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5134294599943455, |
|
"grad_norm": 129.512345171623, |
|
"learning_rate": 4.5207971351944605e-07, |
|
"logits/chosen": -0.904701828956604, |
|
"logits/rejected": -0.9039019346237183, |
|
"logps/chosen": -3.990769386291504, |
|
"logps/rejected": -4.5699143409729, |
|
"loss": 3.5426, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -39.90769577026367, |
|
"rewards/margins": 5.791450500488281, |
|
"rewards/rejected": -45.69914245605469, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5156912637828668, |
|
"grad_norm": 127.45769396653864, |
|
"learning_rate": 4.489397267996157e-07, |
|
"logits/chosen": -0.8994483351707458, |
|
"logits/rejected": -0.8912683725357056, |
|
"logps/chosen": -3.946481466293335, |
|
"logps/rejected": -4.470279693603516, |
|
"loss": 2.9926, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -39.464813232421875, |
|
"rewards/margins": 5.237981796264648, |
|
"rewards/rejected": -44.702796936035156, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5179530675713881, |
|
"grad_norm": 111.39180557968587, |
|
"learning_rate": 4.45796675450085e-07, |
|
"logits/chosen": -0.8582149744033813, |
|
"logits/rejected": -0.8690947890281677, |
|
"logps/chosen": -3.8885929584503174, |
|
"logps/rejected": -4.494987964630127, |
|
"loss": 2.8133, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -38.88593292236328, |
|
"rewards/margins": 6.063946723937988, |
|
"rewards/rejected": -44.94988250732422, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5202148713599095, |
|
"grad_norm": 135.02910775325827, |
|
"learning_rate": 4.4265075629027126e-07, |
|
"logits/chosen": -0.8169862031936646, |
|
"logits/rejected": -0.8348796963691711, |
|
"logps/chosen": -4.078422546386719, |
|
"logps/rejected": -4.564748764038086, |
|
"loss": 2.8027, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -40.78423309326172, |
|
"rewards/margins": 4.863255977630615, |
|
"rewards/rejected": -45.64748764038086, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5224766751484309, |
|
"grad_norm": 110.51829137339959, |
|
"learning_rate": 4.3950216631917563e-07, |
|
"logits/chosen": -0.888090193271637, |
|
"logits/rejected": -0.9090730547904968, |
|
"logps/chosen": -3.9436981678009033, |
|
"logps/rejected": -4.6157708168029785, |
|
"loss": 2.6358, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -39.43698501586914, |
|
"rewards/margins": 6.720722198486328, |
|
"rewards/rejected": -46.15770721435547, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5247384789369522, |
|
"grad_norm": 111.61538092823348, |
|
"learning_rate": 4.3635110270304676e-07, |
|
"logits/chosen": -0.8641917109489441, |
|
"logits/rejected": -0.8716497421264648, |
|
"logps/chosen": -3.86027193069458, |
|
"logps/rejected": -4.502015113830566, |
|
"loss": 2.2291, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -38.602718353271484, |
|
"rewards/margins": 6.4174323081970215, |
|
"rewards/rejected": -45.02014923095703, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5270002827254736, |
|
"grad_norm": 173.35008588152775, |
|
"learning_rate": 4.331977627630339e-07, |
|
"logits/chosen": -0.8097434043884277, |
|
"logits/rejected": -0.8040153980255127, |
|
"logps/chosen": -3.757528305053711, |
|
"logps/rejected": -4.439907550811768, |
|
"loss": 2.6864, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -37.57528305053711, |
|
"rewards/margins": 6.823795318603516, |
|
"rewards/rejected": -44.399078369140625, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 120.90157841350384, |
|
"learning_rate": 4.300423439628313e-07, |
|
"logits/chosen": -0.8537578582763672, |
|
"logits/rejected": -0.8780308365821838, |
|
"logps/chosen": -3.8302276134490967, |
|
"logps/rejected": -4.468556880950928, |
|
"loss": 2.498, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.30228042602539, |
|
"rewards/margins": 6.383289813995361, |
|
"rewards/rejected": -44.685569763183594, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5315238903025162, |
|
"grad_norm": 118.3536030698132, |
|
"learning_rate": 4.268850438963118e-07, |
|
"logits/chosen": -0.8823138475418091, |
|
"logits/rejected": -0.9043455719947815, |
|
"logps/chosen": -4.102262020111084, |
|
"logps/rejected": -4.674637794494629, |
|
"loss": 2.7324, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -41.02262496948242, |
|
"rewards/margins": 5.723756313323975, |
|
"rewards/rejected": -46.746376037597656, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5337856940910376, |
|
"grad_norm": 116.93696869949373, |
|
"learning_rate": 4.2372606027515463e-07, |
|
"logits/chosen": -0.8339194655418396, |
|
"logits/rejected": -0.8445159196853638, |
|
"logps/chosen": -3.7381174564361572, |
|
"logps/rejected": -4.291147708892822, |
|
"loss": 2.9013, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -37.38117599487305, |
|
"rewards/margins": 5.530303478240967, |
|
"rewards/rejected": -42.911476135253906, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.536047497879559, |
|
"grad_norm": 148.0693344568736, |
|
"learning_rate": 4.2056559091646387e-07, |
|
"logits/chosen": -0.8702591061592102, |
|
"logits/rejected": -0.898255467414856, |
|
"logps/chosen": -4.004217624664307, |
|
"logps/rejected": -4.487666130065918, |
|
"loss": 3.3504, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -40.042179107666016, |
|
"rewards/margins": 4.834486484527588, |
|
"rewards/rejected": -44.87666702270508, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5383093016680803, |
|
"grad_norm": 136.83734710023003, |
|
"learning_rate": 4.1740383373038116e-07, |
|
"logits/chosen": -0.8536058664321899, |
|
"logits/rejected": -0.8870092630386353, |
|
"logps/chosen": -3.817162036895752, |
|
"logps/rejected": -4.381956100463867, |
|
"loss": 2.9761, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -38.1716194152832, |
|
"rewards/margins": 5.647944450378418, |
|
"rewards/rejected": -43.81956481933594, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5405711054566016, |
|
"grad_norm": 116.45504096009955, |
|
"learning_rate": 4.1424098670769255e-07, |
|
"logits/chosen": -0.9009624719619751, |
|
"logits/rejected": -0.9262585639953613, |
|
"logps/chosen": -3.884793281555176, |
|
"logps/rejected": -4.33213996887207, |
|
"loss": 3.0779, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -38.84792709350586, |
|
"rewards/margins": 4.473471164703369, |
|
"rewards/rejected": -43.3213996887207, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.542832909245123, |
|
"grad_norm": 114.67603070092983, |
|
"learning_rate": 4.1107724790743007e-07, |
|
"logits/chosen": -0.8459216356277466, |
|
"logits/rejected": -0.8754346966743469, |
|
"logps/chosen": -3.9210426807403564, |
|
"logps/rejected": -4.424591064453125, |
|
"loss": 2.7487, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.210426330566406, |
|
"rewards/margins": 5.035484313964844, |
|
"rewards/rejected": -44.245906829833984, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5450947130336443, |
|
"grad_norm": 159.44800208061523, |
|
"learning_rate": 4.0791281544446947e-07, |
|
"logits/chosen": -0.8924515843391418, |
|
"logits/rejected": -0.8807788491249084, |
|
"logps/chosen": -3.9503896236419678, |
|
"logps/rejected": -4.500914573669434, |
|
"loss": 2.7043, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.50389099121094, |
|
"rewards/margins": 5.505251884460449, |
|
"rewards/rejected": -45.00914764404297, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5473565168221657, |
|
"grad_norm": 128.53086499808066, |
|
"learning_rate": 4.0474788747712416e-07, |
|
"logits/chosen": -0.8996694684028625, |
|
"logits/rejected": -0.9028959274291992, |
|
"logps/chosen": -3.932866096496582, |
|
"logps/rejected": -4.37875509262085, |
|
"loss": 3.589, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -39.32866287231445, |
|
"rewards/margins": 4.458887100219727, |
|
"rewards/rejected": -43.78754806518555, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 104.57144676128537, |
|
"learning_rate": 4.0158266219473573e-07, |
|
"logits/chosen": -0.8719525933265686, |
|
"logits/rejected": -0.8880172371864319, |
|
"logps/chosen": -3.7240490913391113, |
|
"logps/rejected": -4.320034503936768, |
|
"loss": 2.5879, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -37.24049377441406, |
|
"rewards/margins": 5.959850311279297, |
|
"rewards/rejected": -43.20034408569336, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5518801243992084, |
|
"grad_norm": 121.72780985174033, |
|
"learning_rate": 3.984173378052643e-07, |
|
"logits/chosen": -0.8488632440567017, |
|
"logits/rejected": -0.8424826264381409, |
|
"logps/chosen": -3.662327289581299, |
|
"logps/rejected": -4.250753879547119, |
|
"loss": 2.558, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -36.623268127441406, |
|
"rewards/margins": 5.884267807006836, |
|
"rewards/rejected": -42.507537841796875, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5541419281877297, |
|
"grad_norm": 176.66908023998735, |
|
"learning_rate": 3.9525211252287585e-07, |
|
"logits/chosen": -0.9206802248954773, |
|
"logits/rejected": -0.938546895980835, |
|
"logps/chosen": -3.8519883155822754, |
|
"logps/rejected": -4.533115386962891, |
|
"loss": 3.0243, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -38.51988220214844, |
|
"rewards/margins": 6.811273574829102, |
|
"rewards/rejected": -45.331153869628906, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.556403731976251, |
|
"grad_norm": 108.79709814447062, |
|
"learning_rate": 3.920871845555305e-07, |
|
"logits/chosen": -0.8708853721618652, |
|
"logits/rejected": -0.8730578422546387, |
|
"logps/chosen": -3.832918882369995, |
|
"logps/rejected": -4.335785865783691, |
|
"loss": 2.5306, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -38.32918930053711, |
|
"rewards/margins": 5.0286712646484375, |
|
"rewards/rejected": -43.35785675048828, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5586655357647724, |
|
"grad_norm": 127.09481932058374, |
|
"learning_rate": 3.8892275209256984e-07, |
|
"logits/chosen": -0.921829342842102, |
|
"logits/rejected": -0.9111767411231995, |
|
"logps/chosen": -3.9879932403564453, |
|
"logps/rejected": -4.458497047424316, |
|
"loss": 2.9165, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.87993240356445, |
|
"rewards/margins": 4.705035209655762, |
|
"rewards/rejected": -44.58496856689453, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5609273395532938, |
|
"grad_norm": 115.9590799116695, |
|
"learning_rate": 3.8575901329230747e-07, |
|
"logits/chosen": -0.8582264184951782, |
|
"logits/rejected": -0.8617616891860962, |
|
"logps/chosen": -3.9318342208862305, |
|
"logps/rejected": -4.470717430114746, |
|
"loss": 3.0595, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -39.31834411621094, |
|
"rewards/margins": 5.388828277587891, |
|
"rewards/rejected": -44.707176208496094, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5631891433418151, |
|
"grad_norm": 128.3493722347937, |
|
"learning_rate": 3.8259616626961886e-07, |
|
"logits/chosen": -0.8590461015701294, |
|
"logits/rejected": -0.8726236820220947, |
|
"logps/chosen": -3.7069010734558105, |
|
"logps/rejected": -4.159891128540039, |
|
"loss": 2.6582, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -37.06901550292969, |
|
"rewards/margins": 4.529898643493652, |
|
"rewards/rejected": -41.59891128540039, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5654509471303364, |
|
"grad_norm": 114.11597318565974, |
|
"learning_rate": 3.794344090835362e-07, |
|
"logits/chosen": -0.8618912100791931, |
|
"logits/rejected": -0.8814125061035156, |
|
"logps/chosen": -4.0398969650268555, |
|
"logps/rejected": -4.569504737854004, |
|
"loss": 2.945, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.39897155761719, |
|
"rewards/margins": 5.2960710525512695, |
|
"rewards/rejected": -45.695045471191406, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5677127509188578, |
|
"grad_norm": 132.84283803977144, |
|
"learning_rate": 3.7627393972484534e-07, |
|
"logits/chosen": -0.9538972973823547, |
|
"logits/rejected": -0.961841881275177, |
|
"logps/chosen": -3.9805994033813477, |
|
"logps/rejected": -4.373291969299316, |
|
"loss": 3.459, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -39.805992126464844, |
|
"rewards/margins": 3.9269251823425293, |
|
"rewards/rejected": -43.73291778564453, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 116.59079088621297, |
|
"learning_rate": 3.7311495610368823e-07, |
|
"logits/chosen": -0.9467366933822632, |
|
"logits/rejected": -0.9687215089797974, |
|
"logps/chosen": -4.043094158172607, |
|
"logps/rejected": -4.563295364379883, |
|
"loss": 3.0119, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -40.430938720703125, |
|
"rewards/margins": 5.202012538909912, |
|
"rewards/rejected": -45.63295364379883, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5722363584959005, |
|
"grad_norm": 120.59136322495354, |
|
"learning_rate": 3.699576560371689e-07, |
|
"logits/chosen": -0.8889734745025635, |
|
"logits/rejected": -0.9066051244735718, |
|
"logps/chosen": -4.205962181091309, |
|
"logps/rejected": -4.996251106262207, |
|
"loss": 2.315, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.05961990356445, |
|
"rewards/margins": 7.902889251708984, |
|
"rewards/rejected": -49.96250915527344, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5744981622844219, |
|
"grad_norm": 124.59506533641044, |
|
"learning_rate": 3.66802237236966e-07, |
|
"logits/chosen": -0.8749493956565857, |
|
"logits/rejected": -0.8885746002197266, |
|
"logps/chosen": -4.300434112548828, |
|
"logps/rejected": -4.866487503051758, |
|
"loss": 2.9432, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -43.00434112548828, |
|
"rewards/margins": 5.66053581237793, |
|
"rewards/rejected": -48.664878845214844, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5767599660729432, |
|
"grad_norm": 118.16852020588865, |
|
"learning_rate": 3.636488972969532e-07, |
|
"logits/chosen": -0.9017617702484131, |
|
"logits/rejected": -0.9131591320037842, |
|
"logps/chosen": -4.101204872131348, |
|
"logps/rejected": -4.681705474853516, |
|
"loss": 2.6388, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.012046813964844, |
|
"rewards/margins": 5.80500602722168, |
|
"rewards/rejected": -46.817054748535156, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5790217698614645, |
|
"grad_norm": 120.89498541698326, |
|
"learning_rate": 3.604978336808244e-07, |
|
"logits/chosen": -0.9948743581771851, |
|
"logits/rejected": -1.0087530612945557, |
|
"logps/chosen": -4.0368547439575195, |
|
"logps/rejected": -4.631007194519043, |
|
"loss": 2.8043, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.36854934692383, |
|
"rewards/margins": 5.941521644592285, |
|
"rewards/rejected": -46.31007385253906, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5812835736499858, |
|
"grad_norm": 124.83239622588137, |
|
"learning_rate": 3.5734924370972876e-07, |
|
"logits/chosen": -0.9356947541236877, |
|
"logits/rejected": -0.9560631513595581, |
|
"logps/chosen": -4.138307571411133, |
|
"logps/rejected": -4.669035911560059, |
|
"loss": 3.0658, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.38307189941406, |
|
"rewards/margins": 5.307290554046631, |
|
"rewards/rejected": -46.69036102294922, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5835453774385072, |
|
"grad_norm": 138.50523829837766, |
|
"learning_rate": 3.5420332454991504e-07, |
|
"logits/chosen": -0.8820909261703491, |
|
"logits/rejected": -0.8897730112075806, |
|
"logps/chosen": -4.207208633422852, |
|
"logps/rejected": -4.809183597564697, |
|
"loss": 2.7913, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.07209014892578, |
|
"rewards/margins": 6.019748210906982, |
|
"rewards/rejected": -48.091835021972656, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5858071812270286, |
|
"grad_norm": 110.39897739040883, |
|
"learning_rate": 3.510602732003843e-07, |
|
"logits/chosen": -0.9389081597328186, |
|
"logits/rejected": -0.9693293571472168, |
|
"logps/chosen": -4.315252780914307, |
|
"logps/rejected": -4.980816841125488, |
|
"loss": 2.5721, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -43.152530670166016, |
|
"rewards/margins": 6.655643463134766, |
|
"rewards/rejected": -49.80816650390625, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5880689850155499, |
|
"grad_norm": 125.09334065686004, |
|
"learning_rate": 3.4792028648055396e-07, |
|
"logits/chosen": -0.8979041576385498, |
|
"logits/rejected": -0.9275961518287659, |
|
"logps/chosen": -4.101990699768066, |
|
"logps/rejected": -4.697176933288574, |
|
"loss": 2.7902, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -41.0199089050293, |
|
"rewards/margins": 5.951866149902344, |
|
"rewards/rejected": -46.971771240234375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 117.68527045074552, |
|
"learning_rate": 3.447835610179327e-07, |
|
"logits/chosen": -0.8862229585647583, |
|
"logits/rejected": -0.899125337600708, |
|
"logps/chosen": -4.087460994720459, |
|
"logps/rejected": -4.825685501098633, |
|
"loss": 2.6479, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.874610900878906, |
|
"rewards/margins": 7.382248878479004, |
|
"rewards/rejected": -48.25685501098633, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 139.3400014797519, |
|
"learning_rate": 3.416502932358079e-07, |
|
"logits/chosen": -0.9597766399383545, |
|
"logits/rejected": -0.9774207472801208, |
|
"logps/chosen": -4.404236316680908, |
|
"logps/rejected": -4.83123254776001, |
|
"loss": 3.1974, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -44.04236602783203, |
|
"rewards/margins": 4.269958019256592, |
|
"rewards/rejected": -48.31232452392578, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5948543963811139, |
|
"grad_norm": 150.23635516979436, |
|
"learning_rate": 3.385206793409451e-07, |
|
"logits/chosen": -0.8739109039306641, |
|
"logits/rejected": -0.8933315873146057, |
|
"logps/chosen": -3.9273197650909424, |
|
"logps/rejected": -4.468226432800293, |
|
"loss": 2.7657, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.273197174072266, |
|
"rewards/margins": 5.4090657234191895, |
|
"rewards/rejected": -44.68226623535156, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5971162001696353, |
|
"grad_norm": 136.57829357030124, |
|
"learning_rate": 3.3539491531130163e-07, |
|
"logits/chosen": -0.9004377722740173, |
|
"logits/rejected": -0.9124334454536438, |
|
"logps/chosen": -4.114037036895752, |
|
"logps/rejected": -4.8097944259643555, |
|
"loss": 2.6883, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -41.14036560058594, |
|
"rewards/margins": 6.95757532119751, |
|
"rewards/rejected": -48.09794235229492, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5993780039581567, |
|
"grad_norm": 134.6857649306004, |
|
"learning_rate": 3.3227319688375426e-07, |
|
"logits/chosen": -0.9540138840675354, |
|
"logits/rejected": -0.9450901746749878, |
|
"logps/chosen": -4.0871100425720215, |
|
"logps/rejected": -4.629415035247803, |
|
"loss": 2.8724, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.8711051940918, |
|
"rewards/margins": 5.423047065734863, |
|
"rewards/rejected": -46.294151306152344, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.601639807746678, |
|
"grad_norm": 137.41581633964907, |
|
"learning_rate": 3.291557195418427e-07, |
|
"logits/chosen": -0.9520595073699951, |
|
"logits/rejected": -0.9495226144790649, |
|
"logps/chosen": -3.8760974407196045, |
|
"logps/rejected": -4.302947044372559, |
|
"loss": 3.2724, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -38.76097869873047, |
|
"rewards/margins": 4.268494606018066, |
|
"rewards/rejected": -43.02947235107422, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6039016115351993, |
|
"grad_norm": 144.7995816554034, |
|
"learning_rate": 3.260426785035272e-07, |
|
"logits/chosen": -0.9223634004592896, |
|
"logits/rejected": -0.9228293895721436, |
|
"logps/chosen": -3.9227046966552734, |
|
"logps/rejected": -4.469911098480225, |
|
"loss": 3.3138, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -39.227046966552734, |
|
"rewards/margins": 5.472067356109619, |
|
"rewards/rejected": -44.6991081237793, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6061634153237206, |
|
"grad_norm": 108.29473766754211, |
|
"learning_rate": 3.229342687089646e-07, |
|
"logits/chosen": -0.9119688272476196, |
|
"logits/rejected": -0.915400505065918, |
|
"logps/chosen": -3.8088831901550293, |
|
"logps/rejected": -4.450516223907471, |
|
"loss": 2.4994, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -38.08883285522461, |
|
"rewards/margins": 6.416332244873047, |
|
"rewards/rejected": -44.505165100097656, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.608425219112242, |
|
"grad_norm": 114.65522312241735, |
|
"learning_rate": 3.1983068480830143e-07, |
|
"logits/chosen": -0.9089800715446472, |
|
"logits/rejected": -0.9124254584312439, |
|
"logps/chosen": -3.8074193000793457, |
|
"logps/rejected": -4.448195934295654, |
|
"loss": 2.7265, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -38.074195861816406, |
|
"rewards/margins": 6.407771110534668, |
|
"rewards/rejected": -44.481964111328125, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 114.37970020373666, |
|
"learning_rate": 3.1673212114948387e-07, |
|
"logits/chosen": -0.8962373733520508, |
|
"logits/rejected": -0.8947957158088684, |
|
"logps/chosen": -3.66055965423584, |
|
"logps/rejected": -4.304127216339111, |
|
"loss": 2.4988, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -36.60559844970703, |
|
"rewards/margins": 6.435674667358398, |
|
"rewards/rejected": -43.0412712097168, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6129488266892847, |
|
"grad_norm": 131.22698552322544, |
|
"learning_rate": 3.1363877176608845e-07, |
|
"logits/chosen": -0.8634111285209656, |
|
"logits/rejected": -0.8854045271873474, |
|
"logps/chosen": -3.583430290222168, |
|
"logps/rejected": -4.167083740234375, |
|
"loss": 2.9822, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -35.83430099487305, |
|
"rewards/margins": 5.836535930633545, |
|
"rewards/rejected": -41.67083740234375, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.615210630477806, |
|
"grad_norm": 118.13378200287389, |
|
"learning_rate": 3.1055083036517076e-07, |
|
"logits/chosen": -0.8832507133483887, |
|
"logits/rejected": -0.8600270748138428, |
|
"logps/chosen": -3.605767011642456, |
|
"logps/rejected": -4.242787837982178, |
|
"loss": 2.5501, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -36.05766677856445, |
|
"rewards/margins": 6.370209217071533, |
|
"rewards/rejected": -42.427879333496094, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6174724342663274, |
|
"grad_norm": 150.22493651179883, |
|
"learning_rate": 3.074684903151364e-07, |
|
"logits/chosen": -0.7782445549964905, |
|
"logits/rejected": -0.7658709287643433, |
|
"logps/chosen": -3.3246827125549316, |
|
"logps/rejected": -3.7869513034820557, |
|
"loss": 2.6785, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -33.246826171875, |
|
"rewards/margins": 4.622686386108398, |
|
"rewards/rejected": -37.86951446533203, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6197342380548487, |
|
"grad_norm": 124.8554523177313, |
|
"learning_rate": 3.0439194463363136e-07, |
|
"logits/chosen": -0.8569799065589905, |
|
"logits/rejected": -0.8484780192375183, |
|
"logps/chosen": -3.430828094482422, |
|
"logps/rejected": -3.913545608520508, |
|
"loss": 3.1282, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -34.30828094482422, |
|
"rewards/margins": 4.827174186706543, |
|
"rewards/rejected": -39.13545608520508, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6219960418433701, |
|
"grad_norm": 105.37259501282527, |
|
"learning_rate": 3.0132138597545537e-07, |
|
"logits/chosen": -0.8960826396942139, |
|
"logits/rejected": -0.935992956161499, |
|
"logps/chosen": -3.6094119548797607, |
|
"logps/rejected": -4.329087734222412, |
|
"loss": 2.4253, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -36.094120025634766, |
|
"rewards/margins": 7.196761608123779, |
|
"rewards/rejected": -43.29087829589844, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6242578456318915, |
|
"grad_norm": 102.48158570587563, |
|
"learning_rate": 2.982570066204981e-07, |
|
"logits/chosen": -0.8868385553359985, |
|
"logits/rejected": -0.8976235389709473, |
|
"logps/chosen": -3.52752685546875, |
|
"logps/rejected": -4.042696952819824, |
|
"loss": 2.8358, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -35.275264739990234, |
|
"rewards/margins": 5.151702880859375, |
|
"rewards/rejected": -40.426971435546875, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6265196494204128, |
|
"grad_norm": 152.73891214182655, |
|
"learning_rate": 2.951989984616979e-07, |
|
"logits/chosen": -0.8241417407989502, |
|
"logits/rejected": -0.8495975136756897, |
|
"logps/chosen": -3.6896445751190186, |
|
"logps/rejected": -4.32491397857666, |
|
"loss": 3.0859, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -36.89644241333008, |
|
"rewards/margins": 6.352697849273682, |
|
"rewards/rejected": -43.249141693115234, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6287814532089341, |
|
"grad_norm": 112.80204422900222, |
|
"learning_rate": 2.9214755299302584e-07, |
|
"logits/chosen": -0.8538424968719482, |
|
"logits/rejected": -0.8619410395622253, |
|
"logps/chosen": -3.8360202312469482, |
|
"logps/rejected": -4.50319766998291, |
|
"loss": 2.0849, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -38.36020278930664, |
|
"rewards/margins": 6.671772480010986, |
|
"rewards/rejected": -45.03197479248047, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 128.85380711403621, |
|
"learning_rate": 2.89102861297494e-07, |
|
"logits/chosen": -0.8916823863983154, |
|
"logits/rejected": -0.9155115485191345, |
|
"logps/chosen": -3.7095577716827393, |
|
"logps/rejected": -4.224562168121338, |
|
"loss": 3.36, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -37.095577239990234, |
|
"rewards/margins": 5.150045394897461, |
|
"rewards/rejected": -42.24562072753906, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6333050607859768, |
|
"grad_norm": 164.97262791305909, |
|
"learning_rate": 2.860651140351902e-07, |
|
"logits/chosen": -0.887188196182251, |
|
"logits/rejected": -0.8843110799789429, |
|
"logps/chosen": -3.7880616188049316, |
|
"logps/rejected": -4.440821647644043, |
|
"loss": 2.8817, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -37.880611419677734, |
|
"rewards/margins": 6.5276007652282715, |
|
"rewards/rejected": -44.40821838378906, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6355668645744982, |
|
"grad_norm": 120.88646729377493, |
|
"learning_rate": 2.830345014313381e-07, |
|
"logits/chosen": -0.828898549079895, |
|
"logits/rejected": -0.8654926419258118, |
|
"logps/chosen": -3.848175525665283, |
|
"logps/rejected": -4.498141288757324, |
|
"loss": 2.3091, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -38.48175811767578, |
|
"rewards/margins": 6.499655246734619, |
|
"rewards/rejected": -44.981414794921875, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6378286683630195, |
|
"grad_norm": 138.22518292554588, |
|
"learning_rate": 2.800112132643856e-07, |
|
"logits/chosen": -0.8729172348976135, |
|
"logits/rejected": -0.8878234028816223, |
|
"logps/chosen": -3.8589096069335938, |
|
"logps/rejected": -4.514438629150391, |
|
"loss": 2.7701, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -38.58909225463867, |
|
"rewards/margins": 6.5552897453308105, |
|
"rewards/rejected": -45.144378662109375, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6400904721515408, |
|
"grad_norm": 123.6765508241198, |
|
"learning_rate": 2.7699543885412105e-07, |
|
"logits/chosen": -0.8810731768608093, |
|
"logits/rejected": -0.8965428471565247, |
|
"logps/chosen": -3.979841947555542, |
|
"logps/rejected": -4.676267623901367, |
|
"loss": 2.5296, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -39.79841995239258, |
|
"rewards/margins": 6.96425724029541, |
|
"rewards/rejected": -46.762672424316406, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6423522759400622, |
|
"grad_norm": 134.34352209400595, |
|
"learning_rate": 2.7398736704981725e-07, |
|
"logits/chosen": -0.8905003070831299, |
|
"logits/rejected": -0.8742426037788391, |
|
"logps/chosen": -4.015549182891846, |
|
"logps/rejected": -4.602110385894775, |
|
"loss": 2.6926, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.155487060546875, |
|
"rewards/margins": 5.865612030029297, |
|
"rewards/rejected": -46.0211067199707, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6446140797285835, |
|
"grad_norm": 125.55351312091291, |
|
"learning_rate": 2.709871862184063e-07, |
|
"logits/chosen": -0.8608399629592896, |
|
"logits/rejected": -0.8779529929161072, |
|
"logps/chosen": -3.936886787414551, |
|
"logps/rejected": -4.538551330566406, |
|
"loss": 3.1564, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -39.36886215209961, |
|
"rewards/margins": 6.016650199890137, |
|
"rewards/rejected": -45.38551712036133, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6468758835171049, |
|
"grad_norm": 116.6462838115517, |
|
"learning_rate": 2.679950842326837e-07, |
|
"logits/chosen": -0.9049277901649475, |
|
"logits/rejected": -0.9126715064048767, |
|
"logps/chosen": -4.160530090332031, |
|
"logps/rejected": -4.841786861419678, |
|
"loss": 2.5103, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.60530471801758, |
|
"rewards/margins": 6.812563896179199, |
|
"rewards/rejected": -48.41786193847656, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6491376873056263, |
|
"grad_norm": 125.7388754846288, |
|
"learning_rate": 2.6501124845954363e-07, |
|
"logits/chosen": -0.8767872452735901, |
|
"logits/rejected": -0.8922024965286255, |
|
"logps/chosen": -4.103570461273193, |
|
"logps/rejected": -4.829570770263672, |
|
"loss": 2.3212, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -41.03570556640625, |
|
"rewards/margins": 7.260003089904785, |
|
"rewards/rejected": -48.29570388793945, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 123.45145960508628, |
|
"learning_rate": 2.62035865748246e-07, |
|
"logits/chosen": -0.8526559472084045, |
|
"logits/rejected": -0.865902304649353, |
|
"logps/chosen": -3.979776382446289, |
|
"logps/rejected": -4.576243877410889, |
|
"loss": 2.8045, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -39.797767639160156, |
|
"rewards/margins": 5.964676856994629, |
|
"rewards/rejected": -45.76243591308594, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6536612948826689, |
|
"grad_norm": 165.09853723797895, |
|
"learning_rate": 2.5906912241871554e-07, |
|
"logits/chosen": -0.932748019695282, |
|
"logits/rejected": -0.9441463947296143, |
|
"logps/chosen": -4.250375270843506, |
|
"logps/rejected": -4.883494853973389, |
|
"loss": 2.7687, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.503753662109375, |
|
"rewards/margins": 6.331197738647461, |
|
"rewards/rejected": -48.83495330810547, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6559230986711903, |
|
"grad_norm": 136.63193628571327, |
|
"learning_rate": 2.561112042498753e-07, |
|
"logits/chosen": -0.8068567514419556, |
|
"logits/rejected": -0.8377366065979004, |
|
"logps/chosen": -3.9496514797210693, |
|
"logps/rejected": -4.45440673828125, |
|
"loss": 3.4096, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -39.49651336669922, |
|
"rewards/margins": 5.047552108764648, |
|
"rewards/rejected": -44.5440673828125, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6581849024597116, |
|
"grad_norm": 143.97363440746773, |
|
"learning_rate": 2.5316229646801195e-07, |
|
"logits/chosen": -0.8525142073631287, |
|
"logits/rejected": -0.8822568655014038, |
|
"logps/chosen": -4.472620010375977, |
|
"logps/rejected": -5.035334587097168, |
|
"loss": 2.9599, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.726200103759766, |
|
"rewards/margins": 5.6271467208862305, |
|
"rewards/rejected": -50.35334777832031, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.660446706248233, |
|
"grad_norm": 120.93371939289545, |
|
"learning_rate": 2.5022258373517714e-07, |
|
"logits/chosen": -0.9202491044998169, |
|
"logits/rejected": -0.9317676424980164, |
|
"logps/chosen": -4.282386779785156, |
|
"logps/rejected": -4.909029483795166, |
|
"loss": 2.3016, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.8238639831543, |
|
"rewards/margins": 6.266423225402832, |
|
"rewards/rejected": -49.09029006958008, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6627085100367544, |
|
"grad_norm": 149.03147323251173, |
|
"learning_rate": 2.4729225013762474e-07, |
|
"logits/chosen": -0.9682255387306213, |
|
"logits/rejected": -0.9804242253303528, |
|
"logps/chosen": -4.4975714683532715, |
|
"logps/rejected": -5.107451438903809, |
|
"loss": 3.3597, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -44.9757194519043, |
|
"rewards/margins": 6.098800182342529, |
|
"rewards/rejected": -51.07451629638672, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6649703138252756, |
|
"grad_norm": 147.62664173767308, |
|
"learning_rate": 2.4437147917428203e-07, |
|
"logits/chosen": -0.8548184633255005, |
|
"logits/rejected": -0.8674319386482239, |
|
"logps/chosen": -4.390334129333496, |
|
"logps/rejected": -5.062026500701904, |
|
"loss": 2.7968, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.90333938598633, |
|
"rewards/margins": 6.716926574707031, |
|
"rewards/rejected": -50.620262145996094, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.667232117613797, |
|
"grad_norm": 145.09780607922434, |
|
"learning_rate": 2.414604537452595e-07, |
|
"logits/chosen": -0.8391546607017517, |
|
"logits/rejected": -0.8629494309425354, |
|
"logps/chosen": -4.255443096160889, |
|
"logps/rejected": -4.826220512390137, |
|
"loss": 2.7128, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.55442810058594, |
|
"rewards/margins": 5.707772731781006, |
|
"rewards/rejected": -48.26219940185547, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6694939214023183, |
|
"grad_norm": 141.08198535612613, |
|
"learning_rate": 2.385593561403974e-07, |
|
"logits/chosen": -0.8808133602142334, |
|
"logits/rejected": -0.9036346673965454, |
|
"logps/chosen": -4.058435440063477, |
|
"logps/rejected": -4.680113792419434, |
|
"loss": 2.5458, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -40.5843505859375, |
|
"rewards/margins": 6.216782569885254, |
|
"rewards/rejected": -46.8011360168457, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 108.96502235601564, |
|
"learning_rate": 2.3566836802785119e-07, |
|
"logits/chosen": -0.8734185099601746, |
|
"logits/rejected": -0.910306453704834, |
|
"logps/chosen": -4.139810562133789, |
|
"logps/rejected": -4.895854949951172, |
|
"loss": 2.3129, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.398101806640625, |
|
"rewards/margins": 7.560453414916992, |
|
"rewards/rejected": -48.95855712890625, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6740175289793611, |
|
"grad_norm": 137.86729078107237, |
|
"learning_rate": 2.327876704427146e-07, |
|
"logits/chosen": -0.8416418433189392, |
|
"logits/rejected": -0.8470006585121155, |
|
"logps/chosen": -4.119014263153076, |
|
"logps/rejected": -4.593555927276611, |
|
"loss": 3.1621, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -41.19013977050781, |
|
"rewards/margins": 4.745421886444092, |
|
"rewards/rejected": -45.93556213378906, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6762793327678824, |
|
"grad_norm": 175.89823569204168, |
|
"learning_rate": 2.2991744377568358e-07, |
|
"logits/chosen": -0.8492337465286255, |
|
"logits/rejected": -0.8457680344581604, |
|
"logps/chosen": -4.260539531707764, |
|
"logps/rejected": -4.814329147338867, |
|
"loss": 2.9577, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -42.60539627075195, |
|
"rewards/margins": 5.537896633148193, |
|
"rewards/rejected": -48.14329528808594, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6785411365564037, |
|
"grad_norm": 131.9500051592224, |
|
"learning_rate": 2.270578677617601e-07, |
|
"logits/chosen": -0.9049394130706787, |
|
"logits/rejected": -0.9202775955200195, |
|
"logps/chosen": -4.153038501739502, |
|
"logps/rejected": -4.798150062561035, |
|
"loss": 3.2734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.53038787841797, |
|
"rewards/margins": 6.451115608215332, |
|
"rewards/rejected": -47.981502532958984, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6808029403449251, |
|
"grad_norm": 128.15329813087357, |
|
"learning_rate": 2.242091214689971e-07, |
|
"logits/chosen": -0.8781294226646423, |
|
"logits/rejected": -0.9136564135551453, |
|
"logps/chosen": -4.225987911224365, |
|
"logps/rejected": -4.948566436767578, |
|
"loss": 2.5272, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.2598876953125, |
|
"rewards/margins": 7.225780487060547, |
|
"rewards/rejected": -49.485660552978516, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6830647441334464, |
|
"grad_norm": 149.91529967533276, |
|
"learning_rate": 2.2137138328728456e-07, |
|
"logits/chosen": -0.9418582916259766, |
|
"logits/rejected": -0.9293465614318848, |
|
"logps/chosen": -4.346358299255371, |
|
"logps/rejected": -4.898774147033691, |
|
"loss": 2.7408, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.46357727050781, |
|
"rewards/margins": 5.524153232574463, |
|
"rewards/rejected": -48.987735748291016, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6853265479219678, |
|
"grad_norm": 108.47737776135804, |
|
"learning_rate": 2.1854483091717974e-07, |
|
"logits/chosen": -0.9234378337860107, |
|
"logits/rejected": -0.9519913792610168, |
|
"logps/chosen": -4.19830322265625, |
|
"logps/rejected": -4.844261646270752, |
|
"loss": 2.266, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -41.9830322265625, |
|
"rewards/margins": 6.459583759307861, |
|
"rewards/rejected": -48.4426155090332, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6875883517104892, |
|
"grad_norm": 143.85162210524913, |
|
"learning_rate": 2.1572964135877863e-07, |
|
"logits/chosen": -0.9188116192817688, |
|
"logits/rejected": -0.9410698413848877, |
|
"logps/chosen": -4.3630805015563965, |
|
"logps/rejected": -4.898950099945068, |
|
"loss": 3.0578, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.63079833984375, |
|
"rewards/margins": 5.358698844909668, |
|
"rewards/rejected": -48.989498138427734, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6898501554990104, |
|
"grad_norm": 122.12397072006037, |
|
"learning_rate": 2.1292599090063245e-07, |
|
"logits/chosen": -0.9438715577125549, |
|
"logits/rejected": -0.9488154053688049, |
|
"logps/chosen": -4.201948165893555, |
|
"logps/rejected": -4.896744251251221, |
|
"loss": 2.4426, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.01948165893555, |
|
"rewards/margins": 6.94796085357666, |
|
"rewards/rejected": -48.96744155883789, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 144.4237668373174, |
|
"learning_rate": 2.1013405510870824e-07, |
|
"logits/chosen": -0.8521759510040283, |
|
"logits/rejected": -0.8959603905677795, |
|
"logps/chosen": -4.268230438232422, |
|
"logps/rejected": -4.95402193069458, |
|
"loss": 2.2676, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -42.68229675292969, |
|
"rewards/margins": 6.857920169830322, |
|
"rewards/rejected": -49.540225982666016, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6943737630760531, |
|
"grad_norm": 121.44280758498674, |
|
"learning_rate": 2.0735400881539494e-07, |
|
"logits/chosen": -0.8913055658340454, |
|
"logits/rejected": -0.9139821529388428, |
|
"logps/chosen": -4.439169406890869, |
|
"logps/rejected": -5.179335117340088, |
|
"loss": 2.3341, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -44.39169692993164, |
|
"rewards/margins": 7.4016547203063965, |
|
"rewards/rejected": -51.79335021972656, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6966355668645745, |
|
"grad_norm": 158.3173957292418, |
|
"learning_rate": 2.0458602610855536e-07, |
|
"logits/chosen": -0.9496070742607117, |
|
"logits/rejected": -0.9575868844985962, |
|
"logps/chosen": -4.370190620422363, |
|
"logps/rejected": -4.980884552001953, |
|
"loss": 2.4398, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.70191192626953, |
|
"rewards/margins": 6.106935501098633, |
|
"rewards/rejected": -49.80884552001953, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6988973706530959, |
|
"grad_norm": 148.52467272675696, |
|
"learning_rate": 2.0183028032062422e-07, |
|
"logits/chosen": -0.9165297746658325, |
|
"logits/rejected": -0.9382550120353699, |
|
"logps/chosen": -4.426529884338379, |
|
"logps/rejected": -5.0528764724731445, |
|
"loss": 2.7146, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -44.265296936035156, |
|
"rewards/margins": 6.263469219207764, |
|
"rewards/rejected": -50.52876663208008, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7011591744416172, |
|
"grad_norm": 132.68776312794716, |
|
"learning_rate": 1.9908694401775473e-07, |
|
"logits/chosen": -0.9458051323890686, |
|
"logits/rejected": -0.9692423939704895, |
|
"logps/chosen": -4.464923858642578, |
|
"logps/rejected": -5.0837626457214355, |
|
"loss": 2.6034, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.64923858642578, |
|
"rewards/margins": 6.188381195068359, |
|
"rewards/rejected": -50.837623596191406, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7034209782301385, |
|
"grad_norm": 136.8201982483417, |
|
"learning_rate": 1.9635618898901196e-07, |
|
"logits/chosen": -0.921970784664154, |
|
"logits/rejected": -0.939640998840332, |
|
"logps/chosen": -4.886068820953369, |
|
"logps/rejected": -5.573887825012207, |
|
"loss": 2.8076, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -48.860687255859375, |
|
"rewards/margins": 6.878194332122803, |
|
"rewards/rejected": -55.73888397216797, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7056827820186599, |
|
"grad_norm": 138.79561100336207, |
|
"learning_rate": 1.9363818623561565e-07, |
|
"logits/chosen": -0.8815241456031799, |
|
"logits/rejected": -0.9167051315307617, |
|
"logps/chosen": -4.46604585647583, |
|
"logps/rejected": -5.128448009490967, |
|
"loss": 2.467, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.66046142578125, |
|
"rewards/margins": 6.624024391174316, |
|
"rewards/rejected": -51.284481048583984, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7079445858071812, |
|
"grad_norm": 139.63987542313842, |
|
"learning_rate": 1.9093310596023108e-07, |
|
"logits/chosen": -0.8783115148544312, |
|
"logits/rejected": -0.886088490486145, |
|
"logps/chosen": -4.325229167938232, |
|
"logps/rejected": -5.129339694976807, |
|
"loss": 2.4526, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -43.252296447753906, |
|
"rewards/margins": 8.04110336303711, |
|
"rewards/rejected": -51.29339599609375, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7102063895957026, |
|
"grad_norm": 158.39745233921516, |
|
"learning_rate": 1.8824111755631274e-07, |
|
"logits/chosen": -0.9300839900970459, |
|
"logits/rejected": -0.9606208801269531, |
|
"logps/chosen": -4.352430820465088, |
|
"logps/rejected": -4.94814920425415, |
|
"loss": 2.7746, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -43.52430725097656, |
|
"rewards/margins": 5.957186222076416, |
|
"rewards/rejected": -49.48149490356445, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 175.08231727150573, |
|
"learning_rate": 1.8556238959749457e-07, |
|
"logits/chosen": -0.9153900146484375, |
|
"logits/rejected": -0.9284498691558838, |
|
"logps/chosen": -4.703487396240234, |
|
"logps/rejected": -5.153081893920898, |
|
"loss": 3.7552, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -47.034873962402344, |
|
"rewards/margins": 4.495938777923584, |
|
"rewards/rejected": -51.53081512451172, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7147299971727452, |
|
"grad_norm": 171.25427946077767, |
|
"learning_rate": 1.8289708982703562e-07, |
|
"logits/chosen": -0.8872180581092834, |
|
"logits/rejected": -0.8773900866508484, |
|
"logps/chosen": -4.5176239013671875, |
|
"logps/rejected": -5.215326309204102, |
|
"loss": 3.269, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -45.17624282836914, |
|
"rewards/margins": 6.977020740509033, |
|
"rewards/rejected": -52.153263092041016, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7169918009612666, |
|
"grad_norm": 167.8139506813274, |
|
"learning_rate": 1.802453851473151e-07, |
|
"logits/chosen": -0.9402052164077759, |
|
"logits/rejected": -0.9389104247093201, |
|
"logps/chosen": -4.668498516082764, |
|
"logps/rejected": -5.336842060089111, |
|
"loss": 2.5691, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.68498229980469, |
|
"rewards/margins": 6.683432102203369, |
|
"rewards/rejected": -53.36841583251953, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.719253604749788, |
|
"grad_norm": 145.08828046519986, |
|
"learning_rate": 1.7760744160938093e-07, |
|
"logits/chosen": -0.8834313154220581, |
|
"logits/rejected": -0.899104118347168, |
|
"logps/chosen": -4.476520538330078, |
|
"logps/rejected": -5.283236980438232, |
|
"loss": 2.5195, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -44.76520538330078, |
|
"rewards/margins": 8.06716251373291, |
|
"rewards/rejected": -52.832374572753906, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7215154085383093, |
|
"grad_norm": 139.91997046332745, |
|
"learning_rate": 1.7498342440255135e-07, |
|
"logits/chosen": -0.9341943264007568, |
|
"logits/rejected": -0.9333917498588562, |
|
"logps/chosen": -4.627048015594482, |
|
"logps/rejected": -5.251206874847412, |
|
"loss": 2.5712, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -46.27048110961914, |
|
"rewards/margins": 6.241583824157715, |
|
"rewards/rejected": -52.51206588745117, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7237772123268307, |
|
"grad_norm": 128.75616766030222, |
|
"learning_rate": 1.7237349784407115e-07, |
|
"logits/chosen": -0.9444934725761414, |
|
"logits/rejected": -0.9463576674461365, |
|
"logps/chosen": -4.7167863845825195, |
|
"logps/rejected": -5.434706687927246, |
|
"loss": 2.4956, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -47.16786575317383, |
|
"rewards/margins": 7.179207801818848, |
|
"rewards/rejected": -54.34707260131836, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.726039016115352, |
|
"grad_norm": 152.32529171434254, |
|
"learning_rate": 1.6977782536882178e-07, |
|
"logits/chosen": -0.8644733428955078, |
|
"logits/rejected": -0.878294825553894, |
|
"logps/chosen": -4.275421142578125, |
|
"logps/rejected": -5.040489196777344, |
|
"loss": 2.7444, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.75421142578125, |
|
"rewards/margins": 7.650677680969238, |
|
"rewards/rejected": -50.40489196777344, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7283008199038733, |
|
"grad_norm": 114.93649081424365, |
|
"learning_rate": 1.6719656951908708e-07, |
|
"logits/chosen": -0.8660048246383667, |
|
"logits/rejected": -0.8882208466529846, |
|
"logps/chosen": -4.067705154418945, |
|
"logps/rejected": -4.7624616622924805, |
|
"loss": 2.4388, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.67705535888672, |
|
"rewards/margins": 6.947561264038086, |
|
"rewards/rejected": -47.62461853027344, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7305626236923947, |
|
"grad_norm": 141.6695023872139, |
|
"learning_rate": 1.6462989193437453e-07, |
|
"logits/chosen": -0.9560823440551758, |
|
"logits/rejected": -0.9642462730407715, |
|
"logps/chosen": -4.5176310539245605, |
|
"logps/rejected": -5.131880760192871, |
|
"loss": 2.8127, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -45.17631149291992, |
|
"rewards/margins": 6.142499923706055, |
|
"rewards/rejected": -51.318809509277344, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 153.0336707141252, |
|
"learning_rate": 1.6207795334129365e-07, |
|
"logits/chosen": -0.9089516997337341, |
|
"logits/rejected": -0.9075677394866943, |
|
"logps/chosen": -4.768195152282715, |
|
"logps/rejected": -5.365372180938721, |
|
"loss": 2.9092, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -47.68195343017578, |
|
"rewards/margins": 5.971770286560059, |
|
"rewards/rejected": -53.65372085571289, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7350862312694374, |
|
"grad_norm": 175.02628532285675, |
|
"learning_rate": 1.5954091354349121e-07, |
|
"logits/chosen": -0.93093341588974, |
|
"logits/rejected": -0.9459247589111328, |
|
"logps/chosen": -4.557867050170898, |
|
"logps/rejected": -5.066596508026123, |
|
"loss": 3.4744, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -45.57866668701172, |
|
"rewards/margins": 5.087299346923828, |
|
"rewards/rejected": -50.66596603393555, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7373480350579588, |
|
"grad_norm": 268.3403033769808, |
|
"learning_rate": 1.5701893141164364e-07, |
|
"logits/chosen": -0.9369128346443176, |
|
"logits/rejected": -0.9450178742408752, |
|
"logps/chosen": -4.78832483291626, |
|
"logps/rejected": -5.510087490081787, |
|
"loss": 3.4083, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -47.88325119018555, |
|
"rewards/margins": 7.2176289558410645, |
|
"rewards/rejected": -55.10087966918945, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.73960983884648, |
|
"grad_norm": 144.09484817777712, |
|
"learning_rate": 1.545121648735093e-07, |
|
"logits/chosen": -0.9158852100372314, |
|
"logits/rejected": -0.924169659614563, |
|
"logps/chosen": -4.634927749633789, |
|
"logps/rejected": -5.217785358428955, |
|
"loss": 3.0842, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -46.349273681640625, |
|
"rewards/margins": 5.828577518463135, |
|
"rewards/rejected": -52.17784881591797, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7418716426350014, |
|
"grad_norm": 134.18959418719092, |
|
"learning_rate": 1.5202077090403863e-07, |
|
"logits/chosen": -0.9410818815231323, |
|
"logits/rejected": -0.9246065616607666, |
|
"logps/chosen": -4.225207805633545, |
|
"logps/rejected": -4.864658355712891, |
|
"loss": 2.5935, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -42.2520751953125, |
|
"rewards/margins": 6.394504547119141, |
|
"rewards/rejected": -48.646583557128906, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7441334464235227, |
|
"grad_norm": 157.68518289039383, |
|
"learning_rate": 1.495449055155443e-07, |
|
"logits/chosen": -0.9306075572967529, |
|
"logits/rejected": -0.942533552646637, |
|
"logps/chosen": -4.558164119720459, |
|
"logps/rejected": -5.3390398025512695, |
|
"loss": 2.4075, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -45.581642150878906, |
|
"rewards/margins": 7.808758735656738, |
|
"rewards/rejected": -53.390403747558594, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7463952502120441, |
|
"grad_norm": 155.53781883249212, |
|
"learning_rate": 1.4708472374793112e-07, |
|
"logits/chosen": -0.9006601572036743, |
|
"logits/rejected": -0.9074862599372864, |
|
"logps/chosen": -4.6048903465271, |
|
"logps/rejected": -5.098773002624512, |
|
"loss": 3.4763, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -46.04890441894531, |
|
"rewards/margins": 4.938818454742432, |
|
"rewards/rejected": -50.98772430419922, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7486570540005655, |
|
"grad_norm": 156.32875339642223, |
|
"learning_rate": 1.4464037965898878e-07, |
|
"logits/chosen": -0.8546017408370972, |
|
"logits/rejected": -0.8647469878196716, |
|
"logps/chosen": -4.482880115509033, |
|
"logps/rejected": -5.083220481872559, |
|
"loss": 2.9646, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.82880401611328, |
|
"rewards/margins": 6.003401756286621, |
|
"rewards/rejected": -50.83220672607422, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7509188577890868, |
|
"grad_norm": 131.67726299773977, |
|
"learning_rate": 1.4221202631474282e-07, |
|
"logits/chosen": -0.8612452745437622, |
|
"logits/rejected": -0.8679234385490417, |
|
"logps/chosen": -4.424932479858398, |
|
"logps/rejected": -5.060441970825195, |
|
"loss": 2.7243, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -44.249324798583984, |
|
"rewards/margins": 6.355095863342285, |
|
"rewards/rejected": -50.60442352294922, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 143.74537092522394, |
|
"learning_rate": 1.3979981577987113e-07, |
|
"logits/chosen": -0.9003939628601074, |
|
"logits/rejected": -0.8934139013290405, |
|
"logps/chosen": -4.204031467437744, |
|
"logps/rejected": -4.884526252746582, |
|
"loss": 2.7886, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -42.040313720703125, |
|
"rewards/margins": 6.804945945739746, |
|
"rewards/rejected": -48.84525680541992, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7554424653661295, |
|
"grad_norm": 122.3096159489272, |
|
"learning_rate": 1.374038991081807e-07, |
|
"logits/chosen": -0.9354572892189026, |
|
"logits/rejected": -0.9418012499809265, |
|
"logps/chosen": -4.416646957397461, |
|
"logps/rejected": -4.966562747955322, |
|
"loss": 2.9822, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -44.16646957397461, |
|
"rewards/margins": 5.49915885925293, |
|
"rewards/rejected": -49.665626525878906, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7577042691546508, |
|
"grad_norm": 125.3467664124325, |
|
"learning_rate": 1.3502442633314882e-07, |
|
"logits/chosen": -0.8854781985282898, |
|
"logits/rejected": -0.8959544897079468, |
|
"logps/chosen": -3.8627703189849854, |
|
"logps/rejected": -4.476377010345459, |
|
"loss": 2.4221, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -38.62770462036133, |
|
"rewards/margins": 6.136070728302002, |
|
"rewards/rejected": -44.76377487182617, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7599660729431722, |
|
"grad_norm": 130.27731954527835, |
|
"learning_rate": 1.3266154645852815e-07, |
|
"logits/chosen": -0.8756478428840637, |
|
"logits/rejected": -0.8797450065612793, |
|
"logps/chosen": -4.367222785949707, |
|
"logps/rejected": -4.935546875, |
|
"loss": 2.7571, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -43.6722297668457, |
|
"rewards/margins": 5.683239936828613, |
|
"rewards/rejected": -49.35546875, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7622278767316936, |
|
"grad_norm": 174.47595303975626, |
|
"learning_rate": 1.303154074490152e-07, |
|
"logits/chosen": -0.9251211881637573, |
|
"logits/rejected": -0.9095232486724854, |
|
"logps/chosen": -4.15577507019043, |
|
"logps/rejected": -4.788844108581543, |
|
"loss": 3.0771, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -41.55774688720703, |
|
"rewards/margins": 6.330696105957031, |
|
"rewards/rejected": -47.88844680786133, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7644896805202148, |
|
"grad_norm": 139.20935050711142, |
|
"learning_rate": 1.2798615622098616e-07, |
|
"logits/chosen": -0.9291560649871826, |
|
"logits/rejected": -0.9244073033332825, |
|
"logps/chosen": -4.082375526428223, |
|
"logps/rejected": -4.767295837402344, |
|
"loss": 2.935, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -40.823753356933594, |
|
"rewards/margins": 6.8492045402526855, |
|
"rewards/rejected": -47.67295837402344, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7667514843087362, |
|
"grad_norm": 125.85713373053201, |
|
"learning_rate": 1.2567393863329523e-07, |
|
"logits/chosen": -0.9064013957977295, |
|
"logits/rejected": -0.9375932216644287, |
|
"logps/chosen": -4.197393417358398, |
|
"logps/rejected": -4.932326793670654, |
|
"loss": 2.4315, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.973934173583984, |
|
"rewards/margins": 7.349334239959717, |
|
"rewards/rejected": -49.323272705078125, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7690132880972576, |
|
"grad_norm": 113.88331415159809, |
|
"learning_rate": 1.233788994781423e-07, |
|
"logits/chosen": -0.9322744607925415, |
|
"logits/rejected": -0.9696506261825562, |
|
"logps/chosen": -3.992047071456909, |
|
"logps/rejected": -4.617818355560303, |
|
"loss": 2.3658, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.920467376708984, |
|
"rewards/margins": 6.257713317871094, |
|
"rewards/rejected": -46.178184509277344, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7712750918857789, |
|
"grad_norm": 120.76771779346096, |
|
"learning_rate": 1.2110118247200468e-07, |
|
"logits/chosen": -0.930842399597168, |
|
"logits/rejected": -0.9461864829063416, |
|
"logps/chosen": -4.024172782897949, |
|
"logps/rejected": -4.681670188903809, |
|
"loss": 2.253, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -40.24173355102539, |
|
"rewards/margins": 6.574971675872803, |
|
"rewards/rejected": -46.81669998168945, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 136.22802399303367, |
|
"learning_rate": 1.1884093024663933e-07, |
|
"logits/chosen": -0.9333779811859131, |
|
"logits/rejected": -0.9390580058097839, |
|
"logps/chosen": -3.7476558685302734, |
|
"logps/rejected": -4.544672012329102, |
|
"loss": 2.7483, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -37.47655487060547, |
|
"rewards/margins": 7.970164775848389, |
|
"rewards/rejected": -45.446720123291016, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7757986994628217, |
|
"grad_norm": 142.58791407306794, |
|
"learning_rate": 1.1659828434014886e-07, |
|
"logits/chosen": -0.9368746280670166, |
|
"logits/rejected": -0.9193394780158997, |
|
"logps/chosen": -3.7733166217803955, |
|
"logps/rejected": -4.509119033813477, |
|
"loss": 2.5863, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -37.73316955566406, |
|
"rewards/margins": 7.358019828796387, |
|
"rewards/rejected": -45.0911865234375, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7780605032513429, |
|
"grad_norm": 149.16462513003228, |
|
"learning_rate": 1.143733851881203e-07, |
|
"logits/chosen": -0.969507098197937, |
|
"logits/rejected": -0.9765860438346863, |
|
"logps/chosen": -4.0373382568359375, |
|
"logps/rejected": -4.758094310760498, |
|
"loss": 2.5659, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.373382568359375, |
|
"rewards/margins": 7.207555770874023, |
|
"rewards/rejected": -47.5809440612793, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7803223070398643, |
|
"grad_norm": 124.92145191514574, |
|
"learning_rate": 1.1216637211483005e-07, |
|
"logits/chosen": -0.9140538573265076, |
|
"logits/rejected": -0.9331907629966736, |
|
"logps/chosen": -3.9364168643951416, |
|
"logps/rejected": -4.495926856994629, |
|
"loss": 2.7829, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -39.36417007446289, |
|
"rewards/margins": 5.595102310180664, |
|
"rewards/rejected": -44.959266662597656, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7825841108283856, |
|
"grad_norm": 139.19218112831186, |
|
"learning_rate": 1.0997738332451936e-07, |
|
"logits/chosen": -0.9063421487808228, |
|
"logits/rejected": -0.9176337718963623, |
|
"logps/chosen": -4.21934175491333, |
|
"logps/rejected": -4.815896034240723, |
|
"loss": 2.6989, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.19342041015625, |
|
"rewards/margins": 5.965543270111084, |
|
"rewards/rejected": -48.15896224975586, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.784845914616907, |
|
"grad_norm": 135.08565556440382, |
|
"learning_rate": 1.0780655589274031e-07, |
|
"logits/chosen": -0.9613451957702637, |
|
"logits/rejected": -0.9540661573410034, |
|
"logps/chosen": -3.9844415187835693, |
|
"logps/rejected": -4.568908214569092, |
|
"loss": 2.3105, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -39.844417572021484, |
|
"rewards/margins": 5.844663619995117, |
|
"rewards/rejected": -45.68907928466797, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7871077184054284, |
|
"grad_norm": 140.13866385079996, |
|
"learning_rate": 1.056540257577712e-07, |
|
"logits/chosen": -0.8759354948997498, |
|
"logits/rejected": -0.8963236808776855, |
|
"logps/chosen": -4.536995887756348, |
|
"logps/rejected": -5.269956111907959, |
|
"loss": 2.1934, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -45.369956970214844, |
|
"rewards/margins": 7.329606056213379, |
|
"rewards/rejected": -52.69956970214844, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7893695221939496, |
|
"grad_norm": 137.27660659020637, |
|
"learning_rate": 1.0351992771210554e-07, |
|
"logits/chosen": -0.9132465720176697, |
|
"logits/rejected": -0.9206264615058899, |
|
"logps/chosen": -4.071958541870117, |
|
"logps/rejected": -4.736346244812012, |
|
"loss": 2.6102, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -40.719581604003906, |
|
"rewards/margins": 6.643874168395996, |
|
"rewards/rejected": -47.36345291137695, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.791631325982471, |
|
"grad_norm": 142.1622013900452, |
|
"learning_rate": 1.0140439539400953e-07, |
|
"logits/chosen": -0.8662968277931213, |
|
"logits/rejected": -0.9013144969940186, |
|
"logps/chosen": -4.007244110107422, |
|
"logps/rejected": -4.624824523925781, |
|
"loss": 3.0147, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.07244110107422, |
|
"rewards/margins": 6.175803184509277, |
|
"rewards/rejected": -46.24824523925781, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 129.3739896140384, |
|
"learning_rate": 9.930756127915488e-08, |
|
"logits/chosen": -0.9286041259765625, |
|
"logits/rejected": -0.9355084896087646, |
|
"logps/chosen": -4.044810771942139, |
|
"logps/rejected": -4.674356460571289, |
|
"loss": 2.6907, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -40.44810485839844, |
|
"rewards/margins": 6.295462131500244, |
|
"rewards/rejected": -46.743568420410156, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7961549335595137, |
|
"grad_norm": 154.70829465365628, |
|
"learning_rate": 9.722955667232242e-08, |
|
"logits/chosen": -0.9570465683937073, |
|
"logits/rejected": -0.9680700302124023, |
|
"logps/chosen": -4.297163963317871, |
|
"logps/rejected": -4.780937194824219, |
|
"loss": 3.2322, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -42.971641540527344, |
|
"rewards/margins": 4.837734222412109, |
|
"rewards/rejected": -47.80936813354492, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7984167373480351, |
|
"grad_norm": 140.78323504174404, |
|
"learning_rate": 9.517051169918016e-08, |
|
"logits/chosen": -0.9370065331459045, |
|
"logits/rejected": -0.9510276317596436, |
|
"logps/chosen": -4.016280651092529, |
|
"logps/rejected": -4.564591407775879, |
|
"loss": 3.2779, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -40.162811279296875, |
|
"rewards/margins": 5.483105182647705, |
|
"rewards/rejected": -45.64591979980469, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8006785411365565, |
|
"grad_norm": 142.37033106886284, |
|
"learning_rate": 9.313055529813412e-08, |
|
"logits/chosen": -0.8857989311218262, |
|
"logits/rejected": -0.9076879620552063, |
|
"logps/chosen": -4.098158836364746, |
|
"logps/rejected": -4.799932479858398, |
|
"loss": 2.3024, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.98158645629883, |
|
"rewards/margins": 7.017735004425049, |
|
"rewards/rejected": -47.99932098388672, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8029403449250777, |
|
"grad_norm": 144.22040244633794, |
|
"learning_rate": 9.110981521225532e-08, |
|
"logits/chosen": -0.9384421706199646, |
|
"logits/rejected": -0.9499157667160034, |
|
"logps/chosen": -4.076770782470703, |
|
"logps/rejected": -4.640554428100586, |
|
"loss": 2.9348, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -40.76770782470703, |
|
"rewards/margins": 5.637840747833252, |
|
"rewards/rejected": -46.40555191040039, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8052021487135991, |
|
"grad_norm": 139.7935847003589, |
|
"learning_rate": 8.910841798127884e-08, |
|
"logits/chosen": -0.9020113945007324, |
|
"logits/rejected": -0.9301177263259888, |
|
"logps/chosen": -4.16035270690918, |
|
"logps/rejected": -4.824099063873291, |
|
"loss": 2.5111, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -41.60352325439453, |
|
"rewards/margins": 6.637460708618164, |
|
"rewards/rejected": -48.240989685058594, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8074639525021204, |
|
"grad_norm": 166.03846448720964, |
|
"learning_rate": 8.712648893368139e-08, |
|
"logits/chosen": -0.9206175208091736, |
|
"logits/rejected": -0.9502934217453003, |
|
"logps/chosen": -4.101649761199951, |
|
"logps/rejected": -4.884461879730225, |
|
"loss": 2.5017, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.01649475097656, |
|
"rewards/margins": 7.828126907348633, |
|
"rewards/rejected": -48.844627380371094, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8097257562906418, |
|
"grad_norm": 117.89459898307335, |
|
"learning_rate": 8.516415217883186e-08, |
|
"logits/chosen": -0.9100026488304138, |
|
"logits/rejected": -0.9124536514282227, |
|
"logps/chosen": -4.00990629196167, |
|
"logps/rejected": -4.74934196472168, |
|
"loss": 2.6175, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -40.099063873291016, |
|
"rewards/margins": 7.394357681274414, |
|
"rewards/rejected": -47.4934196472168, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8119875600791632, |
|
"grad_norm": 151.06740224882444, |
|
"learning_rate": 8.32215305992209e-08, |
|
"logits/chosen": -0.9616566896438599, |
|
"logits/rejected": -0.9742845296859741, |
|
"logps/chosen": -3.94974422454834, |
|
"logps/rejected": -4.5588812828063965, |
|
"loss": 2.8848, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -39.49744415283203, |
|
"rewards/margins": 6.091368675231934, |
|
"rewards/rejected": -45.58881759643555, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 116.28513294682625, |
|
"learning_rate": 8.129874584276448e-08, |
|
"logits/chosen": -0.9059348702430725, |
|
"logits/rejected": -0.9224525690078735, |
|
"logps/chosen": -4.079035758972168, |
|
"logps/rejected": -4.834011554718018, |
|
"loss": 2.1432, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -40.79035949707031, |
|
"rewards/margins": 7.549759864807129, |
|
"rewards/rejected": -48.34011459350586, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8165111676562058, |
|
"grad_norm": 143.4855751446256, |
|
"learning_rate": 7.939591831518746e-08, |
|
"logits/chosen": -0.943411648273468, |
|
"logits/rejected": -0.9577879905700684, |
|
"logps/chosen": -4.06253719329834, |
|
"logps/rejected": -4.622920513153076, |
|
"loss": 2.308, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -40.625370025634766, |
|
"rewards/margins": 5.603834629058838, |
|
"rewards/rejected": -46.22920608520508, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8187729714447272, |
|
"grad_norm": 132.12698029254315, |
|
"learning_rate": 7.751316717248304e-08, |
|
"logits/chosen": -0.9082808494567871, |
|
"logits/rejected": -0.9305973052978516, |
|
"logps/chosen": -4.398627281188965, |
|
"logps/rejected": -5.260112285614014, |
|
"loss": 2.4971, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.986270904541016, |
|
"rewards/margins": 8.614850044250488, |
|
"rewards/rejected": -52.60112762451172, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8210347752332485, |
|
"grad_norm": 158.043389904261, |
|
"learning_rate": 7.565061031345142e-08, |
|
"logits/chosen": -0.9185335040092468, |
|
"logits/rejected": -0.9299246072769165, |
|
"logps/chosen": -4.59241247177124, |
|
"logps/rejected": -5.313713550567627, |
|
"loss": 2.3348, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -45.92412185668945, |
|
"rewards/margins": 7.213016033172607, |
|
"rewards/rejected": -53.137142181396484, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8232965790217699, |
|
"grad_norm": 137.14995559881837, |
|
"learning_rate": 7.380836437231686e-08, |
|
"logits/chosen": -0.9011315107345581, |
|
"logits/rejected": -0.9010403752326965, |
|
"logps/chosen": -4.083221435546875, |
|
"logps/rejected": -4.828488349914551, |
|
"loss": 2.3463, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -40.83221435546875, |
|
"rewards/margins": 7.452672481536865, |
|
"rewards/rejected": -48.284889221191406, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8255583828102913, |
|
"grad_norm": 133.9434857418435, |
|
"learning_rate": 7.198654471142371e-08, |
|
"logits/chosen": -0.9325624704360962, |
|
"logits/rejected": -0.9269375205039978, |
|
"logps/chosen": -4.175022602081299, |
|
"logps/rejected": -5.013765335083008, |
|
"loss": 2.1937, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -41.75022506713867, |
|
"rewards/margins": 8.387434959411621, |
|
"rewards/rejected": -50.13766098022461, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8278201865988125, |
|
"grad_norm": 140.3898107041387, |
|
"learning_rate": 7.01852654140132e-08, |
|
"logits/chosen": -0.954879105091095, |
|
"logits/rejected": -0.9756340980529785, |
|
"logps/chosen": -4.5630106925964355, |
|
"logps/rejected": -5.309413433074951, |
|
"loss": 2.3981, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -45.63011169433594, |
|
"rewards/margins": 7.464024066925049, |
|
"rewards/rejected": -53.0941276550293, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8300819903873339, |
|
"grad_norm": 138.4057371685046, |
|
"learning_rate": 6.840463927707833e-08, |
|
"logits/chosen": -0.9294202923774719, |
|
"logits/rejected": -0.9419483542442322, |
|
"logps/chosen": -4.580535888671875, |
|
"logps/rejected": -5.202334880828857, |
|
"loss": 2.7198, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -45.80535888671875, |
|
"rewards/margins": 6.217983245849609, |
|
"rewards/rejected": -52.023345947265625, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8323437941758552, |
|
"grad_norm": 128.96927985102926, |
|
"learning_rate": 6.664477780430138e-08, |
|
"logits/chosen": -0.9347717761993408, |
|
"logits/rejected": -0.945314884185791, |
|
"logps/chosen": -4.429632663726807, |
|
"logps/rejected": -4.995570659637451, |
|
"loss": 2.9752, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -44.29632568359375, |
|
"rewards/margins": 5.659379959106445, |
|
"rewards/rejected": -49.955711364746094, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 152.34642547867944, |
|
"learning_rate": 6.49057911990711e-08, |
|
"logits/chosen": -0.8949201107025146, |
|
"logits/rejected": -0.9076350927352905, |
|
"logps/chosen": -4.397095680236816, |
|
"logps/rejected": -4.981942176818848, |
|
"loss": 2.9721, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -43.970951080322266, |
|
"rewards/margins": 5.848470687866211, |
|
"rewards/rejected": -49.81943130493164, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.836867401752898, |
|
"grad_norm": 132.0391102685107, |
|
"learning_rate": 6.318778835758189e-08, |
|
"logits/chosen": -0.92762690782547, |
|
"logits/rejected": -0.9354040026664734, |
|
"logps/chosen": -4.483278274536133, |
|
"logps/rejected": -5.1504974365234375, |
|
"loss": 1.8653, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -44.83277893066406, |
|
"rewards/margins": 6.672185897827148, |
|
"rewards/rejected": -51.50497055053711, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8391292055414192, |
|
"grad_norm": 179.951771990511, |
|
"learning_rate": 6.149087686201433e-08, |
|
"logits/chosen": -0.9428873062133789, |
|
"logits/rejected": -0.9634348154067993, |
|
"logps/chosen": -4.341042518615723, |
|
"logps/rejected": -4.949177265167236, |
|
"loss": 3.3993, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -43.41042709350586, |
|
"rewards/margins": 6.081344127655029, |
|
"rewards/rejected": -49.49176788330078, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8413910093299406, |
|
"grad_norm": 136.21427347850883, |
|
"learning_rate": 5.98151629737988e-08, |
|
"logits/chosen": -0.9433773756027222, |
|
"logits/rejected": -0.943168044090271, |
|
"logps/chosen": -4.414024829864502, |
|
"logps/rejected": -5.185835838317871, |
|
"loss": 2.3556, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.14024353027344, |
|
"rewards/margins": 7.718109607696533, |
|
"rewards/rejected": -51.85835647583008, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.843652813118462, |
|
"grad_norm": 127.45396317271563, |
|
"learning_rate": 5.816075162696097e-08, |
|
"logits/chosen": -0.9678685069084167, |
|
"logits/rejected": -0.9940780401229858, |
|
"logps/chosen": -4.39580774307251, |
|
"logps/rejected": -5.012912273406982, |
|
"loss": 2.2762, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -43.958072662353516, |
|
"rewards/margins": 6.171045303344727, |
|
"rewards/rejected": -50.129119873046875, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8459146169069833, |
|
"grad_norm": 123.86661129091185, |
|
"learning_rate": 5.6527746421551046e-08, |
|
"logits/chosen": -0.9064250588417053, |
|
"logits/rejected": -0.9173108339309692, |
|
"logps/chosen": -4.327992916107178, |
|
"logps/rejected": -5.055395603179932, |
|
"loss": 2.4613, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -43.279930114746094, |
|
"rewards/margins": 7.274028778076172, |
|
"rewards/rejected": -50.553955078125, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8481764206955047, |
|
"grad_norm": 136.01804435251455, |
|
"learning_rate": 5.4916249617156064e-08, |
|
"logits/chosen": -0.9181968569755554, |
|
"logits/rejected": -0.9360796213150024, |
|
"logps/chosen": -4.141705513000488, |
|
"logps/rejected": -4.756865978240967, |
|
"loss": 2.774, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -41.41705322265625, |
|
"rewards/margins": 6.151602745056152, |
|
"rewards/rejected": -47.568660736083984, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8504382244840261, |
|
"grad_norm": 135.3387610667954, |
|
"learning_rate": 5.332636212649646e-08, |
|
"logits/chosen": -0.8991196155548096, |
|
"logits/rejected": -0.915702223777771, |
|
"logps/chosen": -4.379838466644287, |
|
"logps/rejected": -5.094522953033447, |
|
"loss": 2.1719, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -43.79838562011719, |
|
"rewards/margins": 7.146846771240234, |
|
"rewards/rejected": -50.945228576660156, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8527000282725473, |
|
"grad_norm": 164.19828475720155, |
|
"learning_rate": 5.17581835091069e-08, |
|
"logits/chosen": -0.9365058541297913, |
|
"logits/rejected": -0.9663807153701782, |
|
"logps/chosen": -4.514606475830078, |
|
"logps/rejected": -5.128344535827637, |
|
"loss": 3.0464, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -45.14606475830078, |
|
"rewards/margins": 6.137386322021484, |
|
"rewards/rejected": -51.28345489501953, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 138.669954265479, |
|
"learning_rate": 5.02118119651016e-08, |
|
"logits/chosen": -0.9410414099693298, |
|
"logits/rejected": -0.9501762390136719, |
|
"logps/chosen": -4.367845058441162, |
|
"logps/rejected": -5.0047101974487305, |
|
"loss": 3.1673, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -43.67845153808594, |
|
"rewards/margins": 6.368653297424316, |
|
"rewards/rejected": -50.04710388183594, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.85722363584959, |
|
"grad_norm": 153.057632250685, |
|
"learning_rate": 4.868734432902526e-08, |
|
"logits/chosen": -1.0021592378616333, |
|
"logits/rejected": -0.9952703714370728, |
|
"logps/chosen": -4.49019718170166, |
|
"logps/rejected": -5.29477071762085, |
|
"loss": 3.0216, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -44.90196990966797, |
|
"rewards/margins": 8.045737266540527, |
|
"rewards/rejected": -52.94770812988281, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8594854396381114, |
|
"grad_norm": 139.1146559906999, |
|
"learning_rate": 4.7184876063789134e-08, |
|
"logits/chosen": -0.9506573677062988, |
|
"logits/rejected": -0.9560145139694214, |
|
"logps/chosen": -3.926301956176758, |
|
"logps/rejected": -4.576600074768066, |
|
"loss": 2.563, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -39.263023376464844, |
|
"rewards/margins": 6.5029826164245605, |
|
"rewards/rejected": -45.7660026550293, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8617472434266328, |
|
"grad_norm": 136.57207110844007, |
|
"learning_rate": 4.570450125469314e-08, |
|
"logits/chosen": -0.9335479140281677, |
|
"logits/rejected": -0.9474495649337769, |
|
"logps/chosen": -4.52652645111084, |
|
"logps/rejected": -5.3495774269104, |
|
"loss": 2.4878, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -45.2652587890625, |
|
"rewards/margins": 8.230509757995605, |
|
"rewards/rejected": -53.49578094482422, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.864009047215154, |
|
"grad_norm": 149.81502524090894, |
|
"learning_rate": 4.424631260353378e-08, |
|
"logits/chosen": -0.9694351553916931, |
|
"logits/rejected": -0.9859524369239807, |
|
"logps/chosen": -4.307926654815674, |
|
"logps/rejected": -4.940521240234375, |
|
"loss": 2.7104, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.07926940917969, |
|
"rewards/margins": 6.325945854187012, |
|
"rewards/rejected": -49.40521240234375, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8662708510036754, |
|
"grad_norm": 121.3426641959867, |
|
"learning_rate": 4.281040142280008e-08, |
|
"logits/chosen": -0.9893457889556885, |
|
"logits/rejected": -0.9991154670715332, |
|
"logps/chosen": -4.156393527984619, |
|
"logps/rejected": -4.968776226043701, |
|
"loss": 1.989, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -41.56393814086914, |
|
"rewards/margins": 8.123825073242188, |
|
"rewards/rejected": -49.68776321411133, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8685326547921968, |
|
"grad_norm": 141.4337079496108, |
|
"learning_rate": 4.1396857629954286e-08, |
|
"logits/chosen": -0.9534589052200317, |
|
"logits/rejected": -0.9696213603019714, |
|
"logps/chosen": -4.799047470092773, |
|
"logps/rejected": -5.520049571990967, |
|
"loss": 2.7878, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -47.99047088623047, |
|
"rewards/margins": 7.210024833679199, |
|
"rewards/rejected": -55.20050048828125, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8707944585807181, |
|
"grad_norm": 119.73253748242631, |
|
"learning_rate": 4.000576974180232e-08, |
|
"logits/chosen": -0.9004536271095276, |
|
"logits/rejected": -0.9263263940811157, |
|
"logps/chosen": -4.2699875831604, |
|
"logps/rejected": -4.922300338745117, |
|
"loss": 2.9088, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -42.69987487792969, |
|
"rewards/margins": 6.523127555847168, |
|
"rewards/rejected": -49.22300720214844, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8730562623692395, |
|
"grad_norm": 132.99002186990265, |
|
"learning_rate": 3.8637224868950066e-08, |
|
"logits/chosen": -0.9017341136932373, |
|
"logits/rejected": -0.9102005958557129, |
|
"logps/chosen": -4.248313903808594, |
|
"logps/rejected": -4.877220630645752, |
|
"loss": 2.8312, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -42.48313522338867, |
|
"rewards/margins": 6.2890706062316895, |
|
"rewards/rejected": -48.77220153808594, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 140.2576422089798, |
|
"learning_rate": 3.729130871034885e-08, |
|
"logits/chosen": -0.9371786713600159, |
|
"logits/rejected": -0.9420756101608276, |
|
"logps/chosen": -4.37814998626709, |
|
"logps/rejected": -5.035106182098389, |
|
"loss": 2.8047, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -43.78150177001953, |
|
"rewards/margins": 6.569563865661621, |
|
"rewards/rejected": -50.3510627746582, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8775798699462821, |
|
"grad_norm": 175.47044378770292, |
|
"learning_rate": 3.596810554792888e-08, |
|
"logits/chosen": -0.9239012598991394, |
|
"logits/rejected": -0.9475809335708618, |
|
"logps/chosen": -4.3749284744262695, |
|
"logps/rejected": -5.061702251434326, |
|
"loss": 3.1355, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -43.7492790222168, |
|
"rewards/margins": 6.867737770080566, |
|
"rewards/rejected": -50.61702346801758, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8798416737348035, |
|
"grad_norm": 136.44044081625452, |
|
"learning_rate": 3.466769824132116e-08, |
|
"logits/chosen": -0.9199025630950928, |
|
"logits/rejected": -0.9204123020172119, |
|
"logps/chosen": -4.2924580574035645, |
|
"logps/rejected": -4.982794761657715, |
|
"loss": 2.3706, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -42.924583435058594, |
|
"rewards/margins": 6.903364181518555, |
|
"rewards/rejected": -49.82794189453125, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8821034775233249, |
|
"grad_norm": 157.13088383312373, |
|
"learning_rate": 3.339016822266925e-08, |
|
"logits/chosen": -0.8951210975646973, |
|
"logits/rejected": -0.9262260794639587, |
|
"logps/chosen": -4.462003707885742, |
|
"logps/rejected": -5.319886684417725, |
|
"loss": 1.8385, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -44.62003707885742, |
|
"rewards/margins": 8.57883071899414, |
|
"rewards/rejected": -53.19886779785156, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8843652813118462, |
|
"grad_norm": 145.27269622289882, |
|
"learning_rate": 3.213559549152958e-08, |
|
"logits/chosen": -0.9537985920906067, |
|
"logits/rejected": -0.9690415859222412, |
|
"logps/chosen": -4.21071195602417, |
|
"logps/rejected": -4.958610534667969, |
|
"loss": 2.7635, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.10711669921875, |
|
"rewards/margins": 7.478985786437988, |
|
"rewards/rejected": -49.58610153198242, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8866270851003676, |
|
"grad_norm": 152.2072238338678, |
|
"learning_rate": 3.090405860986203e-08, |
|
"logits/chosen": -0.9644224643707275, |
|
"logits/rejected": -0.9988764524459839, |
|
"logps/chosen": -4.434269428253174, |
|
"logps/rejected": -5.323245048522949, |
|
"loss": 2.3393, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.34269332885742, |
|
"rewards/margins": 8.889755249023438, |
|
"rewards/rejected": -53.23244857788086, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 133.85280588404478, |
|
"learning_rate": 2.9695634697110315e-08, |
|
"logits/chosen": -0.9042102694511414, |
|
"logits/rejected": -0.9273264408111572, |
|
"logps/chosen": -4.202421188354492, |
|
"logps/rejected": -5.033628463745117, |
|
"loss": 2.664, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -42.02421569824219, |
|
"rewards/margins": 8.31207275390625, |
|
"rewards/rejected": -50.33628463745117, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8911506926774102, |
|
"grad_norm": 139.50976820633048, |
|
"learning_rate": 2.8510399425372766e-08, |
|
"logits/chosen": -0.9206915497779846, |
|
"logits/rejected": -0.9092394113540649, |
|
"logps/chosen": -4.3293986320495605, |
|
"logps/rejected": -4.970660209655762, |
|
"loss": 2.7218, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -43.293983459472656, |
|
"rewards/margins": 6.412619113922119, |
|
"rewards/rejected": -49.70660400390625, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8934124964659316, |
|
"grad_norm": 142.6192511354523, |
|
"learning_rate": 2.734842701466329e-08, |
|
"logits/chosen": -0.9256288409233093, |
|
"logits/rejected": -0.9244977235794067, |
|
"logps/chosen": -4.661899566650391, |
|
"logps/rejected": -5.342780113220215, |
|
"loss": 2.4201, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -46.61899948120117, |
|
"rewards/margins": 6.808799743652344, |
|
"rewards/rejected": -53.42779541015625, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 130.63925825670492, |
|
"learning_rate": 2.6209790228264438e-08, |
|
"logits/chosen": -0.9332349300384521, |
|
"logits/rejected": -0.94581139087677, |
|
"logps/chosen": -4.036855220794678, |
|
"logps/rejected": -4.773642539978027, |
|
"loss": 2.2623, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -40.368553161621094, |
|
"rewards/margins": 7.367873191833496, |
|
"rewards/rejected": -47.736427307128906, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8979361040429743, |
|
"grad_norm": 149.5991916133793, |
|
"learning_rate": 2.5094560368170305e-08, |
|
"logits/chosen": -0.9196925163269043, |
|
"logits/rejected": -0.9395575523376465, |
|
"logps/chosen": -4.5655694007873535, |
|
"logps/rejected": -5.217185974121094, |
|
"loss": 2.5713, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -45.65568923950195, |
|
"rewards/margins": 6.516168594360352, |
|
"rewards/rejected": -52.17185974121094, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9001979078314957, |
|
"grad_norm": 122.6578201059457, |
|
"learning_rate": 2.4002807270621893e-08, |
|
"logits/chosen": -0.9552274942398071, |
|
"logits/rejected": -0.9657354950904846, |
|
"logps/chosen": -4.322449207305908, |
|
"logps/rejected": -4.962361812591553, |
|
"loss": 2.6024, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -43.2244987487793, |
|
"rewards/margins": 6.399123191833496, |
|
"rewards/rejected": -49.623619079589844, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9024597116200169, |
|
"grad_norm": 135.84460405011131, |
|
"learning_rate": 2.293459930173354e-08, |
|
"logits/chosen": -0.9458591341972351, |
|
"logits/rejected": -0.9692145586013794, |
|
"logps/chosen": -4.452592849731445, |
|
"logps/rejected": -5.130153179168701, |
|
"loss": 2.783, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -44.52592468261719, |
|
"rewards/margins": 6.775611400604248, |
|
"rewards/rejected": -51.30153274536133, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9047215154085383, |
|
"grad_norm": 171.33604881928616, |
|
"learning_rate": 2.189000335321256e-08, |
|
"logits/chosen": -0.9176933765411377, |
|
"logits/rejected": -0.9229288101196289, |
|
"logps/chosen": -4.287893295288086, |
|
"logps/rejected": -4.882048606872559, |
|
"loss": 3.0622, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.87893295288086, |
|
"rewards/margins": 5.941554069519043, |
|
"rewards/rejected": -48.82048797607422, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9069833191970597, |
|
"grad_norm": 131.77434980590826, |
|
"learning_rate": 2.086908483816954e-08, |
|
"logits/chosen": -0.9492596387863159, |
|
"logits/rejected": -0.9559190273284912, |
|
"logps/chosen": -4.549674987792969, |
|
"logps/rejected": -5.201348304748535, |
|
"loss": 2.4757, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -45.49674606323242, |
|
"rewards/margins": 6.516733169555664, |
|
"rewards/rejected": -52.01348114013672, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.909245122985581, |
|
"grad_norm": 132.06347666424753, |
|
"learning_rate": 1.9871907687022717e-08, |
|
"logits/chosen": -0.916560173034668, |
|
"logits/rejected": -0.9371925592422485, |
|
"logps/chosen": -4.1877241134643555, |
|
"logps/rejected": -4.8005266189575195, |
|
"loss": 2.6123, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.87724685668945, |
|
"rewards/margins": 6.128022193908691, |
|
"rewards/rejected": -48.005271911621094, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9115069267741024, |
|
"grad_norm": 114.509047735518, |
|
"learning_rate": 1.889853434349451e-08, |
|
"logits/chosen": -0.9288345575332642, |
|
"logits/rejected": -0.9471941590309143, |
|
"logps/chosen": -4.192251682281494, |
|
"logps/rejected": -4.938650131225586, |
|
"loss": 2.434, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -41.922515869140625, |
|
"rewards/margins": 7.4639787673950195, |
|
"rewards/rejected": -49.386497497558594, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9137687305626236, |
|
"grad_norm": 139.51897990590004, |
|
"learning_rate": 1.7949025760701164e-08, |
|
"logits/chosen": -0.9225287437438965, |
|
"logits/rejected": -0.9274791479110718, |
|
"logps/chosen": -4.604381561279297, |
|
"logps/rejected": -5.195613861083984, |
|
"loss": 2.6384, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -46.043819427490234, |
|
"rewards/margins": 5.912320137023926, |
|
"rewards/rejected": -51.956138610839844, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 128.19324678370154, |
|
"learning_rate": 1.7023441397336023e-08, |
|
"logits/chosen": -0.9489941596984863, |
|
"logits/rejected": -0.9579771757125854, |
|
"logps/chosen": -4.172736167907715, |
|
"logps/rejected": -4.906558513641357, |
|
"loss": 2.4065, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -41.727359771728516, |
|
"rewards/margins": 7.338226318359375, |
|
"rewards/rejected": -49.065582275390625, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9182923381396664, |
|
"grad_norm": 127.71243008762988, |
|
"learning_rate": 1.6121839213945854e-08, |
|
"logits/chosen": -0.9154041409492493, |
|
"logits/rejected": -0.9540258049964905, |
|
"logps/chosen": -4.24996280670166, |
|
"logps/rejected": -5.038878440856934, |
|
"loss": 2.6564, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -42.499629974365234, |
|
"rewards/margins": 7.889162063598633, |
|
"rewards/rejected": -50.3887939453125, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9205541419281877, |
|
"grad_norm": 146.10675693844263, |
|
"learning_rate": 1.5244275669301777e-08, |
|
"logits/chosen": -0.955981969833374, |
|
"logits/rejected": -0.9593254923820496, |
|
"logps/chosen": -4.389744758605957, |
|
"logps/rejected": -5.067453384399414, |
|
"loss": 2.8744, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.8974494934082, |
|
"rewards/margins": 6.777082920074463, |
|
"rewards/rejected": -50.67453384399414, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9228159457167091, |
|
"grad_norm": 132.95610789058466, |
|
"learning_rate": 1.4390805716863398e-08, |
|
"logits/chosen": -0.9074594378471375, |
|
"logits/rejected": -0.9208613634109497, |
|
"logps/chosen": -4.289839744567871, |
|
"logps/rejected": -4.873122692108154, |
|
"loss": 3.0576, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -42.898399353027344, |
|
"rewards/margins": 5.832827091217041, |
|
"rewards/rejected": -48.73122787475586, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9250777495052305, |
|
"grad_norm": 141.9755163132043, |
|
"learning_rate": 1.3561482801337908e-08, |
|
"logits/chosen": -0.9116663336753845, |
|
"logits/rejected": -0.9385542273521423, |
|
"logps/chosen": -4.232028007507324, |
|
"logps/rejected": -4.991069793701172, |
|
"loss": 2.9291, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -42.320281982421875, |
|
"rewards/margins": 7.590411186218262, |
|
"rewards/rejected": -49.91069412231445, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9273395532937517, |
|
"grad_norm": 133.18959451570092, |
|
"learning_rate": 1.2756358855332904e-08, |
|
"logits/chosen": -0.9445152282714844, |
|
"logits/rejected": -0.9605578184127808, |
|
"logps/chosen": -4.202373504638672, |
|
"logps/rejected": -4.8074846267700195, |
|
"loss": 3.1204, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -42.02373504638672, |
|
"rewards/margins": 6.051117897033691, |
|
"rewards/rejected": -48.074851989746094, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9296013570822731, |
|
"grad_norm": 136.12472128004111, |
|
"learning_rate": 1.1975484296105154e-08, |
|
"logits/chosen": -0.9164653420448303, |
|
"logits/rejected": -0.9311988353729248, |
|
"logps/chosen": -4.378890037536621, |
|
"logps/rejected": -5.060423851013184, |
|
"loss": 2.8484, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.78889846801758, |
|
"rewards/margins": 6.815339088439941, |
|
"rewards/rejected": -50.6042366027832, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9318631608707945, |
|
"grad_norm": 137.33443947595077, |
|
"learning_rate": 1.1218908022402374e-08, |
|
"logits/chosen": -0.9297804832458496, |
|
"logits/rejected": -0.9439125061035156, |
|
"logps/chosen": -4.097784996032715, |
|
"logps/rejected": -4.829689025878906, |
|
"loss": 2.4591, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -40.977848052978516, |
|
"rewards/margins": 7.319035530090332, |
|
"rewards/rejected": -48.2968864440918, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9341249646593158, |
|
"grad_norm": 145.19557689058706, |
|
"learning_rate": 1.0486677411402079e-08, |
|
"logits/chosen": -0.9909257888793945, |
|
"logits/rejected": -0.9965202212333679, |
|
"logps/chosen": -4.445742607116699, |
|
"logps/rejected": -5.312036037445068, |
|
"loss": 2.6238, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -44.45742416381836, |
|
"rewards/margins": 8.66294002532959, |
|
"rewards/rejected": -53.120365142822266, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 135.59114017765043, |
|
"learning_rate": 9.778838315744353e-09, |
|
"logits/chosen": -0.9647377133369446, |
|
"logits/rejected": -0.9831647872924805, |
|
"logps/chosen": -4.492733478546143, |
|
"logps/rejected": -5.175955295562744, |
|
"loss": 2.6291, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -44.927330017089844, |
|
"rewards/margins": 6.832226753234863, |
|
"rewards/rejected": -51.75955581665039, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9386485722363584, |
|
"grad_norm": 145.5793767400526, |
|
"learning_rate": 9.095435060660595e-09, |
|
"logits/chosen": -0.9024043679237366, |
|
"logits/rejected": -0.917569100856781, |
|
"logps/chosen": -4.358269691467285, |
|
"logps/rejected": -5.034271717071533, |
|
"loss": 2.8645, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.58269500732422, |
|
"rewards/margins": 6.760016441345215, |
|
"rewards/rejected": -50.34271240234375, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9409103760248798, |
|
"grad_norm": 162.49589370243754, |
|
"learning_rate": 8.436510441197864e-09, |
|
"logits/chosen": -0.9422574043273926, |
|
"logits/rejected": -0.9609728455543518, |
|
"logps/chosen": -4.340670585632324, |
|
"logps/rejected": -5.023505210876465, |
|
"loss": 2.9033, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -43.406707763671875, |
|
"rewards/margins": 6.82834529876709, |
|
"rewards/rejected": -50.23505401611328, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9431721798134012, |
|
"grad_norm": 224.81877042117685, |
|
"learning_rate": 7.802105719539076e-09, |
|
"logits/chosen": -0.9420458078384399, |
|
"logits/rejected": -0.9551193118095398, |
|
"logps/chosen": -4.549409866333008, |
|
"logps/rejected": -5.181853771209717, |
|
"loss": 3.3733, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -45.49409866333008, |
|
"rewards/margins": 6.324440002441406, |
|
"rewards/rejected": -51.818538665771484, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9454339836019225, |
|
"grad_norm": 131.4283806000118, |
|
"learning_rate": 7.1922606224192e-09, |
|
"logits/chosen": -0.9589974880218506, |
|
"logits/rejected": -0.9696003198623657, |
|
"logps/chosen": -4.475660800933838, |
|
"logps/rejected": -5.16171407699585, |
|
"loss": 2.5232, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -44.756614685058594, |
|
"rewards/margins": 6.860527992248535, |
|
"rewards/rejected": -51.61713790893555, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9476957873904439, |
|
"grad_norm": 148.50211503722525, |
|
"learning_rate": 6.6070133386372906e-09, |
|
"logits/chosen": -0.9348170161247253, |
|
"logits/rejected": -0.9493433237075806, |
|
"logps/chosen": -4.343303680419922, |
|
"logps/rejected": -4.941909313201904, |
|
"loss": 3.0315, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -43.43303680419922, |
|
"rewards/margins": 5.9860520362854, |
|
"rewards/rejected": -49.419090270996094, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9499575911789653, |
|
"grad_norm": 129.14740181782477, |
|
"learning_rate": 6.046400516665384e-09, |
|
"logits/chosen": -0.957095742225647, |
|
"logits/rejected": -0.9551052451133728, |
|
"logps/chosen": -4.2565999031066895, |
|
"logps/rejected": -4.9523539543151855, |
|
"loss": 3.1614, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -42.566001892089844, |
|
"rewards/margins": 6.9575371742248535, |
|
"rewards/rejected": -49.523536682128906, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9522193949674865, |
|
"grad_norm": 122.48845625064148, |
|
"learning_rate": 5.510457262353396e-09, |
|
"logits/chosen": -0.9842012524604797, |
|
"logits/rejected": -1.0115524530410767, |
|
"logps/chosen": -4.26042366027832, |
|
"logps/rejected": -4.900167942047119, |
|
"loss": 2.4057, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -42.60423278808594, |
|
"rewards/margins": 6.397446155548096, |
|
"rewards/rejected": -49.00168228149414, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9544811987560079, |
|
"grad_norm": 130.67374788625062, |
|
"learning_rate": 4.9992171367309265e-09, |
|
"logits/chosen": -0.9512357711791992, |
|
"logits/rejected": -0.9497030377388, |
|
"logps/chosen": -4.133967399597168, |
|
"logps/rejected": -4.79262638092041, |
|
"loss": 2.3627, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -41.33967208862305, |
|
"rewards/margins": 6.586594581604004, |
|
"rewards/rejected": -47.92626953125, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 170.07916949000563, |
|
"learning_rate": 4.5127121539052955e-09, |
|
"logits/chosen": -0.9652352333068848, |
|
"logits/rejected": -0.9730237722396851, |
|
"logps/chosen": -4.610488414764404, |
|
"logps/rejected": -5.30706262588501, |
|
"loss": 2.5704, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -46.10488510131836, |
|
"rewards/margins": 6.9657416343688965, |
|
"rewards/rejected": -53.07062911987305, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9590048063330506, |
|
"grad_norm": 148.77436953597567, |
|
"learning_rate": 4.050972779057327e-09, |
|
"logits/chosen": -0.8603891730308533, |
|
"logits/rejected": -0.883198618888855, |
|
"logps/chosen": -4.0621185302734375, |
|
"logps/rejected": -4.761756896972656, |
|
"loss": 2.5931, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -40.62118911743164, |
|
"rewards/margins": 6.996386528015137, |
|
"rewards/rejected": -47.61757278442383, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.961266610121572, |
|
"grad_norm": 150.51477842196144, |
|
"learning_rate": 3.6140279265330477e-09, |
|
"logits/chosen": -0.9070014357566833, |
|
"logits/rejected": -0.9290311336517334, |
|
"logps/chosen": -4.51793909072876, |
|
"logps/rejected": -5.157177448272705, |
|
"loss": 2.8884, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -45.17938995361328, |
|
"rewards/margins": 6.392387390136719, |
|
"rewards/rejected": -51.57177734375, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9635284139100933, |
|
"grad_norm": 136.67894069927104, |
|
"learning_rate": 3.2019049580335853e-09, |
|
"logits/chosen": -0.9470658898353577, |
|
"logits/rejected": -0.9471170902252197, |
|
"logps/chosen": -4.178645133972168, |
|
"logps/rejected": -4.754918098449707, |
|
"loss": 3.3311, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -41.78644943237305, |
|
"rewards/margins": 5.762726783752441, |
|
"rewards/rejected": -47.54917907714844, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9657902176986146, |
|
"grad_norm": 118.06690739543247, |
|
"learning_rate": 2.814629680901337e-09, |
|
"logits/chosen": -0.9594217538833618, |
|
"logits/rejected": -0.9807270169258118, |
|
"logps/chosen": -4.4059553146362305, |
|
"logps/rejected": -5.0466628074646, |
|
"loss": 2.3717, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -44.05955505371094, |
|
"rewards/margins": 6.407071113586426, |
|
"rewards/rejected": -50.46662902832031, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.968052021487136, |
|
"grad_norm": 150.5152498317493, |
|
"learning_rate": 2.4522263465041937e-09, |
|
"logits/chosen": -0.9186062812805176, |
|
"logits/rejected": -0.9451611042022705, |
|
"logps/chosen": -4.349206447601318, |
|
"logps/rejected": -5.054396629333496, |
|
"loss": 2.2936, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -43.49205780029297, |
|
"rewards/margins": 7.051908016204834, |
|
"rewards/rejected": -50.54396438598633, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9703138252756573, |
|
"grad_norm": 135.25076850145902, |
|
"learning_rate": 2.114717648716713e-09, |
|
"logits/chosen": -0.8935461044311523, |
|
"logits/rejected": -0.9095126986503601, |
|
"logps/chosen": -4.378687381744385, |
|
"logps/rejected": -5.177103042602539, |
|
"loss": 2.34, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -43.786869049072266, |
|
"rewards/margins": 7.984163761138916, |
|
"rewards/rejected": -51.771034240722656, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9725756290641787, |
|
"grad_norm": 152.01906534523854, |
|
"learning_rate": 1.802124722499121e-09, |
|
"logits/chosen": -0.9317042231559753, |
|
"logits/rejected": -0.9441834688186646, |
|
"logps/chosen": -4.452950477600098, |
|
"logps/rejected": -5.174241065979004, |
|
"loss": 2.7273, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.529502868652344, |
|
"rewards/margins": 7.2129082679748535, |
|
"rewards/rejected": -51.742408752441406, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9748374328527001, |
|
"grad_norm": 129.07592379619018, |
|
"learning_rate": 1.5144671425737499e-09, |
|
"logits/chosen": -0.9220924377441406, |
|
"logits/rejected": -0.9323858022689819, |
|
"logps/chosen": -4.173183441162109, |
|
"logps/rejected": -4.836048603057861, |
|
"loss": 2.9168, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -41.731834411621094, |
|
"rewards/margins": 6.628646373748779, |
|
"rewards/rejected": -48.36048126220703, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 122.42308357023245, |
|
"learning_rate": 1.251762922199484e-09, |
|
"logits/chosen": -0.8762988448143005, |
|
"logits/rejected": -0.9018377065658569, |
|
"logps/chosen": -4.394649982452393, |
|
"logps/rejected": -5.167266845703125, |
|
"loss": 2.0131, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -43.94649887084961, |
|
"rewards/margins": 7.726165771484375, |
|
"rewards/rejected": -51.67266845703125, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9793610404297427, |
|
"grad_norm": 144.42537157796136, |
|
"learning_rate": 1.0140285120433744e-09, |
|
"logits/chosen": -0.9518988132476807, |
|
"logits/rejected": -0.9752581715583801, |
|
"logps/chosen": -4.419306755065918, |
|
"logps/rejected": -5.068571090698242, |
|
"loss": 2.8378, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -44.19306564331055, |
|
"rewards/margins": 6.492642402648926, |
|
"rewards/rejected": -50.685707092285156, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9816228442182641, |
|
"grad_norm": 144.35990425477752, |
|
"learning_rate": 8.012787991508396e-10, |
|
"logits/chosen": -0.9084798693656921, |
|
"logits/rejected": -0.9402381181716919, |
|
"logps/chosen": -4.297061920166016, |
|
"logps/rejected": -5.11636209487915, |
|
"loss": 2.7992, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -42.97062301635742, |
|
"rewards/margins": 8.192997932434082, |
|
"rewards/rejected": -51.16362380981445, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9838846480067854, |
|
"grad_norm": 134.7783472158385, |
|
"learning_rate": 6.135271060133007e-10, |
|
"logits/chosen": -0.8788937330245972, |
|
"logits/rejected": -0.8922220468521118, |
|
"logps/chosen": -4.292323112487793, |
|
"logps/rejected": -4.958512783050537, |
|
"loss": 2.7245, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -42.9232292175293, |
|
"rewards/margins": 6.6618971824646, |
|
"rewards/rejected": -49.585121154785156, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9861464517953068, |
|
"grad_norm": 131.05692298332755, |
|
"learning_rate": 4.50785189733871e-10, |
|
"logits/chosen": -0.8994375467300415, |
|
"logits/rejected": -0.9363196492195129, |
|
"logps/chosen": -4.137233257293701, |
|
"logps/rejected": -4.894649982452393, |
|
"loss": 2.0871, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -41.372337341308594, |
|
"rewards/margins": 7.574166297912598, |
|
"rewards/rejected": -48.946495056152344, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.988408255583828, |
|
"grad_norm": 141.9667964600976, |
|
"learning_rate": 3.1306324129118935e-10, |
|
"logits/chosen": -0.9034903049468994, |
|
"logits/rejected": -0.9206139445304871, |
|
"logps/chosen": -4.409141540527344, |
|
"logps/rejected": -5.078451156616211, |
|
"loss": 2.5653, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -44.09141540527344, |
|
"rewards/margins": 6.693098068237305, |
|
"rewards/rejected": -50.784515380859375, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9906700593723494, |
|
"grad_norm": 149.43690667028594, |
|
"learning_rate": 2.003698849011748e-10, |
|
"logits/chosen": -0.9702510237693787, |
|
"logits/rejected": -0.992080807685852, |
|
"logps/chosen": -4.582041263580322, |
|
"logps/rejected": -5.16063928604126, |
|
"loss": 2.9004, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -45.82041931152344, |
|
"rewards/margins": 5.78598165512085, |
|
"rewards/rejected": -51.60639953613281, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9929318631608708, |
|
"grad_norm": 151.45409397929544, |
|
"learning_rate": 1.1271217747714779e-10, |
|
"logits/chosen": -0.9387862086296082, |
|
"logits/rejected": -0.9742698669433594, |
|
"logps/chosen": -4.416835784912109, |
|
"logps/rejected": -5.033830165863037, |
|
"loss": 2.8039, |
|
"rewards/accuracies": 0.7421875, |
|
"rewards/chosen": -44.168357849121094, |
|
"rewards/margins": 6.169943809509277, |
|
"rewards/rejected": -50.33830642700195, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9951936669493922, |
|
"grad_norm": 117.95433018808065, |
|
"learning_rate": 5.0095608187739055e-11, |
|
"logits/chosen": -0.9022542238235474, |
|
"logits/rejected": -0.921513557434082, |
|
"logps/chosen": -4.125314712524414, |
|
"logps/rejected": -4.81278133392334, |
|
"loss": 2.5334, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -41.25314712524414, |
|
"rewards/margins": 6.874664306640625, |
|
"rewards/rejected": -48.127811431884766, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 150.85789440344882, |
|
"learning_rate": 1.2524098113209092e-11, |
|
"logits/chosen": -0.9529531002044678, |
|
"logits/rejected": -0.9607404470443726, |
|
"logps/chosen": -4.348971366882324, |
|
"logps/rejected": -4.960037708282471, |
|
"loss": 3.1636, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -43.48971176147461, |
|
"rewards/margins": 6.110668182373047, |
|
"rewards/rejected": -49.600379943847656, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"grad_norm": 133.66353463529444, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.9448285698890686, |
|
"logits/rejected": -0.951061487197876, |
|
"logps/chosen": -4.208832263946533, |
|
"logps/rejected": -4.896453857421875, |
|
"loss": 2.7111, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -42.088321685791016, |
|
"rewards/margins": 6.876214027404785, |
|
"rewards/rejected": -48.96453857421875, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"eval_logits/chosen": -0.9250581860542297, |
|
"eval_logits/rejected": -0.9405222535133362, |
|
"eval_logps/chosen": -4.356727600097656, |
|
"eval_logps/rejected": -5.03577995300293, |
|
"eval_loss": 2.5820231437683105, |
|
"eval_rewards/accuracies": 0.7914438843727112, |
|
"eval_rewards/chosen": -43.567283630371094, |
|
"eval_rewards/margins": 6.790517330169678, |
|
"eval_rewards/rejected": -50.35779571533203, |
|
"eval_runtime": 64.9654, |
|
"eval_samples_per_second": 45.855, |
|
"eval_steps_per_second": 2.878, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"step": 442, |
|
"total_flos": 134366991482880.0, |
|
"train_loss": 3.371998559026157, |
|
"train_runtime": 3776.6556, |
|
"train_samples_per_second": 14.984, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 442, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 134366991482880.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|