{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1259, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003971405877680699, "grad_norm": 1986.144287109375, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.8231229782104492, "log_odds_ratio": -1.391985297203064, "logits/chosen": 102.16714477539062, "logits/rejected": -12.402770042419434, "logps/chosen": -16.669206619262695, "logps/rejected": -15.846084594726562, "loss": 14.8236, "nll_loss": 15.787309646606445, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.8334604501724243, "rewards/margins": -0.041156161576509476, "rewards/rejected": -0.7923042178153992, "step": 5 }, { "epoch": 0.007942811755361398, "grad_norm": 1124.8104248046875, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": -1.4938147068023682, "log_odds_ratio": -2.2117180824279785, "logits/chosen": 69.36089324951172, "logits/rejected": 133.50851440429688, "logps/chosen": -14.369367599487305, "logps/rejected": -12.875558853149414, "loss": 12.448, "nll_loss": 12.069561958312988, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.7184683680534363, "rewards/margins": -0.07469038665294647, "rewards/rejected": -0.6437779664993286, "step": 10 }, { "epoch": 0.011914217633042097, "grad_norm": 571.815673828125, "learning_rate": 7.5e-07, "log_odds_chosen": 0.07529473304748535, "log_odds_ratio": -0.9602964520454407, "logits/chosen": 194.84005737304688, "logits/rejected": 170.63455200195312, "logps/chosen": -8.528478622436523, "logps/rejected": -8.603917121887207, "loss": 8.6647, "nll_loss": 8.767313003540039, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.42642393708229065, "rewards/margins": 0.00377195468172431, "rewards/rejected": -0.4301958680152893, "step": 15 }, { "epoch": 0.015885623510722795, "grad_norm": 294.7610778808594, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 1.0600534677505493, "log_odds_ratio": -0.5760771632194519, "logits/chosen": 130.64846801757812, "logits/rejected": 219.7195281982422, "logps/chosen": -5.179438591003418, "logps/rejected": -6.236131191253662, "loss": 5.951, "nll_loss": 5.380393028259277, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2589719295501709, "rewards/margins": 0.05283462256193161, "rewards/rejected": -0.3118065893650055, "step": 20 }, { "epoch": 0.019857029388403495, "grad_norm": 175.71804809570312, "learning_rate": 1.25e-06, "log_odds_chosen": -0.23735050857067108, "log_odds_ratio": -0.8487253189086914, "logits/chosen": 127.70460510253906, "logits/rejected": 241.3983154296875, "logps/chosen": -3.850130796432495, "logps/rejected": -3.6309711933135986, "loss": 4.3668, "nll_loss": 4.186649322509766, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.19250653684139252, "rewards/margins": -0.010957981459796429, "rewards/rejected": -0.18154855072498322, "step": 25 }, { "epoch": 0.023828435266084195, "grad_norm": 124.54273986816406, "learning_rate": 1.5e-06, "log_odds_chosen": 0.1647549569606781, "log_odds_ratio": -1.391825795173645, "logits/chosen": 265.0519714355469, "logits/rejected": 185.30813598632812, "logps/chosen": -3.637852430343628, "logps/rejected": -3.7852470874786377, "loss": 3.5575, "nll_loss": 3.7124857902526855, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18189263343811035, "rewards/margins": 0.007369739003479481, "rewards/rejected": -0.18926236033439636, "step": 30 }, { "epoch": 0.02779984114376489, "grad_norm": 197.60816955566406, "learning_rate": 1.75e-06, "log_odds_chosen": 0.08467637002468109, "log_odds_ratio": -0.7092531323432922, "logits/chosen": 209.2491455078125, "logits/rejected": 306.3213806152344, "logps/chosen": -2.6641006469726562, "logps/rejected": -2.7374536991119385, "loss": 3.3245, "nll_loss": 2.6615092754364014, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.13320502638816833, "rewards/margins": 0.0036676630843430758, "rewards/rejected": -0.13687269389629364, "step": 35 }, { "epoch": 0.03177124702144559, "grad_norm": 185.7123260498047, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.11347303539514542, "log_odds_ratio": -0.7381674647331238, "logits/chosen": 297.0364990234375, "logits/rejected": 257.74261474609375, "logps/chosen": -2.3121635913848877, "logps/rejected": -2.4389572143554688, "loss": 2.4557, "nll_loss": 3.096728801727295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11560817062854767, "rewards/margins": 0.006339688785374165, "rewards/rejected": -0.12194786220788956, "step": 40 }, { "epoch": 0.035742652899126294, "grad_norm": 110.63021087646484, "learning_rate": 2.25e-06, "log_odds_chosen": -0.3320659101009369, "log_odds_ratio": -0.9123506546020508, "logits/chosen": 301.091552734375, "logits/rejected": 277.54205322265625, "logps/chosen": -1.4611170291900635, "logps/rejected": -1.2663248777389526, "loss": 2.169, "nll_loss": 1.8063217401504517, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07305584847927094, "rewards/margins": -0.009739604778587818, "rewards/rejected": -0.0633162409067154, "step": 45 }, { "epoch": 0.03971405877680699, "grad_norm": 178.21774291992188, "learning_rate": 2.5e-06, "log_odds_chosen": 0.13006296753883362, "log_odds_ratio": -0.6751025915145874, "logits/chosen": 348.32208251953125, "logits/rejected": 229.12075805664062, "logps/chosen": -1.464900255203247, "logps/rejected": -1.5294269323349, "loss": 2.2912, "nll_loss": 2.135530710220337, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07324501872062683, "rewards/margins": 0.003226341214030981, "rewards/rejected": -0.07647135108709335, "step": 50 }, { "epoch": 0.043685464654487687, "grad_norm": 245.26560974121094, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.5194543600082397, "log_odds_ratio": -0.47707730531692505, "logits/chosen": 271.95819091796875, "logits/rejected": 315.78558349609375, "logps/chosen": -1.3941795825958252, "logps/rejected": -1.8157556056976318, "loss": 1.9179, "nll_loss": 1.9026470184326172, "rewards/accuracies": 1.0, "rewards/chosen": -0.06970898061990738, "rewards/margins": 0.021078798919916153, "rewards/rejected": -0.09078778326511383, "step": 55 }, { "epoch": 0.04765687053216839, "grad_norm": 50.196834564208984, "learning_rate": 3e-06, "log_odds_chosen": -0.3250153362751007, "log_odds_ratio": -1.0242193937301636, "logits/chosen": 358.44586181640625, "logits/rejected": 318.2178955078125, "logps/chosen": -1.6527748107910156, "logps/rejected": -1.3916703462600708, "loss": 2.1109, "nll_loss": 1.7649085521697998, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08263873308897018, "rewards/margins": -0.013055220246315002, "rewards/rejected": -0.06958352029323578, "step": 60 }, { "epoch": 0.051628276409849086, "grad_norm": 108.7151107788086, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.29782918095588684, "log_odds_ratio": -0.6089301705360413, "logits/chosen": 285.428466796875, "logits/rejected": 288.96624755859375, "logps/chosen": -1.7433933019638062, "logps/rejected": -2.007410764694214, "loss": 2.4942, "nll_loss": 2.303445339202881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08716966956853867, "rewards/margins": 0.013200879096984863, "rewards/rejected": -0.10037054866552353, "step": 65 }, { "epoch": 0.05559968228752978, "grad_norm": 232.15228271484375, "learning_rate": 3.5e-06, "log_odds_chosen": -0.16374030709266663, "log_odds_ratio": -0.8438912630081177, "logits/chosen": 316.9410400390625, "logits/rejected": 329.91485595703125, "logps/chosen": -1.642735481262207, "logps/rejected": -1.506341814994812, "loss": 2.2097, "nll_loss": 1.9609638452529907, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08213677257299423, "rewards/margins": -0.006819679401814938, "rewards/rejected": -0.07531709969043732, "step": 70 }, { "epoch": 0.059571088165210485, "grad_norm": 78.16633605957031, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.08518339693546295, "log_odds_ratio": -0.6790117025375366, "logits/chosen": 318.6166076660156, "logits/rejected": 272.7696533203125, "logps/chosen": -1.5736838579177856, "logps/rejected": -1.6470115184783936, "loss": 2.0889, "nll_loss": 2.043640375137329, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07868418842554092, "rewards/margins": 0.0036663957871496677, "rewards/rejected": -0.08235058933496475, "step": 75 }, { "epoch": 0.06354249404289118, "grad_norm": 107.8170166015625, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.7262557744979858, "log_odds_ratio": -0.4617268443107605, "logits/chosen": 258.40740966796875, "logits/rejected": 339.81085205078125, "logps/chosen": -1.2588051557540894, "logps/rejected": -1.8158533573150635, "loss": 2.0223, "nll_loss": 1.9233391284942627, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06294026225805283, "rewards/margins": 0.027852404862642288, "rewards/rejected": -0.09079267084598541, "step": 80 }, { "epoch": 0.06751389992057188, "grad_norm": 94.64545440673828, "learning_rate": 4.25e-06, "log_odds_chosen": 0.24036984145641327, "log_odds_ratio": -0.6297720670700073, "logits/chosen": 322.71148681640625, "logits/rejected": 335.28228759765625, "logps/chosen": -1.28748619556427, "logps/rejected": -1.4782545566558838, "loss": 1.7358, "nll_loss": 1.7002407312393188, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06437431275844574, "rewards/margins": 0.009538417682051659, "rewards/rejected": -0.07391272485256195, "step": 85 }, { "epoch": 0.07148530579825259, "grad_norm": 133.2477264404297, "learning_rate": 4.5e-06, "log_odds_chosen": 0.11964414268732071, "log_odds_ratio": -0.6827563643455505, "logits/chosen": 325.7275695800781, "logits/rejected": 273.9080505371094, "logps/chosen": -1.321221947669983, "logps/rejected": -1.4350178241729736, "loss": 2.0792, "nll_loss": 2.0664451122283936, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0660611018538475, "rewards/margins": 0.005689795129001141, "rewards/rejected": -0.07175089418888092, "step": 90 }, { "epoch": 0.07545671167593328, "grad_norm": 75.57556915283203, "learning_rate": 4.75e-06, "log_odds_chosen": 0.1924123615026474, "log_odds_ratio": -0.7588543891906738, "logits/chosen": 268.33111572265625, "logits/rejected": 284.4769287109375, "logps/chosen": -1.2349271774291992, "logps/rejected": -1.3220834732055664, "loss": 1.8031, "nll_loss": 2.01653790473938, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06174635887145996, "rewards/margins": 0.0043578073382377625, "rewards/rejected": -0.06610416620969772, "step": 95 }, { "epoch": 0.07942811755361398, "grad_norm": 82.01164245605469, "learning_rate": 5e-06, "log_odds_chosen": 0.22679157555103302, "log_odds_ratio": -0.6229124069213867, "logits/chosen": 347.00042724609375, "logits/rejected": 280.84796142578125, "logps/chosen": -1.0023002624511719, "logps/rejected": -1.172823429107666, "loss": 1.9771, "nll_loss": 1.778070092201233, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050115011632442474, "rewards/margins": 0.008526156656444073, "rewards/rejected": -0.05864117294549942, "step": 100 }, { "epoch": 0.08339952343129468, "grad_norm": 144.94651794433594, "learning_rate": 4.99977039769305e-06, "log_odds_chosen": -0.6747381091117859, "log_odds_ratio": -1.3916146755218506, "logits/chosen": 285.270751953125, "logits/rejected": 392.5542907714844, "logps/chosen": -2.4368515014648438, "logps/rejected": -1.8863086700439453, "loss": 2.1097, "nll_loss": 2.505847454071045, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.12184257805347443, "rewards/margins": -0.02752714790403843, "rewards/rejected": -0.09431543946266174, "step": 105 }, { "epoch": 0.08737092930897537, "grad_norm": 907.4935302734375, "learning_rate": 4.9990816329459744e-06, "log_odds_chosen": 0.6644043326377869, "log_odds_ratio": -0.6343256235122681, "logits/chosen": 355.09014892578125, "logits/rejected": 328.604736328125, "logps/chosen": -3.468595027923584, "logps/rejected": -4.0634002685546875, "loss": 2.2136, "nll_loss": 2.77046275138855, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17342975735664368, "rewards/margins": 0.02974027954041958, "rewards/rejected": -0.2031700611114502, "step": 110 }, { "epoch": 0.09134233518665608, "grad_norm": 55.67234802246094, "learning_rate": 4.997933832272354e-06, "log_odds_chosen": 0.5580138564109802, "log_odds_ratio": -0.507127046585083, "logits/chosen": 261.1326599121094, "logits/rejected": 371.37396240234375, "logps/chosen": -1.3323694467544556, "logps/rejected": -1.7812392711639404, "loss": 1.7648, "nll_loss": 1.5408798456192017, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06661847233772278, "rewards/margins": 0.02244349755346775, "rewards/rejected": -0.08906197547912598, "step": 115 }, { "epoch": 0.09531374106433678, "grad_norm": 155.13243103027344, "learning_rate": 4.996327206502335e-06, "log_odds_chosen": 0.4205778241157532, "log_odds_ratio": -0.6450524926185608, "logits/chosen": 249.2978973388672, "logits/rejected": 313.87274169921875, "logps/chosen": -1.5952913761138916, "logps/rejected": -1.9669711589813232, "loss": 2.1811, "nll_loss": 2.7610068321228027, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07976456731557846, "rewards/margins": 0.018583994358778, "rewards/rejected": -0.09834857285022736, "step": 120 }, { "epoch": 0.09928514694201747, "grad_norm": 56.40447235107422, "learning_rate": 4.994262050743902e-06, "log_odds_chosen": -0.344782292842865, "log_odds_ratio": -0.9613167643547058, "logits/chosen": 297.97796630859375, "logits/rejected": 316.97796630859375, "logps/chosen": -1.9141199588775635, "logps/rejected": -1.645132064819336, "loss": 2.097, "nll_loss": 2.657578945159912, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09570600092411041, "rewards/margins": -0.013449391350150108, "rewards/rejected": -0.08225660771131516, "step": 125 }, { "epoch": 0.10325655281969817, "grad_norm": 63.89404296875, "learning_rate": 4.991738744328679e-06, "log_odds_chosen": -0.5421128869056702, "log_odds_ratio": -1.0545024871826172, "logits/chosen": 358.645751953125, "logits/rejected": 298.12786865234375, "logps/chosen": -1.4808099269866943, "logps/rejected": -1.1260448694229126, "loss": 2.0496, "nll_loss": 2.101855993270874, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0740404948592186, "rewards/margins": -0.017738252878189087, "rewards/rejected": -0.05630224198102951, "step": 130 }, { "epoch": 0.10722795869737888, "grad_norm": 94.53604125976562, "learning_rate": 4.988757750742243e-06, "log_odds_chosen": -0.7190758585929871, "log_odds_ratio": -1.2133655548095703, "logits/chosen": 339.0290832519531, "logits/rejected": 269.4937438964844, "logps/chosen": -1.472083568572998, "logps/rejected": -1.066962718963623, "loss": 1.8894, "nll_loss": 2.1049113273620605, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07360417395830154, "rewards/margins": -0.020256036892533302, "rewards/rejected": -0.05334814265370369, "step": 135 }, { "epoch": 0.11119936457505956, "grad_norm": 71.55030059814453, "learning_rate": 4.985319617538998e-06, "log_odds_chosen": 0.4117642343044281, "log_odds_ratio": -0.6588890552520752, "logits/chosen": 288.08966064453125, "logits/rejected": 382.9283142089844, "logps/chosen": -1.4075809717178345, "logps/rejected": -1.670000433921814, "loss": 2.1323, "nll_loss": 1.7244634628295898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07037904858589172, "rewards/margins": 0.013120980933308601, "rewards/rejected": -0.08350002765655518, "step": 140 }, { "epoch": 0.11517077045274027, "grad_norm": 34.86751174926758, "learning_rate": 4.981424976241598e-06, "log_odds_chosen": 0.4058244228363037, "log_odds_ratio": -0.6222633123397827, "logits/chosen": 308.98876953125, "logits/rejected": 273.16015625, "logps/chosen": -1.11463463306427, "logps/rejected": -1.428716778755188, "loss": 1.7015, "nll_loss": 1.501156210899353, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05573173239827156, "rewards/margins": 0.01570410653948784, "rewards/rejected": -0.0714358389377594, "step": 145 }, { "epoch": 0.11914217633042097, "grad_norm": 52.555850982666016, "learning_rate": 4.977074542224941e-06, "log_odds_chosen": -0.05959262698888779, "log_odds_ratio": -0.7522531151771545, "logits/chosen": 290.1683349609375, "logits/rejected": 313.0419616699219, "logps/chosen": -1.5048227310180664, "logps/rejected": -1.4883638620376587, "loss": 1.814, "nll_loss": 1.6905310153961182, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07524113357067108, "rewards/margins": -0.0008229411905631423, "rewards/rejected": -0.07441819459199905, "step": 150 }, { "epoch": 0.12311358220810167, "grad_norm": 108.9931869506836, "learning_rate": 4.972269114584779e-06, "log_odds_chosen": -0.10580176115036011, "log_odds_ratio": -0.794634997844696, "logits/chosen": 386.6126403808594, "logits/rejected": 284.34765625, "logps/chosen": -1.1588705778121948, "logps/rejected": -1.1435911655426025, "loss": 1.9924, "nll_loss": 2.0270378589630127, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05794353038072586, "rewards/margins": -0.0007639724644832313, "rewards/rejected": -0.05717955902218819, "step": 155 }, { "epoch": 0.12708498808578236, "grad_norm": 61.099525451660156, "learning_rate": 4.9670095759909275e-06, "log_odds_chosen": 0.3500244915485382, "log_odds_ratio": -0.577224612236023, "logits/chosen": 328.20562744140625, "logits/rejected": 250.172607421875, "logps/chosen": -1.0037747621536255, "logps/rejected": -1.1577932834625244, "loss": 1.8237, "nll_loss": 1.7924268245697021, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050188738852739334, "rewards/margins": 0.007700921501964331, "rewards/rejected": -0.0578896589577198, "step": 160 }, { "epoch": 0.13105639396346305, "grad_norm": 50.632930755615234, "learning_rate": 4.961296892525144e-06, "log_odds_chosen": 0.3148774206638336, "log_odds_ratio": -0.7541528940200806, "logits/chosen": 326.50323486328125, "logits/rejected": 326.8573913574219, "logps/chosen": -1.0643494129180908, "logps/rejected": -1.2868483066558838, "loss": 2.0091, "nll_loss": 1.6609458923339844, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05321747064590454, "rewards/margins": 0.011124944314360619, "rewards/rejected": -0.06434241682291031, "step": 165 }, { "epoch": 0.13502779984114377, "grad_norm": 44.31376266479492, "learning_rate": 4.95513211350367e-06, "log_odds_chosen": 0.7684804797172546, "log_odds_ratio": -0.424949586391449, "logits/chosen": 315.80316162109375, "logits/rejected": 317.47760009765625, "logps/chosen": -1.0722829103469849, "logps/rejected": -1.50656259059906, "loss": 1.5922, "nll_loss": 1.819898009300232, "rewards/accuracies": 1.0, "rewards/chosen": -0.05361414700746536, "rewards/margins": 0.0217139795422554, "rewards/rejected": -0.07532812654972076, "step": 170 }, { "epoch": 0.13899920571882446, "grad_norm": 37.67052459716797, "learning_rate": 4.948516371284493e-06, "log_odds_chosen": -0.38165563344955444, "log_odds_ratio": -0.9263311624526978, "logits/chosen": 399.5979919433594, "logits/rejected": 327.8373107910156, "logps/chosen": -1.0562649965286255, "logps/rejected": -0.7994272112846375, "loss": 1.7336, "nll_loss": 1.5048203468322754, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.052813250571489334, "rewards/margins": -0.012841887772083282, "rewards/rejected": -0.03997135907411575, "step": 175 }, { "epoch": 0.14297061159650518, "grad_norm": 39.37586212158203, "learning_rate": 4.941450881059354e-06, "log_odds_chosen": 0.14676916599273682, "log_odds_ratio": -0.6898115873336792, "logits/chosen": 288.27630615234375, "logits/rejected": 291.10162353515625, "logps/chosen": -1.1020549535751343, "logps/rejected": -1.1935259103775024, "loss": 1.5256, "nll_loss": 1.1978670358657837, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05510275438427925, "rewards/margins": 0.00457354262471199, "rewards/rejected": -0.05967629700899124, "step": 180 }, { "epoch": 0.14694201747418587, "grad_norm": 57.68708419799805, "learning_rate": 4.933936940630537e-06, "log_odds_chosen": -0.36938825249671936, "log_odds_ratio": -0.9125706553459167, "logits/chosen": 301.7081298828125, "logits/rejected": 338.01690673828125, "logps/chosen": -1.5262658596038818, "logps/rejected": -1.2626394033432007, "loss": 1.7968, "nll_loss": 1.8017867803573608, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07631329447031021, "rewards/margins": -0.013181325979530811, "rewards/rejected": -0.06313197314739227, "step": 185 }, { "epoch": 0.15091342335186655, "grad_norm": 61.880126953125, "learning_rate": 4.925975930172489e-06, "log_odds_chosen": 0.2720580995082855, "log_odds_ratio": -0.5754180550575256, "logits/chosen": 240.9505157470703, "logits/rejected": 291.4943542480469, "logps/chosen": -1.128051519393921, "logps/rejected": -1.3102426528930664, "loss": 1.6575, "nll_loss": 1.4304395914077759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.056402575224637985, "rewards/margins": 0.009109559468925, "rewards/rejected": -0.06551213562488556, "step": 190 }, { "epoch": 0.15488482922954727, "grad_norm": 128.1157684326172, "learning_rate": 4.917569311978301e-06, "log_odds_chosen": -0.16586491465568542, "log_odds_ratio": -0.8022063374519348, "logits/chosen": 339.9231872558594, "logits/rejected": 310.51458740234375, "logps/chosen": -1.4521989822387695, "logps/rejected": -1.3116356134414673, "loss": 1.8756, "nll_loss": 1.6476871967315674, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07260995358228683, "rewards/margins": -0.007028169929981232, "rewards/rejected": -0.0655817836523056, "step": 195 }, { "epoch": 0.15885623510722796, "grad_norm": 360.7728271484375, "learning_rate": 4.9087186301911196e-06, "log_odds_chosen": -0.15912006795406342, "log_odds_ratio": -0.8308361172676086, "logits/chosen": 348.33721923828125, "logits/rejected": 337.98748779296875, "logps/chosen": -1.4099957942962646, "logps/rejected": -1.3773982524871826, "loss": 1.7693, "nll_loss": 2.075568914413452, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07049979269504547, "rewards/margins": -0.001629872596822679, "rewards/rejected": -0.06886991858482361, "step": 200 }, { "epoch": 0.16282764098490865, "grad_norm": 55.327293395996094, "learning_rate": 4.89942551052051e-06, "log_odds_chosen": 0.21036644279956818, "log_odds_ratio": -0.6468175053596497, "logits/chosen": 343.9696960449219, "logits/rejected": 264.9131164550781, "logps/chosen": -1.5017131567001343, "logps/rejected": -1.672141432762146, "loss": 1.8711, "nll_loss": 1.952646255493164, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07508565485477448, "rewards/margins": 0.008521410636603832, "rewards/rejected": -0.08360707014799118, "step": 205 }, { "epoch": 0.16679904686258937, "grad_norm": 44.57875442504883, "learning_rate": 4.889691659943842e-06, "log_odds_chosen": 0.6125321388244629, "log_odds_ratio": -0.5584419965744019, "logits/chosen": 301.97369384765625, "logits/rejected": 392.7519836425781, "logps/chosen": -1.1298048496246338, "logps/rejected": -1.5633180141448975, "loss": 1.9275, "nll_loss": 1.591841220855713, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.056490249931812286, "rewards/margins": 0.021675655618309975, "rewards/rejected": -0.07816590368747711, "step": 210 }, { "epoch": 0.17077045274027006, "grad_norm": 40.897430419921875, "learning_rate": 4.879518866392757e-06, "log_odds_chosen": 0.31865325570106506, "log_odds_ratio": -0.5695816278457642, "logits/chosen": 289.89654541015625, "logits/rejected": 337.4901428222656, "logps/chosen": -1.0857855081558228, "logps/rejected": -1.3082635402679443, "loss": 1.8816, "nll_loss": 1.8894774913787842, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05428927391767502, "rewards/margins": 0.011123904958367348, "rewards/rejected": -0.06541318446397781, "step": 215 }, { "epoch": 0.17474185861795075, "grad_norm": 57.284912109375, "learning_rate": 4.868908998424749e-06, "log_odds_chosen": 1.3685444593429565, "log_odds_ratio": -0.32077115774154663, "logits/chosen": 461.255859375, "logits/rejected": 236.81936645507812, "logps/chosen": -1.125225305557251, "logps/rejected": -2.0914998054504395, "loss": 1.8968, "nll_loss": 2.5089685916900635, "rewards/accuracies": 1.0, "rewards/chosen": -0.05626126378774643, "rewards/margins": 0.04831372946500778, "rewards/rejected": -0.10457499325275421, "step": 220 }, { "epoch": 0.17871326449563146, "grad_norm": 48.706504821777344, "learning_rate": 4.8578640048799435e-06, "log_odds_chosen": 0.48141545057296753, "log_odds_ratio": -0.5349053740501404, "logits/chosen": 331.5611877441406, "logits/rejected": 310.23876953125, "logps/chosen": -1.1479623317718506, "logps/rejected": -1.398716688156128, "loss": 1.6831, "nll_loss": 2.0783326625823975, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05739812180399895, "rewards/margins": 0.012537715956568718, "rewards/rejected": -0.06993584334850311, "step": 225 }, { "epoch": 0.18268467037331215, "grad_norm": 40.74349594116211, "learning_rate": 4.846385914523143e-06, "log_odds_chosen": -0.004401213023811579, "log_odds_ratio": -0.7065997123718262, "logits/chosen": 321.564453125, "logits/rejected": 321.5721130371094, "logps/chosen": -1.444544792175293, "logps/rejected": -1.465693473815918, "loss": 1.6282, "nll_loss": 1.7196967601776123, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07222724705934525, "rewards/margins": 0.0010574304033070803, "rewards/rejected": -0.07328467816114426, "step": 230 }, { "epoch": 0.18665607625099284, "grad_norm": 124.04603576660156, "learning_rate": 4.834476835671166e-06, "log_odds_chosen": -0.40913257002830505, "log_odds_ratio": -1.0309641361236572, "logits/chosen": 303.4057312011719, "logits/rejected": 345.175048828125, "logps/chosen": -1.1636359691619873, "logps/rejected": -0.9079948663711548, "loss": 1.8382, "nll_loss": 1.716036081314087, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.058181799948215485, "rewards/margins": -0.012782062403857708, "rewards/rejected": -0.0453997440636158, "step": 235 }, { "epoch": 0.19062748212867356, "grad_norm": 354.8912048339844, "learning_rate": 4.822138955805595e-06, "log_odds_chosen": -0.03981683775782585, "log_odds_ratio": -0.7560557126998901, "logits/chosen": 365.21771240234375, "logits/rejected": 281.8612060546875, "logps/chosen": -1.5758720636367798, "logps/rejected": -1.6159578561782837, "loss": 1.7801, "nll_loss": 1.9230833053588867, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07879360020160675, "rewards/margins": 0.0020043007098138332, "rewards/rejected": -0.08079790323972702, "step": 240 }, { "epoch": 0.19459888800635425, "grad_norm": 112.47100067138672, "learning_rate": 4.809374541170974e-06, "log_odds_chosen": 0.4120238423347473, "log_odds_ratio": -0.5282896161079407, "logits/chosen": 280.23223876953125, "logits/rejected": 381.760986328125, "logps/chosen": -1.43479323387146, "logps/rejected": -1.7411903142929077, "loss": 1.705, "nll_loss": 1.610185980796814, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07173965871334076, "rewards/margins": 0.015319856815040112, "rewards/rejected": -0.08705951273441315, "step": 245 }, { "epoch": 0.19857029388403494, "grad_norm": 89.64398193359375, "learning_rate": 4.796185936358543e-06, "log_odds_chosen": -0.09865443408489227, "log_odds_ratio": -0.8241540789604187, "logits/chosen": 375.30975341796875, "logits/rejected": 277.07080078125, "logps/chosen": -1.1246061325073242, "logps/rejected": -1.0761168003082275, "loss": 1.5139, "nll_loss": 1.5120210647583008, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05623030662536621, "rewards/margins": -0.002424471778795123, "rewards/rejected": -0.05380583554506302, "step": 250 }, { "epoch": 0.20254169976171565, "grad_norm": 71.554443359375, "learning_rate": 4.78257556387557e-06, "log_odds_chosen": 0.17121019959449768, "log_odds_ratio": -0.8539883494377136, "logits/chosen": 271.27740478515625, "logits/rejected": 308.79962158203125, "logps/chosen": -1.4543489217758179, "logps/rejected": -1.6729555130004883, "loss": 1.7579, "nll_loss": 1.7364709377288818, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07271744310855865, "rewards/margins": 0.010930335149168968, "rewards/rejected": -0.08364777266979218, "step": 255 }, { "epoch": 0.20651310563939634, "grad_norm": 38.314876556396484, "learning_rate": 4.7685459237003954e-06, "log_odds_chosen": 0.2899346947669983, "log_odds_ratio": -0.614177942276001, "logits/chosen": 298.9414367675781, "logits/rejected": 384.4459228515625, "logps/chosen": -1.6951465606689453, "logps/rejected": -1.8937695026397705, "loss": 1.8883, "nll_loss": 1.8541686534881592, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08475733548402786, "rewards/margins": 0.009931142441928387, "rewards/rejected": -0.09468847513198853, "step": 260 }, { "epoch": 0.21048451151707703, "grad_norm": 72.83179473876953, "learning_rate": 4.754099592823216e-06, "log_odds_chosen": 0.8651946783065796, "log_odds_ratio": -0.5348523855209351, "logits/chosen": 358.8797912597656, "logits/rejected": 280.5845031738281, "logps/chosen": -1.027199387550354, "logps/rejected": -1.6986967325210571, "loss": 1.512, "nll_loss": 1.2993109226226807, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05135997012257576, "rewards/margins": 0.033574867993593216, "rewards/rejected": -0.08493484556674957, "step": 265 }, { "epoch": 0.21445591739475775, "grad_norm": 44.400272369384766, "learning_rate": 4.739239224772751e-06, "log_odds_chosen": -0.05065007135272026, "log_odds_ratio": -0.8082035183906555, "logits/chosen": 298.37335205078125, "logits/rejected": 344.8133239746094, "logps/chosen": -1.5913331508636475, "logps/rejected": -1.4946712255477905, "loss": 1.6712, "nll_loss": 1.572104811668396, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07956665754318237, "rewards/margins": -0.004833097103983164, "rewards/rejected": -0.07473356276750565, "step": 270 }, { "epoch": 0.21842732327243844, "grad_norm": 59.13341522216797, "learning_rate": 4.7239675491288285e-06, "log_odds_chosen": 0.4572678506374359, "log_odds_ratio": -0.5575239062309265, "logits/chosen": 366.19207763671875, "logits/rejected": 318.4181213378906, "logps/chosen": -1.4838532209396362, "logps/rejected": -1.8985198736190796, "loss": 1.6193, "nll_loss": 1.7495654821395874, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07419265806674957, "rewards/margins": 0.020733339712023735, "rewards/rejected": -0.09492600709199905, "step": 275 }, { "epoch": 0.22239872915011913, "grad_norm": 153.78656005859375, "learning_rate": 4.708287371021016e-06, "log_odds_chosen": 0.4286623001098633, "log_odds_ratio": -0.6032269597053528, "logits/chosen": 264.1981506347656, "logits/rejected": 437.3480529785156, "logps/chosen": -1.4498531818389893, "logps/rejected": -1.8169384002685547, "loss": 1.6785, "nll_loss": 1.932142972946167, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07249265164136887, "rewards/margins": 0.018354270607233047, "rewards/rejected": -0.09084691852331161, "step": 280 }, { "epoch": 0.22637013502779985, "grad_norm": 55.48463821411133, "learning_rate": 4.692201570613367e-06, "log_odds_chosen": -0.38779932260513306, "log_odds_ratio": -0.9519031643867493, "logits/chosen": 315.96405029296875, "logits/rejected": 441.1253967285156, "logps/chosen": -1.785449743270874, "logps/rejected": -1.507177472114563, "loss": 1.9603, "nll_loss": 1.9527504444122314, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08927249163389206, "rewards/margins": -0.013913619332015514, "rewards/rejected": -0.07535887509584427, "step": 285 }, { "epoch": 0.23034154090548054, "grad_norm": 196.46524047851562, "learning_rate": 4.675713102575389e-06, "log_odds_chosen": 0.13416634500026703, "log_odds_ratio": -0.6926567554473877, "logits/chosen": 325.7991943359375, "logits/rejected": 359.30426025390625, "logps/chosen": -1.287474274635315, "logps/rejected": -1.4152565002441406, "loss": 1.8866, "nll_loss": 1.7071374654769897, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06437370926141739, "rewards/margins": 0.006389112211763859, "rewards/rejected": -0.07076282799243927, "step": 290 }, { "epoch": 0.23431294678316125, "grad_norm": 40.833988189697266, "learning_rate": 4.658824995539318e-06, "log_odds_chosen": -0.3848091959953308, "log_odds_ratio": -0.9408342242240906, "logits/chosen": 466.2362365722656, "logits/rejected": 305.64935302734375, "logps/chosen": -1.5434811115264893, "logps/rejected": -1.2438277006149292, "loss": 1.7838, "nll_loss": 1.8352413177490234, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07717405259609222, "rewards/margins": -0.01498266588896513, "rewards/rejected": -0.06219138950109482, "step": 295 }, { "epoch": 0.23828435266084194, "grad_norm": 85.50627899169922, "learning_rate": 4.641540351543826e-06, "log_odds_chosen": -0.06552493572235107, "log_odds_ratio": -0.8229795694351196, "logits/chosen": 306.17236328125, "logits/rejected": 333.0214538574219, "logps/chosen": -1.408279538154602, "logps/rejected": -1.4727718830108643, "loss": 1.7687, "nll_loss": 1.7104421854019165, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0704139694571495, "rewards/margins": 0.0032246210612356663, "rewards/rejected": -0.07363860309123993, "step": 300 }, { "epoch": 0.24225575853852263, "grad_norm": 52.15938186645508, "learning_rate": 4.6238623454642215e-06, "log_odds_chosen": -0.7005519270896912, "log_odds_ratio": -1.180597186088562, "logits/chosen": 393.58575439453125, "logits/rejected": 291.7729187011719, "logps/chosen": -1.5035789012908936, "logps/rejected": -1.0074363946914673, "loss": 1.9541, "nll_loss": 1.8733670711517334, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07517894357442856, "rewards/margins": -0.02480713650584221, "rewards/rejected": -0.050371818244457245, "step": 305 }, { "epoch": 0.24622716441620335, "grad_norm": 71.7739028930664, "learning_rate": 4.605794224429286e-06, "log_odds_chosen": 0.5956661105155945, "log_odds_ratio": -0.5032454133033752, "logits/chosen": 287.00799560546875, "logits/rejected": 278.5837707519531, "logps/chosen": -1.3277604579925537, "logps/rejected": -1.8034179210662842, "loss": 2.0805, "nll_loss": 2.333066463470459, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06638802587985992, "rewards/margins": 0.023782875388860703, "rewards/rejected": -0.09017090499401093, "step": 310 }, { "epoch": 0.25019857029388404, "grad_norm": 95.7601089477539, "learning_rate": 4.587339307224837e-06, "log_odds_chosen": 0.4633815884590149, "log_odds_ratio": -0.5474061965942383, "logits/chosen": 285.03057861328125, "logits/rejected": 313.64971923828125, "logps/chosen": -1.064753532409668, "logps/rejected": -1.4205200672149658, "loss": 1.674, "nll_loss": 1.6759742498397827, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0532376766204834, "rewards/margins": 0.01778833009302616, "rewards/rejected": -0.07102601230144501, "step": 315 }, { "epoch": 0.2541699761715647, "grad_norm": 35.280120849609375, "learning_rate": 4.568500983684127e-06, "log_odds_chosen": 1.0717216730117798, "log_odds_ratio": -0.3502858281135559, "logits/chosen": 316.9742431640625, "logits/rejected": 361.8272705078125, "logps/chosen": -1.0445092916488647, "logps/rejected": -1.8099143505096436, "loss": 1.5821, "nll_loss": 1.5527000427246094, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05222546309232712, "rewards/margins": 0.0382702499628067, "rewards/rejected": -0.09049571305513382, "step": 320 }, { "epoch": 0.2581413820492454, "grad_norm": 56.177520751953125, "learning_rate": 4.54928271406519e-06, "log_odds_chosen": 0.08797760307788849, "log_odds_ratio": -0.6692509055137634, "logits/chosen": 311.7930603027344, "logits/rejected": 325.4112854003906, "logps/chosen": -0.8228706121444702, "logps/rejected": -0.8611005544662476, "loss": 1.6019, "nll_loss": 1.4109102487564087, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.04114353656768799, "rewards/margins": 0.0019115001196041703, "rewards/rejected": -0.043055035173892975, "step": 325 }, { "epoch": 0.2621127879269261, "grad_norm": 49.081207275390625, "learning_rate": 4.529688028415259e-06, "log_odds_chosen": 0.5036702156066895, "log_odds_ratio": -0.5119932293891907, "logits/chosen": 349.2168273925781, "logits/rejected": 417.1556091308594, "logps/chosen": -1.1619529724121094, "logps/rejected": -1.4640512466430664, "loss": 1.7083, "nll_loss": 1.5884101390838623, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05809764936566353, "rewards/margins": 0.015104919672012329, "rewards/rejected": -0.07320256531238556, "step": 330 }, { "epoch": 0.26608419380460685, "grad_norm": 59.9859619140625, "learning_rate": 4.509720525922356e-06, "log_odds_chosen": -0.2589249014854431, "log_odds_ratio": -0.8613080978393555, "logits/chosen": 319.08282470703125, "logits/rejected": 374.44512939453125, "logps/chosen": -1.1872944831848145, "logps/rejected": -1.0131120681762695, "loss": 1.6996, "nll_loss": 1.4569952487945557, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05936472490429878, "rewards/margins": -0.008709125220775604, "rewards/rejected": -0.05065560340881348, "step": 335 }, { "epoch": 0.27005559968228754, "grad_norm": 34.96027755737305, "learning_rate": 4.489383874254193e-06, "log_odds_chosen": 0.19578817486763, "log_odds_ratio": -0.6897228360176086, "logits/chosen": 308.80645751953125, "logits/rejected": 363.35272216796875, "logps/chosen": -1.0954322814941406, "logps/rejected": -1.26204514503479, "loss": 1.8048, "nll_loss": 1.4526374340057373, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05477161332964897, "rewards/margins": 0.008330638520419598, "rewards/rejected": -0.06310225278139114, "step": 340 }, { "epoch": 0.27402700555996823, "grad_norm": 88.31672668457031, "learning_rate": 4.46868180888448e-06, "log_odds_chosen": 0.5558279752731323, "log_odds_ratio": -0.6389625072479248, "logits/chosen": 343.12237548828125, "logits/rejected": 301.82843017578125, "logps/chosen": -2.0307748317718506, "logps/rejected": -2.624824047088623, "loss": 1.8654, "nll_loss": 2.4084858894348145, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1015387549996376, "rewards/margins": 0.029702449217438698, "rewards/rejected": -0.13124120235443115, "step": 345 }, { "epoch": 0.2779984114376489, "grad_norm": 36.35618209838867, "learning_rate": 4.447618132406788e-06, "log_odds_chosen": 0.05257995054125786, "log_odds_ratio": -0.7060452699661255, "logits/chosen": 395.55926513671875, "logits/rejected": 324.0714416503906, "logps/chosen": -1.2713950872421265, "logps/rejected": -1.3762328624725342, "loss": 1.807, "nll_loss": 1.739328145980835, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06356975436210632, "rewards/margins": 0.005241888575255871, "rewards/rejected": -0.06881164014339447, "step": 350 }, { "epoch": 0.2819698173153296, "grad_norm": 56.2636604309082, "learning_rate": 4.4261967138360905e-06, "log_odds_chosen": -0.501254141330719, "log_odds_ratio": -1.0499690771102905, "logits/chosen": 375.1532897949219, "logits/rejected": 253.8148193359375, "logps/chosen": -1.6551625728607178, "logps/rejected": -1.2964580059051514, "loss": 1.5264, "nll_loss": 1.8501193523406982, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.08275812864303589, "rewards/margins": -0.017935223877429962, "rewards/rejected": -0.06482290476560593, "step": 355 }, { "epoch": 0.28594122319301035, "grad_norm": 87.0551528930664, "learning_rate": 4.404421487898083e-06, "log_odds_chosen": -0.14473596215248108, "log_odds_ratio": -0.7785184383392334, "logits/chosen": 331.86968994140625, "logits/rejected": 319.239013671875, "logps/chosen": -1.191341519355774, "logps/rejected": -1.0848348140716553, "loss": 1.6036, "nll_loss": 1.8287181854248047, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.059567082673311234, "rewards/margins": -0.005325344391167164, "rewards/rejected": -0.054241735488176346, "step": 360 }, { "epoch": 0.28991262907069104, "grad_norm": 42.729244232177734, "learning_rate": 4.3822964543064536e-06, "log_odds_chosen": -0.40415963530540466, "log_odds_ratio": -0.9470674395561218, "logits/chosen": 336.14984130859375, "logits/rejected": 290.5699157714844, "logps/chosen": -1.3225958347320557, "logps/rejected": -0.9997372627258301, "loss": 1.6299, "nll_loss": 1.5339610576629639, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06612979620695114, "rewards/margins": -0.01614293083548546, "rewards/rejected": -0.04998686537146568, "step": 365 }, { "epoch": 0.29388403494837173, "grad_norm": 46.94746780395508, "learning_rate": 4.359825677028206e-06, "log_odds_chosen": -0.8854487538337708, "log_odds_ratio": -1.2963764667510986, "logits/chosen": 375.13153076171875, "logits/rejected": 305.2864990234375, "logps/chosen": -1.3724980354309082, "logps/rejected": -0.7736121416091919, "loss": 1.7241, "nll_loss": 1.8799304962158203, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06862489879131317, "rewards/margins": -0.029944291338324547, "rewards/rejected": -0.038680605590343475, "step": 370 }, { "epoch": 0.2978554408260524, "grad_norm": 55.18174743652344, "learning_rate": 4.337013283537182e-06, "log_odds_chosen": 0.2510035037994385, "log_odds_ratio": -0.6310834884643555, "logits/chosen": 368.63214111328125, "logits/rejected": 276.0588684082031, "logps/chosen": -1.1369847059249878, "logps/rejected": -1.3218457698822021, "loss": 1.7533, "nll_loss": 1.9112876653671265, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05684923380613327, "rewards/margins": 0.009243053384125233, "rewards/rejected": -0.06609228998422623, "step": 375 }, { "epoch": 0.3018268467037331, "grad_norm": 41.9847526550293, "learning_rate": 4.3138634640559185e-06, "log_odds_chosen": 0.3149817883968353, "log_odds_ratio": -0.5802162289619446, "logits/chosen": 320.53692626953125, "logits/rejected": 413.7608337402344, "logps/chosen": -1.536766767501831, "logps/rejected": -1.7898311614990234, "loss": 1.7084, "nll_loss": 1.9358304738998413, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07683834433555603, "rewards/margins": 0.012653221376240253, "rewards/rejected": -0.08949156850576401, "step": 380 }, { "epoch": 0.3057982525814138, "grad_norm": 36.58580780029297, "learning_rate": 4.290380470785984e-06, "log_odds_chosen": -0.44627267122268677, "log_odds_ratio": -0.9523780941963196, "logits/chosen": 384.55609130859375, "logits/rejected": 288.72259521484375, "logps/chosen": -1.3820741176605225, "logps/rejected": -1.060802698135376, "loss": 1.7563, "nll_loss": 1.8211044073104858, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06910370290279388, "rewards/margins": -0.016063563525676727, "rewards/rejected": -0.05304013937711716, "step": 385 }, { "epoch": 0.30976965845909454, "grad_norm": 41.187171936035156, "learning_rate": 4.266568617126919e-06, "log_odds_chosen": -0.3663569390773773, "log_odds_ratio": -1.1484845876693726, "logits/chosen": 353.6982727050781, "logits/rejected": 327.0970764160156, "logps/chosen": -1.3744146823883057, "logps/rejected": -1.1000757217407227, "loss": 1.7097, "nll_loss": 1.7982555627822876, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0687207281589508, "rewards/margins": -0.013716941699385643, "rewards/rejected": -0.05500379204750061, "step": 390 }, { "epoch": 0.31374106433677523, "grad_norm": 50.873226165771484, "learning_rate": 4.2424322768839534e-06, "log_odds_chosen": 0.5074528455734253, "log_odds_ratio": -0.5399994254112244, "logits/chosen": 336.0807800292969, "logits/rejected": 341.9483947753906, "logps/chosen": -1.1645238399505615, "logps/rejected": -1.5138776302337646, "loss": 1.6867, "nll_loss": 1.4324095249176025, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05822619050741196, "rewards/margins": 0.017467692494392395, "rewards/rejected": -0.07569388300180435, "step": 395 }, { "epoch": 0.3177124702144559, "grad_norm": 31.38866424560547, "learning_rate": 4.217975883464604e-06, "log_odds_chosen": 0.4223383367061615, "log_odds_ratio": -0.557848334312439, "logits/chosen": 341.1311950683594, "logits/rejected": 330.12530517578125, "logps/chosen": -0.972659707069397, "logps/rejected": -1.1718895435333252, "loss": 1.5047, "nll_loss": 1.4151824712753296, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04863298311829567, "rewards/margins": 0.009961498901247978, "rewards/rejected": -0.0585944838821888, "step": 400 }, { "epoch": 0.3216838760921366, "grad_norm": 113.73686218261719, "learning_rate": 4.1932039290643534e-06, "log_odds_chosen": -0.6433423161506653, "log_odds_ratio": -1.1012665033340454, "logits/chosen": 340.7366638183594, "logits/rejected": 359.9513854980469, "logps/chosen": -1.2754091024398804, "logps/rejected": -0.9146528244018555, "loss": 1.5981, "nll_loss": 1.4592812061309814, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06377045810222626, "rewards/margins": -0.018037814646959305, "rewards/rejected": -0.04573264718055725, "step": 405 }, { "epoch": 0.3256552819698173, "grad_norm": 27.97637367248535, "learning_rate": 4.168120963841501e-06, "log_odds_chosen": 0.515986979007721, "log_odds_ratio": -0.4868692457675934, "logits/chosen": 322.2940979003906, "logits/rejected": 315.605224609375, "logps/chosen": -1.0631706714630127, "logps/rejected": -1.4311316013336182, "loss": 1.5618, "nll_loss": 1.2970093488693237, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.053158532828092575, "rewards/margins": 0.018398040905594826, "rewards/rejected": -0.07155657559633255, "step": 410 }, { "epoch": 0.329626687847498, "grad_norm": 111.9078598022461, "learning_rate": 4.142731595081394e-06, "log_odds_chosen": -0.7611304521560669, "log_odds_ratio": -1.230291485786438, "logits/chosen": 472.66253662109375, "logits/rejected": 277.21435546875, "logps/chosen": -2.0160393714904785, "logps/rejected": -1.417152762413025, "loss": 1.6858, "nll_loss": 2.240736484527588, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.1008019670844078, "rewards/margins": -0.029944339767098427, "rewards/rejected": -0.07085762917995453, "step": 415 }, { "epoch": 0.33359809372517873, "grad_norm": 55.40426254272461, "learning_rate": 4.117040486350141e-06, "log_odds_chosen": -0.27019426226615906, "log_odds_ratio": -0.9864810705184937, "logits/chosen": 320.2743225097656, "logits/rejected": 390.2867736816406, "logps/chosen": -1.4704816341400146, "logps/rejected": -1.3386101722717285, "loss": 2.1069, "nll_loss": 1.8192126750946045, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07352408021688461, "rewards/margins": -0.00659357151016593, "rewards/rejected": -0.06693051010370255, "step": 420 }, { "epoch": 0.3375694996028594, "grad_norm": 80.17561340332031, "learning_rate": 4.0910523566380115e-06, "log_odds_chosen": 0.49502259492874146, "log_odds_ratio": -0.5022796392440796, "logits/chosen": 338.86505126953125, "logits/rejected": 331.8837890625, "logps/chosen": -1.1411818265914917, "logps/rejected": -1.491014003753662, "loss": 1.5704, "nll_loss": 1.140134572982788, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.057059090584516525, "rewards/margins": 0.01749161258339882, "rewards/rejected": -0.07455070316791534, "step": 425 }, { "epoch": 0.3415409054805401, "grad_norm": 40.142581939697266, "learning_rate": 4.06477197949263e-06, "log_odds_chosen": 0.31735625863075256, "log_odds_ratio": -0.6405702233314514, "logits/chosen": 370.8393859863281, "logits/rejected": 353.68585205078125, "logps/chosen": -1.5020195245742798, "logps/rejected": -1.7753969430923462, "loss": 1.6606, "nll_loss": 2.0026352405548096, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07510097324848175, "rewards/margins": 0.01366887241601944, "rewards/rejected": -0.08876985311508179, "step": 430 }, { "epoch": 0.3455123113582208, "grad_norm": 39.17948913574219, "learning_rate": 4.038204182142174e-06, "log_odds_chosen": -0.04409918934106827, "log_odds_ratio": -1.0017715692520142, "logits/chosen": 365.0375061035156, "logits/rejected": 287.4947814941406, "logps/chosen": -1.5504963397979736, "logps/rejected": -1.3872708082199097, "loss": 1.754, "nll_loss": 1.8789138793945312, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0775248184800148, "rewards/margins": -0.008161274716258049, "rewards/rejected": -0.0693635419011116, "step": 435 }, { "epoch": 0.3494837172359015, "grad_norm": 65.36585998535156, "learning_rate": 4.011353844608695e-06, "log_odds_chosen": 0.25077468156814575, "log_odds_ratio": -0.5847761034965515, "logits/chosen": 398.1842346191406, "logits/rejected": 302.77557373046875, "logps/chosen": -1.1319334506988525, "logps/rejected": -1.2919623851776123, "loss": 1.5668, "nll_loss": 1.5274730920791626, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.056596674025058746, "rewards/margins": 0.008001448586583138, "rewards/rejected": -0.06459812819957733, "step": 440 }, { "epoch": 0.3534551231135822, "grad_norm": 66.42986297607422, "learning_rate": 3.9842258988117435e-06, "log_odds_chosen": 0.39154669642448425, "log_odds_ratio": -0.5791794657707214, "logits/chosen": 292.7413635253906, "logits/rejected": 333.33306884765625, "logps/chosen": -1.3780268430709839, "logps/rejected": -1.5879228115081787, "loss": 1.6821, "nll_loss": 1.878339409828186, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06890134513378143, "rewards/margins": 0.010494804009795189, "rewards/rejected": -0.07939615100622177, "step": 445 }, { "epoch": 0.3574265289912629, "grad_norm": 41.643070220947266, "learning_rate": 3.9568253276624704e-06, "log_odds_chosen": 0.3447812795639038, "log_odds_ratio": -0.6143133044242859, "logits/chosen": 308.4149169921875, "logits/rejected": 373.98712158203125, "logps/chosen": -1.146469235420227, "logps/rejected": -1.3937455415725708, "loss": 1.7364, "nll_loss": 1.5407047271728516, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05732346326112747, "rewards/margins": 0.012363811954855919, "rewards/rejected": -0.06968727707862854, "step": 450 }, { "epoch": 0.3613979348689436, "grad_norm": 51.8876953125, "learning_rate": 3.929157164148352e-06, "log_odds_chosen": 0.25782614946365356, "log_odds_ratio": -0.6060336828231812, "logits/chosen": 289.9158020019531, "logits/rejected": 339.3231201171875, "logps/chosen": -0.9036432504653931, "logps/rejected": -1.0764741897583008, "loss": 1.831, "nll_loss": 1.5824403762817383, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.045182161033153534, "rewards/margins": 0.008641545660793781, "rewards/rejected": -0.05382370948791504, "step": 455 }, { "epoch": 0.3653693407466243, "grad_norm": 54.782958984375, "learning_rate": 3.901226490408728e-06, "log_odds_chosen": 0.2922573685646057, "log_odds_ratio": -0.5908970236778259, "logits/chosen": 307.7389221191406, "logits/rejected": 323.6591796875, "logps/chosen": -1.3589437007904053, "logps/rejected": -1.5952913761138916, "loss": 1.5749, "nll_loss": 1.6173921823501587, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06794719398021698, "rewards/margins": 0.011817372404038906, "rewards/rejected": -0.07976455986499786, "step": 460 }, { "epoch": 0.369340746624305, "grad_norm": 30.29743194580078, "learning_rate": 3.873038436801298e-06, "log_odds_chosen": 0.310823529958725, "log_odds_ratio": -0.662110447883606, "logits/chosen": 300.14251708984375, "logits/rejected": 391.355712890625, "logps/chosen": -1.3028764724731445, "logps/rejected": -1.4683092832565308, "loss": 1.714, "nll_loss": 1.549576997756958, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06514382362365723, "rewards/margins": 0.008271644823253155, "rewards/rejected": -0.07341547310352325, "step": 465 }, { "epoch": 0.3733121525019857, "grad_norm": 59.415611267089844, "learning_rate": 3.8445981809597715e-06, "log_odds_chosen": -0.1661299765110016, "log_odds_ratio": -0.8740431070327759, "logits/chosen": 272.77264404296875, "logits/rejected": 408.80975341796875, "logps/chosen": -1.0485327243804932, "logps/rejected": -1.0229226350784302, "loss": 1.4859, "nll_loss": 1.2975164651870728, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05242663621902466, "rewards/margins": -0.0012805074220523238, "rewards/rejected": -0.05114613100886345, "step": 470 }, { "epoch": 0.37728355837966643, "grad_norm": 38.312618255615234, "learning_rate": 3.815910946842828e-06, "log_odds_chosen": -0.014984751120209694, "log_odds_ratio": -0.8228279948234558, "logits/chosen": 292.1250305175781, "logits/rejected": 363.7527770996094, "logps/chosen": -1.3335845470428467, "logps/rejected": -1.5042054653167725, "loss": 1.5739, "nll_loss": 1.5051485300064087, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0666792243719101, "rewards/margins": 0.008531046099960804, "rewards/rejected": -0.07521027326583862, "step": 475 }, { "epoch": 0.3812549642573471, "grad_norm": 56.91576385498047, "learning_rate": 3.7869820037745773e-06, "log_odds_chosen": -0.030622828751802444, "log_odds_ratio": -0.7795476913452148, "logits/chosen": 385.81536865234375, "logits/rejected": 331.4559631347656, "logps/chosen": -1.2507215738296509, "logps/rejected": -1.279594898223877, "loss": 1.6582, "nll_loss": 1.5629332065582275, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0625360757112503, "rewards/margins": 0.0014436636120080948, "rewards/rejected": -0.06397974491119385, "step": 480 }, { "epoch": 0.3852263701350278, "grad_norm": 147.40060424804688, "learning_rate": 3.7578166654766695e-06, "log_odds_chosen": 0.012112426571547985, "log_odds_ratio": -0.8280073404312134, "logits/chosen": 324.9056091308594, "logits/rejected": 301.1451721191406, "logps/chosen": -1.0767914056777954, "logps/rejected": -0.936957836151123, "loss": 1.6, "nll_loss": 1.675865888595581, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05383957177400589, "rewards/margins": -0.006991674192249775, "rewards/rejected": -0.04684789478778839, "step": 485 }, { "epoch": 0.3891977760127085, "grad_norm": 62.930694580078125, "learning_rate": 3.7284202890922707e-06, "log_odds_chosen": -0.3624054789543152, "log_odds_ratio": -0.9134989976882935, "logits/chosen": 387.71923828125, "logits/rejected": 309.9226989746094, "logps/chosen": -1.203176498413086, "logps/rejected": -0.9530506134033203, "loss": 1.6178, "nll_loss": 1.5490739345550537, "rewards/accuracies": 0.0, "rewards/chosen": -0.060158826410770416, "rewards/margins": -0.012506293132901192, "rewards/rejected": -0.047652535140514374, "step": 490 }, { "epoch": 0.3931691818903892, "grad_norm": 44.98346710205078, "learning_rate": 3.698798274202048e-06, "log_odds_chosen": 1.106994867324829, "log_odds_ratio": -0.4969615340232849, "logits/chosen": 416.2843322753906, "logits/rejected": 322.34722900390625, "logps/chosen": -1.0825506448745728, "logps/rejected": -1.9832751750946045, "loss": 1.4863, "nll_loss": 1.3532053232192993, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0541275329887867, "rewards/margins": 0.045036230236291885, "rewards/rejected": -0.09916376322507858, "step": 495 }, { "epoch": 0.3971405877680699, "grad_norm": 71.20710754394531, "learning_rate": 3.668956061832365e-06, "log_odds_chosen": 0.18296189606189728, "log_odds_ratio": -0.6548603773117065, "logits/chosen": 375.1370544433594, "logits/rejected": 340.265869140625, "logps/chosen": -1.1238301992416382, "logps/rejected": -1.242456078529358, "loss": 1.7543, "nll_loss": 2.0999672412872314, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05619151517748833, "rewards/margins": 0.005931290797889233, "rewards/rejected": -0.062122803181409836, "step": 500 }, { "epoch": 0.4011119936457506, "grad_norm": 43.03315353393555, "learning_rate": 3.6388991334558665e-06, "log_odds_chosen": -0.3676120638847351, "log_odds_ratio": -0.9084262847900391, "logits/chosen": 356.4194030761719, "logits/rejected": 317.63275146484375, "logps/chosen": -1.1387929916381836, "logps/rejected": -0.8903687596321106, "loss": 1.4812, "nll_loss": 1.3678486347198486, "rewards/accuracies": 0.0, "rewards/chosen": -0.05693964287638664, "rewards/margins": -0.012421206571161747, "rewards/rejected": -0.04451843723654747, "step": 505 }, { "epoch": 0.4050833995234313, "grad_norm": 27.506656646728516, "learning_rate": 3.6086330099846274e-06, "log_odds_chosen": -0.5120627880096436, "log_odds_ratio": -1.0131856203079224, "logits/chosen": 349.39093017578125, "logits/rejected": 328.4683532714844, "logps/chosen": -1.0289686918258667, "logps/rejected": -0.724139392375946, "loss": 1.7849, "nll_loss": 1.7454910278320312, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05144844204187393, "rewards/margins": -0.015241468325257301, "rewards/rejected": -0.03620696812868118, "step": 510 }, { "epoch": 0.409054805401112, "grad_norm": 61.25779724121094, "learning_rate": 3.578163250756065e-06, "log_odds_chosen": 0.4315710663795471, "log_odds_ratio": -0.5859761834144592, "logits/chosen": 317.2684631347656, "logits/rejected": 341.7981262207031, "logps/chosen": -1.1744743585586548, "logps/rejected": -1.5218207836151123, "loss": 1.6567, "nll_loss": 1.4415868520736694, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05872371047735214, "rewards/margins": 0.017367318272590637, "rewards/rejected": -0.07609103620052338, "step": 515 }, { "epoch": 0.4130262112787927, "grad_norm": 74.8428955078125, "learning_rate": 3.5474954525117887e-06, "log_odds_chosen": 0.05642819404602051, "log_odds_ratio": -0.8421560525894165, "logits/chosen": 321.3084716796875, "logits/rejected": 286.5706787109375, "logps/chosen": -1.519960641860962, "logps/rejected": -1.6510608196258545, "loss": 1.7797, "nll_loss": 1.7759168148040771, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07599803060293198, "rewards/margins": 0.006555011961609125, "rewards/rejected": -0.08255304396152496, "step": 520 }, { "epoch": 0.4169976171564734, "grad_norm": 50.956214904785156, "learning_rate": 3.5166352483695803e-06, "log_odds_chosen": 0.43155431747436523, "log_odds_ratio": -0.5425541400909424, "logits/chosen": 367.09210205078125, "logits/rejected": 334.6671447753906, "logps/chosen": -1.14950430393219, "logps/rejected": -1.4378228187561035, "loss": 1.7118, "nll_loss": 1.758131742477417, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05747520923614502, "rewards/margins": 0.014415934681892395, "rewards/rejected": -0.07189114391803741, "step": 525 }, { "epoch": 0.42096902303415407, "grad_norm": 91.34703826904297, "learning_rate": 3.4855883067886888e-06, "log_odds_chosen": 0.24526679515838623, "log_odds_ratio": -0.6680157780647278, "logits/chosen": 316.42547607421875, "logits/rejected": 285.8825378417969, "logps/chosen": -0.9753881692886353, "logps/rejected": -1.2148548364639282, "loss": 1.4501, "nll_loss": 1.2995529174804688, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04876940697431564, "rewards/margins": 0.011973332613706589, "rewards/rejected": -0.060742735862731934, "step": 530 }, { "epoch": 0.4249404289118348, "grad_norm": 110.02918243408203, "learning_rate": 3.4543603305286432e-06, "log_odds_chosen": 0.0072197020053863525, "log_odds_ratio": -0.7949660420417786, "logits/chosen": 320.6426696777344, "logits/rejected": 313.28411865234375, "logps/chosen": -1.0177392959594727, "logps/rejected": -0.8970460891723633, "loss": 1.6384, "nll_loss": 1.6469953060150146, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05088697001338005, "rewards/margins": -0.006034668534994125, "rewards/rejected": -0.044852301478385925, "step": 535 }, { "epoch": 0.4289118347895155, "grad_norm": 39.02376937866211, "learning_rate": 3.422957055601758e-06, "log_odds_chosen": -0.32208532094955444, "log_odds_ratio": -0.9446969032287598, "logits/chosen": 281.58331298828125, "logits/rejected": 433.8438415527344, "logps/chosen": -1.2129267454147339, "logps/rejected": -0.957917332649231, "loss": 1.5224, "nll_loss": 1.474649429321289, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060646336525678635, "rewards/margins": -0.012750471010804176, "rewards/rejected": -0.04789586737751961, "step": 540 }, { "epoch": 0.4328832406671962, "grad_norm": 72.89534759521484, "learning_rate": 3.3913842502195256e-06, "log_odds_chosen": 0.10887251049280167, "log_odds_ratio": -0.6846013069152832, "logits/chosen": 337.80560302734375, "logits/rejected": 373.49957275390625, "logps/chosen": -1.1243635416030884, "logps/rejected": -1.1805238723754883, "loss": 1.573, "nll_loss": 1.2677637338638306, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05621817708015442, "rewards/margins": 0.0028080150950700045, "rewards/rejected": -0.059026192873716354, "step": 545 }, { "epoch": 0.4368546465448769, "grad_norm": 72.7591323852539, "learning_rate": 3.3596477137331106e-06, "log_odds_chosen": 0.3823426365852356, "log_odds_ratio": -0.5422973036766052, "logits/chosen": 345.8101806640625, "logits/rejected": 315.0772399902344, "logps/chosen": -1.0160396099090576, "logps/rejected": -1.2651389837265015, "loss": 1.6836, "nll_loss": 1.2323284149169922, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05080198124051094, "rewards/margins": 0.01245497353374958, "rewards/rejected": -0.06325694918632507, "step": 550 }, { "epoch": 0.44082605242255757, "grad_norm": 90.90532684326172, "learning_rate": 3.327753275568105e-06, "log_odds_chosen": 0.33174973726272583, "log_odds_ratio": -0.5770066380500793, "logits/chosen": 357.4688415527344, "logits/rejected": 283.86163330078125, "logps/chosen": -1.4533777236938477, "logps/rejected": -1.7138382196426392, "loss": 1.9812, "nll_loss": 1.9684550762176514, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0726688951253891, "rewards/margins": 0.013023021630942822, "rewards/rejected": -0.0856919139623642, "step": 555 }, { "epoch": 0.44479745830023826, "grad_norm": 50.66121292114258, "learning_rate": 3.2957067941537745e-06, "log_odds_chosen": -0.82317054271698, "log_odds_ratio": -1.2737700939178467, "logits/chosen": 260.4927062988281, "logits/rejected": 432.49237060546875, "logps/chosen": -1.632800817489624, "logps/rejected": -1.045261025428772, "loss": 1.8113, "nll_loss": 1.9882593154907227, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.08164004981517792, "rewards/margins": -0.02937699481844902, "rewards/rejected": -0.0522630512714386, "step": 560 }, { "epoch": 0.448768864177919, "grad_norm": 74.89939880371094, "learning_rate": 3.263514155846969e-06, "log_odds_chosen": 0.328817218542099, "log_odds_ratio": -0.5914020538330078, "logits/chosen": 342.4215393066406, "logits/rejected": 324.5187683105469, "logps/chosen": -0.9976029396057129, "logps/rejected": -1.1394035816192627, "loss": 1.5494, "nll_loss": 1.7881031036376953, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04988015443086624, "rewards/margins": 0.007090026047080755, "rewards/rejected": -0.056970179080963135, "step": 565 }, { "epoch": 0.4527402700555997, "grad_norm": 79.3864974975586, "learning_rate": 3.2311812738509062e-06, "log_odds_chosen": 0.5359792709350586, "log_odds_ratio": -0.582006573677063, "logits/chosen": 312.7106018066406, "logits/rejected": 337.06732177734375, "logps/chosen": -1.0472519397735596, "logps/rejected": -1.3570013046264648, "loss": 1.7019, "nll_loss": 1.3205798864364624, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0523625984787941, "rewards/margins": 0.015487474389374256, "rewards/rejected": -0.06785006821155548, "step": 570 }, { "epoch": 0.4567116759332804, "grad_norm": 45.589111328125, "learning_rate": 3.198714087129024e-06, "log_odds_chosen": 0.5243362188339233, "log_odds_ratio": -0.5980243682861328, "logits/chosen": 344.4964904785156, "logits/rejected": 442.112548828125, "logps/chosen": -1.115337610244751, "logps/rejected": -1.4375159740447998, "loss": 1.4739, "nll_loss": 1.505415678024292, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.055766891688108444, "rewards/margins": 0.016108911484479904, "rewards/rejected": -0.07187579572200775, "step": 575 }, { "epoch": 0.46068308181096107, "grad_norm": 82.72159576416016, "learning_rate": 3.1661185593140986e-06, "log_odds_chosen": -0.06218218803405762, "log_odds_ratio": -0.7745502591133118, "logits/chosen": 349.07562255859375, "logits/rejected": 288.5042724609375, "logps/chosen": -1.5691912174224854, "logps/rejected": -1.5143083333969116, "loss": 1.7202, "nll_loss": 1.9988605976104736, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07845956087112427, "rewards/margins": -0.0027441338170319796, "rewards/rejected": -0.07571543008089066, "step": 580 }, { "epoch": 0.46465448768864176, "grad_norm": 36.90455627441406, "learning_rate": 3.133400677612836e-06, "log_odds_chosen": -0.509355366230011, "log_odds_ratio": -1.0258004665374756, "logits/chosen": 336.6009216308594, "logits/rejected": 255.05517578125, "logps/chosen": -1.52445387840271, "logps/rejected": -1.1118301153182983, "loss": 1.7538, "nll_loss": 2.2724270820617676, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07622268795967102, "rewards/margins": -0.020631182938814163, "rewards/rejected": -0.05559150502085686, "step": 585 }, { "epoch": 0.4686258935663225, "grad_norm": 50.35420608520508, "learning_rate": 3.100566451706132e-06, "log_odds_chosen": -0.455788791179657, "log_odds_ratio": -0.9897588491439819, "logits/chosen": 316.55255126953125, "logits/rejected": 320.16485595703125, "logps/chosen": -1.5558842420578003, "logps/rejected": -1.1951056718826294, "loss": 1.8191, "nll_loss": 1.6975910663604736, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07779420912265778, "rewards/margins": -0.018038932234048843, "rewards/rejected": -0.05975528433918953, "step": 590 }, { "epoch": 0.4725972994440032, "grad_norm": 51.966094970703125, "learning_rate": 3.067621912645195e-06, "log_odds_chosen": 0.5225323438644409, "log_odds_ratio": -0.5115421414375305, "logits/chosen": 318.2131042480469, "logits/rejected": 299.44970703125, "logps/chosen": -1.1214492321014404, "logps/rejected": -1.5328994989395142, "loss": 1.609, "nll_loss": 1.4310369491577148, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05607246607542038, "rewards/margins": 0.020572511479258537, "rewards/rejected": -0.07664497196674347, "step": 595 }, { "epoch": 0.4765687053216839, "grad_norm": 43.6855354309082, "learning_rate": 3.0345731117437636e-06, "log_odds_chosen": -0.1032852903008461, "log_odds_ratio": -0.8357051610946655, "logits/chosen": 371.5491638183594, "logits/rejected": 289.3468322753906, "logps/chosen": -1.2020314931869507, "logps/rejected": -1.056931734085083, "loss": 1.8538, "nll_loss": 2.0933589935302734, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060101576149463654, "rewards/margins": -0.007254990749061108, "rewards/rejected": -0.05284658074378967, "step": 600 }, { "epoch": 0.4805401111993646, "grad_norm": 65.06558990478516, "learning_rate": 3.001426119466581e-06, "log_odds_chosen": 0.4957882761955261, "log_odds_ratio": -0.4822470247745514, "logits/chosen": 320.2109680175781, "logits/rejected": 344.2149353027344, "logps/chosen": -1.1690635681152344, "logps/rejected": -1.5483322143554688, "loss": 1.541, "nll_loss": 1.4401990175247192, "rewards/accuracies": 1.0, "rewards/chosen": -0.05845317989587784, "rewards/margins": 0.018963433802127838, "rewards/rejected": -0.07741661369800568, "step": 605 }, { "epoch": 0.48451151707704526, "grad_norm": 42.35622024536133, "learning_rate": 2.9681870243143616e-06, "log_odds_chosen": 0.8287761807441711, "log_odds_ratio": -0.434671550989151, "logits/chosen": 405.56640625, "logits/rejected": 303.19744873046875, "logps/chosen": -1.0719507932662964, "logps/rejected": -1.627996802330017, "loss": 1.7037, "nll_loss": 1.929690957069397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.053597547113895416, "rewards/margins": 0.027802307158708572, "rewards/rejected": -0.08139985054731369, "step": 610 }, { "epoch": 0.48848292295472595, "grad_norm": 119.18313598632812, "learning_rate": 2.9348619317054494e-06, "log_odds_chosen": 0.6101234555244446, "log_odds_ratio": -0.4588192403316498, "logits/chosen": 344.44158935546875, "logits/rejected": 381.56640625, "logps/chosen": -0.940362274646759, "logps/rejected": -1.3556063175201416, "loss": 1.7092, "nll_loss": 1.7841463088989258, "rewards/accuracies": 1.0, "rewards/chosen": -0.04701811820268631, "rewards/margins": 0.02076220139861107, "rewards/rejected": -0.06778031587600708, "step": 615 }, { "epoch": 0.4924543288324067, "grad_norm": 41.579444885253906, "learning_rate": 2.9014569628543577e-06, "log_odds_chosen": 0.6708475947380066, "log_odds_ratio": -0.4850080907344818, "logits/chosen": 271.7330017089844, "logits/rejected": 390.313232421875, "logps/chosen": -1.4829562902450562, "logps/rejected": -1.998425841331482, "loss": 1.5568, "nll_loss": 1.7184091806411743, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07414782792329788, "rewards/margins": 0.02577347680926323, "rewards/rejected": -0.09992130100727081, "step": 620 }, { "epoch": 0.4964257347100874, "grad_norm": 42.15742111206055, "learning_rate": 2.867978253647416e-06, "log_odds_chosen": 0.20527370274066925, "log_odds_ratio": -0.798372209072113, "logits/chosen": 467.48321533203125, "logits/rejected": 273.2369079589844, "logps/chosen": -1.0584286451339722, "logps/rejected": -1.0820119380950928, "loss": 1.6456, "nll_loss": 1.5411746501922607, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05292143300175667, "rewards/margins": 0.0011791624128818512, "rewards/rejected": -0.05410059541463852, "step": 625 }, { "epoch": 0.5003971405877681, "grad_norm": 27.90037727355957, "learning_rate": 2.8344319535157174e-06, "log_odds_chosen": 0.42426902055740356, "log_odds_ratio": -0.5486973524093628, "logits/chosen": 346.8106994628906, "logits/rejected": 325.4391174316406, "logps/chosen": -1.075537085533142, "logps/rejected": -1.3731486797332764, "loss": 1.4489, "nll_loss": 1.245110034942627, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053776852786540985, "rewards/margins": 0.014880577102303505, "rewards/rejected": -0.06865743547677994, "step": 630 }, { "epoch": 0.5043685464654488, "grad_norm": 73.80306243896484, "learning_rate": 2.800824224305584e-06, "log_odds_chosen": 0.22638097405433655, "log_odds_ratio": -0.6381832361221313, "logits/chosen": 359.15911865234375, "logits/rejected": 273.5746154785156, "logps/chosen": -1.2116836309432983, "logps/rejected": -1.3346078395843506, "loss": 1.6291, "nll_loss": 1.5670521259307861, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.060584187507629395, "rewards/margins": 0.006146208383142948, "rewards/rejected": -0.06673039495944977, "step": 635 }, { "epoch": 0.5083399523431295, "grad_norm": 49.89323043823242, "learning_rate": 2.76716123914674e-06, "log_odds_chosen": 0.07012102752923965, "log_odds_ratio": -0.7102433443069458, "logits/chosen": 303.50506591796875, "logits/rejected": 324.7231750488281, "logps/chosen": -1.0607610940933228, "logps/rejected": -1.0976377725601196, "loss": 1.6129, "nll_loss": 1.4195703268051147, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05303805321455002, "rewards/margins": 0.0018438354600220919, "rewards/rejected": -0.05488189309835434, "step": 640 }, { "epoch": 0.5123113582208102, "grad_norm": 46.98134994506836, "learning_rate": 2.7334491813184276e-06, "log_odds_chosen": 0.28301459550857544, "log_odds_ratio": -0.5637701749801636, "logits/chosen": 274.5257568359375, "logits/rejected": 413.9043884277344, "logps/chosen": -0.8029153943061829, "logps/rejected": -0.9688779711723328, "loss": 1.6859, "nll_loss": 1.5517845153808594, "rewards/accuracies": 1.0, "rewards/chosen": -0.04014577344059944, "rewards/margins": 0.008298131637275219, "rewards/rejected": -0.04844390228390694, "step": 645 }, { "epoch": 0.5162827640984908, "grad_norm": 26.37677764892578, "learning_rate": 2.6996942431136466e-06, "log_odds_chosen": 0.342237263917923, "log_odds_ratio": -0.613301694393158, "logits/chosen": 273.4066162109375, "logits/rejected": 396.9400939941406, "logps/chosen": -0.9960271716117859, "logps/rejected": -1.2160321474075317, "loss": 1.3995, "nll_loss": 1.4034605026245117, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.049801357090473175, "rewards/margins": 0.011000247672200203, "rewards/rejected": -0.06080160662531853, "step": 650 }, { "epoch": 0.5202541699761716, "grad_norm": 46.33094024658203, "learning_rate": 2.6659026247017418e-06, "log_odds_chosen": 0.3311120867729187, "log_odds_ratio": -0.7188401222229004, "logits/chosen": 384.7994079589844, "logits/rejected": 300.3538818359375, "logps/chosen": -1.2231776714324951, "logps/rejected": -1.4374616146087646, "loss": 1.5018, "nll_loss": 1.7496258020401, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06115889549255371, "rewards/margins": 0.01071419008076191, "rewards/rejected": -0.07187308371067047, "step": 655 }, { "epoch": 0.5242255758538522, "grad_norm": 70.27367401123047, "learning_rate": 2.6320805329895495e-06, "log_odds_chosen": 0.4950195848941803, "log_odds_ratio": -0.6416879296302795, "logits/chosen": 338.72930908203125, "logits/rejected": 325.9527282714844, "logps/chosen": -1.1382685899734497, "logps/rejected": -1.4173352718353271, "loss": 1.6332, "nll_loss": 1.4613453149795532, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.056913428008556366, "rewards/margins": 0.01395334117114544, "rewards/rejected": -0.07086677849292755, "step": 660 }, { "epoch": 0.528196981731533, "grad_norm": 32.2733039855957, "learning_rate": 2.5982341804812946e-06, "log_odds_chosen": 0.3531644642353058, "log_odds_ratio": -0.5361741781234741, "logits/chosen": 295.8695068359375, "logits/rejected": 391.4307556152344, "logps/chosen": -1.4157756567001343, "logps/rejected": -1.6907964944839478, "loss": 1.6299, "nll_loss": 1.7296804189682007, "rewards/accuracies": 1.0, "rewards/chosen": -0.07078878581523895, "rewards/margins": 0.01375104021281004, "rewards/rejected": -0.08453982323408127, "step": 665 }, { "epoch": 0.5321683876092137, "grad_norm": 42.08566665649414, "learning_rate": 2.5643697841374722e-06, "log_odds_chosen": 0.050149548798799515, "log_odds_ratio": -0.7359659075737, "logits/chosen": 320.44049072265625, "logits/rejected": 269.22320556640625, "logps/chosen": -1.551636815071106, "logps/rejected": -1.6407535076141357, "loss": 1.8105, "nll_loss": 1.6291691064834595, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07758183777332306, "rewards/margins": 0.004455844406038523, "rewards/rejected": -0.08203768730163574, "step": 670 }, { "epoch": 0.5361397934868943, "grad_norm": 52.635318756103516, "learning_rate": 2.5304935642329e-06, "log_odds_chosen": 0.18347088992595673, "log_odds_ratio": -0.621238112449646, "logits/chosen": 344.8230895996094, "logits/rejected": 346.5325927734375, "logps/chosen": -1.3301855325698853, "logps/rejected": -1.4845483303070068, "loss": 1.7328, "nll_loss": 1.6084213256835938, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06650927662849426, "rewards/margins": 0.007718136068433523, "rewards/rejected": -0.07422741502523422, "step": 675 }, { "epoch": 0.5401111993645751, "grad_norm": 43.74726486206055, "learning_rate": 2.4966117432141726e-06, "log_odds_chosen": 0.24497541785240173, "log_odds_ratio": -0.6605706810951233, "logits/chosen": 298.27667236328125, "logits/rejected": 422.50897216796875, "logps/chosen": -0.9108420610427856, "logps/rejected": -1.1715190410614014, "loss": 1.6037, "nll_loss": 1.2029683589935303, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04554210603237152, "rewards/margins": 0.013033849187195301, "rewards/rejected": -0.05857595056295395, "step": 680 }, { "epoch": 0.5440826052422557, "grad_norm": 64.52481079101562, "learning_rate": 2.4627305445567048e-06, "log_odds_chosen": 0.09129991382360458, "log_odds_ratio": -0.6612669229507446, "logits/chosen": 390.5311279296875, "logits/rejected": 266.16888427734375, "logps/chosen": -1.044427752494812, "logps/rejected": -1.137475848197937, "loss": 1.4905, "nll_loss": 1.4350093603134155, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0522213876247406, "rewards/margins": 0.0046524060890078545, "rewards/rejected": -0.05687378719449043, "step": 685 }, { "epoch": 0.5480540111199365, "grad_norm": 50.33750534057617, "learning_rate": 2.428856191621596e-06, "log_odds_chosen": -0.4379616677761078, "log_odds_ratio": -0.9384675025939941, "logits/chosen": 295.41632080078125, "logits/rejected": 335.3627624511719, "logps/chosen": -1.5291237831115723, "logps/rejected": -1.2289823293685913, "loss": 1.5525, "nll_loss": 1.6753852367401123, "rewards/accuracies": 0.0, "rewards/chosen": -0.07645618915557861, "rewards/margins": -0.015007075853645802, "rewards/rejected": -0.061449117958545685, "step": 690 }, { "epoch": 0.5520254169976172, "grad_norm": 62.857566833496094, "learning_rate": 2.3949949065125107e-06, "log_odds_chosen": -0.1392946094274521, "log_odds_ratio": -0.8439092636108398, "logits/chosen": 298.41705322265625, "logits/rejected": 304.42584228515625, "logps/chosen": -1.1663509607315063, "logps/rejected": -1.0636799335479736, "loss": 1.3371, "nll_loss": 1.299024224281311, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05831754207611084, "rewards/margins": -0.005133545026183128, "rewards/rejected": -0.05318400263786316, "step": 695 }, { "epoch": 0.5559968228752978, "grad_norm": 36.2435417175293, "learning_rate": 2.3611529089327893e-06, "log_odds_chosen": 0.14748263359069824, "log_odds_ratio": -0.6694773435592651, "logits/chosen": 278.55145263671875, "logits/rejected": 415.44378662109375, "logps/chosen": -1.3171262741088867, "logps/rejected": -1.4007153511047363, "loss": 1.4775, "nll_loss": 1.3410961627960205, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06585632264614105, "rewards/margins": 0.004179453942924738, "rewards/rejected": -0.07003577053546906, "step": 700 }, { "epoch": 0.5599682287529786, "grad_norm": 47.188453674316406, "learning_rate": 2.327336415043006e-06, "log_odds_chosen": 0.15575894713401794, "log_odds_ratio": -0.6959660649299622, "logits/chosen": 335.40582275390625, "logits/rejected": 465.32843017578125, "logps/chosen": -1.123983383178711, "logps/rejected": -1.2077070474624634, "loss": 1.5045, "nll_loss": 1.2318366765975952, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.056199170649051666, "rewards/margins": 0.004186179488897324, "rewards/rejected": -0.06038535386323929, "step": 705 }, { "epoch": 0.5639396346306592, "grad_norm": 33.867916107177734, "learning_rate": 2.2935516363191695e-06, "log_odds_chosen": -0.3543465733528137, "log_odds_ratio": -0.9505079388618469, "logits/chosen": 280.46343994140625, "logits/rejected": 292.0199279785156, "logps/chosen": -1.207983374595642, "logps/rejected": -0.9264053106307983, "loss": 1.5108, "nll_loss": 1.4482814073562622, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060399167239665985, "rewards/margins": -0.014078897424042225, "rewards/rejected": -0.046320270746946335, "step": 710 }, { "epoch": 0.56791104050834, "grad_norm": 71.63556671142578, "learning_rate": 2.259804778411786e-06, "log_odds_chosen": 0.571071982383728, "log_odds_ratio": -0.4987887740135193, "logits/chosen": 361.73345947265625, "logits/rejected": 394.4234313964844, "logps/chosen": -0.8434446454048157, "logps/rejected": -1.139594554901123, "loss": 1.5099, "nll_loss": 1.2623536586761475, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.042172230780124664, "rewards/margins": 0.014807499945163727, "rewards/rejected": -0.05697972699999809, "step": 715 }, { "epoch": 0.5718824463860207, "grad_norm": 31.155038833618164, "learning_rate": 2.2261020400059986e-06, "log_odds_chosen": 0.15770220756530762, "log_odds_ratio": -0.6557624340057373, "logits/chosen": 311.5298156738281, "logits/rejected": 288.30810546875, "logps/chosen": -1.1949565410614014, "logps/rejected": -1.3707664012908936, "loss": 1.4682, "nll_loss": 1.4724897146224976, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05974782258272171, "rewards/margins": 0.008790492080152035, "rewards/rejected": -0.06853832304477692, "step": 720 }, { "epoch": 0.5758538522637013, "grad_norm": 32.814144134521484, "learning_rate": 2.1924496116829996e-06, "log_odds_chosen": 0.4697895050048828, "log_odds_ratio": -0.6313791871070862, "logits/chosen": 327.9530334472656, "logits/rejected": 266.0186462402344, "logps/chosen": -1.1744751930236816, "logps/rejected": -1.497837781906128, "loss": 1.4959, "nll_loss": 1.3718366622924805, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05872376635670662, "rewards/margins": 0.016168128699064255, "rewards/rejected": -0.07489189505577087, "step": 725 }, { "epoch": 0.5798252581413821, "grad_norm": 48.00249099731445, "learning_rate": 2.158853674782928e-06, "log_odds_chosen": -0.15365850925445557, "log_odds_ratio": -1.1835048198699951, "logits/chosen": 280.81500244140625, "logits/rejected": 461.069580078125, "logps/chosen": -1.4112962484359741, "logps/rejected": -1.6498725414276123, "loss": 1.4424, "nll_loss": 1.5177103281021118, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07056482136249542, "rewards/margins": 0.011928820051252842, "rewards/rejected": -0.08249364048242569, "step": 730 }, { "epoch": 0.5837966640190627, "grad_norm": 38.61474609375, "learning_rate": 2.1253204002694777e-06, "log_odds_chosen": 0.62263023853302, "log_odds_ratio": -0.541793704032898, "logits/chosen": 304.44573974609375, "logits/rejected": 331.6595458984375, "logps/chosen": -1.1065720319747925, "logps/rejected": -1.4321861267089844, "loss": 1.5616, "nll_loss": 1.639901876449585, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.055328596383333206, "rewards/margins": 0.01628071442246437, "rewards/rejected": -0.07160931080579758, "step": 735 }, { "epoch": 0.5877680698967435, "grad_norm": 39.873226165771484, "learning_rate": 2.091855947596401e-06, "log_odds_chosen": -0.07302029430866241, "log_odds_ratio": -0.8576405644416809, "logits/chosen": 356.81842041015625, "logits/rejected": 376.3813781738281, "logps/chosen": -1.1168615818023682, "logps/rejected": -1.0692174434661865, "loss": 1.4753, "nll_loss": 1.271024465560913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05584307760000229, "rewards/margins": -0.0023822046350687742, "rewards/rejected": -0.053460873663425446, "step": 740 }, { "epoch": 0.5917394757744241, "grad_norm": 54.21368408203125, "learning_rate": 2.058466463576124e-06, "log_odds_chosen": -0.6579837203025818, "log_odds_ratio": -1.344585657119751, "logits/chosen": 289.74786376953125, "logits/rejected": 320.3457946777344, "logps/chosen": -1.8980462551116943, "logps/rejected": -1.3352105617523193, "loss": 1.8089, "nll_loss": 1.8561862707138062, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09490232169628143, "rewards/margins": -0.02814178541302681, "rewards/rejected": -0.06676053255796432, "step": 745 }, { "epoch": 0.5957108816521048, "grad_norm": 33.83228302001953, "learning_rate": 2.0251580812506938e-06, "log_odds_chosen": -0.6078636050224304, "log_odds_ratio": -1.1107518672943115, "logits/chosen": 339.91595458984375, "logits/rejected": 320.0901794433594, "logps/chosen": -1.4174226522445679, "logps/rejected": -1.0588816404342651, "loss": 1.4017, "nll_loss": 1.365027904510498, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07087112963199615, "rewards/margins": -0.017927050590515137, "rewards/rejected": -0.052944086492061615, "step": 750 }, { "epoch": 0.5996822875297856, "grad_norm": 37.01515579223633, "learning_rate": 1.9919369187652483e-06, "log_odds_chosen": -0.07303062826395035, "log_odds_ratio": -0.7524539232254028, "logits/chosen": 389.5413513183594, "logits/rejected": 321.3714294433594, "logps/chosen": -1.183814287185669, "logps/rejected": -1.1148512363433838, "loss": 1.619, "nll_loss": 1.339825987815857, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05919071286916733, "rewards/margins": -0.003448158036917448, "rewards/rejected": -0.05574256181716919, "step": 755 }, { "epoch": 0.6036536934074662, "grad_norm": 42.6160888671875, "learning_rate": 1.9588090782442257e-06, "log_odds_chosen": -0.02819465473294258, "log_odds_ratio": -0.7264882922172546, "logits/chosen": 313.9991760253906, "logits/rejected": 334.552978515625, "logps/chosen": -1.269689917564392, "logps/rejected": -1.2629872560501099, "loss": 1.5967, "nll_loss": 1.4182217121124268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06348450481891632, "rewards/margins": -0.0003351382911205292, "rewards/rejected": -0.0631493628025055, "step": 760 }, { "epoch": 0.607625099285147, "grad_norm": 45.021705627441406, "learning_rate": 1.9257806446705116e-06, "log_odds_chosen": -0.48236551880836487, "log_odds_ratio": -1.207824468612671, "logits/chosen": 385.98260498046875, "logits/rejected": 320.9479675292969, "logps/chosen": -1.317742109298706, "logps/rejected": -0.8753318786621094, "loss": 1.6117, "nll_loss": 1.4493136405944824, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06588710099458694, "rewards/margins": -0.022120505571365356, "rewards/rejected": -0.04376659542322159, "step": 765 }, { "epoch": 0.6115965051628276, "grad_norm": 45.63967514038086, "learning_rate": 1.8928576847677404e-06, "log_odds_chosen": 0.15900571644306183, "log_odds_ratio": -0.71577388048172, "logits/chosen": 349.0858459472656, "logits/rejected": 374.12872314453125, "logps/chosen": -0.9837914705276489, "logps/rejected": -0.9930256009101868, "loss": 1.8407, "nll_loss": 1.8363087177276611, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.049189578741788864, "rewards/margins": 0.0004617050290107727, "rewards/rejected": -0.04965128004550934, "step": 770 }, { "epoch": 0.6155679110405083, "grad_norm": 49.38869094848633, "learning_rate": 1.8600462458859492e-06, "log_odds_chosen": 0.5004615187644958, "log_odds_ratio": -0.5722527503967285, "logits/chosen": 338.10382080078125, "logits/rejected": 324.82861328125, "logps/chosen": -0.9794312715530396, "logps/rejected": -1.2764912843704224, "loss": 1.6962, "nll_loss": 1.4810049533843994, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04897156357765198, "rewards/margins": 0.014853003434836864, "rewards/rejected": -0.06382457166910172, "step": 775 }, { "epoch": 0.6195393169181891, "grad_norm": 55.28865432739258, "learning_rate": 1.8273523548907867e-06, "log_odds_chosen": 0.8983039855957031, "log_odds_ratio": -0.7501333951950073, "logits/chosen": 323.56842041015625, "logits/rejected": 315.9183654785156, "logps/chosen": -1.0870964527130127, "logps/rejected": -1.8820436000823975, "loss": 1.7622, "nll_loss": 1.5614144802093506, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.054354824125766754, "rewards/margins": 0.03974735736846924, "rewards/rejected": -0.09410218894481659, "step": 780 }, { "epoch": 0.6235107227958697, "grad_norm": 38.31996154785156, "learning_rate": 1.7947820170564897e-06, "log_odds_chosen": 1.0626842975616455, "log_odds_ratio": -0.44282132387161255, "logits/chosen": 351.11236572265625, "logits/rejected": 300.69366455078125, "logps/chosen": -1.2781195640563965, "logps/rejected": -2.0489819049835205, "loss": 1.4668, "nll_loss": 1.7934401035308838, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0639059767127037, "rewards/margins": 0.03854311257600784, "rewards/rejected": -0.10244909673929214, "step": 785 }, { "epoch": 0.6274821286735505, "grad_norm": 49.6168212890625, "learning_rate": 1.7623412149628216e-06, "log_odds_chosen": -0.2784636616706848, "log_odds_ratio": -0.9712227582931519, "logits/chosen": 294.8133239746094, "logits/rejected": 370.743896484375, "logps/chosen": -1.2229773998260498, "logps/rejected": -1.0270025730133057, "loss": 1.6546, "nll_loss": 1.4282915592193604, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06114886328577995, "rewards/margins": -0.009798737242817879, "rewards/rejected": -0.051350127905607224, "step": 790 }, { "epoch": 0.6314535345512311, "grad_norm": 28.367610931396484, "learning_rate": 1.7300359073961834e-06, "log_odds_chosen": 0.42548590898513794, "log_odds_ratio": -0.5335083603858948, "logits/chosen": 337.03436279296875, "logits/rejected": 383.9117736816406, "logps/chosen": -1.062105655670166, "logps/rejected": -1.415236473083496, "loss": 1.3773, "nll_loss": 1.3322608470916748, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05310528352856636, "rewards/margins": 0.017656544223427773, "rewards/rejected": -0.07076182961463928, "step": 795 }, { "epoch": 0.6354249404289118, "grad_norm": 98.34535217285156, "learning_rate": 1.6978720282550897e-06, "log_odds_chosen": 0.07821528613567352, "log_odds_ratio": -0.665870726108551, "logits/chosen": 305.55938720703125, "logits/rejected": 395.2098388671875, "logps/chosen": -0.9890663027763367, "logps/rejected": -1.0237197875976562, "loss": 1.6046, "nll_loss": 1.4329578876495361, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.049453310668468475, "rewards/margins": 0.0017326741944998503, "rewards/rejected": -0.051185984164476395, "step": 800 }, { "epoch": 0.6393963463065926, "grad_norm": 34.472469329833984, "learning_rate": 1.6658554854602222e-06, "log_odds_chosen": 0.6897698640823364, "log_odds_ratio": -0.41707152128219604, "logits/chosen": 309.0857849121094, "logits/rejected": 351.38153076171875, "logps/chosen": -1.0435580015182495, "logps/rejected": -1.5444471836090088, "loss": 1.5348, "nll_loss": 1.5355165004730225, "rewards/accuracies": 1.0, "rewards/chosen": -0.052177898585796356, "rewards/margins": 0.025044452399015427, "rewards/rejected": -0.07722235471010208, "step": 805 }, { "epoch": 0.6433677521842732, "grad_norm": 48.47563171386719, "learning_rate": 1.6339921598692476e-06, "log_odds_chosen": 0.11802919209003448, "log_odds_ratio": -0.7510842084884644, "logits/chosen": 293.94256591796875, "logits/rejected": 311.3507995605469, "logps/chosen": -1.730385184288025, "logps/rejected": -1.8177807331085205, "loss": 1.7041, "nll_loss": 1.842095136642456, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.086519256234169, "rewards/margins": 0.0043697720393538475, "rewards/rejected": -0.09088902175426483, "step": 810 }, { "epoch": 0.647339158061954, "grad_norm": 31.41144371032715, "learning_rate": 1.6022879041966188e-06, "log_odds_chosen": 0.3533809185028076, "log_odds_ratio": -0.6280057430267334, "logits/chosen": 335.3319396972656, "logits/rejected": 264.1844787597656, "logps/chosen": -1.0379221439361572, "logps/rejected": -1.2540075778961182, "loss": 1.3573, "nll_loss": 1.4223954677581787, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0518961064517498, "rewards/margins": 0.01080426573753357, "rewards/rejected": -0.06270037591457367, "step": 815 }, { "epoch": 0.6513105639396346, "grad_norm": 44.874664306640625, "learning_rate": 1.5707485419385293e-06, "log_odds_chosen": -0.14202973246574402, "log_odds_ratio": -0.8004404306411743, "logits/chosen": 360.9006042480469, "logits/rejected": 264.46337890625, "logps/chosen": -1.2262499332427979, "logps/rejected": -1.134313941001892, "loss": 1.496, "nll_loss": 1.3937432765960693, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06131250411272049, "rewards/margins": -0.00459679588675499, "rewards/rejected": -0.0567157045006752, "step": 820 }, { "epoch": 0.6552819698173153, "grad_norm": 36.3102912902832, "learning_rate": 1.539379866303245e-06, "log_odds_chosen": 0.1242959052324295, "log_odds_ratio": -0.6936241984367371, "logits/chosen": 281.64874267578125, "logits/rejected": 311.4866943359375, "logps/chosen": -1.1951546669006348, "logps/rejected": -1.2739152908325195, "loss": 1.5673, "nll_loss": 1.4124051332473755, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05975773185491562, "rewards/margins": 0.003938031382858753, "rewards/rejected": -0.0636957660317421, "step": 825 }, { "epoch": 0.659253375694996, "grad_norm": 58.44258499145508, "learning_rate": 1.508187639147001e-06, "log_odds_chosen": 0.12791205942630768, "log_odds_ratio": -0.6473852396011353, "logits/chosen": 344.5693664550781, "logits/rejected": 367.2503662109375, "logps/chosen": -1.0457253456115723, "logps/rejected": -1.142913579940796, "loss": 1.5898, "nll_loss": 1.3428993225097656, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05228627473115921, "rewards/margins": 0.004859411157667637, "rewards/rejected": -0.05714568495750427, "step": 830 }, { "epoch": 0.6632247815726767, "grad_norm": 33.92708206176758, "learning_rate": 1.4771775899156487e-06, "log_odds_chosen": 0.5014594793319702, "log_odds_ratio": -0.5115988254547119, "logits/chosen": 318.1674499511719, "logits/rejected": 471.2735290527344, "logps/chosen": -1.0235928297042847, "logps/rejected": -1.36759614944458, "loss": 1.4523, "nll_loss": 1.3124425411224365, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05117964744567871, "rewards/margins": 0.017200157046318054, "rewards/rejected": -0.06837980449199677, "step": 835 }, { "epoch": 0.6671961874503575, "grad_norm": 27.84844207763672, "learning_rate": 1.4463554145922603e-06, "log_odds_chosen": -0.31632497906684875, "log_odds_ratio": -0.9493728876113892, "logits/chosen": 424.86517333984375, "logits/rejected": 282.5466613769531, "logps/chosen": -1.1070573329925537, "logps/rejected": -0.9589263796806335, "loss": 1.4065, "nll_loss": 1.1693588495254517, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.055352866649627686, "rewards/margins": -0.007406541611999273, "rewards/rejected": -0.047946326434612274, "step": 840 }, { "epoch": 0.6711675933280381, "grad_norm": 51.31157302856445, "learning_rate": 1.4157267746508834e-06, "log_odds_chosen": 0.07127873599529266, "log_odds_ratio": -0.7540755867958069, "logits/chosen": 357.64971923828125, "logits/rejected": 300.25811767578125, "logps/chosen": -1.264819860458374, "logps/rejected": -1.2909610271453857, "loss": 1.2753, "nll_loss": 1.4475698471069336, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06324099004268646, "rewards/margins": 0.0013070597779005766, "rewards/rejected": -0.0645480528473854, "step": 845 }, { "epoch": 0.6751389992057188, "grad_norm": 47.41500473022461, "learning_rate": 1.385297296016631e-06, "log_odds_chosen": 0.2415420562028885, "log_odds_ratio": -0.5941085815429688, "logits/chosen": 419.50408935546875, "logits/rejected": 281.9603576660156, "logps/chosen": -0.820796012878418, "logps/rejected": -0.9756487011909485, "loss": 1.5865, "nll_loss": 1.0082799196243286, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.041039805859327316, "rewards/margins": 0.007742627058178186, "rewards/rejected": -0.048782430589199066, "step": 850 }, { "epoch": 0.6791104050833995, "grad_norm": 42.014591217041016, "learning_rate": 1.3550725680322973e-06, "log_odds_chosen": 1.7263425588607788, "log_odds_ratio": -0.2393535077571869, "logits/chosen": 317.6227111816406, "logits/rejected": 302.24859619140625, "logps/chosen": -0.7767224311828613, "logps/rejected": -2.0884933471679688, "loss": 1.5027, "nll_loss": 1.3212028741836548, "rewards/accuracies": 1.0, "rewards/chosen": -0.038836125284433365, "rewards/margins": 0.06558854132890701, "rewards/rejected": -0.10442467033863068, "step": 855 }, { "epoch": 0.6830818109610802, "grad_norm": 32.96746063232422, "learning_rate": 1.3250581424317012e-06, "log_odds_chosen": -0.05438203737139702, "log_odds_ratio": -0.8108514547348022, "logits/chosen": 328.84979248046875, "logits/rejected": 378.38507080078125, "logps/chosen": -1.1635057926177979, "logps/rejected": -1.1345611810684204, "loss": 1.456, "nll_loss": 1.441853642463684, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05817528814077377, "rewards/margins": -0.0014472283655777574, "rewards/rejected": -0.0567280575633049, "step": 860 }, { "epoch": 0.687053216838761, "grad_norm": 25.131879806518555, "learning_rate": 1.295259532319927e-06, "log_odds_chosen": 1.491008996963501, "log_odds_ratio": -0.33983761072158813, "logits/chosen": 305.7498474121094, "logits/rejected": 395.27630615234375, "logps/chosen": -0.9085075259208679, "logps/rejected": -1.8494819402694702, "loss": 1.5229, "nll_loss": 1.9765069484710693, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.045425377786159515, "rewards/margins": 0.04704872891306877, "rewards/rejected": -0.09247410297393799, "step": 865 }, { "epoch": 0.6910246227164416, "grad_norm": 37.967525482177734, "learning_rate": 1.265682211160678e-06, "log_odds_chosen": -0.6112550497055054, "log_odds_ratio": -1.088639497756958, "logits/chosen": 310.4002685546875, "logits/rejected": 406.2005615234375, "logps/chosen": -1.192030906677246, "logps/rejected": -0.812456488609314, "loss": 1.6589, "nll_loss": 1.2748486995697021, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.059601545333862305, "rewards/margins": -0.01897871494293213, "rewards/rejected": -0.040622830390930176, "step": 870 }, { "epoch": 0.6949960285941224, "grad_norm": 75.36614227294922, "learning_rate": 1.2363316117708912e-06, "log_odds_chosen": 0.7237299680709839, "log_odds_ratio": -0.6293952465057373, "logits/chosen": 310.7578430175781, "logits/rejected": 359.1610412597656, "logps/chosen": -1.019719123840332, "logps/rejected": -1.6588159799575806, "loss": 1.5328, "nll_loss": 1.2692029476165771, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05098595470190048, "rewards/margins": 0.031954843550920486, "rewards/rejected": -0.08294080197811127, "step": 875 }, { "epoch": 0.698967434471803, "grad_norm": 36.7714958190918, "learning_rate": 1.2072131253228292e-06, "log_odds_chosen": -0.17302027344703674, "log_odds_ratio": -0.8415447473526001, "logits/chosen": 327.3581237792969, "logits/rejected": 300.59027099609375, "logps/chosen": -1.4463417530059814, "logps/rejected": -1.3052947521209717, "loss": 1.5306, "nll_loss": 1.9659268856048584, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07231709361076355, "rewards/margins": -0.007052358239889145, "rewards/rejected": -0.0652647316455841, "step": 880 }, { "epoch": 0.7029388403494837, "grad_norm": 39.60774612426758, "learning_rate": 1.1783321003538262e-06, "log_odds_chosen": 0.14880752563476562, "log_odds_ratio": -0.6801349520683289, "logits/chosen": 326.66217041015625, "logits/rejected": 288.07977294921875, "logps/chosen": -1.3033047914505005, "logps/rejected": -1.410351037979126, "loss": 1.5044, "nll_loss": 1.411853551864624, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06516523659229279, "rewards/margins": 0.005352319683879614, "rewards/rejected": -0.07051756232976913, "step": 885 }, { "epoch": 0.7069102462271644, "grad_norm": 49.855010986328125, "learning_rate": 1.1496938417838466e-06, "log_odds_chosen": -0.6302076578140259, "log_odds_ratio": -1.108147382736206, "logits/chosen": 303.8426208496094, "logits/rejected": 299.96282958984375, "logps/chosen": -1.1776927709579468, "logps/rejected": -0.8100347518920898, "loss": 1.6152, "nll_loss": 1.3265931606292725, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0588846430182457, "rewards/margins": -0.018382901325821877, "rewards/rejected": -0.04050173982977867, "step": 890 }, { "epoch": 0.7108816521048451, "grad_norm": 65.90828704833984, "learning_rate": 1.1213036099410799e-06, "log_odds_chosen": 0.3805133104324341, "log_odds_ratio": -0.5482473373413086, "logits/chosen": 334.39703369140625, "logits/rejected": 280.8984680175781, "logps/chosen": -1.1065565347671509, "logps/rejected": -1.3260886669158936, "loss": 1.5504, "nll_loss": 1.5361745357513428, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.055327825248241425, "rewards/margins": 0.010976609773933887, "rewards/rejected": -0.06630443036556244, "step": 895 }, { "epoch": 0.7148530579825259, "grad_norm": 44.0485725402832, "learning_rate": 1.0931666195957053e-06, "log_odds_chosen": 0.4383140504360199, "log_odds_ratio": -0.5253010392189026, "logits/chosen": 337.86090087890625, "logits/rejected": 394.5022888183594, "logps/chosen": -0.7901648283004761, "logps/rejected": -1.034003496170044, "loss": 1.4815, "nll_loss": 1.1732326745986938, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03950824216008186, "rewards/margins": 0.012191934511065483, "rewards/rejected": -0.0517001748085022, "step": 900 }, { "epoch": 0.7188244638602065, "grad_norm": 40.06319046020508, "learning_rate": 1.0652880390020398e-06, "log_odds_chosen": 2.0690839290618896, "log_odds_ratio": -0.2347683608531952, "logits/chosen": 423.53961181640625, "logits/rejected": 276.1210021972656, "logps/chosen": -0.6150510311126709, "logps/rejected": -1.5576661825180054, "loss": 1.4353, "nll_loss": 1.4714704751968384, "rewards/accuracies": 1.0, "rewards/chosen": -0.030752548947930336, "rewards/margins": 0.0471307598054409, "rewards/rejected": -0.07788331806659698, "step": 905 }, { "epoch": 0.7227958697378872, "grad_norm": 29.090539932250977, "learning_rate": 1.0376729889492178e-06, "log_odds_chosen": -0.0568159744143486, "log_odds_ratio": -0.7396942377090454, "logits/chosen": 287.9375915527344, "logits/rejected": 300.72698974609375, "logps/chosen": -1.0032362937927246, "logps/rejected": -0.9725478887557983, "loss": 1.5766, "nll_loss": 1.5186008214950562, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05016181990504265, "rewards/margins": -0.0015344202984124422, "rewards/rejected": -0.04862739518284798, "step": 910 }, { "epoch": 0.7267672756155679, "grad_norm": 43.51217269897461, "learning_rate": 1.0103265418205984e-06, "log_odds_chosen": 0.6717264652252197, "log_odds_ratio": -0.5441080927848816, "logits/chosen": 299.4734802246094, "logits/rejected": 346.14617919921875, "logps/chosen": -0.7290914058685303, "logps/rejected": -1.1217130422592163, "loss": 1.5629, "nll_loss": 1.4440972805023193, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03645457327365875, "rewards/margins": 0.01963108405470848, "rewards/rejected": -0.05608565732836723, "step": 915 }, { "epoch": 0.7307386814932486, "grad_norm": 43.0380859375, "learning_rate": 9.832537206620594e-07, "log_odds_chosen": 0.04898405075073242, "log_odds_ratio": -0.8131389617919922, "logits/chosen": 331.40570068359375, "logits/rejected": 315.16229248046875, "logps/chosen": -0.9992687106132507, "logps/rejected": -1.0054936408996582, "loss": 1.5654, "nll_loss": 1.4301942586898804, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.049963437020778656, "rewards/margins": 0.00031124576344154775, "rewards/rejected": -0.05027468129992485, "step": 920 }, { "epoch": 0.7347100873709294, "grad_norm": 37.88023376464844, "learning_rate": 9.564594982593559e-07, "log_odds_chosen": 0.1898471564054489, "log_odds_ratio": -0.6842805743217468, "logits/chosen": 346.6091613769531, "logits/rejected": 315.18408203125, "logps/chosen": -0.9871004819869995, "logps/rejected": -1.0915769338607788, "loss": 1.439, "nll_loss": 1.487571358680725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.049355026334524155, "rewards/margins": 0.005223819054663181, "rewards/rejected": -0.05457884818315506, "step": 925 }, { "epoch": 0.73868149324861, "grad_norm": 54.20380783081055, "learning_rate": 9.299487962247089e-07, "log_odds_chosen": 0.022501707077026367, "log_odds_ratio": -0.8337327837944031, "logits/chosen": 302.9130554199219, "logits/rejected": 279.71685791015625, "logps/chosen": -1.2005774974822998, "logps/rejected": -1.1395219564437866, "loss": 1.6788, "nll_loss": 1.584804654121399, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06002888083457947, "rewards/margins": -0.0030527785420417786, "rewards/rejected": -0.05697610229253769, "step": 930 }, { "epoch": 0.7426528991262907, "grad_norm": 47.39323425292969, "learning_rate": 9.037264840927945e-07, "log_odds_chosen": -0.10585136711597443, "log_odds_ratio": -0.8327314257621765, "logits/chosen": 284.0058898925781, "logits/rejected": 345.33587646484375, "logps/chosen": -1.1005109548568726, "logps/rejected": -1.0587420463562012, "loss": 1.3425, "nll_loss": 1.6860910654067993, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05502554774284363, "rewards/margins": -0.002088439418002963, "rewards/rejected": -0.052937109023332596, "step": 935 }, { "epoch": 0.7466243050039714, "grad_norm": 38.21221923828125, "learning_rate": 8.777973784263016e-07, "log_odds_chosen": -0.43362635374069214, "log_odds_ratio": -0.9653336405754089, "logits/chosen": 285.54193115234375, "logits/rejected": 329.3501892089844, "logps/chosen": -1.3442434072494507, "logps/rejected": -1.126430869102478, "loss": 1.4812, "nll_loss": 1.4815846681594849, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06721217930316925, "rewards/margins": -0.01089063473045826, "rewards/rejected": -0.05632154271006584, "step": 940 }, { "epoch": 0.7505957108816521, "grad_norm": 33.12229919433594, "learning_rate": 8.521662419312124e-07, "log_odds_chosen": 0.15825173258781433, "log_odds_ratio": -0.7438204884529114, "logits/chosen": 337.7537841796875, "logits/rejected": 271.5417175292969, "logps/chosen": -1.0434350967407227, "logps/rejected": -1.243912696838379, "loss": 1.5446, "nll_loss": 1.3849962949752808, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05217175558209419, "rewards/margins": 0.010023881681263447, "rewards/rejected": -0.062195636332035065, "step": 945 }, { "epoch": 0.7545671167593329, "grad_norm": 48.55085372924805, "learning_rate": 8.268377825819821e-07, "log_odds_chosen": -0.014403104782104492, "log_odds_ratio": -0.7055137157440186, "logits/chosen": 371.1424865722656, "logits/rejected": 286.8011779785156, "logps/chosen": -1.4028490781784058, "logps/rejected": -1.39651620388031, "loss": 1.5356, "nll_loss": 1.620958924293518, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07014245539903641, "rewards/margins": -0.00031664298148825765, "rewards/rejected": -0.06982581317424774, "step": 950 }, { "epoch": 0.7585385226370135, "grad_norm": 36.20191192626953, "learning_rate": 8.018166527567672e-07, "log_odds_chosen": 1.0636457204818726, "log_odds_ratio": -0.43774813413619995, "logits/chosen": 259.2046813964844, "logits/rejected": 375.35198974609375, "logps/chosen": -0.868695080280304, "logps/rejected": -1.445894479751587, "loss": 1.4218, "nll_loss": 1.3990987539291382, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0434347540140152, "rewards/margins": 0.02885996736586094, "rewards/rejected": -0.07229472696781158, "step": 955 }, { "epoch": 0.7625099285146942, "grad_norm": 29.8554744720459, "learning_rate": 7.771074483828747e-07, "log_odds_chosen": -0.15444841980934143, "log_odds_ratio": -0.8071505427360535, "logits/chosen": 290.59674072265625, "logits/rejected": 347.6033630371094, "logps/chosen": -1.40049147605896, "logps/rejected": -1.3093515634536743, "loss": 1.3432, "nll_loss": 1.450141191482544, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07002457231283188, "rewards/margins": -0.0045569948852062225, "rewards/rejected": -0.06546757370233536, "step": 960 }, { "epoch": 0.7664813343923749, "grad_norm": 39.03826904296875, "learning_rate": 7.52714708092565e-07, "log_odds_chosen": 0.04803264141082764, "log_odds_ratio": -0.7985066175460815, "logits/chosen": 321.512939453125, "logits/rejected": 325.80853271484375, "logps/chosen": -1.1335227489471436, "logps/rejected": -1.0179483890533447, "loss": 1.5082, "nll_loss": 1.2471481561660767, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05667613074183464, "rewards/margins": -0.005778718274086714, "rewards/rejected": -0.050897419452667236, "step": 965 }, { "epoch": 0.7704527402700556, "grad_norm": 43.7759895324707, "learning_rate": 7.286429123893931e-07, "log_odds_chosen": 0.35435453057289124, "log_odds_ratio": -0.6048498153686523, "logits/chosen": 308.8065490722656, "logits/rejected": 375.87982177734375, "logps/chosen": -1.6246349811553955, "logps/rejected": -1.9498169422149658, "loss": 1.6459, "nll_loss": 1.7503254413604736, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0812317505478859, "rewards/margins": 0.016259105876088142, "rewards/rejected": -0.09749085456132889, "step": 970 }, { "epoch": 0.7744241461477362, "grad_norm": 54.47321319580078, "learning_rate": 7.048964828252188e-07, "log_odds_chosen": 0.9730485081672668, "log_odds_ratio": -0.47064799070358276, "logits/chosen": 364.9896545410156, "logits/rejected": 361.5113830566406, "logps/chosen": -0.9137029647827148, "logps/rejected": -1.395819067955017, "loss": 1.3761, "nll_loss": 1.2809137105941772, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04568514600396156, "rewards/margins": 0.02410580962896347, "rewards/rejected": -0.06979095190763474, "step": 975 }, { "epoch": 0.778395552025417, "grad_norm": 46.42416000366211, "learning_rate": 6.814797811880525e-07, "log_odds_chosen": 0.18214160203933716, "log_odds_ratio": -0.714030921459198, "logits/chosen": 386.58782958984375, "logits/rejected": 275.394775390625, "logps/chosen": -1.3727108240127563, "logps/rejected": -1.474959373474121, "loss": 1.4134, "nll_loss": 1.827099084854126, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06863553822040558, "rewards/margins": 0.005112423561513424, "rewards/rejected": -0.07374797016382217, "step": 980 }, { "epoch": 0.7823669579030977, "grad_norm": 53.367584228515625, "learning_rate": 6.583971087008654e-07, "log_odds_chosen": 0.1975199282169342, "log_odds_ratio": -0.6967960596084595, "logits/chosen": 292.4072265625, "logits/rejected": 433.8272399902344, "logps/chosen": -0.9060415029525757, "logps/rejected": -0.9691941142082214, "loss": 1.3796, "nll_loss": 1.407875657081604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04530208185315132, "rewards/margins": 0.0031576238106936216, "rewards/rejected": -0.04845970869064331, "step": 985 }, { "epoch": 0.7863383637807784, "grad_norm": 50.85255813598633, "learning_rate": 6.356527052315403e-07, "log_odds_chosen": -0.2651984691619873, "log_odds_ratio": -0.8623536825180054, "logits/chosen": 372.86358642578125, "logits/rejected": 285.08172607421875, "logps/chosen": -1.2994807958602905, "logps/rejected": -1.1402140855789185, "loss": 1.5812, "nll_loss": 1.4798548221588135, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06497403979301453, "rewards/margins": -0.007963338866829872, "rewards/rejected": -0.0570107102394104, "step": 990 }, { "epoch": 0.7903097696584591, "grad_norm": 79.17274475097656, "learning_rate": 6.132507485140843e-07, "log_odds_chosen": 1.5265482664108276, "log_odds_ratio": -0.2821381688117981, "logits/chosen": 388.9686279296875, "logits/rejected": 307.1571350097656, "logps/chosen": -0.7675724625587463, "logps/rejected": -1.6737315654754639, "loss": 1.4555, "nll_loss": 1.2786924839019775, "rewards/accuracies": 1.0, "rewards/chosen": -0.03837861865758896, "rewards/margins": 0.045307956635951996, "rewards/rejected": -0.08368657529354095, "step": 995 }, { "epoch": 0.7942811755361397, "grad_norm": 41.524253845214844, "learning_rate": 5.911953533812506e-07, "log_odds_chosen": -0.3479143977165222, "log_odds_ratio": -0.9029040336608887, "logits/chosen": 338.65325927734375, "logits/rejected": 374.2637939453125, "logps/chosen": -0.9410643577575684, "logps/rejected": -0.7491464614868164, "loss": 1.655, "nll_loss": 1.3791284561157227, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.047053221613168716, "rewards/margins": -0.009595893323421478, "rewards/rejected": -0.03745732456445694, "step": 1000 }, { "epoch": 0.7982525814138205, "grad_norm": 47.42415237426758, "learning_rate": 5.694905710087217e-07, "log_odds_chosen": 0.20326891541481018, "log_odds_ratio": -0.6220360398292542, "logits/chosen": 397.6610107421875, "logits/rejected": 313.22088623046875, "logps/chosen": -0.8882730603218079, "logps/rejected": -1.0516657829284668, "loss": 1.6773, "nll_loss": 1.243209719657898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04441365599632263, "rewards/margins": 0.008169631473720074, "rewards/rejected": -0.05258328840136528, "step": 1005 }, { "epoch": 0.8022239872915012, "grad_norm": 45.351253509521484, "learning_rate": 5.481403881709815e-07, "log_odds_chosen": 0.5789100527763367, "log_odds_ratio": -0.886620044708252, "logits/chosen": 345.7013854980469, "logits/rejected": 279.64801025390625, "logps/chosen": -0.9864595532417297, "logps/rejected": -1.3232471942901611, "loss": 1.6636, "nll_loss": 1.7329130172729492, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04932297766208649, "rewards/margins": 0.01683938130736351, "rewards/rejected": -0.0661623626947403, "step": 1010 }, { "epoch": 0.8061953931691819, "grad_norm": 45.76054382324219, "learning_rate": 5.271487265090163e-07, "log_odds_chosen": 0.1340581774711609, "log_odds_ratio": -0.6442204117774963, "logits/chosen": 253.32131958007812, "logits/rejected": 293.978759765625, "logps/chosen": -0.9866166114807129, "logps/rejected": -1.0805059671401978, "loss": 1.3506, "nll_loss": 1.1058270931243896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04933083802461624, "rewards/margins": 0.004694463685154915, "rewards/rejected": -0.05402529984712601, "step": 1015 }, { "epoch": 0.8101667990468626, "grad_norm": 30.294597625732422, "learning_rate": 5.06519441809982e-07, "log_odds_chosen": 0.6967889666557312, "log_odds_ratio": -0.5373490452766418, "logits/chosen": 318.8917541503906, "logits/rejected": 270.747802734375, "logps/chosen": -1.074588418006897, "logps/rejected": -1.4418222904205322, "loss": 1.4542, "nll_loss": 1.5221776962280273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05372941493988037, "rewards/margins": 0.01836169883608818, "rewards/rejected": -0.07209111005067825, "step": 1020 }, { "epoch": 0.8141382049245433, "grad_norm": 53.206661224365234, "learning_rate": 4.862563232989643e-07, "log_odds_chosen": 0.20271643996238708, "log_odds_ratio": -0.6173663139343262, "logits/chosen": 485.3902893066406, "logits/rejected": 313.65185546875, "logps/chosen": -1.0637105703353882, "logps/rejected": -1.2300751209259033, "loss": 1.4923, "nll_loss": 1.1044560670852661, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05318553373217583, "rewards/margins": 0.008318223990499973, "rewards/rejected": -0.061503756791353226, "step": 1025 }, { "epoch": 0.818109610802224, "grad_norm": 78.89067077636719, "learning_rate": 4.663630929429674e-07, "log_odds_chosen": 0.3912231922149658, "log_odds_ratio": -0.5344873666763306, "logits/chosen": 278.98651123046875, "logits/rejected": 279.85418701171875, "logps/chosen": -0.7328051924705505, "logps/rejected": -0.9687238931655884, "loss": 1.497, "nll_loss": 1.046706199645996, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.036640264093875885, "rewards/margins": 0.011795936152338982, "rewards/rejected": -0.048436202108860016, "step": 1030 }, { "epoch": 0.8220810166799047, "grad_norm": 51.44734573364258, "learning_rate": 4.46843404767259e-07, "log_odds_chosen": 0.3833610415458679, "log_odds_ratio": -0.8149446249008179, "logits/chosen": 314.2817077636719, "logits/rejected": 307.8243103027344, "logps/chosen": -1.357617735862732, "logps/rejected": -1.7892076969146729, "loss": 1.459, "nll_loss": 1.4600521326065063, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06788089126348495, "rewards/margins": 0.021579492837190628, "rewards/rejected": -0.08946038782596588, "step": 1035 }, { "epoch": 0.8260524225575854, "grad_norm": 39.02664566040039, "learning_rate": 4.2770084418418736e-07, "log_odds_chosen": 0.27395057678222656, "log_odds_ratio": -0.7102149128913879, "logits/chosen": 414.571533203125, "logits/rejected": 287.80877685546875, "logps/chosen": -0.8411673307418823, "logps/rejected": -0.9671209454536438, "loss": 1.5205, "nll_loss": 1.5072977542877197, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.042058371007442474, "rewards/margins": 0.006297673098742962, "rewards/rejected": -0.04835604503750801, "step": 1040 }, { "epoch": 0.8300238284352661, "grad_norm": 44.10149383544922, "learning_rate": 4.089389273346084e-07, "log_odds_chosen": 1.4467592239379883, "log_odds_ratio": -0.36090224981307983, "logits/chosen": 336.5260009765625, "logits/rejected": 268.7255859375, "logps/chosen": -0.579176664352417, "logps/rejected": -1.433186650276184, "loss": 1.4451, "nll_loss": 2.0459094047546387, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.028958836570382118, "rewards/margins": 0.042700495570898056, "rewards/rejected": -0.07165933400392532, "step": 1045 }, { "epoch": 0.8339952343129468, "grad_norm": 29.071208953857422, "learning_rate": 3.9056110044203594e-07, "log_odds_chosen": 0.3594241142272949, "log_odds_ratio": -0.6073828935623169, "logits/chosen": 340.73431396484375, "logits/rejected": 293.41363525390625, "logps/chosen": -1.039194107055664, "logps/rejected": -1.395516037940979, "loss": 1.3767, "nll_loss": 1.1890077590942383, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05195971205830574, "rewards/margins": 0.017816094681620598, "rewards/rejected": -0.06977580487728119, "step": 1050 }, { "epoch": 0.8379666401906275, "grad_norm": 50.16731643676758, "learning_rate": 3.72570739179631e-07, "log_odds_chosen": 0.49487370252609253, "log_odds_ratio": -0.49297910928726196, "logits/chosen": 280.0902404785156, "logits/rejected": 468.6788024902344, "logps/chosen": -1.0811553001403809, "logps/rejected": -1.4258912801742554, "loss": 1.453, "nll_loss": 1.3579655885696411, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0540577657520771, "rewards/margins": 0.017236804589629173, "rewards/rejected": -0.07129456847906113, "step": 1055 }, { "epoch": 0.8419380460683081, "grad_norm": 33.406272888183594, "learning_rate": 3.5497114805015223e-07, "log_odds_chosen": 0.14397627115249634, "log_odds_ratio": -0.6715080142021179, "logits/chosen": 373.4464416503906, "logits/rejected": 325.99163818359375, "logps/chosen": -0.9457298517227173, "logps/rejected": -1.0310219526290894, "loss": 1.4311, "nll_loss": 1.5625425577163696, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.047286491841077805, "rewards/margins": 0.004264607094228268, "rewards/rejected": -0.051551103591918945, "step": 1060 }, { "epoch": 0.8459094519459889, "grad_norm": 50.40607833862305, "learning_rate": 3.377655597789789e-07, "log_odds_chosen": -0.3970710337162018, "log_odds_ratio": -1.0087158679962158, "logits/chosen": 305.99798583984375, "logits/rejected": 319.93890380859375, "logps/chosen": -1.3028199672698975, "logps/rejected": -1.107723593711853, "loss": 1.5254, "nll_loss": 1.708296537399292, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06514099985361099, "rewards/margins": -0.009754816070199013, "rewards/rejected": -0.05538617819547653, "step": 1065 }, { "epoch": 0.8498808578236696, "grad_norm": 32.866085052490234, "learning_rate": 3.209571347203197e-07, "log_odds_chosen": -0.3324670195579529, "log_odds_ratio": -0.9437648057937622, "logits/chosen": 288.600830078125, "logits/rejected": 395.1521911621094, "logps/chosen": -0.9841065406799316, "logps/rejected": -0.9132230877876282, "loss": 1.2591, "nll_loss": 1.0712697505950928, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04920532926917076, "rewards/margins": -0.0035441755317151546, "rewards/rejected": -0.04566115140914917, "step": 1070 }, { "epoch": 0.8538522637013503, "grad_norm": 63.01152038574219, "learning_rate": 3.0454896027671073e-07, "log_odds_chosen": -0.15130704641342163, "log_odds_ratio": -0.8452315330505371, "logits/chosen": 408.84552001953125, "logits/rejected": 319.49493408203125, "logps/chosen": -1.28704035282135, "logps/rejected": -1.2764626741409302, "loss": 1.6339, "nll_loss": 1.682464361190796, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06435202062129974, "rewards/margins": -0.000528886157553643, "rewards/rejected": -0.06382313370704651, "step": 1075 }, { "epoch": 0.857823669579031, "grad_norm": 32.351932525634766, "learning_rate": 2.885440503319145e-07, "log_odds_chosen": -0.4667375087738037, "log_odds_ratio": -1.0818705558776855, "logits/chosen": 459.02410888671875, "logits/rejected": 257.15716552734375, "logps/chosen": -1.1935930252075195, "logps/rejected": -0.7796539068222046, "loss": 1.548, "nll_loss": 1.7794479131698608, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05967964977025986, "rewards/margins": -0.020696954801678658, "rewards/rejected": -0.03898269310593605, "step": 1080 }, { "epoch": 0.8617950754567116, "grad_norm": 36.15253829956055, "learning_rate": 2.7294534469732794e-07, "log_odds_chosen": 1.6522690057754517, "log_odds_ratio": -0.4097142219543457, "logits/chosen": 344.11962890625, "logits/rejected": 319.623046875, "logps/chosen": -0.894513726234436, "logps/rejected": -2.088923692703247, "loss": 1.4534, "nll_loss": 1.7392299175262451, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0447256863117218, "rewards/margins": 0.05972049757838249, "rewards/rejected": -0.10444619506597519, "step": 1085 }, { "epoch": 0.8657664813343924, "grad_norm": 47.798858642578125, "learning_rate": 2.5775570857199144e-07, "log_odds_chosen": -0.20097847282886505, "log_odds_ratio": -0.8294545412063599, "logits/chosen": 281.59942626953125, "logits/rejected": 324.57403564453125, "logps/chosen": -1.1493308544158936, "logps/rejected": -0.9629890322685242, "loss": 1.3747, "nll_loss": 1.3210439682006836, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0574665442109108, "rewards/margins": -0.009317094460129738, "rewards/rejected": -0.04814944788813591, "step": 1090 }, { "epoch": 0.8697378872120731, "grad_norm": 29.671043395996094, "learning_rate": 2.4297793201630113e-07, "log_odds_chosen": 0.020139653235673904, "log_odds_ratio": -0.7046443819999695, "logits/chosen": 300.97998046875, "logits/rejected": 333.9520568847656, "logps/chosen": -0.972399115562439, "logps/rejected": -0.9247520565986633, "loss": 1.4489, "nll_loss": 1.3582823276519775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04861995577812195, "rewards/margins": -0.0023823559749871492, "rewards/rejected": -0.04623759910464287, "step": 1095 }, { "epoch": 0.8737092930897538, "grad_norm": 30.31666374206543, "learning_rate": 2.286147294395283e-07, "log_odds_chosen": 0.29012542963027954, "log_odds_ratio": -0.7615915536880493, "logits/chosen": 315.9916687011719, "logits/rejected": 422.93017578125, "logps/chosen": -1.0184037685394287, "logps/rejected": -1.0094739198684692, "loss": 1.5729, "nll_loss": 1.4798800945281982, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05092019587755203, "rewards/margins": -0.00044649915071204305, "rewards/rejected": -0.05047369748353958, "step": 1100 }, { "epoch": 0.8776806989674345, "grad_norm": 32.19295120239258, "learning_rate": 2.1466873910123058e-07, "log_odds_chosen": -0.07446761429309845, "log_odds_ratio": -0.8099315762519836, "logits/chosen": 334.15740966796875, "logits/rejected": 331.76031494140625, "logps/chosen": -1.026379942893982, "logps/rejected": -0.9773386120796204, "loss": 1.4675, "nll_loss": 1.4952067136764526, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05131899565458298, "rewards/margins": -0.002452067332342267, "rewards/rejected": -0.048866935074329376, "step": 1105 }, { "epoch": 0.8816521048451151, "grad_norm": 34.077449798583984, "learning_rate": 2.0114252262665086e-07, "log_odds_chosen": 0.30518868565559387, "log_odds_ratio": -0.6247283220291138, "logits/chosen": 313.799560546875, "logits/rejected": 278.57293701171875, "logps/chosen": -1.2211542129516602, "logps/rejected": -1.4464585781097412, "loss": 1.4043, "nll_loss": 1.2219561338424683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06105770915746689, "rewards/margins": 0.011265222914516926, "rewards/rejected": -0.07232292741537094, "step": 1110 }, { "epoch": 0.8856235107227959, "grad_norm": 33.00147247314453, "learning_rate": 1.880385645361951e-07, "log_odds_chosen": 2.0590970516204834, "log_odds_ratio": -0.4269631803035736, "logits/chosen": 304.67572021484375, "logits/rejected": 437.95751953125, "logps/chosen": -0.8812467455863953, "logps/rejected": -2.7326838970184326, "loss": 1.5588, "nll_loss": 1.2233951091766357, "rewards/accuracies": 1.0, "rewards/chosen": -0.04406233876943588, "rewards/margins": 0.09257186949253082, "rewards/rejected": -0.1366342008113861, "step": 1115 }, { "epoch": 0.8895949166004765, "grad_norm": 29.0517578125, "learning_rate": 1.7535927178906898e-07, "log_odds_chosen": 0.6221317648887634, "log_odds_ratio": -0.5823326110839844, "logits/chosen": 288.1285705566406, "logits/rejected": 357.75701904296875, "logps/chosen": -1.0755956172943115, "logps/rejected": -1.5605504512786865, "loss": 1.4892, "nll_loss": 1.3548178672790527, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053779780864715576, "rewards/margins": 0.024247746914625168, "rewards/rejected": -0.07802753150463104, "step": 1120 }, { "epoch": 0.8935663224781573, "grad_norm": 37.735111236572266, "learning_rate": 1.6310697334116583e-07, "log_odds_chosen": -0.2765834629535675, "log_odds_ratio": -0.8877069354057312, "logits/chosen": 357.4658203125, "logits/rejected": 302.4728088378906, "logps/chosen": -1.121751070022583, "logps/rejected": -0.9259787797927856, "loss": 1.4662, "nll_loss": 1.56728196144104, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05608755350112915, "rewards/margins": -0.009788615629076958, "rewards/rejected": -0.04629894345998764, "step": 1125 }, { "epoch": 0.897537728355838, "grad_norm": 39.21086120605469, "learning_rate": 1.512839197172758e-07, "log_odds_chosen": 0.1607791781425476, "log_odds_ratio": -0.7086135745048523, "logits/chosen": 431.327880859375, "logits/rejected": 309.34210205078125, "logps/chosen": -1.1171777248382568, "logps/rejected": -1.3307039737701416, "loss": 1.3869, "nll_loss": 1.7646287679672241, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05585888773202896, "rewards/margins": 0.010676311329007149, "rewards/rejected": -0.06653519719839096, "step": 1130 }, { "epoch": 0.9015091342335186, "grad_norm": 49.552589416503906, "learning_rate": 1.398922825977092e-07, "log_odds_chosen": 1.1862103939056396, "log_odds_ratio": -0.36970359086990356, "logits/chosen": 287.95318603515625, "logits/rejected": 344.77764892578125, "logps/chosen": -1.1711792945861816, "logps/rejected": -2.0627903938293457, "loss": 1.4555, "nll_loss": 1.4977771043777466, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05855896323919296, "rewards/margins": 0.044580571353435516, "rewards/rejected": -0.10313953459262848, "step": 1135 }, { "epoch": 0.9054805401111994, "grad_norm": 34.04672622680664, "learning_rate": 1.2893415441939588e-07, "log_odds_chosen": 0.042095281183719635, "log_odds_ratio": -0.7100853323936462, "logits/chosen": 319.3915710449219, "logits/rejected": 372.4937438964844, "logps/chosen": -0.8466545343399048, "logps/rejected": -0.8484467267990112, "loss": 1.3861, "nll_loss": 1.0329768657684326, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0423327274620533, "rewards/margins": 8.96111159818247e-05, "rewards/rejected": -0.0424223393201828, "step": 1140 }, { "epoch": 0.90945194598888, "grad_norm": 43.70923614501953, "learning_rate": 1.1841154799154376e-07, "log_odds_chosen": 0.09307994693517685, "log_odds_ratio": -0.8356366157531738, "logits/chosen": 383.5526428222656, "logits/rejected": 280.0104064941406, "logps/chosen": -1.0310574769973755, "logps/rejected": -1.0444084405899048, "loss": 1.4938, "nll_loss": 1.3081772327423096, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.051552869379520416, "rewards/margins": 0.0006675481563434005, "rewards/rejected": -0.0522204227745533, "step": 1145 }, { "epoch": 0.9134233518665608, "grad_norm": 42.19453048706055, "learning_rate": 1.083263961259215e-07, "log_odds_chosen": 0.5610286593437195, "log_odds_ratio": -0.5318921804428101, "logits/chosen": 313.7818298339844, "logits/rejected": 281.55303955078125, "logps/chosen": -0.8616586923599243, "logps/rejected": -1.13971745967865, "loss": 1.3678, "nll_loss": 1.5508089065551758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.043082933872938156, "rewards/margins": 0.01390293799340725, "rewards/rejected": -0.056985873728990555, "step": 1150 }, { "epoch": 0.9173947577442415, "grad_norm": 62.5867805480957, "learning_rate": 9.86805512818359e-08, "log_odds_chosen": 0.5819055438041687, "log_odds_ratio": -0.5563138723373413, "logits/chosen": 308.6764221191406, "logits/rejected": 280.77093505859375, "logps/chosen": -0.7943710684776306, "logps/rejected": -1.1860836744308472, "loss": 1.5596, "nll_loss": 1.2872906923294067, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03971855714917183, "rewards/margins": 0.019585633650422096, "rewards/rejected": -0.059304188936948776, "step": 1155 }, { "epoch": 0.9213661636219221, "grad_norm": 39.823516845703125, "learning_rate": 8.947578522587097e-08, "log_odds_chosen": 0.5100874304771423, "log_odds_ratio": -0.7220025062561035, "logits/chosen": 421.84722900390625, "logits/rejected": 306.4421081542969, "logps/chosen": -0.7023354768753052, "logps/rejected": -0.9484280347824097, "loss": 1.2855, "nll_loss": 0.8443538546562195, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0351167730987072, "rewards/margins": 0.012304631993174553, "rewards/rejected": -0.0474214032292366, "step": 1160 }, { "epoch": 0.9253375694996029, "grad_norm": 34.763492584228516, "learning_rate": 8.071378870644381e-08, "log_odds_chosen": 0.068526491522789, "log_odds_ratio": -0.7172547578811646, "logits/chosen": 306.42303466796875, "logits/rejected": 306.2086486816406, "logps/chosen": -1.1717783212661743, "logps/rejected": -1.2519972324371338, "loss": 1.4611, "nll_loss": 1.6088817119598389, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05858892202377319, "rewards/margins": 0.004010946489870548, "rewards/rejected": -0.06259986758232117, "step": 1165 }, { "epoch": 0.9293089753772835, "grad_norm": 57.37542724609375, "learning_rate": 7.239617114324499e-08, "log_odds_chosen": -0.01497584581375122, "log_odds_ratio": -0.7752578854560852, "logits/chosen": 278.559326171875, "logits/rejected": 374.15869140625, "logps/chosen": -1.2931535243988037, "logps/rejected": -1.2771342992782593, "loss": 1.5557, "nll_loss": 1.4230643510818481, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06465767323970795, "rewards/margins": -0.0008009634912014008, "rewards/rejected": -0.06385671347379684, "step": 1170 }, { "epoch": 0.9332803812549643, "grad_norm": 54.0869026184082, "learning_rate": 6.452446033161946e-08, "log_odds_chosen": -0.19031484425067902, "log_odds_ratio": -0.8042638897895813, "logits/chosen": 299.294921875, "logits/rejected": 398.8330078125, "logps/chosen": -1.0967943668365479, "logps/rejected": -1.014966607093811, "loss": 1.5468, "nll_loss": 1.4041001796722412, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05483972281217575, "rewards/margins": -0.0040913899429142475, "rewards/rejected": -0.05074832960963249, "step": 1175 }, { "epoch": 0.937251787132645, "grad_norm": 30.728069305419922, "learning_rate": 5.7100102161937176e-08, "log_odds_chosen": 0.08359535038471222, "log_odds_ratio": -0.6820527911186218, "logits/chosen": 354.07806396484375, "logits/rejected": 264.95947265625, "logps/chosen": -0.9352580904960632, "logps/rejected": -0.9271550178527832, "loss": 1.3888, "nll_loss": 1.2889841794967651, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04676290601491928, "rewards/margins": -0.0004051584692206234, "rewards/rejected": -0.04635775089263916, "step": 1180 }, { "epoch": 0.9412231930103256, "grad_norm": 30.694597244262695, "learning_rate": 5.012446035400881e-08, "log_odds_chosen": -0.18491533398628235, "log_odds_ratio": -0.8581596612930298, "logits/chosen": 431.51483154296875, "logits/rejected": 256.1930236816406, "logps/chosen": -1.3761582374572754, "logps/rejected": -1.2526451349258423, "loss": 1.5465, "nll_loss": 1.7214624881744385, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06880791485309601, "rewards/margins": -0.006175657268613577, "rewards/rejected": -0.062632255256176, "step": 1185 }, { "epoch": 0.9451945988880064, "grad_norm": 52.927608489990234, "learning_rate": 4.359881620659534e-08, "log_odds_chosen": 0.6782919764518738, "log_odds_ratio": -0.6247768998146057, "logits/chosen": 436.1754455566406, "logits/rejected": 285.18365478515625, "logps/chosen": -0.985084056854248, "logps/rejected": -1.5693267583847046, "loss": 1.4816, "nll_loss": 1.398045301437378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04925420135259628, "rewards/margins": 0.029212135821580887, "rewards/rejected": -0.07846634089946747, "step": 1190 }, { "epoch": 0.949166004765687, "grad_norm": 48.66614532470703, "learning_rate": 3.7524368362057415e-08, "log_odds_chosen": -0.1631685197353363, "log_odds_ratio": -0.8233796954154968, "logits/chosen": 418.45928955078125, "logits/rejected": 318.4512939453125, "logps/chosen": -0.8457492589950562, "logps/rejected": -0.7526192665100098, "loss": 1.4522, "nll_loss": 1.3443362712860107, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04228746145963669, "rewards/margins": -0.004656502045691013, "rewards/rejected": -0.03763096407055855, "step": 1195 }, { "epoch": 0.9531374106433678, "grad_norm": 29.535236358642578, "learning_rate": 3.1902232586185635e-08, "log_odds_chosen": 0.14746162295341492, "log_odds_ratio": -0.7185263633728027, "logits/chosen": 346.12823486328125, "logits/rejected": 365.977783203125, "logps/chosen": -0.9943090677261353, "logps/rejected": -0.9647199511528015, "loss": 1.5476, "nll_loss": 1.3550399541854858, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04971545934677124, "rewards/margins": -0.001479454687796533, "rewards/rejected": -0.048236001282930374, "step": 1200 }, { "epoch": 0.9571088165210484, "grad_norm": 46.18953323364258, "learning_rate": 2.673344156325558e-08, "log_odds_chosen": 0.25677961111068726, "log_odds_ratio": -0.6748029589653015, "logits/chosen": 321.4141845703125, "logits/rejected": 390.1539611816406, "logps/chosen": -1.306235432624817, "logps/rejected": -1.5235410928726196, "loss": 1.4511, "nll_loss": 1.348838448524475, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06531177461147308, "rewards/margins": 0.010865284129977226, "rewards/rejected": -0.07617706060409546, "step": 1205 }, { "epoch": 0.9610802223987291, "grad_norm": 48.14284133911133, "learning_rate": 2.2018944706341528e-08, "log_odds_chosen": 0.25043779611587524, "log_odds_ratio": -0.6027860045433044, "logits/chosen": 383.79913330078125, "logits/rejected": 336.7133483886719, "logps/chosen": -1.0423628091812134, "logps/rejected": -1.1916873455047607, "loss": 1.4619, "nll_loss": 1.6256072521209717, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05211814120411873, "rewards/margins": 0.007466231472790241, "rewards/rejected": -0.059584371745586395, "step": 1210 }, { "epoch": 0.9650516282764099, "grad_norm": 32.60062026977539, "learning_rate": 1.775960798292731e-08, "log_odds_chosen": -0.04052457585930824, "log_odds_ratio": -0.8082249760627747, "logits/chosen": 331.26397705078125, "logits/rejected": 329.55096435546875, "logps/chosen": -1.347312092781067, "logps/rejected": -1.3537461757659912, "loss": 1.5079, "nll_loss": 1.5286957025527954, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.0673656016588211, "rewards/margins": 0.00032170413760468364, "rewards/rejected": -0.06768731772899628, "step": 1215 }, { "epoch": 0.9690230341540905, "grad_norm": 43.27484893798828, "learning_rate": 1.3956213755842718e-08, "log_odds_chosen": -0.26221469044685364, "log_odds_ratio": -0.9925212860107422, "logits/chosen": 304.05120849609375, "logits/rejected": 393.05517578125, "logps/chosen": -1.015187382698059, "logps/rejected": -0.9859923124313354, "loss": 1.3977, "nll_loss": 1.1624058485031128, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.050759367644786835, "rewards/margins": -0.0014597497647628188, "rewards/rejected": -0.04929962009191513, "step": 1220 }, { "epoch": 0.9729944400317713, "grad_norm": 43.98046875, "learning_rate": 1.0609460639559033e-08, "log_odds_chosen": 0.6014237999916077, "log_odds_ratio": -0.5421421527862549, "logits/chosen": 339.303955078125, "logits/rejected": 391.6441345214844, "logps/chosen": -1.0844001770019531, "logps/rejected": -1.4398688077926636, "loss": 1.3258, "nll_loss": 1.4467300176620483, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054220009595155716, "rewards/margins": 0.017773432657122612, "rewards/rejected": -0.07199344038963318, "step": 1225 }, { "epoch": 0.9769658459094519, "grad_norm": 34.32419967651367, "learning_rate": 7.719963371865259e-09, "log_odds_chosen": 0.7910041809082031, "log_odds_ratio": -0.4584100842475891, "logits/chosen": 316.9669494628906, "logits/rejected": 363.3733825683594, "logps/chosen": -0.8123126029968262, "logps/rejected": -1.2943851947784424, "loss": 1.3404, "nll_loss": 1.313084363937378, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04061562940478325, "rewards/margins": 0.02410362847149372, "rewards/rejected": -0.06471925973892212, "step": 1230 }, { "epoch": 0.9809372517871326, "grad_norm": 42.33149337768555, "learning_rate": 5.288252700952068e-09, "log_odds_chosen": 0.8038623929023743, "log_odds_ratio": -0.43806830048561096, "logits/chosen": 310.6473083496094, "logits/rejected": 314.3547668457031, "logps/chosen": -0.8453804850578308, "logps/rejected": -1.4276442527770996, "loss": 1.4819, "nll_loss": 1.6422332525253296, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0422690249979496, "rewards/margins": 0.02911318838596344, "rewards/rejected": -0.07138221710920334, "step": 1235 }, { "epoch": 0.9849086576648134, "grad_norm": 45.44175720214844, "learning_rate": 3.3147752879236773e-09, "log_odds_chosen": 0.02175927720963955, "log_odds_ratio": -0.8097355961799622, "logits/chosen": 277.007080078125, "logits/rejected": 415.473388671875, "logps/chosen": -1.0552597045898438, "logps/rejected": -1.2169201374053955, "loss": 1.5311, "nll_loss": 1.5463203191757202, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05276298522949219, "rewards/margins": 0.008083020336925983, "rewards/rejected": -0.060846008360385895, "step": 1240 }, { "epoch": 0.988880063542494, "grad_norm": 62.612850189208984, "learning_rate": 1.7998936247534681e-09, "log_odds_chosen": 0.2898419499397278, "log_odds_ratio": -0.5893855094909668, "logits/chosen": 399.6653747558594, "logits/rejected": 274.2057800292969, "logps/chosen": -1.0792999267578125, "logps/rejected": -1.2781049013137817, "loss": 1.4564, "nll_loss": 1.5806140899658203, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.053964994847774506, "rewards/margins": 0.009940249845385551, "rewards/rejected": -0.0639052465558052, "step": 1245 }, { "epoch": 0.9928514694201748, "grad_norm": 51.08999252319336, "learning_rate": 7.438859677008636e-10, "log_odds_chosen": 0.09502691775560379, "log_odds_ratio": -0.7220104336738586, "logits/chosen": 325.39263916015625, "logits/rejected": 361.7767639160156, "logps/chosen": -1.1207284927368164, "logps/rejected": -1.173607587814331, "loss": 1.3621, "nll_loss": 1.241156816482544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05603642389178276, "rewards/margins": 0.0026439554058015347, "rewards/rejected": -0.05868038535118103, "step": 1250 }, { "epoch": 0.9968228752978554, "grad_norm": 44.36956787109375, "learning_rate": 1.4694628620137708e-10, "log_odds_chosen": -0.33857935667037964, "log_odds_ratio": -0.9029590487480164, "logits/chosen": 293.49420166015625, "logits/rejected": 431.3218688964844, "logps/chosen": -0.9575881958007812, "logps/rejected": -0.7752519845962524, "loss": 1.3423, "nll_loss": 1.1781036853790283, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.0478794127702713, "rewards/margins": -0.009116815403103828, "rewards/rejected": -0.038762595504522324, "step": 1255 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.23976314067840576, "eval_log_odds_ratio": -0.6928443908691406, "eval_logits/chosen": 340.5321350097656, "eval_logits/rejected": 312.9670104980469, "eval_logps/chosen": -1.0267834663391113, "eval_logps/rejected": -1.1786600351333618, "eval_loss": 1.455647587776184, "eval_nll_loss": 1.4095592498779297, "eval_rewards/accuracies": 0.5107913613319397, "eval_rewards/chosen": -0.051339175552129745, "eval_rewards/margins": 0.0075938161462545395, "eval_rewards/rejected": -0.058932989835739136, "eval_runtime": 91.5981, "eval_samples_per_second": 6.037, "eval_steps_per_second": 1.517, "step": 1259 }, { "epoch": 1.0, "step": 1259, "total_flos": 0.0, "train_loss": 1.8019611810861456, "train_runtime": 4470.8327, "train_samples_per_second": 1.126, "train_steps_per_second": 0.282 } ], "logging_steps": 5, "max_steps": 1259, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }