diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4579 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1259, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003971405877680699, + "grad_norm": 1986.144287109375, + "learning_rate": 2.5000000000000004e-07, + "log_odds_chosen": -0.8231229782104492, + "log_odds_ratio": -1.391985297203064, + "logits/chosen": 102.16714477539062, + "logits/rejected": -12.402770042419434, + "logps/chosen": -16.669206619262695, + "logps/rejected": -15.846084594726562, + "loss": 14.8236, + "nll_loss": 15.787309646606445, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.8334604501724243, + "rewards/margins": -0.041156161576509476, + "rewards/rejected": -0.7923042178153992, + "step": 5 + }, + { + "epoch": 0.007942811755361398, + "grad_norm": 1124.8104248046875, + "learning_rate": 5.000000000000001e-07, + "log_odds_chosen": -1.4938147068023682, + "log_odds_ratio": -2.2117180824279785, + "logits/chosen": 69.36089324951172, + "logits/rejected": 133.50851440429688, + "logps/chosen": -14.369367599487305, + "logps/rejected": -12.875558853149414, + "loss": 12.448, + "nll_loss": 12.069561958312988, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.7184683680534363, + "rewards/margins": -0.07469038665294647, + "rewards/rejected": -0.6437779664993286, + "step": 10 + }, + { + "epoch": 0.011914217633042097, + "grad_norm": 571.815673828125, + "learning_rate": 7.5e-07, + "log_odds_chosen": 0.07529473304748535, + "log_odds_ratio": -0.9602964520454407, + "logits/chosen": 194.84005737304688, + "logits/rejected": 170.63455200195312, + "logps/chosen": -8.528478622436523, + "logps/rejected": -8.603917121887207, + "loss": 8.6647, + "nll_loss": 8.767313003540039, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.42642393708229065, + "rewards/margins": 0.00377195468172431, + "rewards/rejected": -0.4301958680152893, + "step": 15 + }, + { + "epoch": 0.015885623510722795, + "grad_norm": 294.7610778808594, + "learning_rate": 1.0000000000000002e-06, + "log_odds_chosen": 1.0600534677505493, + "log_odds_ratio": -0.5760771632194519, + "logits/chosen": 130.64846801757812, + "logits/rejected": 219.7195281982422, + "logps/chosen": -5.179438591003418, + "logps/rejected": -6.236131191253662, + "loss": 5.951, + "nll_loss": 5.380393028259277, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2589719295501709, + "rewards/margins": 0.05283462256193161, + "rewards/rejected": -0.3118065893650055, + "step": 20 + }, + { + "epoch": 0.019857029388403495, + "grad_norm": 175.71804809570312, + "learning_rate": 1.25e-06, + "log_odds_chosen": -0.23735050857067108, + "log_odds_ratio": -0.8487253189086914, + "logits/chosen": 127.70460510253906, + "logits/rejected": 241.3983154296875, + "logps/chosen": -3.850130796432495, + "logps/rejected": -3.6309711933135986, + "loss": 4.3668, + "nll_loss": 4.186649322509766, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.19250653684139252, + "rewards/margins": -0.010957981459796429, + "rewards/rejected": -0.18154855072498322, + "step": 25 + }, + { + "epoch": 0.023828435266084195, + "grad_norm": 124.54273986816406, + "learning_rate": 1.5e-06, + "log_odds_chosen": 0.1647549569606781, + "log_odds_ratio": -1.391825795173645, + "logits/chosen": 265.0519714355469, + "logits/rejected": 185.30813598632812, + "logps/chosen": -3.637852430343628, + "logps/rejected": -3.7852470874786377, + "loss": 3.5575, + "nll_loss": 3.7124857902526855, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.18189263343811035, + "rewards/margins": 0.007369739003479481, + "rewards/rejected": -0.18926236033439636, + "step": 30 + }, + { + "epoch": 0.02779984114376489, + "grad_norm": 197.60816955566406, + "learning_rate": 1.75e-06, + "log_odds_chosen": 0.08467637002468109, + "log_odds_ratio": -0.7092531323432922, + "logits/chosen": 209.2491455078125, + "logits/rejected": 306.3213806152344, + "logps/chosen": -2.6641006469726562, + "logps/rejected": -2.7374536991119385, + "loss": 3.3245, + "nll_loss": 2.6615092754364014, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.13320502638816833, + "rewards/margins": 0.0036676630843430758, + "rewards/rejected": -0.13687269389629364, + "step": 35 + }, + { + "epoch": 0.03177124702144559, + "grad_norm": 185.7123260498047, + "learning_rate": 2.0000000000000003e-06, + "log_odds_chosen": 0.11347303539514542, + "log_odds_ratio": -0.7381674647331238, + "logits/chosen": 297.0364990234375, + "logits/rejected": 257.74261474609375, + "logps/chosen": -2.3121635913848877, + "logps/rejected": -2.4389572143554688, + "loss": 2.4557, + "nll_loss": 3.096728801727295, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.11560817062854767, + "rewards/margins": 0.006339688785374165, + "rewards/rejected": -0.12194786220788956, + "step": 40 + }, + { + "epoch": 0.035742652899126294, + "grad_norm": 110.63021087646484, + "learning_rate": 2.25e-06, + "log_odds_chosen": -0.3320659101009369, + "log_odds_ratio": -0.9123506546020508, + "logits/chosen": 301.091552734375, + "logits/rejected": 277.54205322265625, + "logps/chosen": -1.4611170291900635, + "logps/rejected": -1.2663248777389526, + "loss": 2.169, + "nll_loss": 1.8063217401504517, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07305584847927094, + "rewards/margins": -0.009739604778587818, + "rewards/rejected": -0.0633162409067154, + "step": 45 + }, + { + "epoch": 0.03971405877680699, + "grad_norm": 178.21774291992188, + "learning_rate": 2.5e-06, + "log_odds_chosen": 0.13006296753883362, + "log_odds_ratio": -0.6751025915145874, + "logits/chosen": 348.32208251953125, + "logits/rejected": 229.12075805664062, + "logps/chosen": -1.464900255203247, + "logps/rejected": -1.5294269323349, + "loss": 2.2912, + "nll_loss": 2.135530710220337, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07324501872062683, + "rewards/margins": 0.003226341214030981, + "rewards/rejected": -0.07647135108709335, + "step": 50 + }, + { + "epoch": 0.043685464654487687, + "grad_norm": 245.26560974121094, + "learning_rate": 2.7500000000000004e-06, + "log_odds_chosen": 0.5194543600082397, + "log_odds_ratio": -0.47707730531692505, + "logits/chosen": 271.95819091796875, + "logits/rejected": 315.78558349609375, + "logps/chosen": -1.3941795825958252, + "logps/rejected": -1.8157556056976318, + "loss": 1.9179, + "nll_loss": 1.9026470184326172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06970898061990738, + "rewards/margins": 0.021078798919916153, + "rewards/rejected": -0.09078778326511383, + "step": 55 + }, + { + "epoch": 0.04765687053216839, + "grad_norm": 50.196834564208984, + "learning_rate": 3e-06, + "log_odds_chosen": -0.3250153362751007, + "log_odds_ratio": -1.0242193937301636, + "logits/chosen": 358.44586181640625, + "logits/rejected": 318.2178955078125, + "logps/chosen": -1.6527748107910156, + "logps/rejected": -1.3916703462600708, + "loss": 2.1109, + "nll_loss": 1.7649085521697998, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.08263873308897018, + "rewards/margins": -0.013055220246315002, + "rewards/rejected": -0.06958352029323578, + "step": 60 + }, + { + "epoch": 0.051628276409849086, + "grad_norm": 108.7151107788086, + "learning_rate": 3.2500000000000002e-06, + "log_odds_chosen": 0.29782918095588684, + "log_odds_ratio": -0.6089301705360413, + "logits/chosen": 285.428466796875, + "logits/rejected": 288.96624755859375, + "logps/chosen": -1.7433933019638062, + "logps/rejected": -2.007410764694214, + "loss": 2.4942, + "nll_loss": 2.303445339202881, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08716966956853867, + "rewards/margins": 0.013200879096984863, + "rewards/rejected": -0.10037054866552353, + "step": 65 + }, + { + "epoch": 0.05559968228752978, + "grad_norm": 232.15228271484375, + "learning_rate": 3.5e-06, + "log_odds_chosen": -0.16374030709266663, + "log_odds_ratio": -0.8438912630081177, + "logits/chosen": 316.9410400390625, + "logits/rejected": 329.91485595703125, + "logps/chosen": -1.642735481262207, + "logps/rejected": -1.506341814994812, + "loss": 2.2097, + "nll_loss": 1.9609638452529907, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.08213677257299423, + "rewards/margins": -0.006819679401814938, + "rewards/rejected": -0.07531709969043732, + "step": 70 + }, + { + "epoch": 0.059571088165210485, + "grad_norm": 78.16633605957031, + "learning_rate": 3.7500000000000005e-06, + "log_odds_chosen": 0.08518339693546295, + "log_odds_ratio": -0.6790117025375366, + "logits/chosen": 318.6166076660156, + "logits/rejected": 272.7696533203125, + "logps/chosen": -1.5736838579177856, + "logps/rejected": -1.6470115184783936, + "loss": 2.0889, + "nll_loss": 2.043640375137329, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07868418842554092, + "rewards/margins": 0.0036663957871496677, + "rewards/rejected": -0.08235058933496475, + "step": 75 + }, + { + "epoch": 0.06354249404289118, + "grad_norm": 107.8170166015625, + "learning_rate": 4.000000000000001e-06, + "log_odds_chosen": 0.7262557744979858, + "log_odds_ratio": -0.4617268443107605, + "logits/chosen": 258.40740966796875, + "logits/rejected": 339.81085205078125, + "logps/chosen": -1.2588051557540894, + "logps/rejected": -1.8158533573150635, + "loss": 2.0223, + "nll_loss": 1.9233391284942627, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06294026225805283, + "rewards/margins": 0.027852404862642288, + "rewards/rejected": -0.09079267084598541, + "step": 80 + }, + { + "epoch": 0.06751389992057188, + "grad_norm": 94.64545440673828, + "learning_rate": 4.25e-06, + "log_odds_chosen": 0.24036984145641327, + "log_odds_ratio": -0.6297720670700073, + "logits/chosen": 322.71148681640625, + "logits/rejected": 335.28228759765625, + "logps/chosen": -1.28748619556427, + "logps/rejected": -1.4782545566558838, + "loss": 1.7358, + "nll_loss": 1.7002407312393188, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06437431275844574, + "rewards/margins": 0.009538417682051659, + "rewards/rejected": -0.07391272485256195, + "step": 85 + }, + { + "epoch": 0.07148530579825259, + "grad_norm": 133.2477264404297, + "learning_rate": 4.5e-06, + "log_odds_chosen": 0.11964414268732071, + "log_odds_ratio": -0.6827563643455505, + "logits/chosen": 325.7275695800781, + "logits/rejected": 273.9080505371094, + "logps/chosen": -1.321221947669983, + "logps/rejected": -1.4350178241729736, + "loss": 2.0792, + "nll_loss": 2.0664451122283936, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0660611018538475, + "rewards/margins": 0.005689795129001141, + "rewards/rejected": -0.07175089418888092, + "step": 90 + }, + { + "epoch": 0.07545671167593328, + "grad_norm": 75.57556915283203, + "learning_rate": 4.75e-06, + "log_odds_chosen": 0.1924123615026474, + "log_odds_ratio": -0.7588543891906738, + "logits/chosen": 268.33111572265625, + "logits/rejected": 284.4769287109375, + "logps/chosen": -1.2349271774291992, + "logps/rejected": -1.3220834732055664, + "loss": 1.8031, + "nll_loss": 2.01653790473938, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06174635887145996, + "rewards/margins": 0.0043578073382377625, + "rewards/rejected": -0.06610416620969772, + "step": 95 + }, + { + "epoch": 0.07942811755361398, + "grad_norm": 82.01164245605469, + "learning_rate": 5e-06, + "log_odds_chosen": 0.22679157555103302, + "log_odds_ratio": -0.6229124069213867, + "logits/chosen": 347.00042724609375, + "logits/rejected": 280.84796142578125, + "logps/chosen": -1.0023002624511719, + "logps/rejected": -1.172823429107666, + "loss": 1.9771, + "nll_loss": 1.778070092201233, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.050115011632442474, + "rewards/margins": 0.008526156656444073, + "rewards/rejected": -0.05864117294549942, + "step": 100 + }, + { + "epoch": 0.08339952343129468, + "grad_norm": 144.94651794433594, + "learning_rate": 4.99977039769305e-06, + "log_odds_chosen": -0.6747381091117859, + "log_odds_ratio": -1.3916146755218506, + "logits/chosen": 285.270751953125, + "logits/rejected": 392.5542907714844, + "logps/chosen": -2.4368515014648438, + "logps/rejected": -1.8863086700439453, + "loss": 2.1097, + "nll_loss": 2.505847454071045, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.12184257805347443, + "rewards/margins": -0.02752714790403843, + "rewards/rejected": -0.09431543946266174, + "step": 105 + }, + { + "epoch": 0.08737092930897537, + "grad_norm": 907.4935302734375, + "learning_rate": 4.9990816329459744e-06, + "log_odds_chosen": 0.6644043326377869, + "log_odds_ratio": -0.6343256235122681, + "logits/chosen": 355.09014892578125, + "logits/rejected": 328.604736328125, + "logps/chosen": -3.468595027923584, + "logps/rejected": -4.0634002685546875, + "loss": 2.2136, + "nll_loss": 2.77046275138855, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.17342975735664368, + "rewards/margins": 0.02974027954041958, + "rewards/rejected": -0.2031700611114502, + "step": 110 + }, + { + "epoch": 0.09134233518665608, + "grad_norm": 55.67234802246094, + "learning_rate": 4.997933832272354e-06, + "log_odds_chosen": 0.5580138564109802, + "log_odds_ratio": -0.507127046585083, + "logits/chosen": 261.1326599121094, + "logits/rejected": 371.37396240234375, + "logps/chosen": -1.3323694467544556, + "logps/rejected": -1.7812392711639404, + "loss": 1.7648, + "nll_loss": 1.5408798456192017, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06661847233772278, + "rewards/margins": 0.02244349755346775, + "rewards/rejected": -0.08906197547912598, + "step": 115 + }, + { + "epoch": 0.09531374106433678, + "grad_norm": 155.13243103027344, + "learning_rate": 4.996327206502335e-06, + "log_odds_chosen": 0.4205778241157532, + "log_odds_ratio": -0.6450524926185608, + "logits/chosen": 249.2978973388672, + "logits/rejected": 313.87274169921875, + "logps/chosen": -1.5952913761138916, + "logps/rejected": -1.9669711589813232, + "loss": 2.1811, + "nll_loss": 2.7610068321228027, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07976456731557846, + "rewards/margins": 0.018583994358778, + "rewards/rejected": -0.09834857285022736, + "step": 120 + }, + { + "epoch": 0.09928514694201747, + "grad_norm": 56.40447235107422, + "learning_rate": 4.994262050743902e-06, + "log_odds_chosen": -0.344782292842865, + "log_odds_ratio": -0.9613167643547058, + "logits/chosen": 297.97796630859375, + "logits/rejected": 316.97796630859375, + "logps/chosen": -1.9141199588775635, + "logps/rejected": -1.645132064819336, + "loss": 2.097, + "nll_loss": 2.657578945159912, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09570600092411041, + "rewards/margins": -0.013449391350150108, + "rewards/rejected": -0.08225660771131516, + "step": 125 + }, + { + "epoch": 0.10325655281969817, + "grad_norm": 63.89404296875, + "learning_rate": 4.991738744328679e-06, + "log_odds_chosen": -0.5421128869056702, + "log_odds_ratio": -1.0545024871826172, + "logits/chosen": 358.645751953125, + "logits/rejected": 298.12786865234375, + "logps/chosen": -1.4808099269866943, + "logps/rejected": -1.1260448694229126, + "loss": 2.0496, + "nll_loss": 2.101855993270874, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0740404948592186, + "rewards/margins": -0.017738252878189087, + "rewards/rejected": -0.05630224198102951, + "step": 130 + }, + { + "epoch": 0.10722795869737888, + "grad_norm": 94.53604125976562, + "learning_rate": 4.988757750742243e-06, + "log_odds_chosen": -0.7190758585929871, + "log_odds_ratio": -1.2133655548095703, + "logits/chosen": 339.0290832519531, + "logits/rejected": 269.4937438964844, + "logps/chosen": -1.472083568572998, + "logps/rejected": -1.066962718963623, + "loss": 1.8894, + "nll_loss": 2.1049113273620605, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07360417395830154, + "rewards/margins": -0.020256036892533302, + "rewards/rejected": -0.05334814265370369, + "step": 135 + }, + { + "epoch": 0.11119936457505956, + "grad_norm": 71.55030059814453, + "learning_rate": 4.985319617538998e-06, + "log_odds_chosen": 0.4117642343044281, + "log_odds_ratio": -0.6588890552520752, + "logits/chosen": 288.08966064453125, + "logits/rejected": 382.9283142089844, + "logps/chosen": -1.4075809717178345, + "logps/rejected": -1.670000433921814, + "loss": 2.1323, + "nll_loss": 1.7244634628295898, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07037904858589172, + "rewards/margins": 0.013120980933308601, + "rewards/rejected": -0.08350002765655518, + "step": 140 + }, + { + "epoch": 0.11517077045274027, + "grad_norm": 34.86751174926758, + "learning_rate": 4.981424976241598e-06, + "log_odds_chosen": 0.4058244228363037, + "log_odds_ratio": -0.6222633123397827, + "logits/chosen": 308.98876953125, + "logits/rejected": 273.16015625, + "logps/chosen": -1.11463463306427, + "logps/rejected": -1.428716778755188, + "loss": 1.7015, + "nll_loss": 1.501156210899353, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05573173239827156, + "rewards/margins": 0.01570410653948784, + "rewards/rejected": -0.0714358389377594, + "step": 145 + }, + { + "epoch": 0.11914217633042097, + "grad_norm": 52.555850982666016, + "learning_rate": 4.977074542224941e-06, + "log_odds_chosen": -0.05959262698888779, + "log_odds_ratio": -0.7522531151771545, + "logits/chosen": 290.1683349609375, + "logits/rejected": 313.0419616699219, + "logps/chosen": -1.5048227310180664, + "logps/rejected": -1.4883638620376587, + "loss": 1.814, + "nll_loss": 1.6905310153961182, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07524113357067108, + "rewards/margins": -0.0008229411905631423, + "rewards/rejected": -0.07441819459199905, + "step": 150 + }, + { + "epoch": 0.12311358220810167, + "grad_norm": 108.9931869506836, + "learning_rate": 4.972269114584779e-06, + "log_odds_chosen": -0.10580176115036011, + "log_odds_ratio": -0.794634997844696, + "logits/chosen": 386.6126403808594, + "logits/rejected": 284.34765625, + "logps/chosen": -1.1588705778121948, + "logps/rejected": -1.1435911655426025, + "loss": 1.9924, + "nll_loss": 2.0270378589630127, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05794353038072586, + "rewards/margins": -0.0007639724644832313, + "rewards/rejected": -0.05717955902218819, + "step": 155 + }, + { + "epoch": 0.12708498808578236, + "grad_norm": 61.099525451660156, + "learning_rate": 4.9670095759909275e-06, + "log_odds_chosen": 0.3500244915485382, + "log_odds_ratio": -0.577224612236023, + "logits/chosen": 328.20562744140625, + "logits/rejected": 250.172607421875, + "logps/chosen": -1.0037747621536255, + "logps/rejected": -1.1577932834625244, + "loss": 1.8237, + "nll_loss": 1.7924268245697021, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.050188738852739334, + "rewards/margins": 0.007700921501964331, + "rewards/rejected": -0.0578896589577198, + "step": 160 + }, + { + "epoch": 0.13105639396346305, + "grad_norm": 50.632930755615234, + "learning_rate": 4.961296892525144e-06, + "log_odds_chosen": 0.3148774206638336, + "log_odds_ratio": -0.7541528940200806, + "logits/chosen": 326.50323486328125, + "logits/rejected": 326.8573913574219, + "logps/chosen": -1.0643494129180908, + "logps/rejected": -1.2868483066558838, + "loss": 2.0091, + "nll_loss": 1.6609458923339844, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05321747064590454, + "rewards/margins": 0.011124944314360619, + "rewards/rejected": -0.06434241682291031, + "step": 165 + }, + { + "epoch": 0.13502779984114377, + "grad_norm": 44.31376266479492, + "learning_rate": 4.95513211350367e-06, + "log_odds_chosen": 0.7684804797172546, + "log_odds_ratio": -0.424949586391449, + "logits/chosen": 315.80316162109375, + "logits/rejected": 317.47760009765625, + "logps/chosen": -1.0722829103469849, + "logps/rejected": -1.50656259059906, + "loss": 1.5922, + "nll_loss": 1.819898009300232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05361414700746536, + "rewards/margins": 0.0217139795422554, + "rewards/rejected": -0.07532812654972076, + "step": 170 + }, + { + "epoch": 0.13899920571882446, + "grad_norm": 37.67052459716797, + "learning_rate": 4.948516371284493e-06, + "log_odds_chosen": -0.38165563344955444, + "log_odds_ratio": -0.9263311624526978, + "logits/chosen": 399.5979919433594, + "logits/rejected": 327.8373107910156, + "logps/chosen": -1.0562649965286255, + "logps/rejected": -0.7994272112846375, + "loss": 1.7336, + "nll_loss": 1.5048203468322754, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.052813250571489334, + "rewards/margins": -0.012841887772083282, + "rewards/rejected": -0.03997135907411575, + "step": 175 + }, + { + "epoch": 0.14297061159650518, + "grad_norm": 39.37586212158203, + "learning_rate": 4.941450881059354e-06, + "log_odds_chosen": 0.14676916599273682, + "log_odds_ratio": -0.6898115873336792, + "logits/chosen": 288.27630615234375, + "logits/rejected": 291.10162353515625, + "logps/chosen": -1.1020549535751343, + "logps/rejected": -1.1935259103775024, + "loss": 1.5256, + "nll_loss": 1.1978670358657837, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05510275438427925, + "rewards/margins": 0.00457354262471199, + "rewards/rejected": -0.05967629700899124, + "step": 180 + }, + { + "epoch": 0.14694201747418587, + "grad_norm": 57.68708419799805, + "learning_rate": 4.933936940630537e-06, + "log_odds_chosen": -0.36938825249671936, + "log_odds_ratio": -0.9125706553459167, + "logits/chosen": 301.7081298828125, + "logits/rejected": 338.01690673828125, + "logps/chosen": -1.5262658596038818, + "logps/rejected": -1.2626394033432007, + "loss": 1.7968, + "nll_loss": 1.8017867803573608, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07631329447031021, + "rewards/margins": -0.013181325979530811, + "rewards/rejected": -0.06313197314739227, + "step": 185 + }, + { + "epoch": 0.15091342335186655, + "grad_norm": 61.880126953125, + "learning_rate": 4.925975930172489e-06, + "log_odds_chosen": 0.2720580995082855, + "log_odds_ratio": -0.5754180550575256, + "logits/chosen": 240.9505157470703, + "logits/rejected": 291.4943542480469, + "logps/chosen": -1.128051519393921, + "logps/rejected": -1.3102426528930664, + "loss": 1.6575, + "nll_loss": 1.4304395914077759, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.056402575224637985, + "rewards/margins": 0.009109559468925, + "rewards/rejected": -0.06551213562488556, + "step": 190 + }, + { + "epoch": 0.15488482922954727, + "grad_norm": 128.1157684326172, + "learning_rate": 4.917569311978301e-06, + "log_odds_chosen": -0.16586491465568542, + "log_odds_ratio": -0.8022063374519348, + "logits/chosen": 339.9231872558594, + "logits/rejected": 310.51458740234375, + "logps/chosen": -1.4521989822387695, + "logps/rejected": -1.3116356134414673, + "loss": 1.8756, + "nll_loss": 1.6476871967315674, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07260995358228683, + "rewards/margins": -0.007028169929981232, + "rewards/rejected": -0.0655817836523056, + "step": 195 + }, + { + "epoch": 0.15885623510722796, + "grad_norm": 360.7728271484375, + "learning_rate": 4.9087186301911196e-06, + "log_odds_chosen": -0.15912006795406342, + "log_odds_ratio": -0.8308361172676086, + "logits/chosen": 348.33721923828125, + "logits/rejected": 337.98748779296875, + "logps/chosen": -1.4099957942962646, + "logps/rejected": -1.3773982524871826, + "loss": 1.7693, + "nll_loss": 2.075568914413452, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07049979269504547, + "rewards/margins": -0.001629872596822679, + "rewards/rejected": -0.06886991858482361, + "step": 200 + }, + { + "epoch": 0.16282764098490865, + "grad_norm": 55.327293395996094, + "learning_rate": 4.89942551052051e-06, + "log_odds_chosen": 0.21036644279956818, + "log_odds_ratio": -0.6468175053596497, + "logits/chosen": 343.9696960449219, + "logits/rejected": 264.9131164550781, + "logps/chosen": -1.5017131567001343, + "logps/rejected": -1.672141432762146, + "loss": 1.8711, + "nll_loss": 1.952646255493164, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07508565485477448, + "rewards/margins": 0.008521410636603832, + "rewards/rejected": -0.08360707014799118, + "step": 205 + }, + { + "epoch": 0.16679904686258937, + "grad_norm": 44.57875442504883, + "learning_rate": 4.889691659943842e-06, + "log_odds_chosen": 0.6125321388244629, + "log_odds_ratio": -0.5584419965744019, + "logits/chosen": 301.97369384765625, + "logits/rejected": 392.7519836425781, + "logps/chosen": -1.1298048496246338, + "logps/rejected": -1.5633180141448975, + "loss": 1.9275, + "nll_loss": 1.591841220855713, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.056490249931812286, + "rewards/margins": 0.021675655618309975, + "rewards/rejected": -0.07816590368747711, + "step": 210 + }, + { + "epoch": 0.17077045274027006, + "grad_norm": 40.897430419921875, + "learning_rate": 4.879518866392757e-06, + "log_odds_chosen": 0.31865325570106506, + "log_odds_ratio": -0.5695816278457642, + "logits/chosen": 289.89654541015625, + "logits/rejected": 337.4901428222656, + "logps/chosen": -1.0857855081558228, + "logps/rejected": -1.3082635402679443, + "loss": 1.8816, + "nll_loss": 1.8894774913787842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05428927391767502, + "rewards/margins": 0.011123904958367348, + "rewards/rejected": -0.06541318446397781, + "step": 215 + }, + { + "epoch": 0.17474185861795075, + "grad_norm": 57.284912109375, + "learning_rate": 4.868908998424749e-06, + "log_odds_chosen": 1.3685444593429565, + "log_odds_ratio": -0.32077115774154663, + "logits/chosen": 461.255859375, + "logits/rejected": 236.81936645507812, + "logps/chosen": -1.125225305557251, + "logps/rejected": -2.0914998054504395, + "loss": 1.8968, + "nll_loss": 2.5089685916900635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05626126378774643, + "rewards/margins": 0.04831372946500778, + "rewards/rejected": -0.10457499325275421, + "step": 220 + }, + { + "epoch": 0.17871326449563146, + "grad_norm": 48.706504821777344, + "learning_rate": 4.8578640048799435e-06, + "log_odds_chosen": 0.48141545057296753, + "log_odds_ratio": -0.5349053740501404, + "logits/chosen": 331.5611877441406, + "logits/rejected": 310.23876953125, + "logps/chosen": -1.1479623317718506, + "logps/rejected": -1.398716688156128, + "loss": 1.6831, + "nll_loss": 2.0783326625823975, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05739812180399895, + "rewards/margins": 0.012537715956568718, + "rewards/rejected": -0.06993584334850311, + "step": 225 + }, + { + "epoch": 0.18268467037331215, + "grad_norm": 40.74349594116211, + "learning_rate": 4.846385914523143e-06, + "log_odds_chosen": -0.004401213023811579, + "log_odds_ratio": -0.7065997123718262, + "logits/chosen": 321.564453125, + "logits/rejected": 321.5721130371094, + "logps/chosen": -1.444544792175293, + "logps/rejected": -1.465693473815918, + "loss": 1.6282, + "nll_loss": 1.7196967601776123, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07222724705934525, + "rewards/margins": 0.0010574304033070803, + "rewards/rejected": -0.07328467816114426, + "step": 230 + }, + { + "epoch": 0.18665607625099284, + "grad_norm": 124.04603576660156, + "learning_rate": 4.834476835671166e-06, + "log_odds_chosen": -0.40913257002830505, + "log_odds_ratio": -1.0309641361236572, + "logits/chosen": 303.4057312011719, + "logits/rejected": 345.175048828125, + "logps/chosen": -1.1636359691619873, + "logps/rejected": -0.9079948663711548, + "loss": 1.8382, + "nll_loss": 1.716036081314087, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.058181799948215485, + "rewards/margins": -0.012782062403857708, + "rewards/rejected": -0.0453997440636158, + "step": 235 + }, + { + "epoch": 0.19062748212867356, + "grad_norm": 354.8912048339844, + "learning_rate": 4.822138955805595e-06, + "log_odds_chosen": -0.03981683775782585, + "log_odds_ratio": -0.7560557126998901, + "logits/chosen": 365.21771240234375, + "logits/rejected": 281.8612060546875, + "logps/chosen": -1.5758720636367798, + "logps/rejected": -1.6159578561782837, + "loss": 1.7801, + "nll_loss": 1.9230833053588867, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07879360020160675, + "rewards/margins": 0.0020043007098138332, + "rewards/rejected": -0.08079790323972702, + "step": 240 + }, + { + "epoch": 0.19459888800635425, + "grad_norm": 112.47100067138672, + "learning_rate": 4.809374541170974e-06, + "log_odds_chosen": 0.4120238423347473, + "log_odds_ratio": -0.5282896161079407, + "logits/chosen": 280.23223876953125, + "logits/rejected": 381.760986328125, + "logps/chosen": -1.43479323387146, + "logps/rejected": -1.7411903142929077, + "loss": 1.705, + "nll_loss": 1.610185980796814, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07173965871334076, + "rewards/margins": 0.015319856815040112, + "rewards/rejected": -0.08705951273441315, + "step": 245 + }, + { + "epoch": 0.19857029388403494, + "grad_norm": 89.64398193359375, + "learning_rate": 4.796185936358543e-06, + "log_odds_chosen": -0.09865443408489227, + "log_odds_ratio": -0.8241540789604187, + "logits/chosen": 375.30975341796875, + "logits/rejected": 277.07080078125, + "logps/chosen": -1.1246061325073242, + "logps/rejected": -1.0761168003082275, + "loss": 1.5139, + "nll_loss": 1.5120210647583008, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05623030662536621, + "rewards/margins": -0.002424471778795123, + "rewards/rejected": -0.05380583554506302, + "step": 250 + }, + { + "epoch": 0.20254169976171565, + "grad_norm": 71.554443359375, + "learning_rate": 4.78257556387557e-06, + "log_odds_chosen": 0.17121019959449768, + "log_odds_ratio": -0.8539883494377136, + "logits/chosen": 271.27740478515625, + "logits/rejected": 308.79962158203125, + "logps/chosen": -1.4543489217758179, + "logps/rejected": -1.6729555130004883, + "loss": 1.7579, + "nll_loss": 1.7364709377288818, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07271744310855865, + "rewards/margins": 0.010930335149168968, + "rewards/rejected": -0.08364777266979218, + "step": 255 + }, + { + "epoch": 0.20651310563939634, + "grad_norm": 38.314876556396484, + "learning_rate": 4.7685459237003954e-06, + "log_odds_chosen": 0.2899346947669983, + "log_odds_ratio": -0.614177942276001, + "logits/chosen": 298.9414367675781, + "logits/rejected": 384.4459228515625, + "logps/chosen": -1.6951465606689453, + "logps/rejected": -1.8937695026397705, + "loss": 1.8883, + "nll_loss": 1.8541686534881592, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08475733548402786, + "rewards/margins": 0.009931142441928387, + "rewards/rejected": -0.09468847513198853, + "step": 260 + }, + { + "epoch": 0.21048451151707703, + "grad_norm": 72.83179473876953, + "learning_rate": 4.754099592823216e-06, + "log_odds_chosen": 0.8651946783065796, + "log_odds_ratio": -0.5348523855209351, + "logits/chosen": 358.8797912597656, + "logits/rejected": 280.5845031738281, + "logps/chosen": -1.027199387550354, + "logps/rejected": -1.6986967325210571, + "loss": 1.512, + "nll_loss": 1.2993109226226807, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05135997012257576, + "rewards/margins": 0.033574867993593216, + "rewards/rejected": -0.08493484556674957, + "step": 265 + }, + { + "epoch": 0.21445591739475775, + "grad_norm": 44.400272369384766, + "learning_rate": 4.739239224772751e-06, + "log_odds_chosen": -0.05065007135272026, + "log_odds_ratio": -0.8082035183906555, + "logits/chosen": 298.37335205078125, + "logits/rejected": 344.8133239746094, + "logps/chosen": -1.5913331508636475, + "logps/rejected": -1.4946712255477905, + "loss": 1.6712, + "nll_loss": 1.572104811668396, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07956665754318237, + "rewards/margins": -0.004833097103983164, + "rewards/rejected": -0.07473356276750565, + "step": 270 + }, + { + "epoch": 0.21842732327243844, + "grad_norm": 59.13341522216797, + "learning_rate": 4.7239675491288285e-06, + "log_odds_chosen": 0.4572678506374359, + "log_odds_ratio": -0.5575239062309265, + "logits/chosen": 366.19207763671875, + "logits/rejected": 318.4181213378906, + "logps/chosen": -1.4838532209396362, + "logps/rejected": -1.8985198736190796, + "loss": 1.6193, + "nll_loss": 1.7495654821395874, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07419265806674957, + "rewards/margins": 0.020733339712023735, + "rewards/rejected": -0.09492600709199905, + "step": 275 + }, + { + "epoch": 0.22239872915011913, + "grad_norm": 153.78656005859375, + "learning_rate": 4.708287371021016e-06, + "log_odds_chosen": 0.4286623001098633, + "log_odds_ratio": -0.6032269597053528, + "logits/chosen": 264.1981506347656, + "logits/rejected": 437.3480529785156, + "logps/chosen": -1.4498531818389893, + "logps/rejected": -1.8169384002685547, + "loss": 1.6785, + "nll_loss": 1.932142972946167, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07249265164136887, + "rewards/margins": 0.018354270607233047, + "rewards/rejected": -0.09084691852331161, + "step": 280 + }, + { + "epoch": 0.22637013502779985, + "grad_norm": 55.48463821411133, + "learning_rate": 4.692201570613367e-06, + "log_odds_chosen": -0.38779932260513306, + "log_odds_ratio": -0.9519031643867493, + "logits/chosen": 315.96405029296875, + "logits/rejected": 441.1253967285156, + "logps/chosen": -1.785449743270874, + "logps/rejected": -1.507177472114563, + "loss": 1.9603, + "nll_loss": 1.9527504444122314, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.08927249163389206, + "rewards/margins": -0.013913619332015514, + "rewards/rejected": -0.07535887509584427, + "step": 285 + }, + { + "epoch": 0.23034154090548054, + "grad_norm": 196.46524047851562, + "learning_rate": 4.675713102575389e-06, + "log_odds_chosen": 0.13416634500026703, + "log_odds_ratio": -0.6926567554473877, + "logits/chosen": 325.7991943359375, + "logits/rejected": 359.30426025390625, + "logps/chosen": -1.287474274635315, + "logps/rejected": -1.4152565002441406, + "loss": 1.8866, + "nll_loss": 1.7071374654769897, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06437370926141739, + "rewards/margins": 0.006389112211763859, + "rewards/rejected": -0.07076282799243927, + "step": 290 + }, + { + "epoch": 0.23431294678316125, + "grad_norm": 40.833988189697266, + "learning_rate": 4.658824995539318e-06, + "log_odds_chosen": -0.3848091959953308, + "log_odds_ratio": -0.9408342242240906, + "logits/chosen": 466.2362365722656, + "logits/rejected": 305.64935302734375, + "logps/chosen": -1.5434811115264893, + "logps/rejected": -1.2438277006149292, + "loss": 1.7838, + "nll_loss": 1.8352413177490234, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07717405259609222, + "rewards/margins": -0.01498266588896513, + "rewards/rejected": -0.06219138950109482, + "step": 295 + }, + { + "epoch": 0.23828435266084194, + "grad_norm": 85.50627899169922, + "learning_rate": 4.641540351543826e-06, + "log_odds_chosen": -0.06552493572235107, + "log_odds_ratio": -0.8229795694351196, + "logits/chosen": 306.17236328125, + "logits/rejected": 333.0214538574219, + "logps/chosen": -1.408279538154602, + "logps/rejected": -1.4727718830108643, + "loss": 1.7687, + "nll_loss": 1.7104421854019165, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0704139694571495, + "rewards/margins": 0.0032246210612356663, + "rewards/rejected": -0.07363860309123993, + "step": 300 + }, + { + "epoch": 0.24225575853852263, + "grad_norm": 52.15938186645508, + "learning_rate": 4.6238623454642215e-06, + "log_odds_chosen": -0.7005519270896912, + "log_odds_ratio": -1.180597186088562, + "logits/chosen": 393.58575439453125, + "logits/rejected": 291.7729187011719, + "logps/chosen": -1.5035789012908936, + "logps/rejected": -1.0074363946914673, + "loss": 1.9541, + "nll_loss": 1.8733670711517334, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07517894357442856, + "rewards/margins": -0.02480713650584221, + "rewards/rejected": -0.050371818244457245, + "step": 305 + }, + { + "epoch": 0.24622716441620335, + "grad_norm": 71.7739028930664, + "learning_rate": 4.605794224429286e-06, + "log_odds_chosen": 0.5956661105155945, + "log_odds_ratio": -0.5032454133033752, + "logits/chosen": 287.00799560546875, + "logits/rejected": 278.5837707519531, + "logps/chosen": -1.3277604579925537, + "logps/rejected": -1.8034179210662842, + "loss": 2.0805, + "nll_loss": 2.333066463470459, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06638802587985992, + "rewards/margins": 0.023782875388860703, + "rewards/rejected": -0.09017090499401093, + "step": 310 + }, + { + "epoch": 0.25019857029388404, + "grad_norm": 95.7601089477539, + "learning_rate": 4.587339307224837e-06, + "log_odds_chosen": 0.4633815884590149, + "log_odds_ratio": -0.5474061965942383, + "logits/chosen": 285.03057861328125, + "logits/rejected": 313.64971923828125, + "logps/chosen": -1.064753532409668, + "logps/rejected": -1.4205200672149658, + "loss": 1.674, + "nll_loss": 1.6759742498397827, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0532376766204834, + "rewards/margins": 0.01778833009302616, + "rewards/rejected": -0.07102601230144501, + "step": 315 + }, + { + "epoch": 0.2541699761715647, + "grad_norm": 35.280120849609375, + "learning_rate": 4.568500983684127e-06, + "log_odds_chosen": 1.0717216730117798, + "log_odds_ratio": -0.3502858281135559, + "logits/chosen": 316.9742431640625, + "logits/rejected": 361.8272705078125, + "logps/chosen": -1.0445092916488647, + "logps/rejected": -1.8099143505096436, + "loss": 1.5821, + "nll_loss": 1.5527000427246094, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05222546309232712, + "rewards/margins": 0.0382702499628067, + "rewards/rejected": -0.09049571305513382, + "step": 320 + }, + { + "epoch": 0.2581413820492454, + "grad_norm": 56.177520751953125, + "learning_rate": 4.54928271406519e-06, + "log_odds_chosen": 0.08797760307788849, + "log_odds_ratio": -0.6692509055137634, + "logits/chosen": 311.7930603027344, + "logits/rejected": 325.4112854003906, + "logps/chosen": -0.8228706121444702, + "logps/rejected": -0.8611005544662476, + "loss": 1.6019, + "nll_loss": 1.4109102487564087, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.04114353656768799, + "rewards/margins": 0.0019115001196041703, + "rewards/rejected": -0.043055035173892975, + "step": 325 + }, + { + "epoch": 0.2621127879269261, + "grad_norm": 49.081207275390625, + "learning_rate": 4.529688028415259e-06, + "log_odds_chosen": 0.5036702156066895, + "log_odds_ratio": -0.5119932293891907, + "logits/chosen": 349.2168273925781, + "logits/rejected": 417.1556091308594, + "logps/chosen": -1.1619529724121094, + "logps/rejected": -1.4640512466430664, + "loss": 1.7083, + "nll_loss": 1.5884101390838623, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05809764936566353, + "rewards/margins": 0.015104919672012329, + "rewards/rejected": -0.07320256531238556, + "step": 330 + }, + { + "epoch": 0.26608419380460685, + "grad_norm": 59.9859619140625, + "learning_rate": 4.509720525922356e-06, + "log_odds_chosen": -0.2589249014854431, + "log_odds_ratio": -0.8613080978393555, + "logits/chosen": 319.08282470703125, + "logits/rejected": 374.44512939453125, + "logps/chosen": -1.1872944831848145, + "logps/rejected": -1.0131120681762695, + "loss": 1.6996, + "nll_loss": 1.4569952487945557, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05936472490429878, + "rewards/margins": -0.008709125220775604, + "rewards/rejected": -0.05065560340881348, + "step": 335 + }, + { + "epoch": 0.27005559968228754, + "grad_norm": 34.96027755737305, + "learning_rate": 4.489383874254193e-06, + "log_odds_chosen": 0.19578817486763, + "log_odds_ratio": -0.6897228360176086, + "logits/chosen": 308.80645751953125, + "logits/rejected": 363.35272216796875, + "logps/chosen": -1.0954322814941406, + "logps/rejected": -1.26204514503479, + "loss": 1.8048, + "nll_loss": 1.4526374340057373, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05477161332964897, + "rewards/margins": 0.008330638520419598, + "rewards/rejected": -0.06310225278139114, + "step": 340 + }, + { + "epoch": 0.27402700555996823, + "grad_norm": 88.31672668457031, + "learning_rate": 4.46868180888448e-06, + "log_odds_chosen": 0.5558279752731323, + "log_odds_ratio": -0.6389625072479248, + "logits/chosen": 343.12237548828125, + "logits/rejected": 301.82843017578125, + "logps/chosen": -2.0307748317718506, + "logps/rejected": -2.624824047088623, + "loss": 1.8654, + "nll_loss": 2.4084858894348145, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1015387549996376, + "rewards/margins": 0.029702449217438698, + "rewards/rejected": -0.13124120235443115, + "step": 345 + }, + { + "epoch": 0.2779984114376489, + "grad_norm": 36.35618209838867, + "learning_rate": 4.447618132406788e-06, + "log_odds_chosen": 0.05257995054125786, + "log_odds_ratio": -0.7060452699661255, + "logits/chosen": 395.55926513671875, + "logits/rejected": 324.0714416503906, + "logps/chosen": -1.2713950872421265, + "logps/rejected": -1.3762328624725342, + "loss": 1.807, + "nll_loss": 1.739328145980835, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06356975436210632, + "rewards/margins": 0.005241888575255871, + "rewards/rejected": -0.06881164014339447, + "step": 350 + }, + { + "epoch": 0.2819698173153296, + "grad_norm": 56.2636604309082, + "learning_rate": 4.4261967138360905e-06, + "log_odds_chosen": -0.501254141330719, + "log_odds_ratio": -1.0499690771102905, + "logits/chosen": 375.1532897949219, + "logits/rejected": 253.8148193359375, + "logps/chosen": -1.6551625728607178, + "logps/rejected": -1.2964580059051514, + "loss": 1.5264, + "nll_loss": 1.8501193523406982, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.08275812864303589, + "rewards/margins": -0.017935223877429962, + "rewards/rejected": -0.06482290476560593, + "step": 355 + }, + { + "epoch": 0.28594122319301035, + "grad_norm": 87.0551528930664, + "learning_rate": 4.404421487898083e-06, + "log_odds_chosen": -0.14473596215248108, + "log_odds_ratio": -0.7785184383392334, + "logits/chosen": 331.86968994140625, + "logits/rejected": 319.239013671875, + "logps/chosen": -1.191341519355774, + "logps/rejected": -1.0848348140716553, + "loss": 1.6036, + "nll_loss": 1.8287181854248047, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.059567082673311234, + "rewards/margins": -0.005325344391167164, + "rewards/rejected": -0.054241735488176346, + "step": 360 + }, + { + "epoch": 0.28991262907069104, + "grad_norm": 42.729244232177734, + "learning_rate": 4.3822964543064536e-06, + "log_odds_chosen": -0.40415963530540466, + "log_odds_ratio": -0.9470674395561218, + "logits/chosen": 336.14984130859375, + "logits/rejected": 290.5699157714844, + "logps/chosen": -1.3225958347320557, + "logps/rejected": -0.9997372627258301, + "loss": 1.6299, + "nll_loss": 1.5339610576629639, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06612979620695114, + "rewards/margins": -0.01614293083548546, + "rewards/rejected": -0.04998686537146568, + "step": 365 + }, + { + "epoch": 0.29388403494837173, + "grad_norm": 46.94746780395508, + "learning_rate": 4.359825677028206e-06, + "log_odds_chosen": -0.8854487538337708, + "log_odds_ratio": -1.2963764667510986, + "logits/chosen": 375.13153076171875, + "logits/rejected": 305.2864990234375, + "logps/chosen": -1.3724980354309082, + "logps/rejected": -0.7736121416091919, + "loss": 1.7241, + "nll_loss": 1.8799304962158203, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06862489879131317, + "rewards/margins": -0.029944291338324547, + "rewards/rejected": -0.038680605590343475, + "step": 370 + }, + { + "epoch": 0.2978554408260524, + "grad_norm": 55.18174743652344, + "learning_rate": 4.337013283537182e-06, + "log_odds_chosen": 0.2510035037994385, + "log_odds_ratio": -0.6310834884643555, + "logits/chosen": 368.63214111328125, + "logits/rejected": 276.0588684082031, + "logps/chosen": -1.1369847059249878, + "logps/rejected": -1.3218457698822021, + "loss": 1.7533, + "nll_loss": 1.9112876653671265, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05684923380613327, + "rewards/margins": 0.009243053384125233, + "rewards/rejected": -0.06609228998422623, + "step": 375 + }, + { + "epoch": 0.3018268467037331, + "grad_norm": 41.9847526550293, + "learning_rate": 4.3138634640559185e-06, + "log_odds_chosen": 0.3149817883968353, + "log_odds_ratio": -0.5802162289619446, + "logits/chosen": 320.53692626953125, + "logits/rejected": 413.7608337402344, + "logps/chosen": -1.536766767501831, + "logps/rejected": -1.7898311614990234, + "loss": 1.7084, + "nll_loss": 1.9358304738998413, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07683834433555603, + "rewards/margins": 0.012653221376240253, + "rewards/rejected": -0.08949156850576401, + "step": 380 + }, + { + "epoch": 0.3057982525814138, + "grad_norm": 36.58580780029297, + "learning_rate": 4.290380470785984e-06, + "log_odds_chosen": -0.44627267122268677, + "log_odds_ratio": -0.9523780941963196, + "logits/chosen": 384.55609130859375, + "logits/rejected": 288.72259521484375, + "logps/chosen": -1.3820741176605225, + "logps/rejected": -1.060802698135376, + "loss": 1.7563, + "nll_loss": 1.8211044073104858, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06910370290279388, + "rewards/margins": -0.016063563525676727, + "rewards/rejected": -0.05304013937711716, + "step": 385 + }, + { + "epoch": 0.30976965845909454, + "grad_norm": 41.187171936035156, + "learning_rate": 4.266568617126919e-06, + "log_odds_chosen": -0.3663569390773773, + "log_odds_ratio": -1.1484845876693726, + "logits/chosen": 353.6982727050781, + "logits/rejected": 327.0970764160156, + "logps/chosen": -1.3744146823883057, + "logps/rejected": -1.1000757217407227, + "loss": 1.7097, + "nll_loss": 1.7982555627822876, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0687207281589508, + "rewards/margins": -0.013716941699385643, + "rewards/rejected": -0.05500379204750061, + "step": 390 + }, + { + "epoch": 0.31374106433677523, + "grad_norm": 50.873226165771484, + "learning_rate": 4.2424322768839534e-06, + "log_odds_chosen": 0.5074528455734253, + "log_odds_ratio": -0.5399994254112244, + "logits/chosen": 336.0807800292969, + "logits/rejected": 341.9483947753906, + "logps/chosen": -1.1645238399505615, + "logps/rejected": -1.5138776302337646, + "loss": 1.6867, + "nll_loss": 1.4324095249176025, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05822619050741196, + "rewards/margins": 0.017467692494392395, + "rewards/rejected": -0.07569388300180435, + "step": 395 + }, + { + "epoch": 0.3177124702144559, + "grad_norm": 31.38866424560547, + "learning_rate": 4.217975883464604e-06, + "log_odds_chosen": 0.4223383367061615, + "log_odds_ratio": -0.557848334312439, + "logits/chosen": 341.1311950683594, + "logits/rejected": 330.12530517578125, + "logps/chosen": -0.972659707069397, + "logps/rejected": -1.1718895435333252, + "loss": 1.5047, + "nll_loss": 1.4151824712753296, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04863298311829567, + "rewards/margins": 0.009961498901247978, + "rewards/rejected": -0.0585944838821888, + "step": 400 + }, + { + "epoch": 0.3216838760921366, + "grad_norm": 113.73686218261719, + "learning_rate": 4.1932039290643534e-06, + "log_odds_chosen": -0.6433423161506653, + "log_odds_ratio": -1.1012665033340454, + "logits/chosen": 340.7366638183594, + "logits/rejected": 359.9513854980469, + "logps/chosen": -1.2754091024398804, + "logps/rejected": -0.9146528244018555, + "loss": 1.5981, + "nll_loss": 1.4592812061309814, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06377045810222626, + "rewards/margins": -0.018037814646959305, + "rewards/rejected": -0.04573264718055725, + "step": 405 + }, + { + "epoch": 0.3256552819698173, + "grad_norm": 27.97637367248535, + "learning_rate": 4.168120963841501e-06, + "log_odds_chosen": 0.515986979007721, + "log_odds_ratio": -0.4868692457675934, + "logits/chosen": 322.2940979003906, + "logits/rejected": 315.605224609375, + "logps/chosen": -1.0631706714630127, + "logps/rejected": -1.4311316013336182, + "loss": 1.5618, + "nll_loss": 1.2970093488693237, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.053158532828092575, + "rewards/margins": 0.018398040905594826, + "rewards/rejected": -0.07155657559633255, + "step": 410 + }, + { + "epoch": 0.329626687847498, + "grad_norm": 111.9078598022461, + "learning_rate": 4.142731595081394e-06, + "log_odds_chosen": -0.7611304521560669, + "log_odds_ratio": -1.230291485786438, + "logits/chosen": 472.66253662109375, + "logits/rejected": 277.21435546875, + "logps/chosen": -2.0160393714904785, + "logps/rejected": -1.417152762413025, + "loss": 1.6858, + "nll_loss": 2.240736484527588, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.1008019670844078, + "rewards/margins": -0.029944339767098427, + "rewards/rejected": -0.07085762917995453, + "step": 415 + }, + { + "epoch": 0.33359809372517873, + "grad_norm": 55.40426254272461, + "learning_rate": 4.117040486350141e-06, + "log_odds_chosen": -0.27019426226615906, + "log_odds_ratio": -0.9864810705184937, + "logits/chosen": 320.2743225097656, + "logits/rejected": 390.2867736816406, + "logps/chosen": -1.4704816341400146, + "logps/rejected": -1.3386101722717285, + "loss": 2.1069, + "nll_loss": 1.8192126750946045, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07352408021688461, + "rewards/margins": -0.00659357151016593, + "rewards/rejected": -0.06693051010370255, + "step": 420 + }, + { + "epoch": 0.3375694996028594, + "grad_norm": 80.17561340332031, + "learning_rate": 4.0910523566380115e-06, + "log_odds_chosen": 0.49502259492874146, + "log_odds_ratio": -0.5022796392440796, + "logits/chosen": 338.86505126953125, + "logits/rejected": 331.8837890625, + "logps/chosen": -1.1411818265914917, + "logps/rejected": -1.491014003753662, + "loss": 1.5704, + "nll_loss": 1.140134572982788, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.057059090584516525, + "rewards/margins": 0.01749161258339882, + "rewards/rejected": -0.07455070316791534, + "step": 425 + }, + { + "epoch": 0.3415409054805401, + "grad_norm": 40.142581939697266, + "learning_rate": 4.06477197949263e-06, + "log_odds_chosen": 0.31735625863075256, + "log_odds_ratio": -0.6405702233314514, + "logits/chosen": 370.8393859863281, + "logits/rejected": 353.68585205078125, + "logps/chosen": -1.5020195245742798, + "logps/rejected": -1.7753969430923462, + "loss": 1.6606, + "nll_loss": 2.0026352405548096, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07510097324848175, + "rewards/margins": 0.01366887241601944, + "rewards/rejected": -0.08876985311508179, + "step": 430 + }, + { + "epoch": 0.3455123113582208, + "grad_norm": 39.17948913574219, + "learning_rate": 4.038204182142174e-06, + "log_odds_chosen": -0.04409918934106827, + "log_odds_ratio": -1.0017715692520142, + "logits/chosen": 365.0375061035156, + "logits/rejected": 287.4947814941406, + "logps/chosen": -1.5504963397979736, + "logps/rejected": -1.3872708082199097, + "loss": 1.754, + "nll_loss": 1.8789138793945312, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0775248184800148, + "rewards/margins": -0.008161274716258049, + "rewards/rejected": -0.0693635419011116, + "step": 435 + }, + { + "epoch": 0.3494837172359015, + "grad_norm": 65.36585998535156, + "learning_rate": 4.011353844608695e-06, + "log_odds_chosen": 0.25077468156814575, + "log_odds_ratio": -0.5847761034965515, + "logits/chosen": 398.1842346191406, + "logits/rejected": 302.77557373046875, + "logps/chosen": -1.1319334506988525, + "logps/rejected": -1.2919623851776123, + "loss": 1.5668, + "nll_loss": 1.5274730920791626, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.056596674025058746, + "rewards/margins": 0.008001448586583138, + "rewards/rejected": -0.06459812819957733, + "step": 440 + }, + { + "epoch": 0.3534551231135822, + "grad_norm": 66.42986297607422, + "learning_rate": 3.9842258988117435e-06, + "log_odds_chosen": 0.39154669642448425, + "log_odds_ratio": -0.5791794657707214, + "logits/chosen": 292.7413635253906, + "logits/rejected": 333.33306884765625, + "logps/chosen": -1.3780268430709839, + "logps/rejected": -1.5879228115081787, + "loss": 1.6821, + "nll_loss": 1.878339409828186, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06890134513378143, + "rewards/margins": 0.010494804009795189, + "rewards/rejected": -0.07939615100622177, + "step": 445 + }, + { + "epoch": 0.3574265289912629, + "grad_norm": 41.643070220947266, + "learning_rate": 3.9568253276624704e-06, + "log_odds_chosen": 0.3447812795639038, + "log_odds_ratio": -0.6143133044242859, + "logits/chosen": 308.4149169921875, + "logits/rejected": 373.98712158203125, + "logps/chosen": -1.146469235420227, + "logps/rejected": -1.3937455415725708, + "loss": 1.7364, + "nll_loss": 1.5407047271728516, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05732346326112747, + "rewards/margins": 0.012363811954855919, + "rewards/rejected": -0.06968727707862854, + "step": 450 + }, + { + "epoch": 0.3613979348689436, + "grad_norm": 51.8876953125, + "learning_rate": 3.929157164148352e-06, + "log_odds_chosen": 0.25782614946365356, + "log_odds_ratio": -0.6060336828231812, + "logits/chosen": 289.9158020019531, + "logits/rejected": 339.3231201171875, + "logps/chosen": -0.9036432504653931, + "logps/rejected": -1.0764741897583008, + "loss": 1.831, + "nll_loss": 1.5824403762817383, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.045182161033153534, + "rewards/margins": 0.008641545660793781, + "rewards/rejected": -0.05382370948791504, + "step": 455 + }, + { + "epoch": 0.3653693407466243, + "grad_norm": 54.782958984375, + "learning_rate": 3.901226490408728e-06, + "log_odds_chosen": 0.2922573685646057, + "log_odds_ratio": -0.5908970236778259, + "logits/chosen": 307.7389221191406, + "logits/rejected": 323.6591796875, + "logps/chosen": -1.3589437007904053, + "logps/rejected": -1.5952913761138916, + "loss": 1.5749, + "nll_loss": 1.6173921823501587, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06794719398021698, + "rewards/margins": 0.011817372404038906, + "rewards/rejected": -0.07976455986499786, + "step": 460 + }, + { + "epoch": 0.369340746624305, + "grad_norm": 30.29743194580078, + "learning_rate": 3.873038436801298e-06, + "log_odds_chosen": 0.310823529958725, + "log_odds_ratio": -0.662110447883606, + "logits/chosen": 300.14251708984375, + "logits/rejected": 391.355712890625, + "logps/chosen": -1.3028764724731445, + "logps/rejected": -1.4683092832565308, + "loss": 1.714, + "nll_loss": 1.549576997756958, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06514382362365723, + "rewards/margins": 0.008271644823253155, + "rewards/rejected": -0.07341547310352325, + "step": 465 + }, + { + "epoch": 0.3733121525019857, + "grad_norm": 59.415611267089844, + "learning_rate": 3.8445981809597715e-06, + "log_odds_chosen": -0.1661299765110016, + "log_odds_ratio": -0.8740431070327759, + "logits/chosen": 272.77264404296875, + "logits/rejected": 408.80975341796875, + "logps/chosen": -1.0485327243804932, + "logps/rejected": -1.0229226350784302, + "loss": 1.4859, + "nll_loss": 1.2975164651870728, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05242663621902466, + "rewards/margins": -0.0012805074220523238, + "rewards/rejected": -0.05114613100886345, + "step": 470 + }, + { + "epoch": 0.37728355837966643, + "grad_norm": 38.312618255615234, + "learning_rate": 3.815910946842828e-06, + "log_odds_chosen": -0.014984751120209694, + "log_odds_ratio": -0.8228279948234558, + "logits/chosen": 292.1250305175781, + "logits/rejected": 363.7527770996094, + "logps/chosen": -1.3335845470428467, + "logps/rejected": -1.5042054653167725, + "loss": 1.5739, + "nll_loss": 1.5051485300064087, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0666792243719101, + "rewards/margins": 0.008531046099960804, + "rewards/rejected": -0.07521027326583862, + "step": 475 + }, + { + "epoch": 0.3812549642573471, + "grad_norm": 56.91576385498047, + "learning_rate": 3.7869820037745773e-06, + "log_odds_chosen": -0.030622828751802444, + "log_odds_ratio": -0.7795476913452148, + "logits/chosen": 385.81536865234375, + "logits/rejected": 331.4559631347656, + "logps/chosen": -1.2507215738296509, + "logps/rejected": -1.279594898223877, + "loss": 1.6582, + "nll_loss": 1.5629332065582275, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0625360757112503, + "rewards/margins": 0.0014436636120080948, + "rewards/rejected": -0.06397974491119385, + "step": 480 + }, + { + "epoch": 0.3852263701350278, + "grad_norm": 147.40060424804688, + "learning_rate": 3.7578166654766695e-06, + "log_odds_chosen": 0.012112426571547985, + "log_odds_ratio": -0.8280073404312134, + "logits/chosen": 324.9056091308594, + "logits/rejected": 301.1451721191406, + "logps/chosen": -1.0767914056777954, + "logps/rejected": -0.936957836151123, + "loss": 1.6, + "nll_loss": 1.675865888595581, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05383957177400589, + "rewards/margins": -0.006991674192249775, + "rewards/rejected": -0.04684789478778839, + "step": 485 + }, + { + "epoch": 0.3891977760127085, + "grad_norm": 62.930694580078125, + "learning_rate": 3.7284202890922707e-06, + "log_odds_chosen": -0.3624054789543152, + "log_odds_ratio": -0.9134989976882935, + "logits/chosen": 387.71923828125, + "logits/rejected": 309.9226989746094, + "logps/chosen": -1.203176498413086, + "logps/rejected": -0.9530506134033203, + "loss": 1.6178, + "nll_loss": 1.5490739345550537, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.060158826410770416, + "rewards/margins": -0.012506293132901192, + "rewards/rejected": -0.047652535140514374, + "step": 490 + }, + { + "epoch": 0.3931691818903892, + "grad_norm": 44.98346710205078, + "learning_rate": 3.698798274202048e-06, + "log_odds_chosen": 1.106994867324829, + "log_odds_ratio": -0.4969615340232849, + "logits/chosen": 416.2843322753906, + "logits/rejected": 322.34722900390625, + "logps/chosen": -1.0825506448745728, + "logps/rejected": -1.9832751750946045, + "loss": 1.4863, + "nll_loss": 1.3532053232192993, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0541275329887867, + "rewards/margins": 0.045036230236291885, + "rewards/rejected": -0.09916376322507858, + "step": 495 + }, + { + "epoch": 0.3971405877680699, + "grad_norm": 71.20710754394531, + "learning_rate": 3.668956061832365e-06, + "log_odds_chosen": 0.18296189606189728, + "log_odds_ratio": -0.6548603773117065, + "logits/chosen": 375.1370544433594, + "logits/rejected": 340.265869140625, + "logps/chosen": -1.1238301992416382, + "logps/rejected": -1.242456078529358, + "loss": 1.7543, + "nll_loss": 2.0999672412872314, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05619151517748833, + "rewards/margins": 0.005931290797889233, + "rewards/rejected": -0.062122803181409836, + "step": 500 + }, + { + "epoch": 0.4011119936457506, + "grad_norm": 43.03315353393555, + "learning_rate": 3.6388991334558665e-06, + "log_odds_chosen": -0.3676120638847351, + "log_odds_ratio": -0.9084262847900391, + "logits/chosen": 356.4194030761719, + "logits/rejected": 317.63275146484375, + "logps/chosen": -1.1387929916381836, + "logps/rejected": -0.8903687596321106, + "loss": 1.4812, + "nll_loss": 1.3678486347198486, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05693964287638664, + "rewards/margins": -0.012421206571161747, + "rewards/rejected": -0.04451843723654747, + "step": 505 + }, + { + "epoch": 0.4050833995234313, + "grad_norm": 27.506656646728516, + "learning_rate": 3.6086330099846274e-06, + "log_odds_chosen": -0.5120627880096436, + "log_odds_ratio": -1.0131856203079224, + "logits/chosen": 349.39093017578125, + "logits/rejected": 328.4683532714844, + "logps/chosen": -1.0289686918258667, + "logps/rejected": -0.724139392375946, + "loss": 1.7849, + "nll_loss": 1.7454910278320312, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05144844204187393, + "rewards/margins": -0.015241468325257301, + "rewards/rejected": -0.03620696812868118, + "step": 510 + }, + { + "epoch": 0.409054805401112, + "grad_norm": 61.25779724121094, + "learning_rate": 3.578163250756065e-06, + "log_odds_chosen": 0.4315710663795471, + "log_odds_ratio": -0.5859761834144592, + "logits/chosen": 317.2684631347656, + "logits/rejected": 341.7981262207031, + "logps/chosen": -1.1744743585586548, + "logps/rejected": -1.5218207836151123, + "loss": 1.6567, + "nll_loss": 1.4415868520736694, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05872371047735214, + "rewards/margins": 0.017367318272590637, + "rewards/rejected": -0.07609103620052338, + "step": 515 + }, + { + "epoch": 0.4130262112787927, + "grad_norm": 74.8428955078125, + "learning_rate": 3.5474954525117887e-06, + "log_odds_chosen": 0.05642819404602051, + "log_odds_ratio": -0.8421560525894165, + "logits/chosen": 321.3084716796875, + "logits/rejected": 286.5706787109375, + "logps/chosen": -1.519960641860962, + "logps/rejected": -1.6510608196258545, + "loss": 1.7797, + "nll_loss": 1.7759168148040771, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07599803060293198, + "rewards/margins": 0.006555011961609125, + "rewards/rejected": -0.08255304396152496, + "step": 520 + }, + { + "epoch": 0.4169976171564734, + "grad_norm": 50.956214904785156, + "learning_rate": 3.5166352483695803e-06, + "log_odds_chosen": 0.43155431747436523, + "log_odds_ratio": -0.5425541400909424, + "logits/chosen": 367.09210205078125, + "logits/rejected": 334.6671447753906, + "logps/chosen": -1.14950430393219, + "logps/rejected": -1.4378228187561035, + "loss": 1.7118, + "nll_loss": 1.758131742477417, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05747520923614502, + "rewards/margins": 0.014415934681892395, + "rewards/rejected": -0.07189114391803741, + "step": 525 + }, + { + "epoch": 0.42096902303415407, + "grad_norm": 91.34703826904297, + "learning_rate": 3.4855883067886888e-06, + "log_odds_chosen": 0.24526679515838623, + "log_odds_ratio": -0.6680157780647278, + "logits/chosen": 316.42547607421875, + "logits/rejected": 285.8825378417969, + "logps/chosen": -0.9753881692886353, + "logps/rejected": -1.2148548364639282, + "loss": 1.4501, + "nll_loss": 1.2995529174804688, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04876940697431564, + "rewards/margins": 0.011973332613706589, + "rewards/rejected": -0.060742735862731934, + "step": 530 + }, + { + "epoch": 0.4249404289118348, + "grad_norm": 110.02918243408203, + "learning_rate": 3.4543603305286432e-06, + "log_odds_chosen": 0.0072197020053863525, + "log_odds_ratio": -0.7949660420417786, + "logits/chosen": 320.6426696777344, + "logits/rejected": 313.28411865234375, + "logps/chosen": -1.0177392959594727, + "logps/rejected": -0.8970460891723633, + "loss": 1.6384, + "nll_loss": 1.6469953060150146, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05088697001338005, + "rewards/margins": -0.006034668534994125, + "rewards/rejected": -0.044852301478385925, + "step": 535 + }, + { + "epoch": 0.4289118347895155, + "grad_norm": 39.02376937866211, + "learning_rate": 3.422957055601758e-06, + "log_odds_chosen": -0.32208532094955444, + "log_odds_ratio": -0.9446969032287598, + "logits/chosen": 281.58331298828125, + "logits/rejected": 433.8438415527344, + "logps/chosen": -1.2129267454147339, + "logps/rejected": -0.957917332649231, + "loss": 1.5224, + "nll_loss": 1.474649429321289, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.060646336525678635, + "rewards/margins": -0.012750471010804176, + "rewards/rejected": -0.04789586737751961, + "step": 540 + }, + { + "epoch": 0.4328832406671962, + "grad_norm": 72.89534759521484, + "learning_rate": 3.3913842502195256e-06, + "log_odds_chosen": 0.10887251049280167, + "log_odds_ratio": -0.6846013069152832, + "logits/chosen": 337.80560302734375, + "logits/rejected": 373.49957275390625, + "logps/chosen": -1.1243635416030884, + "logps/rejected": -1.1805238723754883, + "loss": 1.573, + "nll_loss": 1.2677637338638306, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05621817708015442, + "rewards/margins": 0.0028080150950700045, + "rewards/rejected": -0.059026192873716354, + "step": 545 + }, + { + "epoch": 0.4368546465448769, + "grad_norm": 72.7591323852539, + "learning_rate": 3.3596477137331106e-06, + "log_odds_chosen": 0.3823426365852356, + "log_odds_ratio": -0.5422973036766052, + "logits/chosen": 345.8101806640625, + "logits/rejected": 315.0772399902344, + "logps/chosen": -1.0160396099090576, + "logps/rejected": -1.2651389837265015, + "loss": 1.6836, + "nll_loss": 1.2323284149169922, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05080198124051094, + "rewards/margins": 0.01245497353374958, + "rewards/rejected": -0.06325694918632507, + "step": 550 + }, + { + "epoch": 0.44082605242255757, + "grad_norm": 90.90532684326172, + "learning_rate": 3.327753275568105e-06, + "log_odds_chosen": 0.33174973726272583, + "log_odds_ratio": -0.5770066380500793, + "logits/chosen": 357.4688415527344, + "logits/rejected": 283.86163330078125, + "logps/chosen": -1.4533777236938477, + "logps/rejected": -1.7138382196426392, + "loss": 1.9812, + "nll_loss": 1.9684550762176514, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0726688951253891, + "rewards/margins": 0.013023021630942822, + "rewards/rejected": -0.0856919139623642, + "step": 555 + }, + { + "epoch": 0.44479745830023826, + "grad_norm": 50.66121292114258, + "learning_rate": 3.2957067941537745e-06, + "log_odds_chosen": -0.82317054271698, + "log_odds_ratio": -1.2737700939178467, + "logits/chosen": 260.4927062988281, + "logits/rejected": 432.49237060546875, + "logps/chosen": -1.632800817489624, + "logps/rejected": -1.045261025428772, + "loss": 1.8113, + "nll_loss": 1.9882593154907227, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.08164004981517792, + "rewards/margins": -0.02937699481844902, + "rewards/rejected": -0.0522630512714386, + "step": 560 + }, + { + "epoch": 0.448768864177919, + "grad_norm": 74.89939880371094, + "learning_rate": 3.263514155846969e-06, + "log_odds_chosen": 0.328817218542099, + "log_odds_ratio": -0.5914020538330078, + "logits/chosen": 342.4215393066406, + "logits/rejected": 324.5187683105469, + "logps/chosen": -0.9976029396057129, + "logps/rejected": -1.1394035816192627, + "loss": 1.5494, + "nll_loss": 1.7881031036376953, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04988015443086624, + "rewards/margins": 0.007090026047080755, + "rewards/rejected": -0.056970179080963135, + "step": 565 + }, + { + "epoch": 0.4527402700555997, + "grad_norm": 79.3864974975586, + "learning_rate": 3.2311812738509062e-06, + "log_odds_chosen": 0.5359792709350586, + "log_odds_ratio": -0.582006573677063, + "logits/chosen": 312.7106018066406, + "logits/rejected": 337.06732177734375, + "logps/chosen": -1.0472519397735596, + "logps/rejected": -1.3570013046264648, + "loss": 1.7019, + "nll_loss": 1.3205798864364624, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0523625984787941, + "rewards/margins": 0.015487474389374256, + "rewards/rejected": -0.06785006821155548, + "step": 570 + }, + { + "epoch": 0.4567116759332804, + "grad_norm": 45.589111328125, + "learning_rate": 3.198714087129024e-06, + "log_odds_chosen": 0.5243362188339233, + "log_odds_ratio": -0.5980243682861328, + "logits/chosen": 344.4964904785156, + "logits/rejected": 442.112548828125, + "logps/chosen": -1.115337610244751, + "logps/rejected": -1.4375159740447998, + "loss": 1.4739, + "nll_loss": 1.505415678024292, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.055766891688108444, + "rewards/margins": 0.016108911484479904, + "rewards/rejected": -0.07187579572200775, + "step": 575 + }, + { + "epoch": 0.46068308181096107, + "grad_norm": 82.72159576416016, + "learning_rate": 3.1661185593140986e-06, + "log_odds_chosen": -0.06218218803405762, + "log_odds_ratio": -0.7745502591133118, + "logits/chosen": 349.07562255859375, + "logits/rejected": 288.5042724609375, + "logps/chosen": -1.5691912174224854, + "logps/rejected": -1.5143083333969116, + "loss": 1.7202, + "nll_loss": 1.9988605976104736, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07845956087112427, + "rewards/margins": -0.0027441338170319796, + "rewards/rejected": -0.07571543008089066, + "step": 580 + }, + { + "epoch": 0.46465448768864176, + "grad_norm": 36.90455627441406, + "learning_rate": 3.133400677612836e-06, + "log_odds_chosen": -0.509355366230011, + "log_odds_ratio": -1.0258004665374756, + "logits/chosen": 336.6009216308594, + "logits/rejected": 255.05517578125, + "logps/chosen": -1.52445387840271, + "logps/rejected": -1.1118301153182983, + "loss": 1.7538, + "nll_loss": 2.2724270820617676, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07622268795967102, + "rewards/margins": -0.020631182938814163, + "rewards/rejected": -0.05559150502085686, + "step": 585 + }, + { + "epoch": 0.4686258935663225, + "grad_norm": 50.35420608520508, + "learning_rate": 3.100566451706132e-06, + "log_odds_chosen": -0.455788791179657, + "log_odds_ratio": -0.9897588491439819, + "logits/chosen": 316.55255126953125, + "logits/rejected": 320.16485595703125, + "logps/chosen": -1.5558842420578003, + "logps/rejected": -1.1951056718826294, + "loss": 1.8191, + "nll_loss": 1.6975910663604736, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07779420912265778, + "rewards/margins": -0.018038932234048843, + "rewards/rejected": -0.05975528433918953, + "step": 590 + }, + { + "epoch": 0.4725972994440032, + "grad_norm": 51.966094970703125, + "learning_rate": 3.067621912645195e-06, + "log_odds_chosen": 0.5225323438644409, + "log_odds_ratio": -0.5115421414375305, + "logits/chosen": 318.2131042480469, + "logits/rejected": 299.44970703125, + "logps/chosen": -1.1214492321014404, + "logps/rejected": -1.5328994989395142, + "loss": 1.609, + "nll_loss": 1.4310369491577148, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05607246607542038, + "rewards/margins": 0.020572511479258537, + "rewards/rejected": -0.07664497196674347, + "step": 595 + }, + { + "epoch": 0.4765687053216839, + "grad_norm": 43.6855354309082, + "learning_rate": 3.0345731117437636e-06, + "log_odds_chosen": -0.1032852903008461, + "log_odds_ratio": -0.8357051610946655, + "logits/chosen": 371.5491638183594, + "logits/rejected": 289.3468322753906, + "logps/chosen": -1.2020314931869507, + "logps/rejected": -1.056931734085083, + "loss": 1.8538, + "nll_loss": 2.0933589935302734, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.060101576149463654, + "rewards/margins": -0.007254990749061108, + "rewards/rejected": -0.05284658074378967, + "step": 600 + }, + { + "epoch": 0.4805401111993646, + "grad_norm": 65.06558990478516, + "learning_rate": 3.001426119466581e-06, + "log_odds_chosen": 0.4957882761955261, + "log_odds_ratio": -0.4822470247745514, + "logits/chosen": 320.2109680175781, + "logits/rejected": 344.2149353027344, + "logps/chosen": -1.1690635681152344, + "logps/rejected": -1.5483322143554688, + "loss": 1.541, + "nll_loss": 1.4401990175247192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05845317989587784, + "rewards/margins": 0.018963433802127838, + "rewards/rejected": -0.07741661369800568, + "step": 605 + }, + { + "epoch": 0.48451151707704526, + "grad_norm": 42.35622024536133, + "learning_rate": 2.9681870243143616e-06, + "log_odds_chosen": 0.8287761807441711, + "log_odds_ratio": -0.434671550989151, + "logits/chosen": 405.56640625, + "logits/rejected": 303.19744873046875, + "logps/chosen": -1.0719507932662964, + "logps/rejected": -1.627996802330017, + "loss": 1.7037, + "nll_loss": 1.929690957069397, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.053597547113895416, + "rewards/margins": 0.027802307158708572, + "rewards/rejected": -0.08139985054731369, + "step": 610 + }, + { + "epoch": 0.48848292295472595, + "grad_norm": 119.18313598632812, + "learning_rate": 2.9348619317054494e-06, + "log_odds_chosen": 0.6101234555244446, + "log_odds_ratio": -0.4588192403316498, + "logits/chosen": 344.44158935546875, + "logits/rejected": 381.56640625, + "logps/chosen": -0.940362274646759, + "logps/rejected": -1.3556063175201416, + "loss": 1.7092, + "nll_loss": 1.7841463088989258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04701811820268631, + "rewards/margins": 0.02076220139861107, + "rewards/rejected": -0.06778031587600708, + "step": 615 + }, + { + "epoch": 0.4924543288324067, + "grad_norm": 41.579444885253906, + "learning_rate": 2.9014569628543577e-06, + "log_odds_chosen": 0.6708475947380066, + "log_odds_ratio": -0.4850080907344818, + "logits/chosen": 271.7330017089844, + "logits/rejected": 390.313232421875, + "logps/chosen": -1.4829562902450562, + "logps/rejected": -1.998425841331482, + "loss": 1.5568, + "nll_loss": 1.7184091806411743, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07414782792329788, + "rewards/margins": 0.02577347680926323, + "rewards/rejected": -0.09992130100727081, + "step": 620 + }, + { + "epoch": 0.4964257347100874, + "grad_norm": 42.15742111206055, + "learning_rate": 2.867978253647416e-06, + "log_odds_chosen": 0.20527370274066925, + "log_odds_ratio": -0.798372209072113, + "logits/chosen": 467.48321533203125, + "logits/rejected": 273.2369079589844, + "logps/chosen": -1.0584286451339722, + "logps/rejected": -1.0820119380950928, + "loss": 1.6456, + "nll_loss": 1.5411746501922607, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05292143300175667, + "rewards/margins": 0.0011791624128818512, + "rewards/rejected": -0.05410059541463852, + "step": 625 + }, + { + "epoch": 0.5003971405877681, + "grad_norm": 27.90037727355957, + "learning_rate": 2.8344319535157174e-06, + "log_odds_chosen": 0.42426902055740356, + "log_odds_ratio": -0.5486973524093628, + "logits/chosen": 346.8106994628906, + "logits/rejected": 325.4391174316406, + "logps/chosen": -1.075537085533142, + "logps/rejected": -1.3731486797332764, + "loss": 1.4489, + "nll_loss": 1.245110034942627, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.053776852786540985, + "rewards/margins": 0.014880577102303505, + "rewards/rejected": -0.06865743547677994, + "step": 630 + }, + { + "epoch": 0.5043685464654488, + "grad_norm": 73.80306243896484, + "learning_rate": 2.800824224305584e-06, + "log_odds_chosen": 0.22638097405433655, + "log_odds_ratio": -0.6381832361221313, + "logits/chosen": 359.15911865234375, + "logits/rejected": 273.5746154785156, + "logps/chosen": -1.2116836309432983, + "logps/rejected": -1.3346078395843506, + "loss": 1.6291, + "nll_loss": 1.5670521259307861, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.060584187507629395, + "rewards/margins": 0.006146208383142948, + "rewards/rejected": -0.06673039495944977, + "step": 635 + }, + { + "epoch": 0.5083399523431295, + "grad_norm": 49.89323043823242, + "learning_rate": 2.76716123914674e-06, + "log_odds_chosen": 0.07012102752923965, + "log_odds_ratio": -0.7102433443069458, + "logits/chosen": 303.50506591796875, + "logits/rejected": 324.7231750488281, + "logps/chosen": -1.0607610940933228, + "logps/rejected": -1.0976377725601196, + "loss": 1.6129, + "nll_loss": 1.4195703268051147, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05303805321455002, + "rewards/margins": 0.0018438354600220919, + "rewards/rejected": -0.05488189309835434, + "step": 640 + }, + { + "epoch": 0.5123113582208102, + "grad_norm": 46.98134994506836, + "learning_rate": 2.7334491813184276e-06, + "log_odds_chosen": 0.28301459550857544, + "log_odds_ratio": -0.5637701749801636, + "logits/chosen": 274.5257568359375, + "logits/rejected": 413.9043884277344, + "logps/chosen": -0.8029153943061829, + "logps/rejected": -0.9688779711723328, + "loss": 1.6859, + "nll_loss": 1.5517845153808594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04014577344059944, + "rewards/margins": 0.008298131637275219, + "rewards/rejected": -0.04844390228390694, + "step": 645 + }, + { + "epoch": 0.5162827640984908, + "grad_norm": 26.37677764892578, + "learning_rate": 2.6996942431136466e-06, + "log_odds_chosen": 0.342237263917923, + "log_odds_ratio": -0.613301694393158, + "logits/chosen": 273.4066162109375, + "logits/rejected": 396.9400939941406, + "logps/chosen": -0.9960271716117859, + "logps/rejected": -1.2160321474075317, + "loss": 1.3995, + "nll_loss": 1.4034605026245117, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.049801357090473175, + "rewards/margins": 0.011000247672200203, + "rewards/rejected": -0.06080160662531853, + "step": 650 + }, + { + "epoch": 0.5202541699761716, + "grad_norm": 46.33094024658203, + "learning_rate": 2.6659026247017418e-06, + "log_odds_chosen": 0.3311120867729187, + "log_odds_ratio": -0.7188401222229004, + "logits/chosen": 384.7994079589844, + "logits/rejected": 300.3538818359375, + "logps/chosen": -1.2231776714324951, + "logps/rejected": -1.4374616146087646, + "loss": 1.5018, + "nll_loss": 1.7496258020401, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06115889549255371, + "rewards/margins": 0.01071419008076191, + "rewards/rejected": -0.07187308371067047, + "step": 655 + }, + { + "epoch": 0.5242255758538522, + "grad_norm": 70.27367401123047, + "learning_rate": 2.6320805329895495e-06, + "log_odds_chosen": 0.4950195848941803, + "log_odds_ratio": -0.6416879296302795, + "logits/chosen": 338.72930908203125, + "logits/rejected": 325.9527282714844, + "logps/chosen": -1.1382685899734497, + "logps/rejected": -1.4173352718353271, + "loss": 1.6332, + "nll_loss": 1.4613453149795532, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.056913428008556366, + "rewards/margins": 0.01395334117114544, + "rewards/rejected": -0.07086677849292755, + "step": 660 + }, + { + "epoch": 0.528196981731533, + "grad_norm": 32.2733039855957, + "learning_rate": 2.5982341804812946e-06, + "log_odds_chosen": 0.3531644642353058, + "log_odds_ratio": -0.5361741781234741, + "logits/chosen": 295.8695068359375, + "logits/rejected": 391.4307556152344, + "logps/chosen": -1.4157756567001343, + "logps/rejected": -1.6907964944839478, + "loss": 1.6299, + "nll_loss": 1.7296804189682007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07078878581523895, + "rewards/margins": 0.01375104021281004, + "rewards/rejected": -0.08453982323408127, + "step": 665 + }, + { + "epoch": 0.5321683876092137, + "grad_norm": 42.08566665649414, + "learning_rate": 2.5643697841374722e-06, + "log_odds_chosen": 0.050149548798799515, + "log_odds_ratio": -0.7359659075737, + "logits/chosen": 320.44049072265625, + "logits/rejected": 269.22320556640625, + "logps/chosen": -1.551636815071106, + "logps/rejected": -1.6407535076141357, + "loss": 1.8105, + "nll_loss": 1.6291691064834595, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07758183777332306, + "rewards/margins": 0.004455844406038523, + "rewards/rejected": -0.08203768730163574, + "step": 670 + }, + { + "epoch": 0.5361397934868943, + "grad_norm": 52.635318756103516, + "learning_rate": 2.5304935642329e-06, + "log_odds_chosen": 0.18347088992595673, + "log_odds_ratio": -0.621238112449646, + "logits/chosen": 344.8230895996094, + "logits/rejected": 346.5325927734375, + "logps/chosen": -1.3301855325698853, + "logps/rejected": -1.4845483303070068, + "loss": 1.7328, + "nll_loss": 1.6084213256835938, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06650927662849426, + "rewards/margins": 0.007718136068433523, + "rewards/rejected": -0.07422741502523422, + "step": 675 + }, + { + "epoch": 0.5401111993645751, + "grad_norm": 43.74726486206055, + "learning_rate": 2.4966117432141726e-06, + "log_odds_chosen": 0.24497541785240173, + "log_odds_ratio": -0.6605706810951233, + "logits/chosen": 298.27667236328125, + "logits/rejected": 422.50897216796875, + "logps/chosen": -0.9108420610427856, + "logps/rejected": -1.1715190410614014, + "loss": 1.6037, + "nll_loss": 1.2029683589935303, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04554210603237152, + "rewards/margins": 0.013033849187195301, + "rewards/rejected": -0.05857595056295395, + "step": 680 + }, + { + "epoch": 0.5440826052422557, + "grad_norm": 64.52481079101562, + "learning_rate": 2.4627305445567048e-06, + "log_odds_chosen": 0.09129991382360458, + "log_odds_ratio": -0.6612669229507446, + "logits/chosen": 390.5311279296875, + "logits/rejected": 266.16888427734375, + "logps/chosen": -1.044427752494812, + "logps/rejected": -1.137475848197937, + "loss": 1.4905, + "nll_loss": 1.4350093603134155, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0522213876247406, + "rewards/margins": 0.0046524060890078545, + "rewards/rejected": -0.05687378719449043, + "step": 685 + }, + { + "epoch": 0.5480540111199365, + "grad_norm": 50.33750534057617, + "learning_rate": 2.428856191621596e-06, + "log_odds_chosen": -0.4379616677761078, + "log_odds_ratio": -0.9384675025939941, + "logits/chosen": 295.41632080078125, + "logits/rejected": 335.3627624511719, + "logps/chosen": -1.5291237831115723, + "logps/rejected": -1.2289823293685913, + "loss": 1.5525, + "nll_loss": 1.6753852367401123, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.07645618915557861, + "rewards/margins": -0.015007075853645802, + "rewards/rejected": -0.061449117958545685, + "step": 690 + }, + { + "epoch": 0.5520254169976172, + "grad_norm": 62.857566833496094, + "learning_rate": 2.3949949065125107e-06, + "log_odds_chosen": -0.1392946094274521, + "log_odds_ratio": -0.8439092636108398, + "logits/chosen": 298.41705322265625, + "logits/rejected": 304.42584228515625, + "logps/chosen": -1.1663509607315063, + "logps/rejected": -1.0636799335479736, + "loss": 1.3371, + "nll_loss": 1.299024224281311, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05831754207611084, + "rewards/margins": -0.005133545026183128, + "rewards/rejected": -0.05318400263786316, + "step": 695 + }, + { + "epoch": 0.5559968228752978, + "grad_norm": 36.2435417175293, + "learning_rate": 2.3611529089327893e-06, + "log_odds_chosen": 0.14748263359069824, + "log_odds_ratio": -0.6694773435592651, + "logits/chosen": 278.55145263671875, + "logits/rejected": 415.44378662109375, + "logps/chosen": -1.3171262741088867, + "logps/rejected": -1.4007153511047363, + "loss": 1.4775, + "nll_loss": 1.3410961627960205, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06585632264614105, + "rewards/margins": 0.004179453942924738, + "rewards/rejected": -0.07003577053546906, + "step": 700 + }, + { + "epoch": 0.5599682287529786, + "grad_norm": 47.188453674316406, + "learning_rate": 2.327336415043006e-06, + "log_odds_chosen": 0.15575894713401794, + "log_odds_ratio": -0.6959660649299622, + "logits/chosen": 335.40582275390625, + "logits/rejected": 465.32843017578125, + "logps/chosen": -1.123983383178711, + "logps/rejected": -1.2077070474624634, + "loss": 1.5045, + "nll_loss": 1.2318366765975952, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.056199170649051666, + "rewards/margins": 0.004186179488897324, + "rewards/rejected": -0.06038535386323929, + "step": 705 + }, + { + "epoch": 0.5639396346306592, + "grad_norm": 33.867916107177734, + "learning_rate": 2.2935516363191695e-06, + "log_odds_chosen": -0.3543465733528137, + "log_odds_ratio": -0.9505079388618469, + "logits/chosen": 280.46343994140625, + "logits/rejected": 292.0199279785156, + "logps/chosen": -1.207983374595642, + "logps/rejected": -0.9264053106307983, + "loss": 1.5108, + "nll_loss": 1.4482814073562622, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.060399167239665985, + "rewards/margins": -0.014078897424042225, + "rewards/rejected": -0.046320270746946335, + "step": 710 + }, + { + "epoch": 0.56791104050834, + "grad_norm": 71.63556671142578, + "learning_rate": 2.259804778411786e-06, + "log_odds_chosen": 0.571071982383728, + "log_odds_ratio": -0.4987887740135193, + "logits/chosen": 361.73345947265625, + "logits/rejected": 394.4234313964844, + "logps/chosen": -0.8434446454048157, + "logps/rejected": -1.139594554901123, + "loss": 1.5099, + "nll_loss": 1.2623536586761475, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.042172230780124664, + "rewards/margins": 0.014807499945163727, + "rewards/rejected": -0.05697972699999809, + "step": 715 + }, + { + "epoch": 0.5718824463860207, + "grad_norm": 31.155038833618164, + "learning_rate": 2.2261020400059986e-06, + "log_odds_chosen": 0.15770220756530762, + "log_odds_ratio": -0.6557624340057373, + "logits/chosen": 311.5298156738281, + "logits/rejected": 288.30810546875, + "logps/chosen": -1.1949565410614014, + "logps/rejected": -1.3707664012908936, + "loss": 1.4682, + "nll_loss": 1.4724897146224976, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05974782258272171, + "rewards/margins": 0.008790492080152035, + "rewards/rejected": -0.06853832304477692, + "step": 720 + }, + { + "epoch": 0.5758538522637013, + "grad_norm": 32.814144134521484, + "learning_rate": 2.1924496116829996e-06, + "log_odds_chosen": 0.4697895050048828, + "log_odds_ratio": -0.6313791871070862, + "logits/chosen": 327.9530334472656, + "logits/rejected": 266.0186462402344, + "logps/chosen": -1.1744751930236816, + "logps/rejected": -1.497837781906128, + "loss": 1.4959, + "nll_loss": 1.3718366622924805, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05872376635670662, + "rewards/margins": 0.016168128699064255, + "rewards/rejected": -0.07489189505577087, + "step": 725 + }, + { + "epoch": 0.5798252581413821, + "grad_norm": 48.00249099731445, + "learning_rate": 2.158853674782928e-06, + "log_odds_chosen": -0.15365850925445557, + "log_odds_ratio": -1.1835048198699951, + "logits/chosen": 280.81500244140625, + "logits/rejected": 461.069580078125, + "logps/chosen": -1.4112962484359741, + "logps/rejected": -1.6498725414276123, + "loss": 1.4424, + "nll_loss": 1.5177103281021118, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.07056482136249542, + "rewards/margins": 0.011928820051252842, + "rewards/rejected": -0.08249364048242569, + "step": 730 + }, + { + "epoch": 0.5837966640190627, + "grad_norm": 38.61474609375, + "learning_rate": 2.1253204002694777e-06, + "log_odds_chosen": 0.62263023853302, + "log_odds_ratio": -0.541793704032898, + "logits/chosen": 304.44573974609375, + "logits/rejected": 331.6595458984375, + "logps/chosen": -1.1065720319747925, + "logps/rejected": -1.4321861267089844, + "loss": 1.5616, + "nll_loss": 1.639901876449585, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.055328596383333206, + "rewards/margins": 0.01628071442246437, + "rewards/rejected": -0.07160931080579758, + "step": 735 + }, + { + "epoch": 0.5877680698967435, + "grad_norm": 39.873226165771484, + "learning_rate": 2.091855947596401e-06, + "log_odds_chosen": -0.07302029430866241, + "log_odds_ratio": -0.8576405644416809, + "logits/chosen": 356.81842041015625, + "logits/rejected": 376.3813781738281, + "logps/chosen": -1.1168615818023682, + "logps/rejected": -1.0692174434661865, + "loss": 1.4753, + "nll_loss": 1.271024465560913, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05584307760000229, + "rewards/margins": -0.0023822046350687742, + "rewards/rejected": -0.053460873663425446, + "step": 740 + }, + { + "epoch": 0.5917394757744241, + "grad_norm": 54.21368408203125, + "learning_rate": 2.058466463576124e-06, + "log_odds_chosen": -0.6579837203025818, + "log_odds_ratio": -1.344585657119751, + "logits/chosen": 289.74786376953125, + "logits/rejected": 320.3457946777344, + "logps/chosen": -1.8980462551116943, + "logps/rejected": -1.3352105617523193, + "loss": 1.8089, + "nll_loss": 1.8561862707138062, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.09490232169628143, + "rewards/margins": -0.02814178541302681, + "rewards/rejected": -0.06676053255796432, + "step": 745 + }, + { + "epoch": 0.5957108816521048, + "grad_norm": 33.83228302001953, + "learning_rate": 2.0251580812506938e-06, + "log_odds_chosen": -0.6078636050224304, + "log_odds_ratio": -1.1107518672943115, + "logits/chosen": 339.91595458984375, + "logits/rejected": 320.0901794433594, + "logps/chosen": -1.4174226522445679, + "logps/rejected": -1.0588816404342651, + "loss": 1.4017, + "nll_loss": 1.365027904510498, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.07087112963199615, + "rewards/margins": -0.017927050590515137, + "rewards/rejected": -0.052944086492061615, + "step": 750 + }, + { + "epoch": 0.5996822875297856, + "grad_norm": 37.01515579223633, + "learning_rate": 1.9919369187652483e-06, + "log_odds_chosen": -0.07303062826395035, + "log_odds_ratio": -0.7524539232254028, + "logits/chosen": 389.5413513183594, + "logits/rejected": 321.3714294433594, + "logps/chosen": -1.183814287185669, + "logps/rejected": -1.1148512363433838, + "loss": 1.619, + "nll_loss": 1.339825987815857, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05919071286916733, + "rewards/margins": -0.003448158036917448, + "rewards/rejected": -0.05574256181716919, + "step": 755 + }, + { + "epoch": 0.6036536934074662, + "grad_norm": 42.6160888671875, + "learning_rate": 1.9588090782442257e-06, + "log_odds_chosen": -0.02819465473294258, + "log_odds_ratio": -0.7264882922172546, + "logits/chosen": 313.9991760253906, + "logits/rejected": 334.552978515625, + "logps/chosen": -1.269689917564392, + "logps/rejected": -1.2629872560501099, + "loss": 1.5967, + "nll_loss": 1.4182217121124268, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06348450481891632, + "rewards/margins": -0.0003351382911205292, + "rewards/rejected": -0.0631493628025055, + "step": 760 + }, + { + "epoch": 0.607625099285147, + "grad_norm": 45.021705627441406, + "learning_rate": 1.9257806446705116e-06, + "log_odds_chosen": -0.48236551880836487, + "log_odds_ratio": -1.207824468612671, + "logits/chosen": 385.98260498046875, + "logits/rejected": 320.9479675292969, + "logps/chosen": -1.317742109298706, + "logps/rejected": -0.8753318786621094, + "loss": 1.6117, + "nll_loss": 1.4493136405944824, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06588710099458694, + "rewards/margins": -0.022120505571365356, + "rewards/rejected": -0.04376659542322159, + "step": 765 + }, + { + "epoch": 0.6115965051628276, + "grad_norm": 45.63967514038086, + "learning_rate": 1.8928576847677404e-06, + "log_odds_chosen": 0.15900571644306183, + "log_odds_ratio": -0.71577388048172, + "logits/chosen": 349.0858459472656, + "logits/rejected": 374.12872314453125, + "logps/chosen": -0.9837914705276489, + "logps/rejected": -0.9930256009101868, + "loss": 1.8407, + "nll_loss": 1.8363087177276611, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.049189578741788864, + "rewards/margins": 0.0004617050290107727, + "rewards/rejected": -0.04965128004550934, + "step": 770 + }, + { + "epoch": 0.6155679110405083, + "grad_norm": 49.38869094848633, + "learning_rate": 1.8600462458859492e-06, + "log_odds_chosen": 0.5004615187644958, + "log_odds_ratio": -0.5722527503967285, + "logits/chosen": 338.10382080078125, + "logits/rejected": 324.82861328125, + "logps/chosen": -0.9794312715530396, + "logps/rejected": -1.2764912843704224, + "loss": 1.6962, + "nll_loss": 1.4810049533843994, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04897156357765198, + "rewards/margins": 0.014853003434836864, + "rewards/rejected": -0.06382457166910172, + "step": 775 + }, + { + "epoch": 0.6195393169181891, + "grad_norm": 55.28865432739258, + "learning_rate": 1.8273523548907867e-06, + "log_odds_chosen": 0.8983039855957031, + "log_odds_ratio": -0.7501333951950073, + "logits/chosen": 323.56842041015625, + "logits/rejected": 315.9183654785156, + "logps/chosen": -1.0870964527130127, + "logps/rejected": -1.8820436000823975, + "loss": 1.7622, + "nll_loss": 1.5614144802093506, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.054354824125766754, + "rewards/margins": 0.03974735736846924, + "rewards/rejected": -0.09410218894481659, + "step": 780 + }, + { + "epoch": 0.6235107227958697, + "grad_norm": 38.31996154785156, + "learning_rate": 1.7947820170564897e-06, + "log_odds_chosen": 1.0626842975616455, + "log_odds_ratio": -0.44282132387161255, + "logits/chosen": 351.11236572265625, + "logits/rejected": 300.69366455078125, + "logps/chosen": -1.2781195640563965, + "logps/rejected": -2.0489819049835205, + "loss": 1.4668, + "nll_loss": 1.7934401035308838, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0639059767127037, + "rewards/margins": 0.03854311257600784, + "rewards/rejected": -0.10244909673929214, + "step": 785 + }, + { + "epoch": 0.6274821286735505, + "grad_norm": 49.6168212890625, + "learning_rate": 1.7623412149628216e-06, + "log_odds_chosen": -0.2784636616706848, + "log_odds_ratio": -0.9712227582931519, + "logits/chosen": 294.8133239746094, + "logits/rejected": 370.743896484375, + "logps/chosen": -1.2229773998260498, + "logps/rejected": -1.0270025730133057, + "loss": 1.6546, + "nll_loss": 1.4282915592193604, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06114886328577995, + "rewards/margins": -0.009798737242817879, + "rewards/rejected": -0.051350127905607224, + "step": 790 + }, + { + "epoch": 0.6314535345512311, + "grad_norm": 28.367610931396484, + "learning_rate": 1.7300359073961834e-06, + "log_odds_chosen": 0.42548590898513794, + "log_odds_ratio": -0.5335083603858948, + "logits/chosen": 337.03436279296875, + "logits/rejected": 383.9117736816406, + "logps/chosen": -1.062105655670166, + "logps/rejected": -1.415236473083496, + "loss": 1.3773, + "nll_loss": 1.3322608470916748, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05310528352856636, + "rewards/margins": 0.017656544223427773, + "rewards/rejected": -0.07076182961463928, + "step": 795 + }, + { + "epoch": 0.6354249404289118, + "grad_norm": 98.34535217285156, + "learning_rate": 1.6978720282550897e-06, + "log_odds_chosen": 0.07821528613567352, + "log_odds_ratio": -0.665870726108551, + "logits/chosen": 305.55938720703125, + "logits/rejected": 395.2098388671875, + "logps/chosen": -0.9890663027763367, + "logps/rejected": -1.0237197875976562, + "loss": 1.6046, + "nll_loss": 1.4329578876495361, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.049453310668468475, + "rewards/margins": 0.0017326741944998503, + "rewards/rejected": -0.051185984164476395, + "step": 800 + }, + { + "epoch": 0.6393963463065926, + "grad_norm": 34.472469329833984, + "learning_rate": 1.6658554854602222e-06, + "log_odds_chosen": 0.6897698640823364, + "log_odds_ratio": -0.41707152128219604, + "logits/chosen": 309.0857849121094, + "logits/rejected": 351.38153076171875, + "logps/chosen": -1.0435580015182495, + "logps/rejected": -1.5444471836090088, + "loss": 1.5348, + "nll_loss": 1.5355165004730225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.052177898585796356, + "rewards/margins": 0.025044452399015427, + "rewards/rejected": -0.07722235471010208, + "step": 805 + }, + { + "epoch": 0.6433677521842732, + "grad_norm": 48.47563171386719, + "learning_rate": 1.6339921598692476e-06, + "log_odds_chosen": 0.11802919209003448, + "log_odds_ratio": -0.7510842084884644, + "logits/chosen": 293.94256591796875, + "logits/rejected": 311.3507995605469, + "logps/chosen": -1.730385184288025, + "logps/rejected": -1.8177807331085205, + "loss": 1.7041, + "nll_loss": 1.842095136642456, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.086519256234169, + "rewards/margins": 0.0043697720393538475, + "rewards/rejected": -0.09088902175426483, + "step": 810 + }, + { + "epoch": 0.647339158061954, + "grad_norm": 31.41144371032715, + "learning_rate": 1.6022879041966188e-06, + "log_odds_chosen": 0.3533809185028076, + "log_odds_ratio": -0.6280057430267334, + "logits/chosen": 335.3319396972656, + "logits/rejected": 264.1844787597656, + "logps/chosen": -1.0379221439361572, + "logps/rejected": -1.2540075778961182, + "loss": 1.3573, + "nll_loss": 1.4223954677581787, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0518961064517498, + "rewards/margins": 0.01080426573753357, + "rewards/rejected": -0.06270037591457367, + "step": 815 + }, + { + "epoch": 0.6513105639396346, + "grad_norm": 44.874664306640625, + "learning_rate": 1.5707485419385293e-06, + "log_odds_chosen": -0.14202973246574402, + "log_odds_ratio": -0.8004404306411743, + "logits/chosen": 360.9006042480469, + "logits/rejected": 264.46337890625, + "logps/chosen": -1.2262499332427979, + "logps/rejected": -1.134313941001892, + "loss": 1.496, + "nll_loss": 1.3937432765960693, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06131250411272049, + "rewards/margins": -0.00459679588675499, + "rewards/rejected": -0.0567157045006752, + "step": 820 + }, + { + "epoch": 0.6552819698173153, + "grad_norm": 36.3102912902832, + "learning_rate": 1.539379866303245e-06, + "log_odds_chosen": 0.1242959052324295, + "log_odds_ratio": -0.6936241984367371, + "logits/chosen": 281.64874267578125, + "logits/rejected": 311.4866943359375, + "logps/chosen": -1.1951546669006348, + "logps/rejected": -1.2739152908325195, + "loss": 1.5673, + "nll_loss": 1.4124051332473755, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05975773185491562, + "rewards/margins": 0.003938031382858753, + "rewards/rejected": -0.0636957660317421, + "step": 825 + }, + { + "epoch": 0.659253375694996, + "grad_norm": 58.44258499145508, + "learning_rate": 1.508187639147001e-06, + "log_odds_chosen": 0.12791205942630768, + "log_odds_ratio": -0.6473852396011353, + "logits/chosen": 344.5693664550781, + "logits/rejected": 367.2503662109375, + "logps/chosen": -1.0457253456115723, + "logps/rejected": -1.142913579940796, + "loss": 1.5898, + "nll_loss": 1.3428993225097656, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05228627473115921, + "rewards/margins": 0.004859411157667637, + "rewards/rejected": -0.05714568495750427, + "step": 830 + }, + { + "epoch": 0.6632247815726767, + "grad_norm": 33.92708206176758, + "learning_rate": 1.4771775899156487e-06, + "log_odds_chosen": 0.5014594793319702, + "log_odds_ratio": -0.5115988254547119, + "logits/chosen": 318.1674499511719, + "logits/rejected": 471.2735290527344, + "logps/chosen": -1.0235928297042847, + "logps/rejected": -1.36759614944458, + "loss": 1.4523, + "nll_loss": 1.3124425411224365, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05117964744567871, + "rewards/margins": 0.017200157046318054, + "rewards/rejected": -0.06837980449199677, + "step": 835 + }, + { + "epoch": 0.6671961874503575, + "grad_norm": 27.84844207763672, + "learning_rate": 1.4463554145922603e-06, + "log_odds_chosen": -0.31632497906684875, + "log_odds_ratio": -0.9493728876113892, + "logits/chosen": 424.86517333984375, + "logits/rejected": 282.5466613769531, + "logps/chosen": -1.1070573329925537, + "logps/rejected": -0.9589263796806335, + "loss": 1.4065, + "nll_loss": 1.1693588495254517, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.055352866649627686, + "rewards/margins": -0.007406541611999273, + "rewards/rejected": -0.047946326434612274, + "step": 840 + }, + { + "epoch": 0.6711675933280381, + "grad_norm": 51.31157302856445, + "learning_rate": 1.4157267746508834e-06, + "log_odds_chosen": 0.07127873599529266, + "log_odds_ratio": -0.7540755867958069, + "logits/chosen": 357.64971923828125, + "logits/rejected": 300.25811767578125, + "logps/chosen": -1.264819860458374, + "logps/rejected": -1.2909610271453857, + "loss": 1.2753, + "nll_loss": 1.4475698471069336, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06324099004268646, + "rewards/margins": 0.0013070597779005766, + "rewards/rejected": -0.0645480528473854, + "step": 845 + }, + { + "epoch": 0.6751389992057188, + "grad_norm": 47.41500473022461, + "learning_rate": 1.385297296016631e-06, + "log_odds_chosen": 0.2415420562028885, + "log_odds_ratio": -0.5941085815429688, + "logits/chosen": 419.50408935546875, + "logits/rejected": 281.9603576660156, + "logps/chosen": -0.820796012878418, + "logps/rejected": -0.9756487011909485, + "loss": 1.5865, + "nll_loss": 1.0082799196243286, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.041039805859327316, + "rewards/margins": 0.007742627058178186, + "rewards/rejected": -0.048782430589199066, + "step": 850 + }, + { + "epoch": 0.6791104050833995, + "grad_norm": 42.014591217041016, + "learning_rate": 1.3550725680322973e-06, + "log_odds_chosen": 1.7263425588607788, + "log_odds_ratio": -0.2393535077571869, + "logits/chosen": 317.6227111816406, + "logits/rejected": 302.24859619140625, + "logps/chosen": -0.7767224311828613, + "logps/rejected": -2.0884933471679688, + "loss": 1.5027, + "nll_loss": 1.3212028741836548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.038836125284433365, + "rewards/margins": 0.06558854132890701, + "rewards/rejected": -0.10442467033863068, + "step": 855 + }, + { + "epoch": 0.6830818109610802, + "grad_norm": 32.96746063232422, + "learning_rate": 1.3250581424317012e-06, + "log_odds_chosen": -0.05438203737139702, + "log_odds_ratio": -0.8108514547348022, + "logits/chosen": 328.84979248046875, + "logits/rejected": 378.38507080078125, + "logps/chosen": -1.1635057926177979, + "logps/rejected": -1.1345611810684204, + "loss": 1.456, + "nll_loss": 1.441853642463684, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05817528814077377, + "rewards/margins": -0.0014472283655777574, + "rewards/rejected": -0.0567280575633049, + "step": 860 + }, + { + "epoch": 0.687053216838761, + "grad_norm": 25.131879806518555, + "learning_rate": 1.295259532319927e-06, + "log_odds_chosen": 1.491008996963501, + "log_odds_ratio": -0.33983761072158813, + "logits/chosen": 305.7498474121094, + "logits/rejected": 395.27630615234375, + "logps/chosen": -0.9085075259208679, + "logps/rejected": -1.8494819402694702, + "loss": 1.5229, + "nll_loss": 1.9765069484710693, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.045425377786159515, + "rewards/margins": 0.04704872891306877, + "rewards/rejected": -0.09247410297393799, + "step": 865 + }, + { + "epoch": 0.6910246227164416, + "grad_norm": 37.967525482177734, + "learning_rate": 1.265682211160678e-06, + "log_odds_chosen": -0.6112550497055054, + "log_odds_ratio": -1.088639497756958, + "logits/chosen": 310.4002685546875, + "logits/rejected": 406.2005615234375, + "logps/chosen": -1.192030906677246, + "logps/rejected": -0.812456488609314, + "loss": 1.6589, + "nll_loss": 1.2748486995697021, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.059601545333862305, + "rewards/margins": -0.01897871494293213, + "rewards/rejected": -0.040622830390930176, + "step": 870 + }, + { + "epoch": 0.6949960285941224, + "grad_norm": 75.36614227294922, + "learning_rate": 1.2363316117708912e-06, + "log_odds_chosen": 0.7237299680709839, + "log_odds_ratio": -0.6293952465057373, + "logits/chosen": 310.7578430175781, + "logits/rejected": 359.1610412597656, + "logps/chosen": -1.019719123840332, + "logps/rejected": -1.6588159799575806, + "loss": 1.5328, + "nll_loss": 1.2692029476165771, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05098595470190048, + "rewards/margins": 0.031954843550920486, + "rewards/rejected": -0.08294080197811127, + "step": 875 + }, + { + "epoch": 0.698967434471803, + "grad_norm": 36.7714958190918, + "learning_rate": 1.2072131253228292e-06, + "log_odds_chosen": -0.17302027344703674, + "log_odds_ratio": -0.8415447473526001, + "logits/chosen": 327.3581237792969, + "logits/rejected": 300.59027099609375, + "logps/chosen": -1.4463417530059814, + "logps/rejected": -1.3052947521209717, + "loss": 1.5306, + "nll_loss": 1.9659268856048584, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07231709361076355, + "rewards/margins": -0.007052358239889145, + "rewards/rejected": -0.0652647316455841, + "step": 880 + }, + { + "epoch": 0.7029388403494837, + "grad_norm": 39.60774612426758, + "learning_rate": 1.1783321003538262e-06, + "log_odds_chosen": 0.14880752563476562, + "log_odds_ratio": -0.6801349520683289, + "logits/chosen": 326.66217041015625, + "logits/rejected": 288.07977294921875, + "logps/chosen": -1.3033047914505005, + "logps/rejected": -1.410351037979126, + "loss": 1.5044, + "nll_loss": 1.411853551864624, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06516523659229279, + "rewards/margins": 0.005352319683879614, + "rewards/rejected": -0.07051756232976913, + "step": 885 + }, + { + "epoch": 0.7069102462271644, + "grad_norm": 49.855010986328125, + "learning_rate": 1.1496938417838466e-06, + "log_odds_chosen": -0.6302076578140259, + "log_odds_ratio": -1.108147382736206, + "logits/chosen": 303.8426208496094, + "logits/rejected": 299.96282958984375, + "logps/chosen": -1.1776927709579468, + "logps/rejected": -0.8100347518920898, + "loss": 1.6152, + "nll_loss": 1.3265931606292725, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0588846430182457, + "rewards/margins": -0.018382901325821877, + "rewards/rejected": -0.04050173982977867, + "step": 890 + }, + { + "epoch": 0.7108816521048451, + "grad_norm": 65.90828704833984, + "learning_rate": 1.1213036099410799e-06, + "log_odds_chosen": 0.3805133104324341, + "log_odds_ratio": -0.5482473373413086, + "logits/chosen": 334.39703369140625, + "logits/rejected": 280.8984680175781, + "logps/chosen": -1.1065565347671509, + "logps/rejected": -1.3260886669158936, + "loss": 1.5504, + "nll_loss": 1.5361745357513428, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.055327825248241425, + "rewards/margins": 0.010976609773933887, + "rewards/rejected": -0.06630443036556244, + "step": 895 + }, + { + "epoch": 0.7148530579825259, + "grad_norm": 44.0485725402832, + "learning_rate": 1.0931666195957053e-06, + "log_odds_chosen": 0.4383140504360199, + "log_odds_ratio": -0.5253010392189026, + "logits/chosen": 337.86090087890625, + "logits/rejected": 394.5022888183594, + "logps/chosen": -0.7901648283004761, + "logps/rejected": -1.034003496170044, + "loss": 1.4815, + "nll_loss": 1.1732326745986938, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03950824216008186, + "rewards/margins": 0.012191934511065483, + "rewards/rejected": -0.0517001748085022, + "step": 900 + }, + { + "epoch": 0.7188244638602065, + "grad_norm": 40.06319046020508, + "learning_rate": 1.0652880390020398e-06, + "log_odds_chosen": 2.0690839290618896, + "log_odds_ratio": -0.2347683608531952, + "logits/chosen": 423.53961181640625, + "logits/rejected": 276.1210021972656, + "logps/chosen": -0.6150510311126709, + "logps/rejected": -1.5576661825180054, + "loss": 1.4353, + "nll_loss": 1.4714704751968384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030752548947930336, + "rewards/margins": 0.0471307598054409, + "rewards/rejected": -0.07788331806659698, + "step": 905 + }, + { + "epoch": 0.7227958697378872, + "grad_norm": 29.090539932250977, + "learning_rate": 1.0376729889492178e-06, + "log_odds_chosen": -0.0568159744143486, + "log_odds_ratio": -0.7396942377090454, + "logits/chosen": 287.9375915527344, + "logits/rejected": 300.72698974609375, + "logps/chosen": -1.0032362937927246, + "logps/rejected": -0.9725478887557983, + "loss": 1.5766, + "nll_loss": 1.5186008214950562, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05016181990504265, + "rewards/margins": -0.0015344202984124422, + "rewards/rejected": -0.04862739518284798, + "step": 910 + }, + { + "epoch": 0.7267672756155679, + "grad_norm": 43.51217269897461, + "learning_rate": 1.0103265418205984e-06, + "log_odds_chosen": 0.6717264652252197, + "log_odds_ratio": -0.5441080927848816, + "logits/chosen": 299.4734802246094, + "logits/rejected": 346.14617919921875, + "logps/chosen": -0.7290914058685303, + "logps/rejected": -1.1217130422592163, + "loss": 1.5629, + "nll_loss": 1.4440972805023193, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03645457327365875, + "rewards/margins": 0.01963108405470848, + "rewards/rejected": -0.05608565732836723, + "step": 915 + }, + { + "epoch": 0.7307386814932486, + "grad_norm": 43.0380859375, + "learning_rate": 9.832537206620594e-07, + "log_odds_chosen": 0.04898405075073242, + "log_odds_ratio": -0.8131389617919922, + "logits/chosen": 331.40570068359375, + "logits/rejected": 315.16229248046875, + "logps/chosen": -0.9992687106132507, + "logps/rejected": -1.0054936408996582, + "loss": 1.5654, + "nll_loss": 1.4301942586898804, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.049963437020778656, + "rewards/margins": 0.00031124576344154775, + "rewards/rejected": -0.05027468129992485, + "step": 920 + }, + { + "epoch": 0.7347100873709294, + "grad_norm": 37.88023376464844, + "learning_rate": 9.564594982593559e-07, + "log_odds_chosen": 0.1898471564054489, + "log_odds_ratio": -0.6842805743217468, + "logits/chosen": 346.6091613769531, + "logits/rejected": 315.18408203125, + "logps/chosen": -0.9871004819869995, + "logps/rejected": -1.0915769338607788, + "loss": 1.439, + "nll_loss": 1.487571358680725, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.049355026334524155, + "rewards/margins": 0.005223819054663181, + "rewards/rejected": -0.05457884818315506, + "step": 925 + }, + { + "epoch": 0.73868149324861, + "grad_norm": 54.20380783081055, + "learning_rate": 9.299487962247089e-07, + "log_odds_chosen": 0.022501707077026367, + "log_odds_ratio": -0.8337327837944031, + "logits/chosen": 302.9130554199219, + "logits/rejected": 279.71685791015625, + "logps/chosen": -1.2005774974822998, + "logps/rejected": -1.1395219564437866, + "loss": 1.6788, + "nll_loss": 1.584804654121399, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06002888083457947, + "rewards/margins": -0.0030527785420417786, + "rewards/rejected": -0.05697610229253769, + "step": 930 + }, + { + "epoch": 0.7426528991262907, + "grad_norm": 47.39323425292969, + "learning_rate": 9.037264840927945e-07, + "log_odds_chosen": -0.10585136711597443, + "log_odds_ratio": -0.8327314257621765, + "logits/chosen": 284.0058898925781, + "logits/rejected": 345.33587646484375, + "logps/chosen": -1.1005109548568726, + "logps/rejected": -1.0587420463562012, + "loss": 1.3425, + "nll_loss": 1.6860910654067993, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05502554774284363, + "rewards/margins": -0.002088439418002963, + "rewards/rejected": -0.052937109023332596, + "step": 935 + }, + { + "epoch": 0.7466243050039714, + "grad_norm": 38.21221923828125, + "learning_rate": 8.777973784263016e-07, + "log_odds_chosen": -0.43362635374069214, + "log_odds_ratio": -0.9653336405754089, + "logits/chosen": 285.54193115234375, + "logits/rejected": 329.3501892089844, + "logps/chosen": -1.3442434072494507, + "logps/rejected": -1.126430869102478, + "loss": 1.4812, + "nll_loss": 1.4815846681594849, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06721217930316925, + "rewards/margins": -0.01089063473045826, + "rewards/rejected": -0.05632154271006584, + "step": 940 + }, + { + "epoch": 0.7505957108816521, + "grad_norm": 33.12229919433594, + "learning_rate": 8.521662419312124e-07, + "log_odds_chosen": 0.15825173258781433, + "log_odds_ratio": -0.7438204884529114, + "logits/chosen": 337.7537841796875, + "logits/rejected": 271.5417175292969, + "logps/chosen": -1.0434350967407227, + "logps/rejected": -1.243912696838379, + "loss": 1.5446, + "nll_loss": 1.3849962949752808, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05217175558209419, + "rewards/margins": 0.010023881681263447, + "rewards/rejected": -0.062195636332035065, + "step": 945 + }, + { + "epoch": 0.7545671167593329, + "grad_norm": 48.55085372924805, + "learning_rate": 8.268377825819821e-07, + "log_odds_chosen": -0.014403104782104492, + "log_odds_ratio": -0.7055137157440186, + "logits/chosen": 371.1424865722656, + "logits/rejected": 286.8011779785156, + "logps/chosen": -1.4028490781784058, + "logps/rejected": -1.39651620388031, + "loss": 1.5356, + "nll_loss": 1.620958924293518, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07014245539903641, + "rewards/margins": -0.00031664298148825765, + "rewards/rejected": -0.06982581317424774, + "step": 950 + }, + { + "epoch": 0.7585385226370135, + "grad_norm": 36.20191192626953, + "learning_rate": 8.018166527567672e-07, + "log_odds_chosen": 1.0636457204818726, + "log_odds_ratio": -0.43774813413619995, + "logits/chosen": 259.2046813964844, + "logits/rejected": 375.35198974609375, + "logps/chosen": -0.868695080280304, + "logps/rejected": -1.445894479751587, + "loss": 1.4218, + "nll_loss": 1.3990987539291382, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0434347540140152, + "rewards/margins": 0.02885996736586094, + "rewards/rejected": -0.07229472696781158, + "step": 955 + }, + { + "epoch": 0.7625099285146942, + "grad_norm": 29.8554744720459, + "learning_rate": 7.771074483828747e-07, + "log_odds_chosen": -0.15444841980934143, + "log_odds_ratio": -0.8071505427360535, + "logits/chosen": 290.59674072265625, + "logits/rejected": 347.6033630371094, + "logps/chosen": -1.40049147605896, + "logps/rejected": -1.3093515634536743, + "loss": 1.3432, + "nll_loss": 1.450141191482544, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.07002457231283188, + "rewards/margins": -0.0045569948852062225, + "rewards/rejected": -0.06546757370233536, + "step": 960 + }, + { + "epoch": 0.7664813343923749, + "grad_norm": 39.03826904296875, + "learning_rate": 7.52714708092565e-07, + "log_odds_chosen": 0.04803264141082764, + "log_odds_ratio": -0.7985066175460815, + "logits/chosen": 321.512939453125, + "logits/rejected": 325.80853271484375, + "logps/chosen": -1.1335227489471436, + "logps/rejected": -1.0179483890533447, + "loss": 1.5082, + "nll_loss": 1.2471481561660767, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05667613074183464, + "rewards/margins": -0.005778718274086714, + "rewards/rejected": -0.050897419452667236, + "step": 965 + }, + { + "epoch": 0.7704527402700556, + "grad_norm": 43.7759895324707, + "learning_rate": 7.286429123893931e-07, + "log_odds_chosen": 0.35435453057289124, + "log_odds_ratio": -0.6048498153686523, + "logits/chosen": 308.8065490722656, + "logits/rejected": 375.87982177734375, + "logps/chosen": -1.6246349811553955, + "logps/rejected": -1.9498169422149658, + "loss": 1.6459, + "nll_loss": 1.7503254413604736, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0812317505478859, + "rewards/margins": 0.016259105876088142, + "rewards/rejected": -0.09749085456132889, + "step": 970 + }, + { + "epoch": 0.7744241461477362, + "grad_norm": 54.47321319580078, + "learning_rate": 7.048964828252188e-07, + "log_odds_chosen": 0.9730485081672668, + "log_odds_ratio": -0.47064799070358276, + "logits/chosen": 364.9896545410156, + "logits/rejected": 361.5113830566406, + "logps/chosen": -0.9137029647827148, + "logps/rejected": -1.395819067955017, + "loss": 1.3761, + "nll_loss": 1.2809137105941772, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04568514600396156, + "rewards/margins": 0.02410580962896347, + "rewards/rejected": -0.06979095190763474, + "step": 975 + }, + { + "epoch": 0.778395552025417, + "grad_norm": 46.42416000366211, + "learning_rate": 6.814797811880525e-07, + "log_odds_chosen": 0.18214160203933716, + "log_odds_ratio": -0.714030921459198, + "logits/chosen": 386.58782958984375, + "logits/rejected": 275.394775390625, + "logps/chosen": -1.3727108240127563, + "logps/rejected": -1.474959373474121, + "loss": 1.4134, + "nll_loss": 1.827099084854126, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06863553822040558, + "rewards/margins": 0.005112423561513424, + "rewards/rejected": -0.07374797016382217, + "step": 980 + }, + { + "epoch": 0.7823669579030977, + "grad_norm": 53.367584228515625, + "learning_rate": 6.583971087008654e-07, + "log_odds_chosen": 0.1975199282169342, + "log_odds_ratio": -0.6967960596084595, + "logits/chosen": 292.4072265625, + "logits/rejected": 433.8272399902344, + "logps/chosen": -0.9060415029525757, + "logps/rejected": -0.9691941142082214, + "loss": 1.3796, + "nll_loss": 1.407875657081604, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04530208185315132, + "rewards/margins": 0.0031576238106936216, + "rewards/rejected": -0.04845970869064331, + "step": 985 + }, + { + "epoch": 0.7863383637807784, + "grad_norm": 50.85255813598633, + "learning_rate": 6.356527052315403e-07, + "log_odds_chosen": -0.2651984691619873, + "log_odds_ratio": -0.8623536825180054, + "logits/chosen": 372.86358642578125, + "logits/rejected": 285.08172607421875, + "logps/chosen": -1.2994807958602905, + "logps/rejected": -1.1402140855789185, + "loss": 1.5812, + "nll_loss": 1.4798548221588135, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06497403979301453, + "rewards/margins": -0.007963338866829872, + "rewards/rejected": -0.0570107102394104, + "step": 990 + }, + { + "epoch": 0.7903097696584591, + "grad_norm": 79.17274475097656, + "learning_rate": 6.132507485140843e-07, + "log_odds_chosen": 1.5265482664108276, + "log_odds_ratio": -0.2821381688117981, + "logits/chosen": 388.9686279296875, + "logits/rejected": 307.1571350097656, + "logps/chosen": -0.7675724625587463, + "logps/rejected": -1.6737315654754639, + "loss": 1.4555, + "nll_loss": 1.2786924839019775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03837861865758896, + "rewards/margins": 0.045307956635951996, + "rewards/rejected": -0.08368657529354095, + "step": 995 + }, + { + "epoch": 0.7942811755361397, + "grad_norm": 41.524253845214844, + "learning_rate": 5.911953533812506e-07, + "log_odds_chosen": -0.3479143977165222, + "log_odds_ratio": -0.9029040336608887, + "logits/chosen": 338.65325927734375, + "logits/rejected": 374.2637939453125, + "logps/chosen": -0.9410643577575684, + "logps/rejected": -0.7491464614868164, + "loss": 1.655, + "nll_loss": 1.3791284561157227, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.047053221613168716, + "rewards/margins": -0.009595893323421478, + "rewards/rejected": -0.03745732456445694, + "step": 1000 + }, + { + "epoch": 0.7982525814138205, + "grad_norm": 47.42415237426758, + "learning_rate": 5.694905710087217e-07, + "log_odds_chosen": 0.20326891541481018, + "log_odds_ratio": -0.6220360398292542, + "logits/chosen": 397.6610107421875, + "logits/rejected": 313.22088623046875, + "logps/chosen": -0.8882730603218079, + "logps/rejected": -1.0516657829284668, + "loss": 1.6773, + "nll_loss": 1.243209719657898, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04441365599632263, + "rewards/margins": 0.008169631473720074, + "rewards/rejected": -0.05258328840136528, + "step": 1005 + }, + { + "epoch": 0.8022239872915012, + "grad_norm": 45.351253509521484, + "learning_rate": 5.481403881709815e-07, + "log_odds_chosen": 0.5789100527763367, + "log_odds_ratio": -0.886620044708252, + "logits/chosen": 345.7013854980469, + "logits/rejected": 279.64801025390625, + "logps/chosen": -0.9864595532417297, + "logps/rejected": -1.3232471942901611, + "loss": 1.6636, + "nll_loss": 1.7329130172729492, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04932297766208649, + "rewards/margins": 0.01683938130736351, + "rewards/rejected": -0.0661623626947403, + "step": 1010 + }, + { + "epoch": 0.8061953931691819, + "grad_norm": 45.76054382324219, + "learning_rate": 5.271487265090163e-07, + "log_odds_chosen": 0.1340581774711609, + "log_odds_ratio": -0.6442204117774963, + "logits/chosen": 253.32131958007812, + "logits/rejected": 293.978759765625, + "logps/chosen": -0.9866166114807129, + "logps/rejected": -1.0805059671401978, + "loss": 1.3506, + "nll_loss": 1.1058270931243896, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04933083802461624, + "rewards/margins": 0.004694463685154915, + "rewards/rejected": -0.05402529984712601, + "step": 1015 + }, + { + "epoch": 0.8101667990468626, + "grad_norm": 30.294597625732422, + "learning_rate": 5.06519441809982e-07, + "log_odds_chosen": 0.6967889666557312, + "log_odds_ratio": -0.5373490452766418, + "logits/chosen": 318.8917541503906, + "logits/rejected": 270.747802734375, + "logps/chosen": -1.074588418006897, + "logps/rejected": -1.4418222904205322, + "loss": 1.4542, + "nll_loss": 1.5221776962280273, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05372941493988037, + "rewards/margins": 0.01836169883608818, + "rewards/rejected": -0.07209111005067825, + "step": 1020 + }, + { + "epoch": 0.8141382049245433, + "grad_norm": 53.206661224365234, + "learning_rate": 4.862563232989643e-07, + "log_odds_chosen": 0.20271643996238708, + "log_odds_ratio": -0.6173663139343262, + "logits/chosen": 485.3902893066406, + "logits/rejected": 313.65185546875, + "logps/chosen": -1.0637105703353882, + "logps/rejected": -1.2300751209259033, + "loss": 1.4923, + "nll_loss": 1.1044560670852661, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05318553373217583, + "rewards/margins": 0.008318223990499973, + "rewards/rejected": -0.061503756791353226, + "step": 1025 + }, + { + "epoch": 0.818109610802224, + "grad_norm": 78.89067077636719, + "learning_rate": 4.663630929429674e-07, + "log_odds_chosen": 0.3912231922149658, + "log_odds_ratio": -0.5344873666763306, + "logits/chosen": 278.98651123046875, + "logits/rejected": 279.85418701171875, + "logps/chosen": -0.7328051924705505, + "logps/rejected": -0.9687238931655884, + "loss": 1.497, + "nll_loss": 1.046706199645996, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.036640264093875885, + "rewards/margins": 0.011795936152338982, + "rewards/rejected": -0.048436202108860016, + "step": 1030 + }, + { + "epoch": 0.8220810166799047, + "grad_norm": 51.44734573364258, + "learning_rate": 4.46843404767259e-07, + "log_odds_chosen": 0.3833610415458679, + "log_odds_ratio": -0.8149446249008179, + "logits/chosen": 314.2817077636719, + "logits/rejected": 307.8243103027344, + "logps/chosen": -1.357617735862732, + "logps/rejected": -1.7892076969146729, + "loss": 1.459, + "nll_loss": 1.4600521326065063, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06788089126348495, + "rewards/margins": 0.021579492837190628, + "rewards/rejected": -0.08946038782596588, + "step": 1035 + }, + { + "epoch": 0.8260524225575854, + "grad_norm": 39.02664566040039, + "learning_rate": 4.2770084418418736e-07, + "log_odds_chosen": 0.27395057678222656, + "log_odds_ratio": -0.7102149128913879, + "logits/chosen": 414.571533203125, + "logits/rejected": 287.80877685546875, + "logps/chosen": -0.8411673307418823, + "logps/rejected": -0.9671209454536438, + "loss": 1.5205, + "nll_loss": 1.5072977542877197, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.042058371007442474, + "rewards/margins": 0.006297673098742962, + "rewards/rejected": -0.04835604503750801, + "step": 1040 + }, + { + "epoch": 0.8300238284352661, + "grad_norm": 44.10149383544922, + "learning_rate": 4.089389273346084e-07, + "log_odds_chosen": 1.4467592239379883, + "log_odds_ratio": -0.36090224981307983, + "logits/chosen": 336.5260009765625, + "logits/rejected": 268.7255859375, + "logps/chosen": -0.579176664352417, + "logps/rejected": -1.433186650276184, + "loss": 1.4451, + "nll_loss": 2.0459094047546387, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.028958836570382118, + "rewards/margins": 0.042700495570898056, + "rewards/rejected": -0.07165933400392532, + "step": 1045 + }, + { + "epoch": 0.8339952343129468, + "grad_norm": 29.071208953857422, + "learning_rate": 3.9056110044203594e-07, + "log_odds_chosen": 0.3594241142272949, + "log_odds_ratio": -0.6073828935623169, + "logits/chosen": 340.73431396484375, + "logits/rejected": 293.41363525390625, + "logps/chosen": -1.039194107055664, + "logps/rejected": -1.395516037940979, + "loss": 1.3767, + "nll_loss": 1.1890077590942383, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05195971205830574, + "rewards/margins": 0.017816094681620598, + "rewards/rejected": -0.06977580487728119, + "step": 1050 + }, + { + "epoch": 0.8379666401906275, + "grad_norm": 50.16731643676758, + "learning_rate": 3.72570739179631e-07, + "log_odds_chosen": 0.49487370252609253, + "log_odds_ratio": -0.49297910928726196, + "logits/chosen": 280.0902404785156, + "logits/rejected": 468.6788024902344, + "logps/chosen": -1.0811553001403809, + "logps/rejected": -1.4258912801742554, + "loss": 1.453, + "nll_loss": 1.3579655885696411, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0540577657520771, + "rewards/margins": 0.017236804589629173, + "rewards/rejected": -0.07129456847906113, + "step": 1055 + }, + { + "epoch": 0.8419380460683081, + "grad_norm": 33.406272888183594, + "learning_rate": 3.5497114805015223e-07, + "log_odds_chosen": 0.14397627115249634, + "log_odds_ratio": -0.6715080142021179, + "logits/chosen": 373.4464416503906, + "logits/rejected": 325.99163818359375, + "logps/chosen": -0.9457298517227173, + "logps/rejected": -1.0310219526290894, + "loss": 1.4311, + "nll_loss": 1.5625425577163696, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.047286491841077805, + "rewards/margins": 0.004264607094228268, + "rewards/rejected": -0.051551103591918945, + "step": 1060 + }, + { + "epoch": 0.8459094519459889, + "grad_norm": 50.40607833862305, + "learning_rate": 3.377655597789789e-07, + "log_odds_chosen": -0.3970710337162018, + "log_odds_ratio": -1.0087158679962158, + "logits/chosen": 305.99798583984375, + "logits/rejected": 319.93890380859375, + "logps/chosen": -1.3028199672698975, + "logps/rejected": -1.107723593711853, + "loss": 1.5254, + "nll_loss": 1.708296537399292, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.06514099985361099, + "rewards/margins": -0.009754816070199013, + "rewards/rejected": -0.05538617819547653, + "step": 1065 + }, + { + "epoch": 0.8498808578236696, + "grad_norm": 32.866085052490234, + "learning_rate": 3.209571347203197e-07, + "log_odds_chosen": -0.3324670195579529, + "log_odds_ratio": -0.9437648057937622, + "logits/chosen": 288.600830078125, + "logits/rejected": 395.1521911621094, + "logps/chosen": -0.9841065406799316, + "logps/rejected": -0.9132230877876282, + "loss": 1.2591, + "nll_loss": 1.0712697505950928, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04920532926917076, + "rewards/margins": -0.0035441755317151546, + "rewards/rejected": -0.04566115140914917, + "step": 1070 + }, + { + "epoch": 0.8538522637013503, + "grad_norm": 63.01152038574219, + "learning_rate": 3.0454896027671073e-07, + "log_odds_chosen": -0.15130704641342163, + "log_odds_ratio": -0.8452315330505371, + "logits/chosen": 408.84552001953125, + "logits/rejected": 319.49493408203125, + "logps/chosen": -1.28704035282135, + "logps/rejected": -1.2764626741409302, + "loss": 1.6339, + "nll_loss": 1.682464361190796, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06435202062129974, + "rewards/margins": -0.000528886157553643, + "rewards/rejected": -0.06382313370704651, + "step": 1075 + }, + { + "epoch": 0.857823669579031, + "grad_norm": 32.351932525634766, + "learning_rate": 2.885440503319145e-07, + "log_odds_chosen": -0.4667375087738037, + "log_odds_ratio": -1.0818705558776855, + "logits/chosen": 459.02410888671875, + "logits/rejected": 257.15716552734375, + "logps/chosen": -1.1935930252075195, + "logps/rejected": -0.7796539068222046, + "loss": 1.548, + "nll_loss": 1.7794479131698608, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05967964977025986, + "rewards/margins": -0.020696954801678658, + "rewards/rejected": -0.03898269310593605, + "step": 1080 + }, + { + "epoch": 0.8617950754567116, + "grad_norm": 36.15253829956055, + "learning_rate": 2.7294534469732794e-07, + "log_odds_chosen": 1.6522690057754517, + "log_odds_ratio": -0.4097142219543457, + "logits/chosen": 344.11962890625, + "logits/rejected": 319.623046875, + "logps/chosen": -0.894513726234436, + "logps/rejected": -2.088923692703247, + "loss": 1.4534, + "nll_loss": 1.7392299175262451, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0447256863117218, + "rewards/margins": 0.05972049757838249, + "rewards/rejected": -0.10444619506597519, + "step": 1085 + }, + { + "epoch": 0.8657664813343924, + "grad_norm": 47.798858642578125, + "learning_rate": 2.5775570857199144e-07, + "log_odds_chosen": -0.20097847282886505, + "log_odds_ratio": -0.8294545412063599, + "logits/chosen": 281.59942626953125, + "logits/rejected": 324.57403564453125, + "logps/chosen": -1.1493308544158936, + "logps/rejected": -0.9629890322685242, + "loss": 1.3747, + "nll_loss": 1.3210439682006836, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0574665442109108, + "rewards/margins": -0.009317094460129738, + "rewards/rejected": -0.04814944788813591, + "step": 1090 + }, + { + "epoch": 0.8697378872120731, + "grad_norm": 29.671043395996094, + "learning_rate": 2.4297793201630113e-07, + "log_odds_chosen": 0.020139653235673904, + "log_odds_ratio": -0.7046443819999695, + "logits/chosen": 300.97998046875, + "logits/rejected": 333.9520568847656, + "logps/chosen": -0.972399115562439, + "logps/rejected": -0.9247520565986633, + "loss": 1.4489, + "nll_loss": 1.3582823276519775, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04861995577812195, + "rewards/margins": -0.0023823559749871492, + "rewards/rejected": -0.04623759910464287, + "step": 1095 + }, + { + "epoch": 0.8737092930897538, + "grad_norm": 30.31666374206543, + "learning_rate": 2.286147294395283e-07, + "log_odds_chosen": 0.29012542963027954, + "log_odds_ratio": -0.7615915536880493, + "logits/chosen": 315.9916687011719, + "logits/rejected": 422.93017578125, + "logps/chosen": -1.0184037685394287, + "logps/rejected": -1.0094739198684692, + "loss": 1.5729, + "nll_loss": 1.4798800945281982, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05092019587755203, + "rewards/margins": -0.00044649915071204305, + "rewards/rejected": -0.05047369748353958, + "step": 1100 + }, + { + "epoch": 0.8776806989674345, + "grad_norm": 32.19295120239258, + "learning_rate": 2.1466873910123058e-07, + "log_odds_chosen": -0.07446761429309845, + "log_odds_ratio": -0.8099315762519836, + "logits/chosen": 334.15740966796875, + "logits/rejected": 331.76031494140625, + "logps/chosen": -1.026379942893982, + "logps/rejected": -0.9773386120796204, + "loss": 1.4675, + "nll_loss": 1.4952067136764526, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05131899565458298, + "rewards/margins": -0.002452067332342267, + "rewards/rejected": -0.048866935074329376, + "step": 1105 + }, + { + "epoch": 0.8816521048451151, + "grad_norm": 34.077449798583984, + "learning_rate": 2.0114252262665086e-07, + "log_odds_chosen": 0.30518868565559387, + "log_odds_ratio": -0.6247283220291138, + "logits/chosen": 313.799560546875, + "logits/rejected": 278.57293701171875, + "logps/chosen": -1.2211542129516602, + "logps/rejected": -1.4464585781097412, + "loss": 1.4043, + "nll_loss": 1.2219561338424683, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.06105770915746689, + "rewards/margins": 0.011265222914516926, + "rewards/rejected": -0.07232292741537094, + "step": 1110 + }, + { + "epoch": 0.8856235107227959, + "grad_norm": 33.00147247314453, + "learning_rate": 1.880385645361951e-07, + "log_odds_chosen": 2.0590970516204834, + "log_odds_ratio": -0.4269631803035736, + "logits/chosen": 304.67572021484375, + "logits/rejected": 437.95751953125, + "logps/chosen": -0.8812467455863953, + "logps/rejected": -2.7326838970184326, + "loss": 1.5588, + "nll_loss": 1.2233951091766357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04406233876943588, + "rewards/margins": 0.09257186949253082, + "rewards/rejected": -0.1366342008113861, + "step": 1115 + }, + { + "epoch": 0.8895949166004765, + "grad_norm": 29.0517578125, + "learning_rate": 1.7535927178906898e-07, + "log_odds_chosen": 0.6221317648887634, + "log_odds_ratio": -0.5823326110839844, + "logits/chosen": 288.1285705566406, + "logits/rejected": 357.75701904296875, + "logps/chosen": -1.0755956172943115, + "logps/rejected": -1.5605504512786865, + "loss": 1.4892, + "nll_loss": 1.3548178672790527, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.053779780864715576, + "rewards/margins": 0.024247746914625168, + "rewards/rejected": -0.07802753150463104, + "step": 1120 + }, + { + "epoch": 0.8935663224781573, + "grad_norm": 37.735111236572266, + "learning_rate": 1.6310697334116583e-07, + "log_odds_chosen": -0.2765834629535675, + "log_odds_ratio": -0.8877069354057312, + "logits/chosen": 357.4658203125, + "logits/rejected": 302.4728088378906, + "logps/chosen": -1.121751070022583, + "logps/rejected": -0.9259787797927856, + "loss": 1.4662, + "nll_loss": 1.56728196144104, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05608755350112915, + "rewards/margins": -0.009788615629076958, + "rewards/rejected": -0.04629894345998764, + "step": 1125 + }, + { + "epoch": 0.897537728355838, + "grad_norm": 39.21086120605469, + "learning_rate": 1.512839197172758e-07, + "log_odds_chosen": 0.1607791781425476, + "log_odds_ratio": -0.7086135745048523, + "logits/chosen": 431.327880859375, + "logits/rejected": 309.34210205078125, + "logps/chosen": -1.1171777248382568, + "logps/rejected": -1.3307039737701416, + "loss": 1.3869, + "nll_loss": 1.7646287679672241, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05585888773202896, + "rewards/margins": 0.010676311329007149, + "rewards/rejected": -0.06653519719839096, + "step": 1130 + }, + { + "epoch": 0.9015091342335186, + "grad_norm": 49.552589416503906, + "learning_rate": 1.398922825977092e-07, + "log_odds_chosen": 1.1862103939056396, + "log_odds_ratio": -0.36970359086990356, + "logits/chosen": 287.95318603515625, + "logits/rejected": 344.77764892578125, + "logps/chosen": -1.1711792945861816, + "logps/rejected": -2.0627903938293457, + "loss": 1.4555, + "nll_loss": 1.4977771043777466, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05855896323919296, + "rewards/margins": 0.044580571353435516, + "rewards/rejected": -0.10313953459262848, + "step": 1135 + }, + { + "epoch": 0.9054805401111994, + "grad_norm": 34.04672622680664, + "learning_rate": 1.2893415441939588e-07, + "log_odds_chosen": 0.042095281183719635, + "log_odds_ratio": -0.7100853323936462, + "logits/chosen": 319.3915710449219, + "logits/rejected": 372.4937438964844, + "logps/chosen": -0.8466545343399048, + "logps/rejected": -0.8484467267990112, + "loss": 1.3861, + "nll_loss": 1.0329768657684326, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0423327274620533, + "rewards/margins": 8.96111159818247e-05, + "rewards/rejected": -0.0424223393201828, + "step": 1140 + }, + { + "epoch": 0.90945194598888, + "grad_norm": 43.70923614501953, + "learning_rate": 1.1841154799154376e-07, + "log_odds_chosen": 0.09307994693517685, + "log_odds_ratio": -0.8356366157531738, + "logits/chosen": 383.5526428222656, + "logits/rejected": 280.0104064941406, + "logps/chosen": -1.0310574769973755, + "logps/rejected": -1.0444084405899048, + "loss": 1.4938, + "nll_loss": 1.3081772327423096, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.051552869379520416, + "rewards/margins": 0.0006675481563434005, + "rewards/rejected": -0.0522204227745533, + "step": 1145 + }, + { + "epoch": 0.9134233518665608, + "grad_norm": 42.19453048706055, + "learning_rate": 1.083263961259215e-07, + "log_odds_chosen": 0.5610286593437195, + "log_odds_ratio": -0.5318921804428101, + "logits/chosen": 313.7818298339844, + "logits/rejected": 281.55303955078125, + "logps/chosen": -0.8616586923599243, + "logps/rejected": -1.13971745967865, + "loss": 1.3678, + "nll_loss": 1.5508089065551758, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.043082933872938156, + "rewards/margins": 0.01390293799340725, + "rewards/rejected": -0.056985873728990555, + "step": 1150 + }, + { + "epoch": 0.9173947577442415, + "grad_norm": 62.5867805480957, + "learning_rate": 9.86805512818359e-08, + "log_odds_chosen": 0.5819055438041687, + "log_odds_ratio": -0.5563138723373413, + "logits/chosen": 308.6764221191406, + "logits/rejected": 280.77093505859375, + "logps/chosen": -0.7943710684776306, + "logps/rejected": -1.1860836744308472, + "loss": 1.5596, + "nll_loss": 1.2872906923294067, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.03971855714917183, + "rewards/margins": 0.019585633650422096, + "rewards/rejected": -0.059304188936948776, + "step": 1155 + }, + { + "epoch": 0.9213661636219221, + "grad_norm": 39.823516845703125, + "learning_rate": 8.947578522587097e-08, + "log_odds_chosen": 0.5100874304771423, + "log_odds_ratio": -0.7220025062561035, + "logits/chosen": 421.84722900390625, + "logits/rejected": 306.4421081542969, + "logps/chosen": -0.7023354768753052, + "logps/rejected": -0.9484280347824097, + "loss": 1.2855, + "nll_loss": 0.8443538546562195, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.0351167730987072, + "rewards/margins": 0.012304631993174553, + "rewards/rejected": -0.0474214032292366, + "step": 1160 + }, + { + "epoch": 0.9253375694996029, + "grad_norm": 34.763492584228516, + "learning_rate": 8.071378870644381e-08, + "log_odds_chosen": 0.068526491522789, + "log_odds_ratio": -0.7172547578811646, + "logits/chosen": 306.42303466796875, + "logits/rejected": 306.2086486816406, + "logps/chosen": -1.1717783212661743, + "logps/rejected": -1.2519972324371338, + "loss": 1.4611, + "nll_loss": 1.6088817119598389, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05858892202377319, + "rewards/margins": 0.004010946489870548, + "rewards/rejected": -0.06259986758232117, + "step": 1165 + }, + { + "epoch": 0.9293089753772835, + "grad_norm": 57.37542724609375, + "learning_rate": 7.239617114324499e-08, + "log_odds_chosen": -0.01497584581375122, + "log_odds_ratio": -0.7752578854560852, + "logits/chosen": 278.559326171875, + "logits/rejected": 374.15869140625, + "logps/chosen": -1.2931535243988037, + "logps/rejected": -1.2771342992782593, + "loss": 1.5557, + "nll_loss": 1.4230643510818481, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06465767323970795, + "rewards/margins": -0.0008009634912014008, + "rewards/rejected": -0.06385671347379684, + "step": 1170 + }, + { + "epoch": 0.9332803812549643, + "grad_norm": 54.0869026184082, + "learning_rate": 6.452446033161946e-08, + "log_odds_chosen": -0.19031484425067902, + "log_odds_ratio": -0.8042638897895813, + "logits/chosen": 299.294921875, + "logits/rejected": 398.8330078125, + "logps/chosen": -1.0967943668365479, + "logps/rejected": -1.014966607093811, + "loss": 1.5468, + "nll_loss": 1.4041001796722412, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.05483972281217575, + "rewards/margins": -0.0040913899429142475, + "rewards/rejected": -0.05074832960963249, + "step": 1175 + }, + { + "epoch": 0.937251787132645, + "grad_norm": 30.728069305419922, + "learning_rate": 5.7100102161937176e-08, + "log_odds_chosen": 0.08359535038471222, + "log_odds_ratio": -0.6820527911186218, + "logits/chosen": 354.07806396484375, + "logits/rejected": 264.95947265625, + "logps/chosen": -0.9352580904960632, + "logps/rejected": -0.9271550178527832, + "loss": 1.3888, + "nll_loss": 1.2889841794967651, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04676290601491928, + "rewards/margins": -0.0004051584692206234, + "rewards/rejected": -0.04635775089263916, + "step": 1180 + }, + { + "epoch": 0.9412231930103256, + "grad_norm": 30.694597244262695, + "learning_rate": 5.012446035400881e-08, + "log_odds_chosen": -0.18491533398628235, + "log_odds_ratio": -0.8581596612930298, + "logits/chosen": 431.51483154296875, + "logits/rejected": 256.1930236816406, + "logps/chosen": -1.3761582374572754, + "logps/rejected": -1.2526451349258423, + "loss": 1.5465, + "nll_loss": 1.7214624881744385, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06880791485309601, + "rewards/margins": -0.006175657268613577, + "rewards/rejected": -0.062632255256176, + "step": 1185 + }, + { + "epoch": 0.9451945988880064, + "grad_norm": 52.927608489990234, + "learning_rate": 4.359881620659534e-08, + "log_odds_chosen": 0.6782919764518738, + "log_odds_ratio": -0.6247768998146057, + "logits/chosen": 436.1754455566406, + "logits/rejected": 285.18365478515625, + "logps/chosen": -0.985084056854248, + "logps/rejected": -1.5693267583847046, + "loss": 1.4816, + "nll_loss": 1.398045301437378, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04925420135259628, + "rewards/margins": 0.029212135821580887, + "rewards/rejected": -0.07846634089946747, + "step": 1190 + }, + { + "epoch": 0.949166004765687, + "grad_norm": 48.66614532470703, + "learning_rate": 3.7524368362057415e-08, + "log_odds_chosen": -0.1631685197353363, + "log_odds_ratio": -0.8233796954154968, + "logits/chosen": 418.45928955078125, + "logits/rejected": 318.4512939453125, + "logps/chosen": -0.8457492589950562, + "logps/rejected": -0.7526192665100098, + "loss": 1.4522, + "nll_loss": 1.3443362712860107, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04228746145963669, + "rewards/margins": -0.004656502045691013, + "rewards/rejected": -0.03763096407055855, + "step": 1195 + }, + { + "epoch": 0.9531374106433678, + "grad_norm": 29.535236358642578, + "learning_rate": 3.1902232586185635e-08, + "log_odds_chosen": 0.14746162295341492, + "log_odds_ratio": -0.7185263633728027, + "logits/chosen": 346.12823486328125, + "logits/rejected": 365.977783203125, + "logps/chosen": -0.9943090677261353, + "logps/rejected": -0.9647199511528015, + "loss": 1.5476, + "nll_loss": 1.3550399541854858, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.04971545934677124, + "rewards/margins": -0.001479454687796533, + "rewards/rejected": -0.048236001282930374, + "step": 1200 + }, + { + "epoch": 0.9571088165210484, + "grad_norm": 46.18953323364258, + "learning_rate": 2.673344156325558e-08, + "log_odds_chosen": 0.25677961111068726, + "log_odds_ratio": -0.6748029589653015, + "logits/chosen": 321.4141845703125, + "logits/rejected": 390.1539611816406, + "logps/chosen": -1.306235432624817, + "logps/rejected": -1.5235410928726196, + "loss": 1.4511, + "nll_loss": 1.348838448524475, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06531177461147308, + "rewards/margins": 0.010865284129977226, + "rewards/rejected": -0.07617706060409546, + "step": 1205 + }, + { + "epoch": 0.9610802223987291, + "grad_norm": 48.14284133911133, + "learning_rate": 2.2018944706341528e-08, + "log_odds_chosen": 0.25043779611587524, + "log_odds_ratio": -0.6027860045433044, + "logits/chosen": 383.79913330078125, + "logits/rejected": 336.7133483886719, + "logps/chosen": -1.0423628091812134, + "logps/rejected": -1.1916873455047607, + "loss": 1.4619, + "nll_loss": 1.6256072521209717, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.05211814120411873, + "rewards/margins": 0.007466231472790241, + "rewards/rejected": -0.059584371745586395, + "step": 1210 + }, + { + "epoch": 0.9650516282764099, + "grad_norm": 32.60062026977539, + "learning_rate": 1.775960798292731e-08, + "log_odds_chosen": -0.04052457585930824, + "log_odds_ratio": -0.8082249760627747, + "logits/chosen": 331.26397705078125, + "logits/rejected": 329.55096435546875, + "logps/chosen": -1.347312092781067, + "logps/rejected": -1.3537461757659912, + "loss": 1.5079, + "nll_loss": 1.5286957025527954, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.0673656016588211, + "rewards/margins": 0.00032170413760468364, + "rewards/rejected": -0.06768731772899628, + "step": 1215 + }, + { + "epoch": 0.9690230341540905, + "grad_norm": 43.27484893798828, + "learning_rate": 1.3956213755842718e-08, + "log_odds_chosen": -0.26221469044685364, + "log_odds_ratio": -0.9925212860107422, + "logits/chosen": 304.05120849609375, + "logits/rejected": 393.05517578125, + "logps/chosen": -1.015187382698059, + "logps/rejected": -0.9859923124313354, + "loss": 1.3977, + "nll_loss": 1.1624058485031128, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.050759367644786835, + "rewards/margins": -0.0014597497647628188, + "rewards/rejected": -0.04929962009191513, + "step": 1220 + }, + { + "epoch": 0.9729944400317713, + "grad_norm": 43.98046875, + "learning_rate": 1.0609460639559033e-08, + "log_odds_chosen": 0.6014237999916077, + "log_odds_ratio": -0.5421421527862549, + "logits/chosen": 339.303955078125, + "logits/rejected": 391.6441345214844, + "logps/chosen": -1.0844001770019531, + "logps/rejected": -1.4398688077926636, + "loss": 1.3258, + "nll_loss": 1.4467300176620483, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.054220009595155716, + "rewards/margins": 0.017773432657122612, + "rewards/rejected": -0.07199344038963318, + "step": 1225 + }, + { + "epoch": 0.9769658459094519, + "grad_norm": 34.32419967651367, + "learning_rate": 7.719963371865259e-09, + "log_odds_chosen": 0.7910041809082031, + "log_odds_ratio": -0.4584100842475891, + "logits/chosen": 316.9669494628906, + "logits/rejected": 363.3733825683594, + "logps/chosen": -0.8123126029968262, + "logps/rejected": -1.2943851947784424, + "loss": 1.3404, + "nll_loss": 1.313084363937378, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.04061562940478325, + "rewards/margins": 0.02410362847149372, + "rewards/rejected": -0.06471925973892212, + "step": 1230 + }, + { + "epoch": 0.9809372517871326, + "grad_norm": 42.33149337768555, + "learning_rate": 5.288252700952068e-09, + "log_odds_chosen": 0.8038623929023743, + "log_odds_ratio": -0.43806830048561096, + "logits/chosen": 310.6473083496094, + "logits/rejected": 314.3547668457031, + "logps/chosen": -0.8453804850578308, + "logps/rejected": -1.4276442527770996, + "loss": 1.4819, + "nll_loss": 1.6422332525253296, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.0422690249979496, + "rewards/margins": 0.02911318838596344, + "rewards/rejected": -0.07138221710920334, + "step": 1235 + }, + { + "epoch": 0.9849086576648134, + "grad_norm": 45.44175720214844, + "learning_rate": 3.3147752879236773e-09, + "log_odds_chosen": 0.02175927720963955, + "log_odds_ratio": -0.8097355961799622, + "logits/chosen": 277.007080078125, + "logits/rejected": 415.473388671875, + "logps/chosen": -1.0552597045898438, + "logps/rejected": -1.2169201374053955, + "loss": 1.5311, + "nll_loss": 1.5463203191757202, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05276298522949219, + "rewards/margins": 0.008083020336925983, + "rewards/rejected": -0.060846008360385895, + "step": 1240 + }, + { + "epoch": 0.988880063542494, + "grad_norm": 62.612850189208984, + "learning_rate": 1.7998936247534681e-09, + "log_odds_chosen": 0.2898419499397278, + "log_odds_ratio": -0.5893855094909668, + "logits/chosen": 399.6653747558594, + "logits/rejected": 274.2057800292969, + "logps/chosen": -1.0792999267578125, + "logps/rejected": -1.2781049013137817, + "loss": 1.4564, + "nll_loss": 1.5806140899658203, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.053964994847774506, + "rewards/margins": 0.009940249845385551, + "rewards/rejected": -0.0639052465558052, + "step": 1245 + }, + { + "epoch": 0.9928514694201748, + "grad_norm": 51.08999252319336, + "learning_rate": 7.438859677008636e-10, + "log_odds_chosen": 0.09502691775560379, + "log_odds_ratio": -0.7220104336738586, + "logits/chosen": 325.39263916015625, + "logits/rejected": 361.7767639160156, + "logps/chosen": -1.1207284927368164, + "logps/rejected": -1.173607587814331, + "loss": 1.3621, + "nll_loss": 1.241156816482544, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.05603642389178276, + "rewards/margins": 0.0026439554058015347, + "rewards/rejected": -0.05868038535118103, + "step": 1250 + }, + { + "epoch": 0.9968228752978554, + "grad_norm": 44.36956787109375, + "learning_rate": 1.4694628620137708e-10, + "log_odds_chosen": -0.33857935667037964, + "log_odds_ratio": -0.9029590487480164, + "logits/chosen": 293.49420166015625, + "logits/rejected": 431.3218688964844, + "logps/chosen": -0.9575881958007812, + "logps/rejected": -0.7752519845962524, + "loss": 1.3423, + "nll_loss": 1.1781036853790283, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": -0.0478794127702713, + "rewards/margins": -0.009116815403103828, + "rewards/rejected": -0.038762595504522324, + "step": 1255 + }, + { + "epoch": 1.0, + "eval_log_odds_chosen": 0.23976314067840576, + "eval_log_odds_ratio": -0.6928443908691406, + "eval_logits/chosen": 340.5321350097656, + "eval_logits/rejected": 312.9670104980469, + "eval_logps/chosen": -1.0267834663391113, + "eval_logps/rejected": -1.1786600351333618, + "eval_loss": 1.455647587776184, + "eval_nll_loss": 1.4095592498779297, + "eval_rewards/accuracies": 0.5107913613319397, + "eval_rewards/chosen": -0.051339175552129745, + "eval_rewards/margins": 0.0075938161462545395, + "eval_rewards/rejected": -0.058932989835739136, + "eval_runtime": 91.5981, + "eval_samples_per_second": 6.037, + "eval_steps_per_second": 1.517, + "step": 1259 + }, + { + "epoch": 1.0, + "step": 1259, + "total_flos": 0.0, + "train_loss": 1.8019611810861456, + "train_runtime": 4470.8327, + "train_samples_per_second": 1.126, + "train_steps_per_second": 0.282 + } + ], + "logging_steps": 5, + "max_steps": 1259, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}