{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3777, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003971405877680699, "grad_norm": 1984.91259765625, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.822909951210022, "log_odds_ratio": -1.3946492671966553, "logits/chosen": 102.17945098876953, "logits/rejected": -12.376755714416504, "logps/chosen": -16.667949676513672, "logps/rejected": -15.845039367675781, "loss": 14.8233, "nll_loss": 15.784014701843262, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.8333975076675415, "rewards/margins": -0.041145503520965576, "rewards/rejected": -0.7922519445419312, "step": 5 }, { "epoch": 0.007942811755361398, "grad_norm": 1128.173583984375, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": -1.4871519804000854, "log_odds_ratio": -2.2126498222351074, "logits/chosen": 69.30543518066406, "logits/rejected": 133.51295471191406, "logps/chosen": -14.377180099487305, "logps/rejected": -12.890034675598145, "loss": 12.4575, "nll_loss": 12.073002815246582, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.7188590168952942, "rewards/margins": -0.0743572935461998, "rewards/rejected": -0.6445017457008362, "step": 10 }, { "epoch": 0.011914217633042097, "grad_norm": 578.59765625, "learning_rate": 7.5e-07, "log_odds_chosen": 0.08573625236749649, "log_odds_ratio": -0.9523025751113892, "logits/chosen": 194.7311248779297, "logits/rejected": 170.49374389648438, "logps/chosen": -8.521177291870117, "logps/rejected": -8.607057571411133, "loss": 8.662, "nll_loss": 8.76197624206543, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.4260588586330414, "rewards/margins": 0.004294055514037609, "rewards/rejected": -0.43035292625427246, "step": 15 }, { "epoch": 0.015885623510722795, "grad_norm": 292.1365051269531, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 1.116430640220642, "log_odds_ratio": -0.5740691423416138, "logits/chosen": 130.79287719726562, "logits/rejected": 219.90774536132812, "logps/chosen": -5.188860893249512, "logps/rejected": -6.301913261413574, "loss": 5.9474, "nll_loss": 5.384028434753418, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2594430446624756, "rewards/margins": 0.055652640759944916, "rewards/rejected": -0.3150956928730011, "step": 20 }, { "epoch": 0.019857029388403495, "grad_norm": 176.4138641357422, "learning_rate": 1.25e-06, "log_odds_chosen": -0.2425965815782547, "log_odds_ratio": -0.8498791456222534, "logits/chosen": 127.62091064453125, "logits/rejected": 241.3440399169922, "logps/chosen": -3.852045774459839, "logps/rejected": -3.6273605823516846, "loss": 4.3717, "nll_loss": 4.1882829666137695, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.19260229170322418, "rewards/margins": -0.01123427040874958, "rewards/rejected": -0.18136802315711975, "step": 25 }, { "epoch": 0.023828435266084195, "grad_norm": 122.2364501953125, "learning_rate": 1.5e-06, "log_odds_chosen": 0.13310351967811584, "log_odds_ratio": -1.3691762685775757, "logits/chosen": 266.5719299316406, "logits/rejected": 187.1367645263672, "logps/chosen": -3.6415443420410156, "logps/rejected": -3.757603883743286, "loss": 3.5507, "nll_loss": 3.7023651599884033, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.18207721412181854, "rewards/margins": 0.0058029768988490105, "rewards/rejected": -0.18788018822669983, "step": 30 }, { "epoch": 0.02779984114376489, "grad_norm": 193.59271240234375, "learning_rate": 1.75e-06, "log_odds_chosen": 0.06719346344470978, "log_odds_ratio": -0.7199904918670654, "logits/chosen": 215.12673950195312, "logits/rejected": 310.61846923828125, "logps/chosen": -2.6558592319488525, "logps/rejected": -2.712460994720459, "loss": 3.333, "nll_loss": 2.6609179973602295, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.13279296457767487, "rewards/margins": 0.002830089535564184, "rewards/rejected": -0.13562306761741638, "step": 35 }, { "epoch": 0.03177124702144559, "grad_norm": 147.1302032470703, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.12157557159662247, "log_odds_ratio": -0.7308082580566406, "logits/chosen": 302.55181884765625, "logits/rejected": 263.9683532714844, "logps/chosen": -2.3353283405303955, "logps/rejected": -2.4736487865448, "loss": 2.4602, "nll_loss": 3.1235008239746094, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11676641553640366, "rewards/margins": 0.006916025187820196, "rewards/rejected": -0.12368245422840118, "step": 40 }, { "epoch": 0.035742652899126294, "grad_norm": 110.2761459350586, "learning_rate": 2.25e-06, "log_odds_chosen": -0.2882576584815979, "log_odds_ratio": -0.8804551362991333, "logits/chosen": 304.38250732421875, "logits/rejected": 283.35162353515625, "logps/chosen": -1.4691202640533447, "logps/rejected": -1.2973356246948242, "loss": 2.1893, "nll_loss": 1.8311408758163452, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07345602661371231, "rewards/margins": -0.008589239791035652, "rewards/rejected": -0.06486678123474121, "step": 45 }, { "epoch": 0.03971405877680699, "grad_norm": 157.4852752685547, "learning_rate": 2.5e-06, "log_odds_chosen": 0.11816178262233734, "log_odds_ratio": -0.6793395280838013, "logits/chosen": 351.5061950683594, "logits/rejected": 229.8159637451172, "logps/chosen": -1.4661238193511963, "logps/rejected": -1.5235395431518555, "loss": 2.2969, "nll_loss": 2.131767988204956, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07330618798732758, "rewards/margins": 0.0028707936871796846, "rewards/rejected": -0.07617697864770889, "step": 50 }, { "epoch": 0.043685464654487687, "grad_norm": 152.07127380371094, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.4677702784538269, "log_odds_ratio": -0.4963720738887787, "logits/chosen": 274.1103515625, "logits/rejected": 317.81048583984375, "logps/chosen": -1.4359080791473389, "logps/rejected": -1.8219692707061768, "loss": 1.9312, "nll_loss": 1.9257862567901611, "rewards/accuracies": 1.0, "rewards/chosen": -0.07179541140794754, "rewards/margins": 0.019303051754832268, "rewards/rejected": -0.09109846502542496, "step": 55 }, { "epoch": 0.04765687053216839, "grad_norm": 70.87137603759766, "learning_rate": 3e-06, "log_odds_chosen": -0.36796286702156067, "log_odds_ratio": -1.036171317100525, "logits/chosen": 356.27490234375, "logits/rejected": 317.17694091796875, "logps/chosen": -1.6219791173934937, "logps/rejected": -1.34583580493927, "loss": 2.1117, "nll_loss": 1.755855917930603, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08109895884990692, "rewards/margins": -0.01380716823041439, "rewards/rejected": -0.06729178875684738, "step": 60 }, { "epoch": 0.051628276409849086, "grad_norm": 58.58616638183594, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.2776266038417816, "log_odds_ratio": -0.6214284300804138, "logits/chosen": 280.74383544921875, "logits/rejected": 284.61981201171875, "logps/chosen": -1.7956883907318115, "logps/rejected": -2.0456883907318115, "loss": 2.5287, "nll_loss": 2.3265061378479004, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0897844210267067, "rewards/margins": 0.012500002980232239, "rewards/rejected": -0.10228443145751953, "step": 65 }, { "epoch": 0.05559968228752978, "grad_norm": 208.72171020507812, "learning_rate": 3.5e-06, "log_odds_chosen": -0.17022135853767395, "log_odds_ratio": -0.8455036282539368, "logits/chosen": 307.35565185546875, "logits/rejected": 320.88824462890625, "logps/chosen": -1.625649094581604, "logps/rejected": -1.4863574504852295, "loss": 2.1645, "nll_loss": 1.922053575515747, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08128245174884796, "rewards/margins": -0.006964580621570349, "rewards/rejected": -0.07431787252426147, "step": 70 }, { "epoch": 0.059571088165210485, "grad_norm": 69.00164031982422, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.1811346560716629, "log_odds_ratio": -0.6363757252693176, "logits/chosen": 317.77398681640625, "logits/rejected": 273.17938232421875, "logps/chosen": -1.5566846132278442, "logps/rejected": -1.7085206508636475, "loss": 2.0867, "nll_loss": 2.0406994819641113, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07783423364162445, "rewards/margins": 0.0075918035581707954, "rewards/rejected": -0.08542603254318237, "step": 75 }, { "epoch": 0.06354249404289118, "grad_norm": 77.00801086425781, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.6646592617034912, "log_odds_ratio": -0.48417338728904724, "logits/chosen": 260.798583984375, "logits/rejected": 340.7353820800781, "logps/chosen": -1.2510426044464111, "logps/rejected": -1.7401701211929321, "loss": 2.0187, "nll_loss": 1.9357578754425049, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06255212426185608, "rewards/margins": 0.02445637807250023, "rewards/rejected": -0.0870085060596466, "step": 80 }, { "epoch": 0.06751389992057188, "grad_norm": 208.50125122070312, "learning_rate": 4.25e-06, "log_odds_chosen": 0.2738969027996063, "log_odds_ratio": -0.6175040006637573, "logits/chosen": 320.6893005371094, "logits/rejected": 330.1410827636719, "logps/chosen": -1.2499741315841675, "logps/rejected": -1.4630674123764038, "loss": 1.7159, "nll_loss": 1.6920101642608643, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.062498707324266434, "rewards/margins": 0.010654664598405361, "rewards/rejected": -0.07315336912870407, "step": 85 }, { "epoch": 0.07148530579825259, "grad_norm": 103.35198211669922, "learning_rate": 4.5e-06, "log_odds_chosen": 0.12765750288963318, "log_odds_ratio": -0.6823771595954895, "logits/chosen": 321.28680419921875, "logits/rejected": 270.31292724609375, "logps/chosen": -1.3744524717330933, "logps/rejected": -1.5009243488311768, "loss": 2.1276, "nll_loss": 2.185715436935425, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06872262060642242, "rewards/margins": 0.006323590874671936, "rewards/rejected": -0.07504621893167496, "step": 90 }, { "epoch": 0.07545671167593328, "grad_norm": 61.852169036865234, "learning_rate": 4.75e-06, "log_odds_chosen": 0.06399938464164734, "log_odds_ratio": -0.8128548860549927, "logits/chosen": 269.13970947265625, "logits/rejected": 286.59197998046875, "logps/chosen": -1.3123642206192017, "logps/rejected": -1.3269928693771362, "loss": 1.8274, "nll_loss": 2.0496463775634766, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06561820954084396, "rewards/margins": 0.0007314354297704995, "rewards/rejected": -0.06634964793920517, "step": 95 }, { "epoch": 0.07942811755361398, "grad_norm": 54.209163665771484, "learning_rate": 5e-06, "log_odds_chosen": 0.27285704016685486, "log_odds_ratio": -0.5980533957481384, "logits/chosen": 343.4212951660156, "logits/rejected": 275.14459228515625, "logps/chosen": -0.9581457376480103, "logps/rejected": -1.1551190614700317, "loss": 1.9599, "nll_loss": 1.7298065423965454, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04790728539228439, "rewards/margins": 0.009848668240010738, "rewards/rejected": -0.057755958288908005, "step": 100 }, { "epoch": 0.08339952343129468, "grad_norm": 92.10086822509766, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": -0.664605975151062, "log_odds_ratio": -1.3894308805465698, "logits/chosen": 278.6569519042969, "logits/rejected": 385.9412841796875, "logps/chosen": -2.4318549633026123, "logps/rejected": -1.900923728942871, "loss": 2.1305, "nll_loss": 2.418598175048828, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.12159274518489838, "rewards/margins": -0.02654656209051609, "rewards/rejected": -0.09504619240760803, "step": 105 }, { "epoch": 0.08737092930897537, "grad_norm": 392.0915832519531, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.9900819659233093, "log_odds_ratio": -0.4791165292263031, "logits/chosen": 352.7217102050781, "logits/rejected": 326.9925537109375, "logps/chosen": -2.6203694343566895, "logps/rejected": -3.5170578956604004, "loss": 1.9908, "nll_loss": 2.289768934249878, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1310184746980667, "rewards/margins": 0.04483442381024361, "rewards/rejected": -0.17585287988185883, "step": 110 }, { "epoch": 0.09134233518665608, "grad_norm": 65.87200164794922, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.6037150621414185, "log_odds_ratio": -0.4995183050632477, "logits/chosen": 271.54107666015625, "logits/rejected": 379.6575012207031, "logps/chosen": -1.2679884433746338, "logps/rejected": -1.7445560693740845, "loss": 1.7958, "nll_loss": 1.523012399673462, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06339941918849945, "rewards/margins": 0.023828381672501564, "rewards/rejected": -0.08722780644893646, "step": 115 }, { "epoch": 0.09531374106433678, "grad_norm": 101.74378967285156, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.4342077672481537, "log_odds_ratio": -0.6743711233139038, "logits/chosen": 263.05096435546875, "logits/rejected": 325.2860412597656, "logps/chosen": -1.5464773178100586, "logps/rejected": -1.930596947669983, "loss": 2.0518, "nll_loss": 2.4592247009277344, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07732386887073517, "rewards/margins": 0.019205976277589798, "rewards/rejected": -0.09652984142303467, "step": 120 }, { "epoch": 0.09928514694201747, "grad_norm": 48.3521614074707, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": -0.35852494835853577, "log_odds_ratio": -0.9780646562576294, "logits/chosen": 302.3356628417969, "logits/rejected": 319.9010009765625, "logps/chosen": -1.8093980550765991, "logps/rejected": -1.538129210472107, "loss": 1.965, "nll_loss": 2.3710861206054688, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.09046990424394608, "rewards/margins": -0.01356343924999237, "rewards/rejected": -0.0769064649939537, "step": 125 }, { "epoch": 0.10325655281969817, "grad_norm": 62.85472869873047, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": -0.6488355398178101, "log_odds_ratio": -1.1188563108444214, "logits/chosen": 354.04156494140625, "logits/rejected": 294.17169189453125, "logps/chosen": -1.5017735958099365, "logps/rejected": -1.0746318101882935, "loss": 1.965, "nll_loss": 2.148465871810913, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07508867979049683, "rewards/margins": -0.021357093006372452, "rewards/rejected": -0.05373159795999527, "step": 130 }, { "epoch": 0.10722795869737888, "grad_norm": 80.7550277709961, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": -0.6391368508338928, "log_odds_ratio": -1.1500171422958374, "logits/chosen": 333.481689453125, "logits/rejected": 268.5390625, "logps/chosen": -1.366288185119629, "logps/rejected": -1.029240369796753, "loss": 1.8453, "nll_loss": 2.0549352169036865, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06831441074609756, "rewards/margins": -0.01685238815844059, "rewards/rejected": -0.05146201699972153, "step": 135 }, { "epoch": 0.11119936457505956, "grad_norm": 52.68488693237305, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.44900092482566833, "log_odds_ratio": -0.6255335211753845, "logits/chosen": 285.18841552734375, "logits/rejected": 379.58984375, "logps/chosen": -1.3383129835128784, "logps/rejected": -1.6278247833251953, "loss": 1.9521, "nll_loss": 1.6224981546401978, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06691565364599228, "rewards/margins": 0.014475582167506218, "rewards/rejected": -0.08139123767614365, "step": 140 }, { "epoch": 0.11517077045274027, "grad_norm": 32.54029083251953, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": 0.2789516746997833, "log_odds_ratio": -0.6979798078536987, "logits/chosen": 308.1046142578125, "logits/rejected": 268.2256774902344, "logps/chosen": -1.107441782951355, "logps/rejected": -1.3338348865509033, "loss": 1.6567, "nll_loss": 1.4684410095214844, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.055372096598148346, "rewards/margins": 0.011319654062390327, "rewards/rejected": -0.06669174879789352, "step": 145 }, { "epoch": 0.11914217633042097, "grad_norm": 52.103843688964844, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": -0.07480888068675995, "log_odds_ratio": -0.7555229663848877, "logits/chosen": 287.1299743652344, "logits/rejected": 311.5876770019531, "logps/chosen": -1.454633355140686, "logps/rejected": -1.4153658151626587, "loss": 1.7879, "nll_loss": 1.646153450012207, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07273166626691818, "rewards/margins": -0.0019633763004094362, "rewards/rejected": -0.07076828926801682, "step": 150 }, { "epoch": 0.12311358220810167, "grad_norm": 76.20840454101562, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.01139686070382595, "log_odds_ratio": -0.7648938894271851, "logits/chosen": 377.6351013183594, "logits/rejected": 283.956298828125, "logps/chosen": -1.144536018371582, "logps/rejected": -1.1955327987670898, "loss": 1.8622, "nll_loss": 1.9163175821304321, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05722679942846298, "rewards/margins": 0.0025498413015156984, "rewards/rejected": -0.05977664515376091, "step": 155 }, { "epoch": 0.12708498808578236, "grad_norm": 64.51053619384766, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.34641021490097046, "log_odds_ratio": -0.6011036038398743, "logits/chosen": 324.39093017578125, "logits/rejected": 246.60037231445312, "logps/chosen": -0.9995762705802917, "logps/rejected": -1.1426242589950562, "loss": 1.7913, "nll_loss": 1.83511483669281, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04997881501913071, "rewards/margins": 0.007152394857257605, "rewards/rejected": -0.05713121220469475, "step": 160 }, { "epoch": 0.13105639396346305, "grad_norm": 75.09971618652344, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 0.2190161645412445, "log_odds_ratio": -0.7696462273597717, "logits/chosen": 316.7698059082031, "logits/rejected": 318.6285095214844, "logps/chosen": -1.039623498916626, "logps/rejected": -1.2176158428192139, "loss": 1.9486, "nll_loss": 1.644431710243225, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05198118835687637, "rewards/margins": 0.008899608626961708, "rewards/rejected": -0.060880791395902634, "step": 165 }, { "epoch": 0.13502779984114377, "grad_norm": 42.6450309753418, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.8528574705123901, "log_odds_ratio": -0.38301119208335876, "logits/chosen": 305.4313049316406, "logits/rejected": 304.91485595703125, "logps/chosen": -1.0219231843948364, "logps/rejected": -1.5175138711929321, "loss": 1.5835, "nll_loss": 1.799435019493103, "rewards/accuracies": 1.0, "rewards/chosen": -0.05109615996479988, "rewards/margins": 0.024779539555311203, "rewards/rejected": -0.07587569952011108, "step": 170 }, { "epoch": 0.13899920571882446, "grad_norm": 60.28545379638672, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": -0.31903964281082153, "log_odds_ratio": -0.9109575152397156, "logits/chosen": 390.44110107421875, "logits/rejected": 319.32049560546875, "logps/chosen": -1.0434072017669678, "logps/rejected": -0.8091991543769836, "loss": 1.657, "nll_loss": 1.5240576267242432, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05217035859823227, "rewards/margins": -0.0117103960365057, "rewards/rejected": -0.04045996814966202, "step": 175 }, { "epoch": 0.14297061159650518, "grad_norm": 65.73664855957031, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.1744231879711151, "log_odds_ratio": -0.6777268648147583, "logits/chosen": 293.218505859375, "logits/rejected": 292.60076904296875, "logps/chosen": -1.0469298362731934, "logps/rejected": -1.1555253267288208, "loss": 1.4845, "nll_loss": 1.1594089269638062, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.052346497774124146, "rewards/margins": 0.0054297661408782005, "rewards/rejected": -0.05777626112103462, "step": 180 }, { "epoch": 0.14694201747418587, "grad_norm": 50.266204833984375, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": -0.4040209650993347, "log_odds_ratio": -0.9406334161758423, "logits/chosen": 303.67889404296875, "logits/rejected": 342.15863037109375, "logps/chosen": -1.4420201778411865, "logps/rejected": -1.1751186847686768, "loss": 1.7518, "nll_loss": 1.7355467081069946, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07210101187229156, "rewards/margins": -0.013345075771212578, "rewards/rejected": -0.05875593423843384, "step": 185 }, { "epoch": 0.15091342335186655, "grad_norm": 45.1833381652832, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.25176072120666504, "log_odds_ratio": -0.5851801037788391, "logits/chosen": 251.08883666992188, "logits/rejected": 298.2301330566406, "logps/chosen": -1.0510302782058716, "logps/rejected": -1.2064368724822998, "loss": 1.6223, "nll_loss": 1.3497669696807861, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0525515154004097, "rewards/margins": 0.007770332042127848, "rewards/rejected": -0.06032184511423111, "step": 190 }, { "epoch": 0.15488482922954727, "grad_norm": 133.46376037597656, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": -0.13634520769119263, "log_odds_ratio": -0.7839670181274414, "logits/chosen": 341.7732849121094, "logits/rejected": 311.25469970703125, "logps/chosen": -1.3579070568084717, "logps/rejected": -1.2352676391601562, "loss": 1.7763, "nll_loss": 1.5284233093261719, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06789536029100418, "rewards/margins": -0.0061319745145738125, "rewards/rejected": -0.06176338344812393, "step": 195 }, { "epoch": 0.15885623510722796, "grad_norm": 212.89317321777344, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": -0.16400772333145142, "log_odds_ratio": -0.8339014053344727, "logits/chosen": 351.3518981933594, "logits/rejected": 341.30303955078125, "logps/chosen": -1.33816659450531, "logps/rejected": -1.29983651638031, "loss": 1.6261, "nll_loss": 1.852270483970642, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0669083371758461, "rewards/margins": -0.0019165098201483488, "rewards/rejected": -0.06499183177947998, "step": 200 }, { "epoch": 0.16282764098490865, "grad_norm": 70.86261749267578, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 0.25013333559036255, "log_odds_ratio": -0.636869490146637, "logits/chosen": 347.8689270019531, "logits/rejected": 272.3687438964844, "logps/chosen": -1.3360661268234253, "logps/rejected": -1.536778450012207, "loss": 1.7544, "nll_loss": 1.8276907205581665, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06680331379175186, "rewards/margins": 0.010035613551735878, "rewards/rejected": -0.07683892548084259, "step": 205 }, { "epoch": 0.16679904686258937, "grad_norm": 34.17070770263672, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 0.3506450951099396, "log_odds_ratio": -0.6737692952156067, "logits/chosen": 300.19873046875, "logits/rejected": 388.3397521972656, "logps/chosen": -1.0396573543548584, "logps/rejected": -1.299134612083435, "loss": 1.7484, "nll_loss": 1.4944199323654175, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05198286846280098, "rewards/margins": 0.012973867356777191, "rewards/rejected": -0.06495673954486847, "step": 210 }, { "epoch": 0.17077045274027006, "grad_norm": 39.18354797363281, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 0.39639392495155334, "log_odds_ratio": -0.5673670768737793, "logits/chosen": 289.6415710449219, "logits/rejected": 342.2761535644531, "logps/chosen": -0.9784858822822571, "logps/rejected": -1.2464014291763306, "loss": 1.8046, "nll_loss": 1.8137214183807373, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.048924293369054794, "rewards/margins": 0.013395780697464943, "rewards/rejected": -0.06232007220387459, "step": 215 }, { "epoch": 0.17474185861795075, "grad_norm": 66.22753143310547, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.9709181785583496, "log_odds_ratio": -0.44296368956565857, "logits/chosen": 459.4443359375, "logits/rejected": 243.2751007080078, "logps/chosen": -1.1406949758529663, "logps/rejected": -1.8125925064086914, "loss": 1.8881, "nll_loss": 2.4477667808532715, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.057034749537706375, "rewards/margins": 0.03359488397836685, "rewards/rejected": -0.09062962979078293, "step": 220 }, { "epoch": 0.17871326449563146, "grad_norm": 40.38410949707031, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 0.5472304821014404, "log_odds_ratio": -0.5069239735603333, "logits/chosen": 337.7701721191406, "logits/rejected": 314.196044921875, "logps/chosen": -1.0212123394012451, "logps/rejected": -1.2714849710464478, "loss": 1.6271, "nll_loss": 1.9993699789047241, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05106062442064285, "rewards/margins": 0.012513632886111736, "rewards/rejected": -0.06357425451278687, "step": 225 }, { "epoch": 0.18268467037331215, "grad_norm": 36.757057189941406, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 0.027332711964845657, "log_odds_ratio": -0.6871450543403625, "logits/chosen": 314.82354736328125, "logits/rejected": 310.2427062988281, "logps/chosen": -1.3505454063415527, "logps/rejected": -1.3915187120437622, "loss": 1.565, "nll_loss": 1.6233704090118408, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06752727180719376, "rewards/margins": 0.0020486623980104923, "rewards/rejected": -0.06957593560218811, "step": 230 }, { "epoch": 0.18665607625099284, "grad_norm": 61.87923049926758, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": -0.4674092233181, "log_odds_ratio": -1.0502631664276123, "logits/chosen": 289.9769592285156, "logits/rejected": 327.74395751953125, "logps/chosen": -1.076569676399231, "logps/rejected": -0.7748829126358032, "loss": 1.7465, "nll_loss": 1.6123956441879272, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05382848531007767, "rewards/margins": -0.015084335580468178, "rewards/rejected": -0.03874415159225464, "step": 235 }, { "epoch": 0.19062748212867356, "grad_norm": 73.88203430175781, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.07915325462818146, "log_odds_ratio": -0.7744172215461731, "logits/chosen": 361.3428039550781, "logits/rejected": 279.46966552734375, "logps/chosen": -1.4194855690002441, "logps/rejected": -1.5974981784820557, "loss": 1.6476, "nll_loss": 1.8600105047225952, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07097427546977997, "rewards/margins": 0.008900630287826061, "rewards/rejected": -0.0798749104142189, "step": 240 }, { "epoch": 0.19459888800635425, "grad_norm": 97.79542541503906, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 0.41313672065734863, "log_odds_ratio": -0.5386548638343811, "logits/chosen": 274.5640563964844, "logits/rejected": 378.27227783203125, "logps/chosen": -1.3742090463638306, "logps/rejected": -1.6753637790679932, "loss": 1.6432, "nll_loss": 1.562549114227295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06871045380830765, "rewards/margins": 0.01505773700773716, "rewards/rejected": -0.08376819640398026, "step": 245 }, { "epoch": 0.19857029388403494, "grad_norm": 52.48481369018555, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": -0.16644199192523956, "log_odds_ratio": -0.8439895510673523, "logits/chosen": 360.90277099609375, "logits/rejected": 261.315185546875, "logps/chosen": -1.0284861326217651, "logps/rejected": -0.9436852335929871, "loss": 1.3865, "nll_loss": 1.3669432401657104, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.051424313336610794, "rewards/margins": -0.004240049980580807, "rewards/rejected": -0.04718426242470741, "step": 250 }, { "epoch": 0.20254169976171565, "grad_norm": 51.58882522583008, "learning_rate": 3.131121455425748e-06, "log_odds_chosen": 0.5241218209266663, "log_odds_ratio": -0.8181684613227844, "logits/chosen": 271.86822509765625, "logits/rejected": 310.2635803222656, "logps/chosen": -1.439292550086975, "logps/rejected": -2.013617515563965, "loss": 1.673, "nll_loss": 1.6913448572158813, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07196462154388428, "rewards/margins": 0.028716260567307472, "rewards/rejected": -0.100680872797966, "step": 255 }, { "epoch": 0.20651310563939634, "grad_norm": 35.36348342895508, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.6499348878860474, "log_odds_ratio": -0.49271130561828613, "logits/chosen": 296.9714050292969, "logits/rejected": 382.0646057128906, "logps/chosen": -1.4200375080108643, "logps/rejected": -1.984840750694275, "loss": 1.9446, "nll_loss": 2.310304880142212, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07100187987089157, "rewards/margins": 0.02824016846716404, "rewards/rejected": -0.09924204647541046, "step": 260 }, { "epoch": 0.21048451151707703, "grad_norm": 60.83711242675781, "learning_rate": 3.0714755841697565e-06, "log_odds_chosen": 0.645357608795166, "log_odds_ratio": -0.5926202535629272, "logits/chosen": 350.14630126953125, "logits/rejected": 271.0173034667969, "logps/chosen": -1.0696051120758057, "logps/rejected": -1.5853424072265625, "loss": 1.4227, "nll_loss": 1.2928531169891357, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05348025634884834, "rewards/margins": 0.025786861777305603, "rewards/rejected": -0.07926712185144424, "step": 265 }, { "epoch": 0.21445591739475775, "grad_norm": 36.92619323730469, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": -0.053741950541734695, "log_odds_ratio": -0.8099346160888672, "logits/chosen": 294.5556945800781, "logits/rejected": 340.336669921875, "logps/chosen": -1.5328369140625, "logps/rejected": -1.4363396167755127, "loss": 1.585, "nll_loss": 1.5067962408065796, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07664184272289276, "rewards/margins": -0.0048248679377138615, "rewards/rejected": -0.07181697338819504, "step": 270 }, { "epoch": 0.21842732327243844, "grad_norm": 59.67924499511719, "learning_rate": 3.0151134457776365e-06, "log_odds_chosen": 0.39039263129234314, "log_odds_ratio": -0.5856078863143921, "logits/chosen": 359.80999755859375, "logits/rejected": 312.9378967285156, "logps/chosen": -1.3910114765167236, "logps/rejected": -1.746132254600525, "loss": 1.5416, "nll_loss": 1.6771767139434814, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06955057382583618, "rewards/margins": 0.01775604858994484, "rewards/rejected": -0.08730661869049072, "step": 275 }, { "epoch": 0.22239872915011913, "grad_norm": 89.38143920898438, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.49174147844314575, "log_odds_ratio": -0.6083627939224243, "logits/chosen": 258.7726135253906, "logits/rejected": 430.52197265625, "logps/chosen": -1.3561815023422241, "logps/rejected": -1.7862621545791626, "loss": 1.6064, "nll_loss": 1.7953475713729858, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0678090825676918, "rewards/margins": 0.021504027768969536, "rewards/rejected": -0.08931310474872589, "step": 280 }, { "epoch": 0.22637013502779985, "grad_norm": 43.1147575378418, "learning_rate": 2.961744388795462e-06, "log_odds_chosen": -0.42655545473098755, "log_odds_ratio": -0.9692665934562683, "logits/chosen": 313.4588317871094, "logits/rejected": 437.51580810546875, "logps/chosen": -1.7302316427230835, "logps/rejected": -1.4267915487289429, "loss": 1.8931, "nll_loss": 1.8868739604949951, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.08651158958673477, "rewards/margins": -0.015172007493674755, "rewards/rejected": -0.07133957743644714, "step": 285 }, { "epoch": 0.23034154090548054, "grad_norm": 111.950439453125, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.18716028332710266, "log_odds_ratio": -0.6489423513412476, "logits/chosen": 320.8119201660156, "logits/rejected": 353.88568115234375, "logps/chosen": -1.203453779220581, "logps/rejected": -1.3500152826309204, "loss": 1.6784, "nll_loss": 1.6349010467529297, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06017268821597099, "rewards/margins": 0.007328073028475046, "rewards/rejected": -0.0675007551908493, "step": 290 }, { "epoch": 0.23431294678316125, "grad_norm": 45.6556282043457, "learning_rate": 2.9111125486979104e-06, "log_odds_chosen": -0.518650233745575, "log_odds_ratio": -1.0133109092712402, "logits/chosen": 456.8958435058594, "logits/rejected": 289.4540100097656, "logps/chosen": -1.5072423219680786, "logps/rejected": -1.13739013671875, "loss": 1.7026, "nll_loss": 1.7881231307983398, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07536212354898453, "rewards/margins": -0.01849261112511158, "rewards/rejected": -0.0568695068359375, "step": 295 }, { "epoch": 0.23828435266084194, "grad_norm": 56.93437576293945, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": -0.09250687062740326, "log_odds_ratio": -0.8143345713615417, "logits/chosen": 297.9905700683594, "logits/rejected": 320.3016052246094, "logps/chosen": -1.3163940906524658, "logps/rejected": -1.3317310810089111, "loss": 1.7171, "nll_loss": 1.6320091485977173, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06581971049308777, "rewards/margins": 0.0007668494945392013, "rewards/rejected": -0.06658656150102615, "step": 300 }, { "epoch": 0.24225575853852263, "grad_norm": 53.10919952392578, "learning_rate": 2.862991671569341e-06, "log_odds_chosen": -0.7954866886138916, "log_odds_ratio": -1.2641322612762451, "logits/chosen": 384.24554443359375, "logits/rejected": 274.28692626953125, "logps/chosen": -1.4349799156188965, "logps/rejected": -0.8974045515060425, "loss": 1.8459, "nll_loss": 1.7445728778839111, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.0717489942908287, "rewards/margins": -0.02687876857817173, "rewards/rejected": -0.04487023502588272, "step": 305 }, { "epoch": 0.24622716441620335, "grad_norm": 31.78973388671875, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.8655799031257629, "log_odds_ratio": -0.42556723952293396, "logits/chosen": 275.6189880371094, "logits/rejected": 267.2253723144531, "logps/chosen": -1.1174626350402832, "logps/rejected": -1.6986801624298096, "loss": 1.8585, "nll_loss": 2.0873680114746094, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05587313324213028, "rewards/margins": 0.02906087599694729, "rewards/rejected": -0.08493401110172272, "step": 310 }, { "epoch": 0.25019857029388404, "grad_norm": 54.311798095703125, "learning_rate": 2.817180849095055e-06, "log_odds_chosen": 0.48402324318885803, "log_odds_ratio": -0.5392564535140991, "logits/chosen": 273.8492431640625, "logits/rejected": 303.4148864746094, "logps/chosen": -0.9568207859992981, "logps/rejected": -1.3038668632507324, "loss": 1.5511, "nll_loss": 1.534110188484192, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04784103482961655, "rewards/margins": 0.017352305352687836, "rewards/rejected": -0.06519334018230438, "step": 315 }, { "epoch": 0.2541699761715647, "grad_norm": 46.84675979614258, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 1.0449055433273315, "log_odds_ratio": -0.3449096083641052, "logits/chosen": 313.7346496582031, "logits/rejected": 355.859130859375, "logps/chosen": -0.9354375004768372, "logps/rejected": -1.6399080753326416, "loss": 1.445, "nll_loss": 1.3939152956008911, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04677187651395798, "rewards/margins": 0.03522353246808052, "rewards/rejected": -0.0819954052567482, "step": 320 }, { "epoch": 0.2581413820492454, "grad_norm": 57.15336990356445, "learning_rate": 2.773500981126146e-06, "log_odds_chosen": 0.07060068845748901, "log_odds_ratio": -0.7055736184120178, "logits/chosen": 301.77703857421875, "logits/rejected": 314.42828369140625, "logps/chosen": -0.818830132484436, "logps/rejected": -0.8071014285087585, "loss": 1.5234, "nll_loss": 1.3474265336990356, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0409415028989315, "rewards/margins": -0.0005864340928383172, "rewards/rejected": -0.04035507142543793, "step": 325 }, { "epoch": 0.2621127879269261, "grad_norm": 42.167640686035156, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.6327840685844421, "log_odds_ratio": -0.47118449211120605, "logits/chosen": 338.5755615234375, "logits/rejected": 409.26776123046875, "logps/chosen": -1.0573819875717163, "logps/rejected": -1.443060278892517, "loss": 1.647, "nll_loss": 1.4856857061386108, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.052869103848934174, "rewards/margins": 0.01928391121327877, "rewards/rejected": -0.0721530169248581, "step": 330 }, { "epoch": 0.26608419380460685, "grad_norm": 40.28363800048828, "learning_rate": 2.7317918235407652e-06, "log_odds_chosen": -0.2637309432029724, "log_odds_ratio": -0.864203929901123, "logits/chosen": 308.519287109375, "logits/rejected": 366.68194580078125, "logps/chosen": -1.1223952770233154, "logps/rejected": -0.9605833888053894, "loss": 1.5782, "nll_loss": 1.3887097835540771, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05611976981163025, "rewards/margins": -0.008090597577393055, "rewards/rejected": -0.04802917316555977, "step": 335 }, { "epoch": 0.27005559968228754, "grad_norm": 34.92796325683594, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 0.3361690640449524, "log_odds_ratio": -0.6272503137588501, "logits/chosen": 303.1048583984375, "logits/rejected": 358.2105712890625, "logps/chosen": -0.9646172523498535, "logps/rejected": -1.2090504169464111, "loss": 1.6717, "nll_loss": 1.3398942947387695, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0482308603823185, "rewards/margins": 0.012221668846905231, "rewards/rejected": -0.060452528297901154, "step": 340 }, { "epoch": 0.27402700555996823, "grad_norm": 45.980873107910156, "learning_rate": 2.691909510290828e-06, "log_odds_chosen": 0.7104015350341797, "log_odds_ratio": -0.49001604318618774, "logits/chosen": 339.669677734375, "logits/rejected": 297.57012939453125, "logps/chosen": -1.9824626445770264, "logps/rejected": -2.6118223667144775, "loss": 1.7175, "nll_loss": 2.170316219329834, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09912314265966415, "rewards/margins": 0.0314679816365242, "rewards/rejected": -0.13059112429618835, "step": 345 }, { "epoch": 0.2779984114376489, "grad_norm": 36.35749816894531, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 0.14296868443489075, "log_odds_ratio": -0.6650969386100769, "logits/chosen": 390.28662109375, "logits/rejected": 319.8929138183594, "logps/chosen": -1.1751973628997803, "logps/rejected": -1.343955636024475, "loss": 1.6748, "nll_loss": 1.6485261917114258, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.058759868144989014, "rewards/margins": 0.008437911979854107, "rewards/rejected": -0.067197784781456, "step": 350 }, { "epoch": 0.2819698173153296, "grad_norm": 46.82810592651367, "learning_rate": 2.6537244621713765e-06, "log_odds_chosen": -0.5206547975540161, "log_odds_ratio": -1.062336802482605, "logits/chosen": 371.12255859375, "logits/rejected": 252.90744018554688, "logps/chosen": -1.5387684106826782, "logps/rejected": -1.1726481914520264, "loss": 1.4011, "nll_loss": 1.7245814800262451, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07693842798471451, "rewards/margins": -0.01830601692199707, "rewards/rejected": -0.05863240361213684, "step": 355 }, { "epoch": 0.28594122319301035, "grad_norm": 84.11432647705078, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": -0.12257371097803116, "log_odds_ratio": -0.7665070295333862, "logits/chosen": 321.9914245605469, "logits/rejected": 313.55279541015625, "logps/chosen": -1.13595449924469, "logps/rejected": -1.0432698726654053, "loss": 1.5142, "nll_loss": 1.7525981664657593, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05679772421717644, "rewards/margins": -0.0046342299319803715, "rewards/rejected": -0.052163492888212204, "step": 360 }, { "epoch": 0.28991262907069104, "grad_norm": 91.57356262207031, "learning_rate": 2.6171196129510684e-06, "log_odds_chosen": -0.3752515912055969, "log_odds_ratio": -0.9325364232063293, "logits/chosen": 328.098876953125, "logits/rejected": 284.2572326660156, "logps/chosen": -1.2117679119110107, "logps/rejected": -0.9372884631156921, "loss": 1.5146, "nll_loss": 1.3987443447113037, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060588397085666656, "rewards/margins": -0.01372397132217884, "rewards/rejected": -0.046864427626132965, "step": 365 }, { "epoch": 0.29388403494837173, "grad_norm": 56.33279037475586, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": -0.8803679347038269, "log_odds_ratio": -1.2740561962127686, "logits/chosen": 370.04913330078125, "logits/rejected": 296.248291015625, "logps/chosen": -1.2923654317855835, "logps/rejected": -0.7359521985054016, "loss": 1.6098, "nll_loss": 1.7468293905258179, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06461827456951141, "rewards/margins": -0.027820657938718796, "rewards/rejected": -0.03679760918021202, "step": 370 }, { "epoch": 0.2978554408260524, "grad_norm": 99.3619155883789, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 0.06336264312267303, "log_odds_ratio": -0.7155358791351318, "logits/chosen": 357.4932861328125, "logits/rejected": 261.9980163574219, "logps/chosen": -1.088346242904663, "logps/rejected": -1.1663461923599243, "loss": 1.6697, "nll_loss": 1.7833229303359985, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054417312145233154, "rewards/margins": 0.003899999661371112, "rewards/rejected": -0.058317311108112335, "step": 375 }, { "epoch": 0.3018268467037331, "grad_norm": 39.31928253173828, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 0.30186375975608826, "log_odds_ratio": -0.5939323306083679, "logits/chosen": 311.0823974609375, "logits/rejected": 405.12408447265625, "logps/chosen": -1.402393102645874, "logps/rejected": -1.6425796747207642, "loss": 1.6037, "nll_loss": 1.767801284790039, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07011966407299042, "rewards/margins": 0.012009315192699432, "rewards/rejected": -0.08212897181510925, "step": 380 }, { "epoch": 0.3057982525814138, "grad_norm": 50.13980484008789, "learning_rate": 2.5482359571881276e-06, "log_odds_chosen": -0.5006630420684814, "log_odds_ratio": -0.9872447848320007, "logits/chosen": 373.640380859375, "logits/rejected": 276.2611999511719, "logps/chosen": -1.317260503768921, "logps/rejected": -0.9704214334487915, "loss": 1.6575, "nll_loss": 1.706688642501831, "rewards/accuracies": 0.0, "rewards/chosen": -0.06586302816867828, "rewards/margins": -0.017341960221529007, "rewards/rejected": -0.048521075397729874, "step": 385 }, { "epoch": 0.30976965845909454, "grad_norm": 38.53764724731445, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": -0.21573182940483093, "log_odds_ratio": -1.0288946628570557, "logits/chosen": 347.60894775390625, "logits/rejected": 319.7806396484375, "logps/chosen": -1.2338215112686157, "logps/rejected": -1.0342485904693604, "loss": 1.6194, "nll_loss": 1.6776854991912842, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.061691075563430786, "rewards/margins": -0.009978653863072395, "rewards/rejected": -0.05171242356300354, "step": 390 }, { "epoch": 0.31374106433677523, "grad_norm": 38.11606979370117, "learning_rate": 2.515773027133138e-06, "log_odds_chosen": 0.3980295658111572, "log_odds_ratio": -0.5573837161064148, "logits/chosen": 329.6410827636719, "logits/rejected": 334.8705139160156, "logps/chosen": -1.0556812286376953, "logps/rejected": -1.3309425115585327, "loss": 1.5489, "nll_loss": 1.286458134651184, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052784062922000885, "rewards/margins": 0.0137630645185709, "rewards/rejected": -0.06654712557792664, "step": 395 }, { "epoch": 0.3177124702144559, "grad_norm": 34.63991928100586, "learning_rate": 2.5e-06, "log_odds_chosen": 0.29624634981155396, "log_odds_ratio": -0.585162878036499, "logits/chosen": 335.17999267578125, "logits/rejected": 316.7696228027344, "logps/chosen": -0.9373574256896973, "logps/rejected": -1.0805803537368774, "loss": 1.4058, "nll_loss": 1.348035216331482, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04686787351965904, "rewards/margins": 0.007161143235862255, "rewards/rejected": -0.054029010236263275, "step": 400 }, { "epoch": 0.3216838760921366, "grad_norm": 84.49266052246094, "learning_rate": 2.484519974999767e-06, "log_odds_chosen": -0.5001649856567383, "log_odds_ratio": -1.0039180517196655, "logits/chosen": 334.81451416015625, "logits/rejected": 353.23443603515625, "logps/chosen": -1.1627585887908936, "logps/rejected": -0.8853675723075867, "loss": 1.3853, "nll_loss": 1.3510539531707764, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.058137934654951096, "rewards/margins": -0.013869555667042732, "rewards/rejected": -0.044268377125263214, "step": 405 }, { "epoch": 0.3256552819698173, "grad_norm": 29.99695587158203, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 0.4416959881782532, "log_odds_ratio": -0.5149996876716614, "logits/chosen": 309.7474060058594, "logits/rejected": 301.06805419921875, "logps/chosen": -0.9938238263130188, "logps/rejected": -1.2996299266815186, "loss": 1.4475, "nll_loss": 1.214712142944336, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04969118908047676, "rewards/margins": 0.015290307812392712, "rewards/rejected": -0.06498149782419205, "step": 410 }, { "epoch": 0.329626687847498, "grad_norm": 102.2310791015625, "learning_rate": 2.4544034683690802e-06, "log_odds_chosen": -0.5294458270072937, "log_odds_ratio": -1.0700939893722534, "logits/chosen": 465.16192626953125, "logits/rejected": 265.0263366699219, "logps/chosen": -1.591260313987732, "logps/rejected": -1.2482383251190186, "loss": 1.4863, "nll_loss": 1.8571536540985107, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07956302165985107, "rewards/margins": -0.01715109869837761, "rewards/rejected": -0.06241191551089287, "step": 415 }, { "epoch": 0.33359809372517873, "grad_norm": 46.43037414550781, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": -0.16312381625175476, "log_odds_ratio": -0.9062603712081909, "logits/chosen": 308.7474060058594, "logits/rejected": 377.7091369628906, "logps/chosen": -1.2873585224151611, "logps/rejected": -1.244364857673645, "loss": 1.9129, "nll_loss": 1.7224760055541992, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06436793506145477, "rewards/margins": -0.0021496848203241825, "rewards/rejected": -0.06221824139356613, "step": 420 }, { "epoch": 0.3375694996028594, "grad_norm": 60.4110107421875, "learning_rate": 2.4253562503633297e-06, "log_odds_chosen": 0.6081835627555847, "log_odds_ratio": -0.4843834340572357, "logits/chosen": 336.476318359375, "logits/rejected": 328.60626220703125, "logps/chosen": -0.955605685710907, "logps/rejected": -1.360215425491333, "loss": 1.4305, "nll_loss": 1.0399436950683594, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04778028652071953, "rewards/margins": 0.02023048885166645, "rewards/rejected": -0.06801077723503113, "step": 425 }, { "epoch": 0.3415409054805401, "grad_norm": 45.45296096801758, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 0.4710689187049866, "log_odds_ratio": -0.5245848894119263, "logits/chosen": 364.83978271484375, "logits/rejected": 349.26629638671875, "logps/chosen": -1.245513677597046, "logps/rejected": -1.6239559650421143, "loss": 1.5163, "nll_loss": 1.7595207691192627, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06227568909525871, "rewards/margins": 0.01892211101949215, "rewards/rejected": -0.08119779825210571, "step": 430 }, { "epoch": 0.3455123113582208, "grad_norm": 38.822410583496094, "learning_rate": 2.3973165074269213e-06, "log_odds_chosen": 0.3714559078216553, "log_odds_ratio": -0.6340258717536926, "logits/chosen": 350.9668273925781, "logits/rejected": 270.967529296875, "logps/chosen": -1.0809491872787476, "logps/rejected": -1.2758018970489502, "loss": 1.5124, "nll_loss": 1.431868553161621, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05404745787382126, "rewards/margins": 0.009742636233568192, "rewards/rejected": -0.06379009783267975, "step": 435 }, { "epoch": 0.3494837172359015, "grad_norm": 48.35163879394531, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 0.3045877516269684, "log_odds_ratio": -0.5679005980491638, "logits/chosen": 387.47454833984375, "logits/rejected": 291.999755859375, "logps/chosen": -1.0117883682250977, "logps/rejected": -1.1841952800750732, "loss": 1.4928, "nll_loss": 1.4351252317428589, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.050589419901371, "rewards/margins": 0.00862034410238266, "rewards/rejected": -0.05920976400375366, "step": 440 }, { "epoch": 0.3534551231135822, "grad_norm": 52.99742889404297, "learning_rate": 2.3702273156998867e-06, "log_odds_chosen": 0.6814774870872498, "log_odds_ratio": -0.45101094245910645, "logits/chosen": 279.8681945800781, "logits/rejected": 321.81268310546875, "logps/chosen": -1.1514198780059814, "logps/rejected": -1.5826447010040283, "loss": 1.5398, "nll_loss": 1.701086401939392, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05757099390029907, "rewards/margins": 0.021561237052083015, "rewards/rejected": -0.07913222908973694, "step": 445 }, { "epoch": 0.3574265289912629, "grad_norm": 38.22096633911133, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 0.3011036515235901, "log_odds_ratio": -0.6046400666236877, "logits/chosen": 295.15643310546875, "logits/rejected": 363.99285888671875, "logps/chosen": -1.0281888246536255, "logps/rejected": -1.2145917415618896, "loss": 1.5697, "nll_loss": 1.3977224826812744, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.051409441977739334, "rewards/margins": 0.009320144541561604, "rewards/rejected": -0.060729581862688065, "step": 450 }, { "epoch": 0.3613979348689436, "grad_norm": 59.05949783325195, "learning_rate": 2.3440361546924774e-06, "log_odds_chosen": 0.2038741558790207, "log_odds_ratio": -0.6207619309425354, "logits/chosen": 275.1322326660156, "logits/rejected": 327.7266540527344, "logps/chosen": -0.8248960375785828, "logps/rejected": -0.9636802673339844, "loss": 1.7361, "nll_loss": 1.472722053527832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04124480113387108, "rewards/margins": 0.006939212791621685, "rewards/rejected": -0.04818401485681534, "step": 455 }, { "epoch": 0.3653693407466243, "grad_norm": 75.4602279663086, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 0.14844343066215515, "log_odds_ratio": -0.6544772386550903, "logits/chosen": 300.9977111816406, "logits/rejected": 315.71978759765625, "logps/chosen": -1.323999285697937, "logps/rejected": -1.4490077495574951, "loss": 1.4689, "nll_loss": 1.5351868867874146, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06619997322559357, "rewards/margins": 0.006250420119613409, "rewards/rejected": -0.07245039194822311, "step": 460 }, { "epoch": 0.369340746624305, "grad_norm": 30.48955535888672, "learning_rate": 2.3186944788008413e-06, "log_odds_chosen": 0.22145530581474304, "log_odds_ratio": -0.6970380544662476, "logits/chosen": 289.8216552734375, "logits/rejected": 381.4501953125, "logps/chosen": -1.2244035005569458, "logps/rejected": -1.314239501953125, "loss": 1.5863, "nll_loss": 1.4692577123641968, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06122017651796341, "rewards/margins": 0.0044917999766767025, "rewards/rejected": -0.06571197509765625, "step": 465 }, { "epoch": 0.3733121525019857, "grad_norm": 53.512874603271484, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": -0.14795434474945068, "log_odds_ratio": -0.841509997844696, "logits/chosen": 259.2519226074219, "logits/rejected": 400.40814208984375, "logps/chosen": -0.9314160346984863, "logps/rejected": -0.9140155911445618, "loss": 1.4001, "nll_loss": 1.1743015050888062, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.046570807695388794, "rewards/margins": -0.0008700292673893273, "rewards/rejected": -0.04570077732205391, "step": 470 }, { "epoch": 0.37728355837966643, "grad_norm": 41.76538848876953, "learning_rate": 2.2941573387056174e-06, "log_odds_chosen": 0.1297486275434494, "log_odds_ratio": -0.7071259617805481, "logits/chosen": 282.8462829589844, "logits/rejected": 355.6139831542969, "logps/chosen": -1.218668818473816, "logps/rejected": -1.4113277196884155, "loss": 1.4564, "nll_loss": 1.3523343801498413, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06093344837427139, "rewards/margins": 0.009632943198084831, "rewards/rejected": -0.07056639343500137, "step": 475 }, { "epoch": 0.3812549642573471, "grad_norm": 39.9034538269043, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 0.01033252477645874, "log_odds_ratio": -0.7517456412315369, "logits/chosen": 377.63323974609375, "logits/rejected": 321.8352966308594, "logps/chosen": -1.1192216873168945, "logps/rejected": -1.179273009300232, "loss": 1.5432, "nll_loss": 1.4109880924224854, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05596108362078667, "rewards/margins": 0.0030025753658264875, "rewards/rejected": -0.058963656425476074, "step": 480 }, { "epoch": 0.3852263701350278, "grad_norm": 78.85708618164062, "learning_rate": 2.270383045932499e-06, "log_odds_chosen": -0.009592628106474876, "log_odds_ratio": -0.8770645260810852, "logits/chosen": 314.0664978027344, "logits/rejected": 293.30218505859375, "logps/chosen": -1.006894826889038, "logps/rejected": -0.8256500363349915, "loss": 1.479, "nll_loss": 1.5845009088516235, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05034474655985832, "rewards/margins": -0.009062247350811958, "rewards/rejected": -0.041282497346401215, "step": 485 }, { "epoch": 0.3891977760127085, "grad_norm": 39.800479888916016, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": -0.4220407009124756, "log_odds_ratio": -0.9446707963943481, "logits/chosen": 381.3775329589844, "logits/rejected": 301.7720947265625, "logps/chosen": -1.1097631454467773, "logps/rejected": -0.8344427943229675, "loss": 1.5091, "nll_loss": 1.4189200401306152, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05548815801739693, "rewards/margins": -0.013766017742455006, "rewards/rejected": -0.041722141206264496, "step": 490 }, { "epoch": 0.3931691818903892, "grad_norm": 43.88238525390625, "learning_rate": 2.2473328748774737e-06, "log_odds_chosen": 1.1393579244613647, "log_odds_ratio": -0.5236693620681763, "logits/chosen": 410.23822021484375, "logits/rejected": 316.77801513671875, "logps/chosen": -0.9757580757141113, "logps/rejected": -1.8936541080474854, "loss": 1.3702, "nll_loss": 1.2193056344985962, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.048787906765937805, "rewards/margins": 0.0458948090672493, "rewards/rejected": -0.0946827083826065, "step": 495 }, { "epoch": 0.3971405877680699, "grad_norm": 68.22760772705078, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 0.1606062352657318, "log_odds_ratio": -0.6387936472892761, "logits/chosen": 367.50335693359375, "logits/rejected": 333.0289001464844, "logps/chosen": -1.0744435787200928, "logps/rejected": -1.2043496370315552, "loss": 1.6871, "nll_loss": 2.0462329387664795, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05372218042612076, "rewards/margins": 0.006495301611721516, "rewards/rejected": -0.0602174811065197, "step": 500 }, { "epoch": 0.4011119936457506, "grad_norm": 40.497886657714844, "learning_rate": 2.224970797449924e-06, "log_odds_chosen": -0.4286605417728424, "log_odds_ratio": -0.9496608972549438, "logits/chosen": 351.97662353515625, "logits/rejected": 314.7813415527344, "logps/chosen": -1.0459034442901611, "logps/rejected": -0.7734488248825073, "loss": 1.3743, "nll_loss": 1.2439095973968506, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.052295178174972534, "rewards/margins": -0.01362273283302784, "rewards/rejected": -0.038672447204589844, "step": 505 }, { "epoch": 0.4050833995234313, "grad_norm": 30.911205291748047, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": -0.44240862131118774, "log_odds_ratio": -1.003506064414978, "logits/chosen": 344.8081359863281, "logits/rejected": 322.8605651855469, "logps/chosen": -0.9024990797042847, "logps/rejected": -0.632462739944458, "loss": 1.67, "nll_loss": 1.6219089031219482, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.04512495547533035, "rewards/margins": -0.013501817360520363, "rewards/rejected": -0.03162313997745514, "step": 510 }, { "epoch": 0.409054805401112, "grad_norm": 68.99786376953125, "learning_rate": 2.203263246196159e-06, "log_odds_chosen": 0.5419043898582458, "log_odds_ratio": -0.5282405614852905, "logits/chosen": 304.9704895019531, "logits/rejected": 331.59344482421875, "logps/chosen": -1.0522342920303345, "logps/rejected": -1.4638428688049316, "loss": 1.5493, "nll_loss": 1.3706550598144531, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05261171981692314, "rewards/margins": 0.02058042585849762, "rewards/rejected": -0.07319213449954987, "step": 515 }, { "epoch": 0.4130262112787927, "grad_norm": 88.52311706542969, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 0.0930122509598732, "log_odds_ratio": -0.8259990811347961, "logits/chosen": 312.1341247558594, "logits/rejected": 277.54803466796875, "logps/chosen": -1.3634002208709717, "logps/rejected": -1.5216593742370605, "loss": 1.6321, "nll_loss": 1.6297897100448608, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06817001104354858, "rewards/margins": 0.007912958040833473, "rewards/rejected": -0.07608296722173691, "step": 520 }, { "epoch": 0.4169976171564734, "grad_norm": 45.25846862792969, "learning_rate": 2.182178902359924e-06, "log_odds_chosen": 0.5764316320419312, "log_odds_ratio": -0.494175523519516, "logits/chosen": 357.0990295410156, "logits/rejected": 322.9207458496094, "logps/chosen": -1.0251896381378174, "logps/rejected": -1.414226770401001, "loss": 1.5803, "nll_loss": 1.5963222980499268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05125947669148445, "rewards/margins": 0.019451860338449478, "rewards/rejected": -0.07071133702993393, "step": 525 }, { "epoch": 0.42096902303415407, "grad_norm": 36.488765716552734, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 0.36014705896377563, "log_odds_ratio": -0.6301948428153992, "logits/chosen": 304.56103515625, "logits/rejected": 273.9854736328125, "logps/chosen": -0.930140495300293, "logps/rejected": -1.235038161277771, "loss": 1.3532, "nll_loss": 1.213836669921875, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04650702700018883, "rewards/margins": 0.01524488627910614, "rewards/rejected": -0.06175190955400467, "step": 530 }, { "epoch": 0.4249404289118348, "grad_norm": 84.07301330566406, "learning_rate": 2.161688505835585e-06, "log_odds_chosen": 0.10728853940963745, "log_odds_ratio": -0.7639841437339783, "logits/chosen": 312.0140686035156, "logits/rejected": 303.831787109375, "logps/chosen": -0.9278820157051086, "logps/rejected": -0.8287888765335083, "loss": 1.5005, "nll_loss": 1.5337541103363037, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04639409855008125, "rewards/margins": -0.0049546584486961365, "rewards/rejected": -0.041439443826675415, "step": 535 }, { "epoch": 0.4289118347895155, "grad_norm": 39.88277816772461, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": -0.4115291237831116, "log_odds_ratio": -0.9856241345405579, "logits/chosen": 274.3957214355469, "logits/rejected": 427.97650146484375, "logps/chosen": -1.1595760583877563, "logps/rejected": -0.8591532707214355, "loss": 1.4338, "nll_loss": 1.3843562602996826, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0579788014292717, "rewards/margins": -0.015021136030554771, "rewards/rejected": -0.04295766353607178, "step": 540 }, { "epoch": 0.4328832406671962, "grad_norm": 71.12447357177734, "learning_rate": 2.1417646843905967e-06, "log_odds_chosen": 0.19147726893424988, "log_odds_ratio": -0.6580365896224976, "logits/chosen": 331.36236572265625, "logits/rejected": 365.7850036621094, "logps/chosen": -1.0934817790985107, "logps/rejected": -1.20845627784729, "loss": 1.497, "nll_loss": 1.2108908891677856, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05467408895492554, "rewards/margins": 0.005748722702264786, "rewards/rejected": -0.06042281538248062, "step": 545 }, { "epoch": 0.4368546465448769, "grad_norm": 66.69554901123047, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": 0.5324321389198303, "log_odds_ratio": -0.5010102391242981, "logits/chosen": 332.88983154296875, "logits/rejected": 302.9046630859375, "logps/chosen": -0.9199882745742798, "logps/rejected": -1.2687950134277344, "loss": 1.5434, "nll_loss": 1.1497266292572021, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04599941521883011, "rewards/margins": 0.01744033768773079, "rewards/rejected": -0.0634397491812706, "step": 550 }, { "epoch": 0.44082605242255757, "grad_norm": 84.53522491455078, "learning_rate": 2.122381799890045e-06, "log_odds_chosen": 0.588485836982727, "log_odds_ratio": -0.5170946717262268, "logits/chosen": 350.81085205078125, "logits/rejected": 277.13232421875, "logps/chosen": -1.1174993515014648, "logps/rejected": -1.5437860488891602, "loss": 1.7752, "nll_loss": 1.7890150547027588, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05587497353553772, "rewards/margins": 0.021314334124326706, "rewards/rejected": -0.07718930393457413, "step": 555 }, { "epoch": 0.44479745830023826, "grad_norm": 51.40147018432617, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": -1.033674955368042, "log_odds_ratio": -1.4361447095870972, "logits/chosen": 247.4395751953125, "logits/rejected": 423.43707275390625, "logps/chosen": -1.5958759784698486, "logps/rejected": -0.9105483293533325, "loss": 1.7178, "nll_loss": 1.9259655475616455, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07979379594326019, "rewards/margins": -0.034266382455825806, "rewards/rejected": -0.045527417212724686, "step": 560 }, { "epoch": 0.448768864177919, "grad_norm": 72.64619445800781, "learning_rate": 2.1035158095583564e-06, "log_odds_chosen": 0.18388502299785614, "log_odds_ratio": -0.6336166262626648, "logits/chosen": 335.2535705566406, "logits/rejected": 319.001953125, "logps/chosen": -0.9879266619682312, "logps/rejected": -1.0735465288162231, "loss": 1.4765, "nll_loss": 1.7013626098632812, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0493963286280632, "rewards/margins": 0.00428099324926734, "rewards/rejected": -0.05367732793092728, "step": 565 }, { "epoch": 0.4527402700555997, "grad_norm": 74.86976623535156, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": 0.4404204487800598, "log_odds_ratio": -0.6092650890350342, "logits/chosen": 307.1649169921875, "logits/rejected": 326.94189453125, "logps/chosen": -0.9814633131027222, "logps/rejected": -1.2262656688690186, "loss": 1.5794, "nll_loss": 1.2251192331314087, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.049073170870542526, "rewards/margins": 0.012240114621818066, "rewards/rejected": -0.06131328269839287, "step": 570 }, { "epoch": 0.4567116759332804, "grad_norm": 53.87495422363281, "learning_rate": 2.085144140570748e-06, "log_odds_chosen": 0.6467480063438416, "log_odds_ratio": -0.46934765577316284, "logits/chosen": 328.2501525878906, "logits/rejected": 424.5448303222656, "logps/chosen": -0.916412353515625, "logps/rejected": -1.2970696687698364, "loss": 1.3723, "nll_loss": 1.3209168910980225, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04582061618566513, "rewards/margins": 0.019032862037420273, "rewards/rejected": -0.0648534744977951, "step": 575 }, { "epoch": 0.46068308181096107, "grad_norm": 74.30099487304688, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": -0.007485628128051758, "log_odds_ratio": -0.7629297971725464, "logits/chosen": 338.58172607421875, "logits/rejected": 277.81951904296875, "logps/chosen": -1.2728912830352783, "logps/rejected": -1.2861583232879639, "loss": 1.5917, "nll_loss": 1.7535676956176758, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06364456564188004, "rewards/margins": 0.000663341605104506, "rewards/rejected": -0.06430791318416595, "step": 580 }, { "epoch": 0.46465448768864176, "grad_norm": 40.75542449951172, "learning_rate": 2.067245576486808e-06, "log_odds_chosen": -0.4641965925693512, "log_odds_ratio": -0.9669200778007507, "logits/chosen": 329.0755310058594, "logits/rejected": 246.0184326171875, "logps/chosen": -1.3119654655456543, "logps/rejected": -0.9784905314445496, "loss": 1.663, "nll_loss": 2.1507182121276855, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.0655982717871666, "rewards/margins": -0.016673749312758446, "rewards/rejected": -0.0489245280623436, "step": 585 }, { "epoch": 0.4686258935663225, "grad_norm": 42.86177444458008, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": -0.478046715259552, "log_odds_ratio": -0.9956863522529602, "logits/chosen": 311.27435302734375, "logits/rejected": 312.9971008300781, "logps/chosen": -1.4736350774765015, "logps/rejected": -1.113892912864685, "loss": 1.6741, "nll_loss": 1.6202127933502197, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07368175685405731, "rewards/margins": -0.01798710599541664, "rewards/rejected": -0.05569465085864067, "step": 590 }, { "epoch": 0.4725972994440032, "grad_norm": 42.01351547241211, "learning_rate": 2.0498001542269694e-06, "log_odds_chosen": 0.5545082688331604, "log_odds_ratio": -0.4980427324771881, "logits/chosen": 314.21270751953125, "logits/rejected": 294.3743591308594, "logps/chosen": -1.0309841632843018, "logps/rejected": -1.4416837692260742, "loss": 1.5197, "nll_loss": 1.3417736291885376, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05154920741915703, "rewards/margins": 0.020534982904791832, "rewards/rejected": -0.07208418846130371, "step": 595 }, { "epoch": 0.4765687053216839, "grad_norm": 43.628326416015625, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": -0.10555162280797958, "log_odds_ratio": -0.8111637234687805, "logits/chosen": 362.4828186035156, "logits/rejected": 278.252685546875, "logps/chosen": -1.1187463998794556, "logps/rejected": -0.9830799102783203, "loss": 1.723, "nll_loss": 1.9726502895355225, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05593731999397278, "rewards/margins": -0.006783320102840662, "rewards/rejected": -0.04915400221943855, "step": 600 }, { "epoch": 0.4805401111993646, "grad_norm": 63.654212951660156, "learning_rate": 2.0327890704543546e-06, "log_odds_chosen": 0.5946463942527771, "log_odds_ratio": -0.450082391500473, "logits/chosen": 309.9012756347656, "logits/rejected": 330.7868957519531, "logps/chosen": -1.0596506595611572, "logps/rejected": -1.5116208791732788, "loss": 1.4556, "nll_loss": 1.3402467966079712, "rewards/accuracies": 1.0, "rewards/chosen": -0.052982527762651443, "rewards/margins": 0.022598514333367348, "rewards/rejected": -0.07558103650808334, "step": 605 }, { "epoch": 0.48451151707704526, "grad_norm": 43.90888214111328, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 0.7587811946868896, "log_odds_ratio": -0.43494945764541626, "logits/chosen": 397.4610290527344, "logits/rejected": 294.28460693359375, "logps/chosen": -0.9713308215141296, "logps/rejected": -1.5056118965148926, "loss": 1.6174, "nll_loss": 1.8379215002059937, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0485665425658226, "rewards/margins": 0.026714056730270386, "rewards/rejected": -0.07528059184551239, "step": 610 }, { "epoch": 0.48848292295472595, "grad_norm": 65.09957122802734, "learning_rate": 2.0161945963637796e-06, "log_odds_chosen": 0.7116761803627014, "log_odds_ratio": -0.41528528928756714, "logits/chosen": 337.7419738769531, "logits/rejected": 375.7625427246094, "logps/chosen": -0.8582298159599304, "logps/rejected": -1.3130686283111572, "loss": 1.5795, "nll_loss": 1.6238418817520142, "rewards/accuracies": 1.0, "rewards/chosen": -0.04291149228811264, "rewards/margins": 0.022741934284567833, "rewards/rejected": -0.06565342843532562, "step": 615 }, { "epoch": 0.4924543288324067, "grad_norm": 57.64512252807617, "learning_rate": 2.0080483222562476e-06, "log_odds_chosen": 0.7363397479057312, "log_odds_ratio": -0.4678316116333008, "logits/chosen": 260.24725341796875, "logits/rejected": 379.7354431152344, "logps/chosen": -1.373089075088501, "logps/rejected": -1.958742380142212, "loss": 1.4653, "nll_loss": 1.5931271314620972, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06865445524454117, "rewards/margins": 0.029282670468091965, "rewards/rejected": -0.09793712943792343, "step": 620 }, { "epoch": 0.4964257347100874, "grad_norm": 43.39156723022461, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.21482162177562714, "log_odds_ratio": -0.7532765865325928, "logits/chosen": 456.2254333496094, "logits/rejected": 259.1532897949219, "logps/chosen": -0.9853864908218384, "logps/rejected": -1.0251073837280273, "loss": 1.5771, "nll_loss": 1.4871981143951416, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04926932230591774, "rewards/margins": 0.0019860477186739445, "rewards/rejected": -0.05125536769628525, "step": 625 }, { "epoch": 0.5003971405877681, "grad_norm": 27.689453125, "learning_rate": 1.9920476822239895e-06, "log_odds_chosen": 0.5083667039871216, "log_odds_ratio": -0.5167660713195801, "logits/chosen": 340.188232421875, "logits/rejected": 316.4561462402344, "logps/chosen": -1.0205366611480713, "logps/rejected": -1.3779761791229248, "loss": 1.3601, "nll_loss": 1.174508810043335, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.051026832312345505, "rewards/margins": 0.017871975898742676, "rewards/rejected": -0.06889880448579788, "step": 630 }, { "epoch": 0.5043685464654488, "grad_norm": 64.66783905029297, "learning_rate": 1.9841894753313627e-06, "log_odds_chosen": 0.32054659724235535, "log_odds_ratio": -0.5909343361854553, "logits/chosen": 350.8283386230469, "logits/rejected": 264.38421630859375, "logps/chosen": -1.0457651615142822, "logps/rejected": -1.2276270389556885, "loss": 1.5451, "nll_loss": 1.4288588762283325, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.052288252860307693, "rewards/margins": 0.009093107655644417, "rewards/rejected": -0.06138136237859726, "step": 635 }, { "epoch": 0.5083399523431295, "grad_norm": 55.53464889526367, "learning_rate": 1.976423537605237e-06, "log_odds_chosen": 0.05188782140612602, "log_odds_ratio": -0.7150664925575256, "logits/chosen": 299.6695861816406, "logits/rejected": 320.55572509765625, "logps/chosen": -0.9817999005317688, "logps/rejected": -1.0104490518569946, "loss": 1.5411, "nll_loss": 1.3620450496673584, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04908999055624008, "rewards/margins": 0.0014324591029435396, "rewards/rejected": -0.05052245408296585, "step": 640 }, { "epoch": 0.5123113582208102, "grad_norm": 33.685333251953125, "learning_rate": 1.9687480773953947e-06, "log_odds_chosen": 0.3233865797519684, "log_odds_ratio": -0.548931360244751, "logits/chosen": 274.61444091796875, "logits/rejected": 411.7635803222656, "logps/chosen": -0.7365684509277344, "logps/rejected": -0.9102706909179688, "loss": 1.6014, "nll_loss": 1.4943163394927979, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0368284247815609, "rewards/margins": 0.008685111068189144, "rewards/rejected": -0.04551353678107262, "step": 645 }, { "epoch": 0.5162827640984908, "grad_norm": 44.73686599731445, "learning_rate": 1.961161351381841e-06, "log_odds_chosen": 0.4027363359928131, "log_odds_ratio": -0.6090582013130188, "logits/chosen": 262.2003173828125, "logits/rejected": 391.3258972167969, "logps/chosen": -0.9413054585456848, "logps/rejected": -1.1956028938293457, "loss": 1.3514, "nll_loss": 1.3425610065460205, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04706526920199394, "rewards/margins": 0.012714875862002373, "rewards/rejected": -0.059780143201351166, "step": 650 }, { "epoch": 0.5202541699761716, "grad_norm": 64.39408874511719, "learning_rate": 1.953661662911409e-06, "log_odds_chosen": 0.3183760643005371, "log_odds_ratio": -0.700135350227356, "logits/chosen": 379.3218078613281, "logits/rejected": 293.6615905761719, "logps/chosen": -1.1582627296447754, "logps/rejected": -1.3677259683609009, "loss": 1.4144, "nll_loss": 1.6593866348266602, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05791313573718071, "rewards/margins": 0.010473157279193401, "rewards/rejected": -0.06838629394769669, "step": 655 }, { "epoch": 0.5242255758538522, "grad_norm": 66.78807830810547, "learning_rate": 1.9462473604038077e-06, "log_odds_chosen": 0.5076483488082886, "log_odds_ratio": -0.6378599405288696, "logits/chosen": 338.880859375, "logits/rejected": 322.9387512207031, "logps/chosen": -1.0433294773101807, "logps/rejected": -1.3180233240127563, "loss": 1.5652, "nll_loss": 1.3921977281570435, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052166469395160675, "rewards/margins": 0.013734695501625538, "rewards/rejected": -0.06590116769075394, "step": 660 }, { "epoch": 0.528196981731533, "grad_norm": 36.77963638305664, "learning_rate": 1.938916835823703e-06, "log_odds_chosen": 0.31942346692085266, "log_odds_ratio": -0.550063967704773, "logits/chosen": 291.7728576660156, "logits/rejected": 386.6041564941406, "logps/chosen": -1.384905219078064, "logps/rejected": -1.6302177906036377, "loss": 1.5694, "nll_loss": 1.6492725610733032, "rewards/accuracies": 1.0, "rewards/chosen": -0.06924526393413544, "rewards/margins": 0.012265628203749657, "rewards/rejected": -0.08151089400053024, "step": 665 }, { "epoch": 0.5321683876092137, "grad_norm": 39.7949333190918, "learning_rate": 1.9316685232156397e-06, "log_odds_chosen": 0.03977243974804878, "log_odds_ratio": -0.7474034428596497, "logits/chosen": 319.749267578125, "logits/rejected": 268.3997497558594, "logps/chosen": -1.4536529779434204, "logps/rejected": -1.5424761772155762, "loss": 1.7192, "nll_loss": 1.524833083152771, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07268264889717102, "rewards/margins": 0.004441158380359411, "rewards/rejected": -0.07712380588054657, "step": 670 }, { "epoch": 0.5361397934868943, "grad_norm": 47.65932846069336, "learning_rate": 1.924500897298753e-06, "log_odds_chosen": 0.24133019149303436, "log_odds_ratio": -0.5948769450187683, "logits/chosen": 342.0313415527344, "logits/rejected": 344.55010986328125, "logps/chosen": -1.2440179586410522, "logps/rejected": -1.4411834478378296, "loss": 1.6389, "nll_loss": 1.5813580751419067, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06220090389251709, "rewards/margins": 0.009858268313109875, "rewards/rejected": -0.07205917686223984, "step": 675 }, { "epoch": 0.5401111993645751, "grad_norm": 58.68126678466797, "learning_rate": 1.917412472118426e-06, "log_odds_chosen": 0.30001306533813477, "log_odds_ratio": -0.6535278558731079, "logits/chosen": 293.0940856933594, "logits/rejected": 417.0559997558594, "logps/chosen": -0.8398979902267456, "logps/rejected": -1.1216331720352173, "loss": 1.4459, "nll_loss": 1.1451408863067627, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04199490323662758, "rewards/margins": 0.014086750335991383, "rewards/rejected": -0.05608165264129639, "step": 680 }, { "epoch": 0.5440826052422557, "grad_norm": 63.648494720458984, "learning_rate": 1.9104017997521752e-06, "log_odds_chosen": 0.20501708984375, "log_odds_ratio": -0.6081960201263428, "logits/chosen": 390.64794921875, "logits/rejected": 267.46661376953125, "logps/chosen": -0.8991649746894836, "logps/rejected": -1.0455691814422607, "loss": 1.4251, "nll_loss": 1.3118441104888916, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.044958241283893585, "rewards/margins": 0.0073202140629291534, "rewards/rejected": -0.05227845907211304, "step": 685 }, { "epoch": 0.5480540111199365, "grad_norm": 51.855377197265625, "learning_rate": 1.9034674690672024e-06, "log_odds_chosen": -0.4093741476535797, "log_odds_ratio": -0.9202106595039368, "logits/chosen": 294.8217468261719, "logits/rejected": 336.0784912109375, "logps/chosen": -1.4474905729293823, "logps/rejected": -1.1725715398788452, "loss": 1.4859, "nll_loss": 1.618402123451233, "rewards/accuracies": 0.0, "rewards/chosen": -0.07237453758716583, "rewards/margins": -0.01374595146626234, "rewards/rejected": -0.05862858146429062, "step": 690 }, { "epoch": 0.5520254169976172, "grad_norm": 73.54180908203125, "learning_rate": 1.8966081045272043e-06, "log_odds_chosen": -0.12494969367980957, "log_odds_ratio": -0.8439692258834839, "logits/chosen": 293.6024475097656, "logits/rejected": 299.53753662109375, "logps/chosen": -1.091850996017456, "logps/rejected": -1.0055066347122192, "loss": 1.2878, "nll_loss": 1.2297693490982056, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0545925572514534, "rewards/margins": -0.004317224025726318, "rewards/rejected": -0.050275325775146484, "step": 695 }, { "epoch": 0.5559968228752978, "grad_norm": 39.061195373535156, "learning_rate": 1.8898223650461362e-06, "log_odds_chosen": 0.16092145442962646, "log_odds_ratio": -0.6716988682746887, "logits/chosen": 274.28326416015625, "logits/rejected": 411.262451171875, "logps/chosen": -1.2318370342254639, "logps/rejected": -1.321478009223938, "loss": 1.4147, "nll_loss": 1.2553250789642334, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.061591845005750656, "rewards/margins": 0.004482048097997904, "rewards/rejected": -0.06607390195131302, "step": 700 }, { "epoch": 0.5599682287529786, "grad_norm": 59.83469009399414, "learning_rate": 1.8831089428867739e-06, "log_odds_chosen": 0.13619789481163025, "log_odds_ratio": -0.6964500546455383, "logits/chosen": 330.43310546875, "logits/rejected": 458.30767822265625, "logps/chosen": -1.0438668727874756, "logps/rejected": -1.1042603254318237, "loss": 1.3856, "nll_loss": 1.1628286838531494, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05219334363937378, "rewards/margins": 0.003019676310941577, "rewards/rejected": -0.055213022977113724, "step": 705 }, { "epoch": 0.5639396346306592, "grad_norm": 33.04471206665039, "learning_rate": 1.876466562602004e-06, "log_odds_chosen": -0.26594752073287964, "log_odds_ratio": -0.9066311120986938, "logits/chosen": 269.53826904296875, "logits/rejected": 281.7684631347656, "logps/chosen": -1.1670176982879639, "logps/rejected": -0.9401714205741882, "loss": 1.4375, "nll_loss": 1.4048278331756592, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05835089087486267, "rewards/margins": -0.011342315934598446, "rewards/rejected": -0.04700857400894165, "step": 710 }, { "epoch": 0.56791104050834, "grad_norm": 78.54840087890625, "learning_rate": 1.8698939800169145e-06, "log_odds_chosen": 0.5749568939208984, "log_odds_ratio": -0.48962122201919556, "logits/chosen": 352.38568115234375, "logits/rejected": 383.9367980957031, "logps/chosen": -0.8274946212768555, "logps/rejected": -1.1295303106307983, "loss": 1.3946, "nll_loss": 1.2294576168060303, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04137473553419113, "rewards/margins": 0.015101781114935875, "rewards/rejected": -0.05647651478648186, "step": 715 }, { "epoch": 0.5718824463860207, "grad_norm": 32.714805603027344, "learning_rate": 1.863389981249825e-06, "log_odds_chosen": 0.10385574400424957, "log_odds_ratio": -0.6922389268875122, "logits/chosen": 301.9817810058594, "logits/rejected": 279.5206604003906, "logps/chosen": -1.1496083736419678, "logps/rejected": -1.2928965091705322, "loss": 1.3971, "nll_loss": 1.4229730367660522, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05748041719198227, "rewards/margins": 0.00716440100222826, "rewards/rejected": -0.06464481353759766, "step": 720 }, { "epoch": 0.5758538522637013, "grad_norm": 41.090572357177734, "learning_rate": 1.8569533817705187e-06, "log_odds_chosen": 0.5001566410064697, "log_odds_ratio": -0.6019404530525208, "logits/chosen": 322.370361328125, "logits/rejected": 263.5240173339844, "logps/chosen": -1.0987929105758667, "logps/rejected": -1.4239269495010376, "loss": 1.4352, "nll_loss": 1.3209176063537598, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05493964999914169, "rewards/margins": 0.016256701201200485, "rewards/rejected": -0.07119635492563248, "step": 725 }, { "epoch": 0.5798252581413821, "grad_norm": 43.46686935424805, "learning_rate": 1.8505830254940132e-06, "log_odds_chosen": -0.10105061531066895, "log_odds_ratio": -1.1564183235168457, "logits/chosen": 275.8767395019531, "logits/rejected": 457.92132568359375, "logps/chosen": -1.3419158458709717, "logps/rejected": -1.6229051351547241, "loss": 1.3517, "nll_loss": 1.4537432193756104, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0670957863330841, "rewards/margins": 0.0140494704246521, "rewards/rejected": -0.0811452642083168, "step": 730 }, { "epoch": 0.5837966640190627, "grad_norm": 47.49483871459961, "learning_rate": 1.8442777839082938e-06, "log_odds_chosen": 0.5534607768058777, "log_odds_ratio": -0.5553954243659973, "logits/chosen": 300.9742126464844, "logits/rejected": 328.1264953613281, "logps/chosen": -1.0749738216400146, "logps/rejected": -1.3655563592910767, "loss": 1.4955, "nll_loss": 1.5789165496826172, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05374868959188461, "rewards/margins": 0.01452912949025631, "rewards/rejected": -0.06827782094478607, "step": 735 }, { "epoch": 0.5877680698967435, "grad_norm": 47.386417388916016, "learning_rate": 1.8380365552345197e-06, "log_odds_chosen": -0.17396871745586395, "log_odds_ratio": -0.9099219441413879, "logits/chosen": 353.1138610839844, "logits/rejected": 372.53668212890625, "logps/chosen": -1.097666621208191, "logps/rejected": -0.9902673959732056, "loss": 1.4205, "nll_loss": 1.2607418298721313, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05488333851099014, "rewards/margins": -0.005369964987039566, "rewards/rejected": -0.04951336979866028, "step": 740 }, { "epoch": 0.5917394757744241, "grad_norm": 61.877071380615234, "learning_rate": 1.8318582636182793e-06, "log_odds_chosen": -0.670925498008728, "log_odds_ratio": -1.3397860527038574, "logits/chosen": 281.83233642578125, "logits/rejected": 313.679443359375, "logps/chosen": -1.7743791341781616, "logps/rejected": -1.2447985410690308, "loss": 1.7376, "nll_loss": 1.7562745809555054, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0887189581990242, "rewards/margins": -0.026479026302695274, "rewards/rejected": -0.062239933758974075, "step": 745 }, { "epoch": 0.5957108816521048, "grad_norm": 34.587196350097656, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": -0.6947991251945496, "log_odds_ratio": -1.1835838556289673, "logits/chosen": 328.31439208984375, "logits/rejected": 305.95892333984375, "logps/chosen": -1.4247385263442993, "logps/rejected": -1.009553074836731, "loss": 1.3579, "nll_loss": 1.3562877178192139, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.07123693078756332, "rewards/margins": -0.020759278908371925, "rewards/rejected": -0.05047765374183655, "step": 750 }, { "epoch": 0.5996822875297856, "grad_norm": 48.92048645019531, "learning_rate": 1.8196863131170976e-06, "log_odds_chosen": -0.017104322090744972, "log_odds_ratio": -0.7284756898880005, "logits/chosen": 381.6023254394531, "logits/rejected": 315.03009033203125, "logps/chosen": -1.0912220478057861, "logps/rejected": -1.0588710308074951, "loss": 1.5279, "nll_loss": 1.2532793283462524, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05456110090017319, "rewards/margins": -0.0016175527125597, "rewards/rejected": -0.052943550050258636, "step": 755 }, { "epoch": 0.6036536934074662, "grad_norm": 60.20802688598633, "learning_rate": 1.8136906252750293e-06, "log_odds_chosen": -0.11121414601802826, "log_odds_ratio": -0.7645635604858398, "logits/chosen": 305.519287109375, "logits/rejected": 327.36456298828125, "logps/chosen": -1.2993478775024414, "logps/rejected": -1.2332274913787842, "loss": 1.5179, "nll_loss": 1.4147006273269653, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06496739387512207, "rewards/margins": -0.003306013997644186, "rewards/rejected": -0.061661381274461746, "step": 760 }, { "epoch": 0.607625099285147, "grad_norm": 56.269866943359375, "learning_rate": 1.807753815155468e-06, "log_odds_chosen": -0.48918837308883667, "log_odds_ratio": -1.195225477218628, "logits/chosen": 376.7154846191406, "logits/rejected": 306.25811767578125, "logps/chosen": -1.2335858345031738, "logps/rejected": -0.8009617924690247, "loss": 1.5646, "nll_loss": 1.3844496011734009, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06167929247021675, "rewards/margins": -0.02163120172917843, "rewards/rejected": -0.04004809260368347, "step": 765 }, { "epoch": 0.6115965051628276, "grad_norm": 44.323631286621094, "learning_rate": 1.801874925391118e-06, "log_odds_chosen": 0.21221765875816345, "log_odds_ratio": -0.6802242398262024, "logits/chosen": 334.3392639160156, "logits/rejected": 365.3385009765625, "logps/chosen": -0.9386296272277832, "logps/rejected": -1.0058612823486328, "loss": 1.7071, "nll_loss": 1.7459404468536377, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04693147912621498, "rewards/margins": 0.0033615841530263424, "rewards/rejected": -0.05029306560754776, "step": 770 }, { "epoch": 0.6155679110405083, "grad_norm": 59.921302795410156, "learning_rate": 1.7960530202677493e-06, "log_odds_chosen": 0.5171085000038147, "log_odds_ratio": -0.5760589241981506, "logits/chosen": 329.0060119628906, "logits/rejected": 318.23797607421875, "logps/chosen": -0.930219292640686, "logps/rejected": -1.2369699478149414, "loss": 1.5969, "nll_loss": 1.449138879776001, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0465109646320343, "rewards/margins": 0.015337531454861164, "rewards/rejected": -0.06184849888086319, "step": 775 }, { "epoch": 0.6195393169181891, "grad_norm": 59.61544418334961, "learning_rate": 1.7902871850985824e-06, "log_odds_chosen": 0.9015772938728333, "log_odds_ratio": -0.7340617179870605, "logits/chosen": 311.7084045410156, "logits/rejected": 305.33599853515625, "logps/chosen": -0.9682513475418091, "logps/rejected": -1.7009541988372803, "loss": 1.695, "nll_loss": 1.4785038232803345, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.048412568867206573, "rewards/margins": 0.036635152995586395, "rewards/rejected": -0.08504771441221237, "step": 780 }, { "epoch": 0.6235107227958697, "grad_norm": 44.869529724121094, "learning_rate": 1.7845765256206243e-06, "log_odds_chosen": 1.0826823711395264, "log_odds_ratio": -0.4545539319515228, "logits/chosen": 345.8677062988281, "logits/rejected": 296.6705322265625, "logps/chosen": -1.112697958946228, "logps/rejected": -1.967371940612793, "loss": 1.4201, "nll_loss": 1.7027595043182373, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05563489720225334, "rewards/margins": 0.04273369535803795, "rewards/rejected": -0.09836859256029129, "step": 785 }, { "epoch": 0.6274821286735505, "grad_norm": 54.221893310546875, "learning_rate": 1.7789201674120502e-06, "log_odds_chosen": -0.34022170305252075, "log_odds_ratio": -1.0099502801895142, "logits/chosen": 290.6964111328125, "logits/rejected": 367.1211853027344, "logps/chosen": -1.201079249382019, "logps/rejected": -0.9725133776664734, "loss": 1.5892, "nll_loss": 1.398667335510254, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06005396693944931, "rewards/margins": -0.011428297497332096, "rewards/rejected": -0.04862567037343979, "step": 790 }, { "epoch": 0.6314535345512311, "grad_norm": 33.76439666748047, "learning_rate": 1.7733172553297718e-06, "log_odds_chosen": 0.3912748694419861, "log_odds_ratio": -0.555752158164978, "logits/chosen": 332.48321533203125, "logits/rejected": 379.13909912109375, "logps/chosen": -1.0452964305877686, "logps/rejected": -1.3820005655288696, "loss": 1.3308, "nll_loss": 1.2774577140808105, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.052264828234910965, "rewards/margins": 0.016835201531648636, "rewards/rejected": -0.0691000297665596, "step": 795 }, { "epoch": 0.6354249404289118, "grad_norm": 112.01268768310547, "learning_rate": 1.7677669529663689e-06, "log_odds_chosen": 0.15832357108592987, "log_odds_ratio": -0.6339753866195679, "logits/chosen": 303.36553955078125, "logits/rejected": 390.05645751953125, "logps/chosen": -0.9267553091049194, "logps/rejected": -1.0056819915771484, "loss": 1.5535, "nll_loss": 1.344155192375183, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04633776843547821, "rewards/margins": 0.003946331329643726, "rewards/rejected": -0.05028409883379936, "step": 800 }, { "epoch": 0.6393963463065926, "grad_norm": 42.41415023803711, "learning_rate": 1.7622684421256037e-06, "log_odds_chosen": 0.885511040687561, "log_odds_ratio": -0.3806975185871124, "logits/chosen": 300.3759460449219, "logits/rejected": 346.12249755859375, "logps/chosen": -0.8861316442489624, "logps/rejected": -1.4850671291351318, "loss": 1.4821, "nll_loss": 1.4846971035003662, "rewards/accuracies": 1.0, "rewards/chosen": -0.04430658370256424, "rewards/margins": 0.029946770519018173, "rewards/rejected": -0.07425335794687271, "step": 805 }, { "epoch": 0.6433677521842732, "grad_norm": 46.314781188964844, "learning_rate": 1.7568209223157664e-06, "log_odds_chosen": 0.2553045153617859, "log_odds_ratio": -0.7045444846153259, "logits/chosen": 285.037841796875, "logits/rejected": 303.57769775390625, "logps/chosen": -1.5863940715789795, "logps/rejected": -1.8081388473510742, "loss": 1.6063, "nll_loss": 1.7009849548339844, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07931970804929733, "rewards/margins": 0.011087236925959587, "rewards/rejected": -0.09040693938732147, "step": 810 }, { "epoch": 0.647339158061954, "grad_norm": 38.062042236328125, "learning_rate": 1.751423610260147e-06, "log_odds_chosen": 0.33337265253067017, "log_odds_ratio": -0.6462761759757996, "logits/chosen": 331.96063232421875, "logits/rejected": 264.3335876464844, "logps/chosen": -1.0078685283660889, "logps/rejected": -1.1855480670928955, "loss": 1.329, "nll_loss": 1.3942029476165771, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.050393424928188324, "rewards/margins": 0.00888398103415966, "rewards/rejected": -0.059277404099702835, "step": 815 }, { "epoch": 0.6513105639396346, "grad_norm": 48.024879455566406, "learning_rate": 1.7460757394239458e-06, "log_odds_chosen": -0.12280458211898804, "log_odds_ratio": -0.7886728048324585, "logits/chosen": 360.01708984375, "logits/rejected": 266.03411865234375, "logps/chosen": -1.1657707691192627, "logps/rejected": -1.0915082693099976, "loss": 1.4432, "nll_loss": 1.3378136157989502, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.058288536965847015, "rewards/margins": -0.003713126527145505, "rewards/rejected": -0.05457541346549988, "step": 820 }, { "epoch": 0.6552819698173153, "grad_norm": 41.48712158203125, "learning_rate": 1.7407765595569787e-06, "log_odds_chosen": -0.009509158320724964, "log_odds_ratio": -0.7556756734848022, "logits/chosen": 279.5709533691406, "logits/rejected": 309.6410217285156, "logps/chosen": -1.2121888399124146, "logps/rejected": -1.1930046081542969, "loss": 1.5544, "nll_loss": 1.386554479598999, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06060944125056267, "rewards/margins": -0.0009592041606083512, "rewards/rejected": -0.0596502348780632, "step": 825 }, { "epoch": 0.659253375694996, "grad_norm": 105.25407409667969, "learning_rate": 1.7355253362515584e-06, "log_odds_chosen": 0.09693387895822525, "log_odds_ratio": -0.6678298115730286, "logits/chosen": 341.30438232421875, "logits/rejected": 364.1517639160156, "logps/chosen": -1.0246386528015137, "logps/rejected": -1.1010067462921143, "loss": 1.5619, "nll_loss": 1.313808798789978, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.051231931895017624, "rewards/margins": 0.0038184034638106823, "rewards/rejected": -0.055050335824489594, "step": 830 }, { "epoch": 0.6632247815726767, "grad_norm": 30.412721633911133, "learning_rate": 1.7303213505149572e-06, "log_odds_chosen": 0.4479742646217346, "log_odds_ratio": -0.5424279570579529, "logits/chosen": 312.5921325683594, "logits/rejected": 465.254150390625, "logps/chosen": -0.9704440832138062, "logps/rejected": -1.2700642347335815, "loss": 1.4003, "nll_loss": 1.2447351217269897, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04852220416069031, "rewards/margins": 0.01498100720345974, "rewards/rejected": -0.0635032132267952, "step": 835 }, { "epoch": 0.6671961874503575, "grad_norm": 33.53908157348633, "learning_rate": 1.7251638983558855e-06, "log_odds_chosen": -0.34184280037879944, "log_odds_ratio": -0.9526281356811523, "logits/chosen": 418.00823974609375, "logits/rejected": 276.333984375, "logps/chosen": -1.0781444311141968, "logps/rejected": -0.9165736436843872, "loss": 1.3678, "nll_loss": 1.1540253162384033, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05390722304582596, "rewards/margins": -0.008078541606664658, "rewards/rejected": -0.0458286814391613, "step": 840 }, { "epoch": 0.6711675933280381, "grad_norm": 38.89650344848633, "learning_rate": 1.7200522903844539e-06, "log_odds_chosen": -0.019759630784392357, "log_odds_ratio": -0.7890421748161316, "logits/chosen": 352.12933349609375, "logits/rejected": 295.3460693359375, "logps/chosen": -1.2261745929718018, "logps/rejected": -1.1950092315673828, "loss": 1.2377, "nll_loss": 1.3689581155776978, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06130873039364815, "rewards/margins": -0.001558272517286241, "rewards/rejected": -0.05975046008825302, "step": 845 }, { "epoch": 0.6751389992057188, "grad_norm": 44.82482147216797, "learning_rate": 1.7149858514250883e-06, "log_odds_chosen": 0.27604418992996216, "log_odds_ratio": -0.5769887566566467, "logits/chosen": 410.49627685546875, "logits/rejected": 273.0401916503906, "logps/chosen": -0.771110475063324, "logps/rejected": -0.9429551362991333, "loss": 1.5443, "nll_loss": 0.988152801990509, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03855552524328232, "rewards/margins": 0.008592232130467892, "rewards/rejected": -0.047147758305072784, "step": 850 }, { "epoch": 0.6791104050833995, "grad_norm": 41.895572662353516, "learning_rate": 1.7099639201419239e-06, "log_odds_chosen": 1.8448680639266968, "log_odds_ratio": -0.22662608325481415, "logits/chosen": 311.1007385253906, "logits/rejected": 294.10302734375, "logps/chosen": -0.6820273995399475, "logps/rejected": -2.0481584072113037, "loss": 1.4831, "nll_loss": 1.288765788078308, "rewards/accuracies": 1.0, "rewards/chosen": -0.034101370722055435, "rewards/margins": 0.06830655783414841, "rewards/rejected": -0.10240793228149414, "step": 855 }, { "epoch": 0.6830818109610802, "grad_norm": 32.27305603027344, "learning_rate": 1.704985848676184e-06, "log_odds_chosen": 0.05296659469604492, "log_odds_ratio": -0.7537012100219727, "logits/chosen": 322.4505310058594, "logits/rejected": 371.6986389160156, "logps/chosen": -1.063537836074829, "logps/rejected": -1.099055290222168, "loss": 1.4083, "nll_loss": 1.3870474100112915, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05317689850926399, "rewards/margins": 0.0017758652102202177, "rewards/rejected": -0.05495276302099228, "step": 860 }, { "epoch": 0.687053216838761, "grad_norm": 27.749332427978516, "learning_rate": 1.700051002295115e-06, "log_odds_chosen": 1.5363937616348267, "log_odds_ratio": -0.324050635099411, "logits/chosen": 302.7940979003906, "logits/rejected": 389.976318359375, "logps/chosen": -0.8558230400085449, "logps/rejected": -1.8002240657806396, "loss": 1.4722, "nll_loss": 1.92082941532135, "rewards/accuracies": 1.0, "rewards/chosen": -0.042791154235601425, "rewards/margins": 0.04722005873918533, "rewards/rejected": -0.09001120924949646, "step": 865 }, { "epoch": 0.6910246227164416, "grad_norm": 43.04403305053711, "learning_rate": 1.6951587590520263e-06, "log_odds_chosen": -0.35114437341690063, "log_odds_ratio": -0.9282342791557312, "logits/chosen": 306.46990966796875, "logits/rejected": 401.5986328125, "logps/chosen": -1.02718186378479, "logps/rejected": -0.7973839640617371, "loss": 1.6272, "nll_loss": 1.2324786186218262, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05135909467935562, "rewards/margins": -0.01148989424109459, "rewards/rejected": -0.03986920043826103, "step": 870 }, { "epoch": 0.6949960285941224, "grad_norm": 259.5257568359375, "learning_rate": 1.6903085094570331e-06, "log_odds_chosen": 0.6448081731796265, "log_odds_ratio": -0.649856448173523, "logits/chosen": 305.8426818847656, "logits/rejected": 355.3719787597656, "logps/chosen": -1.023664116859436, "logps/rejected": -1.60223388671875, "loss": 1.4816, "nll_loss": 1.2721219062805176, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.051183201372623444, "rewards/margins": 0.028928488492965698, "rewards/rejected": -0.08011169731616974, "step": 875 }, { "epoch": 0.698967434471803, "grad_norm": 36.911766052246094, "learning_rate": 1.6854996561581053e-06, "log_odds_chosen": -0.09146185219287872, "log_odds_ratio": -0.8007118105888367, "logits/chosen": 322.17376708984375, "logits/rejected": 294.0484619140625, "logps/chosen": -1.3798397779464722, "logps/rejected": -1.3085267543792725, "loss": 1.4735, "nll_loss": 1.8494446277618408, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0689919963479042, "rewards/margins": -0.0035656639374792576, "rewards/rejected": -0.06542633473873138, "step": 880 }, { "epoch": 0.7029388403494837, "grad_norm": 40.6159782409668, "learning_rate": 1.680731613632036e-06, "log_odds_chosen": 0.105155348777771, "log_odds_ratio": -0.7062179446220398, "logits/chosen": 321.5434875488281, "logits/rejected": 283.28021240234375, "logps/chosen": -1.280366063117981, "logps/rejected": -1.3586736917495728, "loss": 1.4686, "nll_loss": 1.4260222911834717, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06401830166578293, "rewards/margins": 0.0039153858087956905, "rewards/rejected": -0.06793369352817535, "step": 885 }, { "epoch": 0.7069102462271644, "grad_norm": 43.12873077392578, "learning_rate": 1.6760038078849776e-06, "log_odds_chosen": -0.557292640209198, "log_odds_ratio": -1.0417410135269165, "logits/chosen": 296.5141296386719, "logits/rejected": 291.49896240234375, "logps/chosen": -1.1542619466781616, "logps/rejected": -0.8248146176338196, "loss": 1.6357, "nll_loss": 1.2914320230484009, "rewards/accuracies": 0.0, "rewards/chosen": -0.0577131025493145, "rewards/margins": -0.01647236943244934, "rewards/rejected": -0.04124072939157486, "step": 890 }, { "epoch": 0.7108816521048451, "grad_norm": 84.90913391113281, "learning_rate": 1.6713156761621891e-06, "log_odds_chosen": 0.16997307538986206, "log_odds_ratio": -0.6351084113121033, "logits/chosen": 327.1002502441406, "logits/rejected": 273.7991638183594, "logps/chosen": -1.1396805047988892, "logps/rejected": -1.2256426811218262, "loss": 1.5056, "nll_loss": 1.5154684782028198, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.056984029710292816, "rewards/margins": 0.004298110492527485, "rewards/rejected": -0.06128213554620743, "step": 895 }, { "epoch": 0.7148530579825259, "grad_norm": 52.2535285949707, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": 0.559515655040741, "log_odds_ratio": -0.49422377347946167, "logits/chosen": 334.64312744140625, "logits/rejected": 391.33038330078125, "logps/chosen": -0.7214312553405762, "logps/rejected": -0.986790657043457, "loss": 1.4393, "nll_loss": 1.1182574033737183, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03607156500220299, "rewards/margins": 0.01326796691864729, "rewards/rejected": -0.04933953285217285, "step": 900 }, { "epoch": 0.7188244638602065, "grad_norm": 42.749168395996094, "learning_rate": 1.6620562382863342e-06, "log_odds_chosen": 1.9891941547393799, "log_odds_ratio": -0.21987108886241913, "logits/chosen": 414.6642150878906, "logits/rejected": 266.611083984375, "logps/chosen": -0.566085696220398, "logps/rejected": -1.508644938468933, "loss": 1.4136, "nll_loss": 1.4871587753295898, "rewards/accuracies": 1.0, "rewards/chosen": -0.028304290026426315, "rewards/margins": 0.047127965837717056, "rewards/rejected": -0.07543225586414337, "step": 905 }, { "epoch": 0.7227958697378872, "grad_norm": 28.15529441833496, "learning_rate": 1.6574838603294898e-06, "log_odds_chosen": 0.01718742772936821, "log_odds_ratio": -0.7042349576950073, "logits/chosen": 282.4758605957031, "logits/rejected": 295.02996826171875, "logps/chosen": -0.9299288988113403, "logps/rejected": -0.9464460611343384, "loss": 1.5259, "nll_loss": 1.4807093143463135, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0464964434504509, "rewards/margins": 0.000825858092866838, "rewards/rejected": -0.04732229933142662, "step": 910 }, { "epoch": 0.7267672756155679, "grad_norm": 37.33399200439453, "learning_rate": 1.6529490122682157e-06, "log_odds_chosen": 0.5594145059585571, "log_odds_ratio": -0.5877448916435242, "logits/chosen": 292.7896423339844, "logits/rejected": 335.90289306640625, "logps/chosen": -0.7141492366790771, "logps/rejected": -1.103790044784546, "loss": 1.5321, "nll_loss": 1.445112943649292, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03570746257901192, "rewards/margins": 0.019482046365737915, "rewards/rejected": -0.05518950894474983, "step": 915 }, { "epoch": 0.7307386814932486, "grad_norm": 38.81259536743164, "learning_rate": 1.648451183489468e-06, "log_odds_chosen": 0.08375336974859238, "log_odds_ratio": -0.7926191091537476, "logits/chosen": 324.5826110839844, "logits/rejected": 307.3143005371094, "logps/chosen": -0.9936789274215698, "logps/rejected": -0.990047812461853, "loss": 1.5009, "nll_loss": 1.3815875053405762, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.04968394711613655, "rewards/margins": -0.00018155350699089468, "rewards/rejected": -0.04950239509344101, "step": 920 }, { "epoch": 0.7347100873709294, "grad_norm": 41.69913864135742, "learning_rate": 1.643989873053573e-06, "log_odds_chosen": 0.1969633847475052, "log_odds_ratio": -0.6738036274909973, "logits/chosen": 342.1506652832031, "logits/rejected": 309.15814208984375, "logps/chosen": -1.0204120874404907, "logps/rejected": -1.1466134786605835, "loss": 1.4047, "nll_loss": 1.4902677536010742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.051020603626966476, "rewards/margins": 0.006310069467872381, "rewards/rejected": -0.057330675423145294, "step": 925 }, { "epoch": 0.73868149324861, "grad_norm": 43.032135009765625, "learning_rate": 1.6395645894598825e-06, "log_odds_chosen": 0.13928785920143127, "log_odds_ratio": -0.8347541689872742, "logits/chosen": 297.34014892578125, "logits/rejected": 275.1300354003906, "logps/chosen": -1.1409282684326172, "logps/rejected": -1.0776065587997437, "loss": 1.8936, "nll_loss": 1.5503441095352173, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05704641342163086, "rewards/margins": -0.003166080918163061, "rewards/rejected": -0.05388033390045166, "step": 930 }, { "epoch": 0.7426528991262907, "grad_norm": 57.749149322509766, "learning_rate": 1.6351748504193218e-06, "log_odds_chosen": -0.38780477643013, "log_odds_ratio": -1.0117642879486084, "logits/chosen": 280.0088195800781, "logits/rejected": 341.8829345703125, "logps/chosen": -1.1196801662445068, "logps/rejected": -0.9303563237190247, "loss": 1.3338, "nll_loss": 1.6607334613800049, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0559840090572834, "rewards/margins": -0.009466195479035378, "rewards/rejected": -0.04651781544089317, "step": 935 }, { "epoch": 0.7466243050039714, "grad_norm": 54.09027862548828, "learning_rate": 1.6308201826336057e-06, "log_odds_chosen": -0.379691481590271, "log_odds_ratio": -0.9309779405593872, "logits/chosen": 283.4677429199219, "logits/rejected": 327.2042541503906, "logps/chosen": -1.322261095046997, "logps/rejected": -1.131831169128418, "loss": 1.4357, "nll_loss": 1.4476417303085327, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06611306220293045, "rewards/margins": -0.009521503001451492, "rewards/rejected": -0.05659156292676926, "step": 940 }, { "epoch": 0.7505957108816521, "grad_norm": 31.14377784729004, "learning_rate": 1.6265001215808888e-06, "log_odds_chosen": 0.22349996864795685, "log_odds_ratio": -0.666618824005127, "logits/chosen": 335.06573486328125, "logits/rejected": 268.8232116699219, "logps/chosen": -1.0155109167099, "logps/rejected": -1.231431484222412, "loss": 1.5133, "nll_loss": 1.3729729652404785, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05077555030584335, "rewards/margins": 0.010796028189361095, "rewards/rejected": -0.06157157942652702, "step": 945 }, { "epoch": 0.7545671167593329, "grad_norm": 50.59060287475586, "learning_rate": 1.6222142113076255e-06, "log_odds_chosen": -0.010113936848938465, "log_odds_ratio": -0.6997529864311218, "logits/chosen": 368.07037353515625, "logits/rejected": 283.2250671386719, "logps/chosen": -1.3818244934082031, "logps/rejected": -1.378185510635376, "loss": 1.5137, "nll_loss": 1.5937120914459229, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06909122318029404, "rewards/margins": -0.0001819491444621235, "rewards/rejected": -0.06890927255153656, "step": 950 }, { "epoch": 0.7585385226370135, "grad_norm": 38.753761291503906, "learning_rate": 1.617962004226434e-06, "log_odds_chosen": 1.379948377609253, "log_odds_ratio": -0.3466225862503052, "logits/chosen": 256.8636474609375, "logits/rejected": 371.79840087890625, "logps/chosen": -0.777528703212738, "logps/rejected": -1.486754059791565, "loss": 1.3994, "nll_loss": 1.315920114517212, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03887643292546272, "rewards/margins": 0.035461269319057465, "rewards/rejected": -0.07433770596981049, "step": 955 }, { "epoch": 0.7625099285146942, "grad_norm": 33.10997009277344, "learning_rate": 1.6137430609197571e-06, "log_odds_chosen": -0.14383646845817566, "log_odds_ratio": -0.8176695704460144, "logits/chosen": 290.1402587890625, "logits/rejected": 345.85064697265625, "logps/chosen": -1.3824456930160522, "logps/rejected": -1.3024415969848633, "loss": 1.352, "nll_loss": 1.4453611373901367, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06912229210138321, "rewards/margins": -0.004000205546617508, "rewards/rejected": -0.0651220828294754, "step": 960 }, { "epoch": 0.7664813343923749, "grad_norm": 48.93876266479492, "learning_rate": 1.6095569499491263e-06, "log_odds_chosen": -0.012878346256911755, "log_odds_ratio": -0.8197082281112671, "logits/chosen": 314.2780456542969, "logits/rejected": 319.3282470703125, "logps/chosen": -1.1133010387420654, "logps/rejected": -0.9631961584091187, "loss": 1.4791, "nll_loss": 1.242210030555725, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.055665045976638794, "rewards/margins": -0.00750524178147316, "rewards/rejected": -0.04815980792045593, "step": 965 }, { "epoch": 0.7704527402700556, "grad_norm": 50.061912536621094, "learning_rate": 1.605403247669839e-06, "log_odds_chosen": 0.3026159405708313, "log_odds_ratio": -0.6236444711685181, "logits/chosen": 304.08453369140625, "logits/rejected": 370.66998291015625, "logps/chosen": -1.6113126277923584, "logps/rejected": -1.8838993310928345, "loss": 1.628, "nll_loss": 1.7272355556488037, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08056564629077911, "rewards/margins": 0.013629332184791565, "rewards/rejected": -0.09419497102499008, "step": 970 }, { "epoch": 0.7744241461477362, "grad_norm": 57.93197250366211, "learning_rate": 1.6012815380508715e-06, "log_odds_chosen": 1.213180661201477, "log_odds_ratio": -0.42742282152175903, "logits/chosen": 357.7371826171875, "logits/rejected": 351.83795166015625, "logps/chosen": -0.8961232304573059, "logps/rejected": -1.4123413562774658, "loss": 1.3272, "nll_loss": 1.2730929851531982, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.044806160032749176, "rewards/margins": 0.025810906663537025, "rewards/rejected": -0.07061706483364105, "step": 975 }, { "epoch": 0.778395552025417, "grad_norm": 48.875247955322266, "learning_rate": 1.59719141249985e-06, "log_odds_chosen": 0.12866242229938507, "log_odds_ratio": -0.7350910902023315, "logits/chosen": 377.27935791015625, "logits/rejected": 266.5240783691406, "logps/chosen": -1.3747342824935913, "logps/rejected": -1.4584678411483765, "loss": 1.3958, "nll_loss": 1.8285316228866577, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0687367171049118, "rewards/margins": 0.004186672158539295, "rewards/rejected": -0.07292339205741882, "step": 980 }, { "epoch": 0.7823669579030977, "grad_norm": 58.968650817871094, "learning_rate": 1.5931324696929157e-06, "log_odds_chosen": 0.25052839517593384, "log_odds_ratio": -0.6790642738342285, "logits/chosen": 283.9019470214844, "logits/rejected": 426.68701171875, "logps/chosen": -0.8618942499160767, "logps/rejected": -0.9527530670166016, "loss": 1.3527, "nll_loss": 1.38225519657135, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04309471324086189, "rewards/margins": 0.004542945884168148, "rewards/rejected": -0.047637660056352615, "step": 985 }, { "epoch": 0.7863383637807784, "grad_norm": 55.875274658203125, "learning_rate": 1.5891043154093205e-06, "log_odds_chosen": -0.32832685112953186, "log_odds_ratio": -0.8919731974601746, "logits/chosen": 365.53662109375, "logits/rejected": 278.527099609375, "logps/chosen": -1.341890573501587, "logps/rejected": -1.1177005767822266, "loss": 1.5727, "nll_loss": 1.4955816268920898, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.06709453463554382, "rewards/margins": -0.011209504678845406, "rewards/rejected": -0.05588502809405327, "step": 990 }, { "epoch": 0.7903097696584591, "grad_norm": 82.46007537841797, "learning_rate": 1.5851065623706038e-06, "log_odds_chosen": 1.42555832862854, "log_odds_ratio": -0.3155195116996765, "logits/chosen": 380.34832763671875, "logits/rejected": 302.12542724609375, "logps/chosen": -0.7897301912307739, "logps/rejected": -1.5760501623153687, "loss": 1.4242, "nll_loss": 1.2763941287994385, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03948650881648064, "rewards/margins": 0.039315998554229736, "rewards/rejected": -0.07880251109600067, "step": 995 }, { "epoch": 0.7942811755361397, "grad_norm": 72.97930145263672, "learning_rate": 1.5811388300841898e-06, "log_odds_chosen": -0.25392764806747437, "log_odds_ratio": -0.873538613319397, "logits/chosen": 334.4622497558594, "logits/rejected": 370.5467834472656, "logps/chosen": -0.9442771077156067, "logps/rejected": -0.8281978368759155, "loss": 1.6114, "nll_loss": 1.3259779214859009, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04721385985612869, "rewards/margins": -0.005803964100778103, "rewards/rejected": -0.04140989109873772, "step": 1000 }, { "epoch": 0.7982525814138205, "grad_norm": 53.86620330810547, "learning_rate": 1.5772007446912793e-06, "log_odds_chosen": 0.22081449627876282, "log_odds_ratio": -0.6176373362541199, "logits/chosen": 391.4696350097656, "logits/rejected": 308.0226135253906, "logps/chosen": -0.8516048192977905, "logps/rejected": -1.0242515802383423, "loss": 1.7721, "nll_loss": 1.211578607559204, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04258023947477341, "rewards/margins": 0.008632339537143707, "rewards/rejected": -0.05121258646249771, "step": 1005 }, { "epoch": 0.8022239872915012, "grad_norm": 50.31648254394531, "learning_rate": 1.5732919388188816e-06, "log_odds_chosen": 0.5329298973083496, "log_odds_ratio": -0.8860853314399719, "logits/chosen": 341.54229736328125, "logits/rejected": 272.69091796875, "logps/chosen": -0.967967689037323, "logps/rejected": -1.2983975410461426, "loss": 1.6175, "nll_loss": 1.6894325017929077, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04839838296175003, "rewards/margins": 0.016521494835615158, "rewards/rejected": -0.06491987407207489, "step": 1010 }, { "epoch": 0.8061953931691819, "grad_norm": 49.962276458740234, "learning_rate": 1.5694120514358613e-06, "log_odds_chosen": 0.13751927018165588, "log_odds_ratio": -0.6340736150741577, "logits/chosen": 251.845947265625, "logits/rejected": 292.0717468261719, "logps/chosen": -0.9524608850479126, "logps/rejected": -1.0290417671203613, "loss": 1.3444, "nll_loss": 1.1181609630584717, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04762304201722145, "rewards/margins": 0.003829048480838537, "rewards/rejected": -0.051452092826366425, "step": 1015 }, { "epoch": 0.8101667990468626, "grad_norm": 28.708763122558594, "learning_rate": 1.565560727712874e-06, "log_odds_chosen": 0.4968925416469574, "log_odds_ratio": -0.6516743898391724, "logits/chosen": 311.68353271484375, "logits/rejected": 264.3401184082031, "logps/chosen": -1.1515090465545654, "logps/rejected": -1.4103407859802246, "loss": 1.4494, "nll_loss": 1.5162445306777954, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05757545307278633, "rewards/margins": 0.012941589578986168, "rewards/rejected": -0.07051704823970795, "step": 1020 }, { "epoch": 0.8141382049245433, "grad_norm": 52.76094055175781, "learning_rate": 1.561737618886061e-06, "log_odds_chosen": 0.09350456297397614, "log_odds_ratio": -0.6697710752487183, "logits/chosen": 481.36810302734375, "logits/rejected": 311.92694091796875, "logps/chosen": -1.1021904945373535, "logps/rejected": -1.1937323808670044, "loss": 1.5063, "nll_loss": 1.150399923324585, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05510953068733215, "rewards/margins": 0.004577091429382563, "rewards/rejected": -0.05968661978840828, "step": 1025 }, { "epoch": 0.818109610802224, "grad_norm": 72.83948516845703, "learning_rate": 1.5579423821243897e-06, "log_odds_chosen": 0.3802599310874939, "log_odds_ratio": -0.5428605675697327, "logits/chosen": 277.2724609375, "logits/rejected": 278.0252685546875, "logps/chosen": -0.7227882742881775, "logps/rejected": -0.9464675784111023, "loss": 1.5166, "nll_loss": 1.0390559434890747, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03613941743969917, "rewards/margins": 0.011183961294591427, "rewards/rejected": -0.047323379665613174, "step": 1030 }, { "epoch": 0.8220810166799047, "grad_norm": 47.56222152709961, "learning_rate": 1.554174680400523e-06, "log_odds_chosen": 0.40021246671676636, "log_odds_ratio": -0.7876571416854858, "logits/chosen": 311.06353759765625, "logits/rejected": 303.9035949707031, "logps/chosen": -1.366424560546875, "logps/rejected": -1.8226970434188843, "loss": 1.4538, "nll_loss": 1.4211769104003906, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06832123547792435, "rewards/margins": 0.022813621908426285, "rewards/rejected": -0.09113486111164093, "step": 1035 }, { "epoch": 0.8260524225575854, "grad_norm": 65.6354751586914, "learning_rate": 1.5504341823651056e-06, "log_odds_chosen": 0.1901915967464447, "log_odds_ratio": -0.7493601441383362, "logits/chosen": 409.8924865722656, "logits/rejected": 282.7567443847656, "logps/chosen": -0.8832821846008301, "logps/rejected": -0.9766547083854675, "loss": 1.519, "nll_loss": 1.5573896169662476, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.044164109975099564, "rewards/margins": 0.004668629728257656, "rewards/rejected": -0.048832736909389496, "step": 1040 }, { "epoch": 0.8300238284352661, "grad_norm": 45.96513366699219, "learning_rate": 1.546720562224365e-06, "log_odds_chosen": 1.7359260320663452, "log_odds_ratio": -0.31901225447654724, "logits/chosen": 329.9159240722656, "logits/rejected": 264.3152160644531, "logps/chosen": -0.5153332352638245, "logps/rejected": -1.4465187788009644, "loss": 1.4326, "nll_loss": 1.9956477880477905, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.025766659528017044, "rewards/margins": 0.046559277921915054, "rewards/rejected": -0.0723259299993515, "step": 1045 }, { "epoch": 0.8339952343129468, "grad_norm": 46.372764587402344, "learning_rate": 1.5430334996209192e-06, "log_odds_chosen": 0.3377481997013092, "log_odds_ratio": -0.6260396242141724, "logits/chosen": 338.7218322753906, "logits/rejected": 290.7141418457031, "logps/chosen": -1.0393493175506592, "logps/rejected": -1.3871691226959229, "loss": 1.3843, "nll_loss": 1.16513192653656, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05196746438741684, "rewards/margins": 0.017390986904501915, "rewards/rejected": -0.0693584531545639, "step": 1050 }, { "epoch": 0.8379666401906275, "grad_norm": 45.74534225463867, "learning_rate": 1.539372679517698e-06, "log_odds_chosen": 0.5245014429092407, "log_odds_ratio": -0.4880025386810303, "logits/chosen": 277.099365234375, "logits/rejected": 466.10931396484375, "logps/chosen": -1.0697903633117676, "logps/rejected": -1.429344654083252, "loss": 1.4411, "nll_loss": 1.334235429763794, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05348951742053032, "rewards/margins": 0.01797771267592907, "rewards/rejected": -0.07146723568439484, "step": 1055 }, { "epoch": 0.8419380460683081, "grad_norm": 54.054969787597656, "learning_rate": 1.5357377920848783e-06, "log_odds_chosen": 0.16159498691558838, "log_odds_ratio": -0.6649383306503296, "logits/chosen": 370.05181884765625, "logits/rejected": 320.9632263183594, "logps/chosen": -0.9088066220283508, "logps/rejected": -1.0025485754013062, "loss": 1.4118, "nll_loss": 1.5204761028289795, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04544033482670784, "rewards/margins": 0.004687097389250994, "rewards/rejected": -0.05012742802500725, "step": 1060 }, { "epoch": 0.8459094519459889, "grad_norm": 44.504940032958984, "learning_rate": 1.532128532589739e-06, "log_odds_chosen": -0.3477151691913605, "log_odds_ratio": -0.9693183898925781, "logits/chosen": 299.94952392578125, "logits/rejected": 313.5872497558594, "logps/chosen": -1.2391369342803955, "logps/rejected": -1.0682213306427002, "loss": 1.502, "nll_loss": 1.6569955348968506, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.061956845223903656, "rewards/margins": -0.008545780554413795, "rewards/rejected": -0.05341106653213501, "step": 1065 }, { "epoch": 0.8498808578236696, "grad_norm": 33.01232147216797, "learning_rate": 1.5285446012893579e-06, "log_odds_chosen": -0.2956480085849762, "log_odds_ratio": -0.9251123666763306, "logits/chosen": 280.8124084472656, "logits/rejected": 386.6439208984375, "logps/chosen": -0.9560649991035461, "logps/rejected": -0.9032427072525024, "loss": 1.2335, "nll_loss": 1.0527818202972412, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.047803252935409546, "rewards/margins": -0.0026411116123199463, "rewards/rejected": -0.0451621375977993, "step": 1070 }, { "epoch": 0.8538522637013503, "grad_norm": 56.84487533569336, "learning_rate": 1.5249857033260468e-06, "log_odds_chosen": -0.0642160177230835, "log_odds_ratio": -0.815158486366272, "logits/chosen": 405.3268737792969, "logits/rejected": 318.09503173828125, "logps/chosen": -1.2454036474227905, "logps/rejected": -1.285205602645874, "loss": 1.6233, "nll_loss": 1.6647475957870483, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.062270186841487885, "rewards/margins": 0.001990094780921936, "rewards/rejected": -0.06426028162240982, "step": 1075 }, { "epoch": 0.857823669579031, "grad_norm": 32.555484771728516, "learning_rate": 1.5214515486254614e-06, "log_odds_chosen": -0.31260019540786743, "log_odds_ratio": -0.9709165692329407, "logits/chosen": 455.90008544921875, "logits/rejected": 257.8133850097656, "logps/chosen": -1.0744378566741943, "logps/rejected": -0.7803062200546265, "loss": 1.5035, "nll_loss": 1.744284987449646, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.053721893578767776, "rewards/margins": -0.014706583693623543, "rewards/rejected": -0.03901531547307968, "step": 1080 }, { "epoch": 0.8617950754567116, "grad_norm": 46.7504768371582, "learning_rate": 1.517941851797291e-06, "log_odds_chosen": 1.6384780406951904, "log_odds_ratio": -0.3990572392940521, "logits/chosen": 343.765380859375, "logits/rejected": 318.4671325683594, "logps/chosen": -0.8857353925704956, "logps/rejected": -2.0790462493896484, "loss": 1.4511, "nll_loss": 1.7493927478790283, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04428676888346672, "rewards/margins": 0.05966554209589958, "rewards/rejected": -0.1039523109793663, "step": 1085 }, { "epoch": 0.8657664813343924, "grad_norm": 45.060401916503906, "learning_rate": 1.5144563320384566e-06, "log_odds_chosen": -0.16205939650535583, "log_odds_ratio": -0.8034777641296387, "logits/chosen": 282.28436279296875, "logits/rejected": 323.5531005859375, "logps/chosen": -1.0885937213897705, "logps/rejected": -0.9365935325622559, "loss": 1.3514, "nll_loss": 1.2723312377929688, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054429687559604645, "rewards/margins": -0.007600012235343456, "rewards/rejected": -0.04682967811822891, "step": 1090 }, { "epoch": 0.8697378872120731, "grad_norm": 36.2373161315918, "learning_rate": 1.5109947130387486e-06, "log_odds_chosen": 0.03178917244076729, "log_odds_ratio": -0.704880952835083, "logits/chosen": 300.1133117675781, "logits/rejected": 332.36920166015625, "logps/chosen": -0.9320970773696899, "logps/rejected": -0.8847508430480957, "loss": 1.4299, "nll_loss": 1.3066003322601318, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.046604860574007034, "rewards/margins": -0.002367311390116811, "rewards/rejected": -0.04423754662275314, "step": 1095 }, { "epoch": 0.8737092930897538, "grad_norm": 36.22955322265625, "learning_rate": 1.5075567228888182e-06, "log_odds_chosen": 0.2864794135093689, "log_odds_ratio": -0.7445758581161499, "logits/chosen": 316.0003662109375, "logits/rejected": 421.9435119628906, "logps/chosen": -0.9958402514457703, "logps/rejected": -0.9862167239189148, "loss": 1.5677, "nll_loss": 1.4682040214538574, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.049792006611824036, "rewards/margins": -0.00048117563710547984, "rewards/rejected": -0.0493108332157135, "step": 1100 }, { "epoch": 0.8776806989674345, "grad_norm": 40.75776290893555, "learning_rate": 1.5041420939904672e-06, "log_odds_chosen": -0.04482314735651016, "log_odds_ratio": -0.7973297834396362, "logits/chosen": 330.23980712890625, "logits/rejected": 326.890625, "logps/chosen": -0.9843143224716187, "logps/rejected": -0.9512438774108887, "loss": 1.4566, "nll_loss": 1.460742712020874, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04921571537852287, "rewards/margins": -0.0016535200411453843, "rewards/rejected": -0.04756220057606697, "step": 1105 }, { "epoch": 0.8816521048451151, "grad_norm": 35.376686096191406, "learning_rate": 1.5007505629691608e-06, "log_odds_chosen": 0.24461106956005096, "log_odds_ratio": -0.6386993527412415, "logits/chosen": 314.48748779296875, "logits/rejected": 279.96356201171875, "logps/chosen": -1.2024238109588623, "logps/rejected": -1.3905580043792725, "loss": 1.3812, "nll_loss": 1.2091633081436157, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06012119725346565, "rewards/margins": 0.00940670631825924, "rewards/rejected": -0.06952790170907974, "step": 1110 }, { "epoch": 0.8856235107227959, "grad_norm": 42.16975784301758, "learning_rate": 1.4973818705886997e-06, "log_odds_chosen": 2.133307456970215, "log_odds_ratio": -0.39024442434310913, "logits/chosen": 305.95953369140625, "logits/rejected": 435.2921447753906, "logps/chosen": -0.863163948059082, "logps/rejected": -2.759533643722534, "loss": 1.5651, "nll_loss": 1.215340256690979, "rewards/accuracies": 1.0, "rewards/chosen": -0.04315819591283798, "rewards/margins": 0.09481848776340485, "rewards/rejected": -0.13797667622566223, "step": 1115 }, { "epoch": 0.8895949166004765, "grad_norm": 29.565824508666992, "learning_rate": 1.494035761667992e-06, "log_odds_chosen": 0.5857739448547363, "log_odds_ratio": -0.5919302105903625, "logits/chosen": 289.72613525390625, "logits/rejected": 357.0590515136719, "logps/chosen": -1.0517274141311646, "logps/rejected": -1.5134713649749756, "loss": 1.4878, "nll_loss": 1.3550740480422974, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05258636921644211, "rewards/margins": 0.02308720164000988, "rewards/rejected": -0.07567357271909714, "step": 1120 }, { "epoch": 0.8935663224781573, "grad_norm": 42.470863342285156, "learning_rate": 1.49071198499986e-06, "log_odds_chosen": -0.2158789336681366, "log_odds_ratio": -0.8656711578369141, "logits/chosen": 358.0691833496094, "logits/rejected": 302.2369689941406, "logps/chosen": -1.077471375465393, "logps/rejected": -0.9175441861152649, "loss": 1.448, "nll_loss": 1.5222632884979248, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.05387356877326965, "rewards/margins": -0.007996362634003162, "rewards/rejected": -0.045877207070589066, "step": 1125 }, { "epoch": 0.897537728355838, "grad_norm": 40.679229736328125, "learning_rate": 1.487410293271824e-06, "log_odds_chosen": 0.6120940446853638, "log_odds_ratio": -0.5448315739631653, "logits/chosen": 431.89404296875, "logits/rejected": 311.6705017089844, "logps/chosen": -1.0258240699768066, "logps/rejected": -1.5273563861846924, "loss": 1.392, "nll_loss": 1.7506325244903564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05129120498895645, "rewards/margins": 0.025076616555452347, "rewards/rejected": -0.0763678178191185, "step": 1130 }, { "epoch": 0.9015091342335186, "grad_norm": 48.9831657409668, "learning_rate": 1.484130442988812e-06, "log_odds_chosen": 1.108486533164978, "log_odds_ratio": -0.3976016938686371, "logits/chosen": 286.84564208984375, "logits/rejected": 343.97174072265625, "logps/chosen": -1.2722723484039307, "logps/rejected": -2.1502983570098877, "loss": 1.4505, "nll_loss": 1.5086886882781982, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06361361593008041, "rewards/margins": 0.04390129819512367, "rewards/rejected": -0.10751490294933319, "step": 1135 }, { "epoch": 0.9054805401111994, "grad_norm": 45.998348236083984, "learning_rate": 1.480872194397731e-06, "log_odds_chosen": 0.01077426690608263, "log_odds_ratio": -0.7240376472473145, "logits/chosen": 316.49053955078125, "logits/rejected": 369.6336669921875, "logps/chosen": -0.8706620931625366, "logps/rejected": -0.8589606285095215, "loss": 1.3818, "nll_loss": 1.0445737838745117, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04353310167789459, "rewards/margins": -0.0005850695306435227, "rewards/rejected": -0.042948029935359955, "step": 1140 }, { "epoch": 0.90945194598888, "grad_norm": 43.84899139404297, "learning_rate": 1.4776353114138545e-06, "log_odds_chosen": 0.3026946485042572, "log_odds_ratio": -0.7330090403556824, "logits/chosen": 382.29071044921875, "logits/rejected": 280.1352844238281, "logps/chosen": -0.9080026745796204, "logps/rejected": -1.0521103143692017, "loss": 1.4538, "nll_loss": 1.2269086837768555, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04540013149380684, "rewards/margins": 0.0072053843177855015, "rewards/rejected": -0.0526055172085762, "step": 1145 }, { "epoch": 0.9134233518665608, "grad_norm": 44.53425979614258, "learning_rate": 1.4744195615489715e-06, "log_odds_chosen": 0.41574639081954956, "log_odds_ratio": -0.5235159993171692, "logits/chosen": 313.0999450683594, "logits/rejected": 280.4124450683594, "logps/chosen": -0.8852353096008301, "logps/rejected": -1.1474339962005615, "loss": 1.3557, "nll_loss": 1.5458626747131348, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.044261764734983444, "rewards/margins": 0.013109927996993065, "rewards/rejected": -0.05737169459462166, "step": 1150 }, { "epoch": 0.9173947577442415, "grad_norm": 73.07974243164062, "learning_rate": 1.4712247158412494e-06, "log_odds_chosen": 0.6588196158409119, "log_odds_ratio": -0.517508864402771, "logits/chosen": 305.9576416015625, "logits/rejected": 279.7393493652344, "logps/chosen": -0.661457896232605, "logps/rejected": -1.1083705425262451, "loss": 1.5078, "nll_loss": 1.212100625038147, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03307289630174637, "rewards/margins": 0.022345632314682007, "rewards/rejected": -0.055418528616428375, "step": 1155 }, { "epoch": 0.9213661636219221, "grad_norm": 89.8277587890625, "learning_rate": 1.4680505487867589e-06, "log_odds_chosen": 0.16856543719768524, "log_odds_ratio": -0.7652976512908936, "logits/chosen": 419.119140625, "logits/rejected": 306.33819580078125, "logps/chosen": -0.7615987062454224, "logps/rejected": -0.9220150709152222, "loss": 1.2839, "nll_loss": 0.8718665242195129, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.03807993233203888, "rewards/margins": 0.00802082009613514, "rewards/rejected": -0.04610075429081917, "step": 1160 }, { "epoch": 0.9253375694996029, "grad_norm": 38.93104934692383, "learning_rate": 1.4648968382726192e-06, "log_odds_chosen": -0.008628154173493385, "log_odds_ratio": -0.7770323753356934, "logits/chosen": 301.6461486816406, "logits/rejected": 302.1844177246094, "logps/chosen": -1.2084157466888428, "logps/rejected": -1.24485445022583, "loss": 1.4509, "nll_loss": 1.5561408996582031, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060420792549848557, "rewards/margins": 0.0018219311023131013, "rewards/rejected": -0.06224272772669792, "step": 1165 }, { "epoch": 0.9293089753772835, "grad_norm": 51.1599235534668, "learning_rate": 1.4617633655117156e-06, "log_odds_chosen": -0.02944868803024292, "log_odds_ratio": -0.7933769226074219, "logits/chosen": 274.8695983886719, "logits/rejected": 369.8721618652344, "logps/chosen": -1.3042938709259033, "logps/rejected": -1.2830064296722412, "loss": 1.5347, "nll_loss": 1.4256232976913452, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06521469354629517, "rewards/margins": -0.001064369105733931, "rewards/rejected": -0.06415032595396042, "step": 1170 }, { "epoch": 0.9332803812549643, "grad_norm": 68.91346740722656, "learning_rate": 1.4586499149789457e-06, "log_odds_chosen": -0.08938068896532059, "log_odds_ratio": -0.7493371963500977, "logits/chosen": 295.7596130371094, "logits/rejected": 395.13543701171875, "logps/chosen": -1.07353937625885, "logps/rejected": -1.0517762899398804, "loss": 1.5337, "nll_loss": 1.382968783378601, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.053676970303058624, "rewards/margins": -0.0010881524067372084, "rewards/rejected": -0.0525888130068779, "step": 1175 }, { "epoch": 0.937251787132645, "grad_norm": 31.116653442382812, "learning_rate": 1.4555562743489552e-06, "log_odds_chosen": 0.08080291748046875, "log_odds_ratio": -0.6991288065910339, "logits/chosen": 348.20330810546875, "logits/rejected": 262.96844482421875, "logps/chosen": -0.9602434039115906, "logps/rejected": -0.9359101057052612, "loss": 1.4031, "nll_loss": 1.3135709762573242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04801217466592789, "rewards/margins": -0.001216667122207582, "rewards/rejected": -0.04679550975561142, "step": 1180 }, { "epoch": 0.9412231930103256, "grad_norm": 33.331146240234375, "learning_rate": 1.4524822344353171e-06, "log_odds_chosen": -0.2812700867652893, "log_odds_ratio": -0.9116196632385254, "logits/chosen": 427.7166442871094, "logits/rejected": 252.6331329345703, "logps/chosen": -1.4378384351730347, "logps/rejected": -1.2257274389266968, "loss": 1.5102, "nll_loss": 1.7158218622207642, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.07189192622900009, "rewards/margins": -0.010605551302433014, "rewards/rejected": -0.06128637120127678, "step": 1185 }, { "epoch": 0.9451945988880064, "grad_norm": 49.096778869628906, "learning_rate": 1.4494275891311214e-06, "log_odds_chosen": 0.510400652885437, "log_odds_ratio": -0.6953208446502686, "logits/chosen": 432.3097229003906, "logits/rejected": 280.7305603027344, "logps/chosen": -0.9621549844741821, "logps/rejected": -1.430006980895996, "loss": 1.4778, "nll_loss": 1.3935554027557373, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.048107750713825226, "rewards/margins": 0.023392602801322937, "rewards/rejected": -0.07150034606456757, "step": 1190 }, { "epoch": 0.949166004765687, "grad_norm": 54.93104934692383, "learning_rate": 1.4463921353509293e-06, "log_odds_chosen": -0.25363707542419434, "log_odds_ratio": -0.8674055933952332, "logits/chosen": 415.92730712890625, "logits/rejected": 317.29217529296875, "logps/chosen": -0.9652946591377258, "logps/rejected": -0.8204643130302429, "loss": 1.4354, "nll_loss": 1.3288614749908447, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04826473444700241, "rewards/margins": -0.0072415173053741455, "rewards/rejected": -0.041023217141628265, "step": 1195 }, { "epoch": 0.9531374106433678, "grad_norm": 35.22028732299805, "learning_rate": 1.4433756729740647e-06, "log_odds_chosen": 0.19383028149604797, "log_odds_ratio": -0.7156062126159668, "logits/chosen": 338.9249572753906, "logits/rejected": 362.58660888671875, "logps/chosen": -0.9961981773376465, "logps/rejected": -0.9824131727218628, "loss": 1.541, "nll_loss": 1.3671401739120483, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04980991408228874, "rewards/margins": -0.0006892535602673888, "rewards/rejected": -0.04912065714597702, "step": 1200 }, { "epoch": 0.9571088165210484, "grad_norm": 43.74067687988281, "learning_rate": 1.4403780047891936e-06, "log_odds_chosen": 0.19641388952732086, "log_odds_ratio": -0.7219318151473999, "logits/chosen": 317.5765380859375, "logits/rejected": 387.16510009765625, "logps/chosen": -1.3108140230178833, "logps/rejected": -1.4882935285568237, "loss": 1.4724, "nll_loss": 1.3518784046173096, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06554069370031357, "rewards/margins": 0.00887397862970829, "rewards/rejected": -0.0744146779179573, "step": 1205 }, { "epoch": 0.9610802223987291, "grad_norm": 53.19133377075195, "learning_rate": 1.4373989364401727e-06, "log_odds_chosen": 0.1009591817855835, "log_odds_ratio": -0.6597286462783813, "logits/chosen": 381.9156188964844, "logits/rejected": 333.6917419433594, "logps/chosen": -1.0816733837127686, "logps/rejected": -1.1360995769500732, "loss": 1.4638, "nll_loss": 1.632345199584961, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05408366769552231, "rewards/margins": 0.0027213036082684994, "rewards/rejected": -0.056804973632097244, "step": 1210 }, { "epoch": 0.9650516282764099, "grad_norm": 38.759151458740234, "learning_rate": 1.4344382763731173e-06, "log_odds_chosen": 0.26151102781295776, "log_odds_ratio": -0.6654404401779175, "logits/chosen": 328.0746154785156, "logits/rejected": 326.65576171875, "logps/chosen": -1.2324529886245728, "logps/rejected": -1.4607326984405518, "loss": 1.4824, "nll_loss": 1.464372992515564, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06162264943122864, "rewards/margins": 0.011413980275392532, "rewards/rejected": -0.07303663343191147, "step": 1215 }, { "epoch": 0.9690230341540905, "grad_norm": 61.07746505737305, "learning_rate": 1.4314958357846706e-06, "log_odds_chosen": -0.1794472187757492, "log_odds_ratio": -0.9448292851448059, "logits/chosen": 302.45697021484375, "logits/rejected": 391.7511291503906, "logps/chosen": -0.9841415286064148, "logps/rejected": -1.0038130283355713, "loss": 1.3866, "nll_loss": 1.1302523612976074, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04920707270503044, "rewards/margins": 0.0009835765231400728, "rewards/rejected": -0.050190649926662445, "step": 1220 }, { "epoch": 0.9729944400317713, "grad_norm": 50.63767623901367, "learning_rate": 1.4285714285714286e-06, "log_odds_chosen": 0.9508854150772095, "log_odds_ratio": -0.4959738254547119, "logits/chosen": 335.69036865234375, "logits/rejected": 390.5137939453125, "logps/chosen": -0.9460613131523132, "logps/rejected": -1.4656463861465454, "loss": 1.2784, "nll_loss": 1.3087149858474731, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04730306938290596, "rewards/margins": 0.02597925066947937, "rewards/rejected": -0.07328232377767563, "step": 1225 }, { "epoch": 0.9769658459094519, "grad_norm": 45.654056549072266, "learning_rate": 1.4256648712805027e-06, "log_odds_chosen": 0.6766214966773987, "log_odds_ratio": -0.48730725049972534, "logits/chosen": 313.19122314453125, "logits/rejected": 360.2751770019531, "logps/chosen": -0.8694770932197571, "logps/rejected": -1.3065763711929321, "loss": 1.3305, "nll_loss": 1.3452584743499756, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04347385838627815, "rewards/margins": 0.021854963153600693, "rewards/rejected": -0.06532882153987885, "step": 1230 }, { "epoch": 0.9809372517871326, "grad_norm": 47.46096420288086, "learning_rate": 1.4227759830611807e-06, "log_odds_chosen": 0.700452446937561, "log_odds_ratio": -0.49210184812545776, "logits/chosen": 304.34393310546875, "logits/rejected": 310.6593322753906, "logps/chosen": -0.8900222778320312, "logps/rejected": -1.4213621616363525, "loss": 1.4565, "nll_loss": 1.5947277545928955, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04450111836194992, "rewards/margins": 0.026566997170448303, "rewards/rejected": -0.07106811553239822, "step": 1235 }, { "epoch": 0.9849086576648134, "grad_norm": 46.09358596801758, "learning_rate": 1.419904585617662e-06, "log_odds_chosen": 0.14936234056949615, "log_odds_ratio": -0.8327441215515137, "logits/chosen": 274.401611328125, "logits/rejected": 412.91912841796875, "logps/chosen": -0.9217895269393921, "logps/rejected": -1.1136987209320068, "loss": 1.5226, "nll_loss": 1.5541056394577026, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.046089477837085724, "rewards/margins": 0.00959546584635973, "rewards/rejected": -0.05568494275212288, "step": 1240 }, { "epoch": 0.988880063542494, "grad_norm": 71.01630401611328, "learning_rate": 1.4170505031628396e-06, "log_odds_chosen": 0.36680126190185547, "log_odds_ratio": -0.5545364022254944, "logits/chosen": 397.23516845703125, "logits/rejected": 272.55535888671875, "logps/chosen": -0.9994925260543823, "logps/rejected": -1.2435686588287354, "loss": 1.4343, "nll_loss": 1.5012677907943726, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.049974631518125534, "rewards/margins": 0.012203807011246681, "rewards/rejected": -0.062178440392017365, "step": 1245 }, { "epoch": 0.9928514694201748, "grad_norm": 77.71598815917969, "learning_rate": 1.4142135623730952e-06, "log_odds_chosen": 0.08250565826892853, "log_odds_ratio": -0.732119083404541, "logits/chosen": 322.84356689453125, "logits/rejected": 360.3878479003906, "logps/chosen": -1.1035608053207397, "logps/rejected": -1.1463263034820557, "loss": 1.3562, "nll_loss": 1.2327629327774048, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05517803877592087, "rewards/margins": 0.002138280076906085, "rewards/rejected": -0.05731632187962532, "step": 1250 }, { "epoch": 0.9968228752978554, "grad_norm": 57.77963638305664, "learning_rate": 1.4113935923440917e-06, "log_odds_chosen": -0.49407824873924255, "log_odds_ratio": -0.9747999906539917, "logits/chosen": 290.9209289550781, "logits/rejected": 428.91351318359375, "logps/chosen": -0.9552696347236633, "logps/rejected": -0.7045097947120667, "loss": 1.3309, "nll_loss": 1.1892468929290771, "rewards/accuracies": 0.0, "rewards/chosen": -0.047763485461473465, "rewards/margins": -0.012537995353341103, "rewards/rejected": -0.03522548824548721, "step": 1255 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.21191194653511047, "eval_log_odds_ratio": -0.7034205794334412, "eval_logits/chosen": 338.2715148925781, "eval_logits/rejected": 310.9833068847656, "eval_logps/chosen": -1.0253794193267822, "eval_logps/rejected": -1.1665838956832886, "eval_loss": 1.4432373046875, "eval_nll_loss": 1.3963584899902344, "eval_rewards/accuracies": 0.5467625856399536, "eval_rewards/chosen": -0.05126897618174553, "eval_rewards/margins": 0.0070602260529994965, "eval_rewards/rejected": -0.05832919850945473, "eval_runtime": 91.6529, "eval_samples_per_second": 6.034, "eval_steps_per_second": 1.517, "step": 1259 }, { "epoch": 1.0007942811755361, "grad_norm": 29.619354248046875, "learning_rate": 1.4085904245475275e-06, "log_odds_chosen": 0.2918682098388672, "log_odds_ratio": -0.6367956399917603, "logits/chosen": 296.0645446777344, "logits/rejected": 305.7909240722656, "logps/chosen": -0.9510093927383423, "logps/rejected": -1.154069185256958, "loss": 1.35, "nll_loss": 1.288127064704895, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.047550469636917114, "rewards/margins": 0.01015299092978239, "rewards/rejected": -0.05770345777273178, "step": 1260 }, { "epoch": 1.004765687053217, "grad_norm": 52.63589096069336, "learning_rate": 1.4058038927888332e-06, "log_odds_chosen": 0.4959385395050049, "log_odds_ratio": -0.5091910362243652, "logits/chosen": 398.83843994140625, "logits/rejected": 301.77056884765625, "logps/chosen": -0.6213952898979187, "logps/rejected": -0.8220237493515015, "loss": 0.8874, "nll_loss": 0.9351065754890442, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.031069766730070114, "rewards/margins": 0.010031421668827534, "rewards/rejected": -0.041101183742284775, "step": 1265 }, { "epoch": 1.0087370929308976, "grad_norm": 39.637977600097656, "learning_rate": 1.4030338331657844e-06, "log_odds_chosen": 0.8783077001571655, "log_odds_ratio": -0.42215338349342346, "logits/chosen": 334.1138610839844, "logits/rejected": 312.70989990234375, "logps/chosen": -0.8557085990905762, "logps/rejected": -1.4451814889907837, "loss": 0.8788, "nll_loss": 0.9550254940986633, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04278543218970299, "rewards/margins": 0.02947363629937172, "rewards/rejected": -0.07225906848907471, "step": 1270 }, { "epoch": 1.0127084988085782, "grad_norm": 45.631141662597656, "learning_rate": 1.4002800840280098e-06, "log_odds_chosen": 1.9242538213729858, "log_odds_ratio": -0.280457466840744, "logits/chosen": 319.7841796875, "logits/rejected": 331.0938720703125, "logps/chosen": -0.5420134663581848, "logps/rejected": -1.7290103435516357, "loss": 0.8474, "nll_loss": 0.8860380053520203, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02710067108273506, "rewards/margins": 0.05934985354542732, "rewards/rejected": -0.08645053207874298, "step": 1275 }, { "epoch": 1.016679904686259, "grad_norm": 41.359683990478516, "learning_rate": 1.3975424859373688e-06, "log_odds_chosen": 1.5988181829452515, "log_odds_ratio": -0.31896865367889404, "logits/chosen": 303.55609130859375, "logits/rejected": 302.9502868652344, "logps/chosen": -0.434316486120224, "logps/rejected": -0.9644277691841125, "loss": 0.817, "nll_loss": 0.7315559387207031, "rewards/accuracies": 1.0, "rewards/chosen": -0.02171582356095314, "rewards/margins": 0.026505568996071815, "rewards/rejected": -0.048221390694379807, "step": 1280 }, { "epoch": 1.0206513105639397, "grad_norm": 35.43171310424805, "learning_rate": 1.3948208816291767e-06, "log_odds_chosen": 2.031580924987793, "log_odds_ratio": -0.16610851883888245, "logits/chosen": 268.5763244628906, "logits/rejected": 361.4361267089844, "logps/chosen": -0.48307856917381287, "logps/rejected": -1.6509729623794556, "loss": 0.918, "nll_loss": 1.000573754310608, "rewards/accuracies": 1.0, "rewards/chosen": -0.024153929203748703, "rewards/margins": 0.058394718915224075, "rewards/rejected": -0.08254864811897278, "step": 1285 }, { "epoch": 1.0246227164416204, "grad_norm": 49.6970329284668, "learning_rate": 1.3921151159742616e-06, "log_odds_chosen": 1.449532151222229, "log_odds_ratio": -0.3011077642440796, "logits/chosen": 330.57379150390625, "logits/rejected": 305.499755859375, "logps/chosen": -0.5765695571899414, "logps/rejected": -1.4029136896133423, "loss": 0.7548, "nll_loss": 0.7175348997116089, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02882847748696804, "rewards/margins": 0.041317202150821686, "rewards/rejected": -0.07014568150043488, "step": 1290 }, { "epoch": 1.0285941223193011, "grad_norm": 32.22804260253906, "learning_rate": 1.3894250359418213e-06, "log_odds_chosen": 1.8826097249984741, "log_odds_ratio": -0.1858014166355133, "logits/chosen": 321.48516845703125, "logits/rejected": 292.4135437011719, "logps/chosen": -0.42812156677246094, "logps/rejected": -1.511461615562439, "loss": 0.7512, "nll_loss": 0.7446305155754089, "rewards/accuracies": 1.0, "rewards/chosen": -0.021406078711152077, "rewards/margins": 0.0541670098900795, "rewards/rejected": -0.07557308673858643, "step": 1295 }, { "epoch": 1.0325655281969817, "grad_norm": 36.08200454711914, "learning_rate": 1.386750490563073e-06, "log_odds_chosen": 1.3494882583618164, "log_odds_ratio": -0.29181593656539917, "logits/chosen": 383.32464599609375, "logits/rejected": 247.87405395507812, "logps/chosen": -0.38351649045944214, "logps/rejected": -0.8719679713249207, "loss": 0.8151, "nll_loss": 0.8919464945793152, "rewards/accuracies": 1.0, "rewards/chosen": -0.019175823777914047, "rewards/margins": 0.024422574788331985, "rewards/rejected": -0.04359840229153633, "step": 1300 }, { "epoch": 1.0365369340746624, "grad_norm": 53.69875717163086, "learning_rate": 1.3840913308956663e-06, "log_odds_chosen": 4.168577194213867, "log_odds_ratio": -0.15235312283039093, "logits/chosen": 233.92977905273438, "logits/rejected": 265.3076171875, "logps/chosen": -0.34961751103401184, "logps/rejected": -2.9050133228302, "loss": 0.764, "nll_loss": 0.7820371985435486, "rewards/accuracies": 1.0, "rewards/chosen": -0.01748087629675865, "rewards/margins": 0.12776978313922882, "rewards/rejected": -0.14525064826011658, "step": 1305 }, { "epoch": 1.0405083399523432, "grad_norm": 22.12669563293457, "learning_rate": 1.3814474099888442e-06, "log_odds_chosen": 2.3678359985351562, "log_odds_ratio": -0.18794922530651093, "logits/chosen": 366.886474609375, "logits/rejected": 262.33270263671875, "logps/chosen": -0.4483531415462494, "logps/rejected": -1.4391227960586548, "loss": 0.8066, "nll_loss": 0.7956131100654602, "rewards/accuracies": 1.0, "rewards/chosen": -0.02241765707731247, "rewards/margins": 0.04953848570585251, "rewards/rejected": -0.07195614278316498, "step": 1310 }, { "epoch": 1.044479745830024, "grad_norm": 33.546234130859375, "learning_rate": 1.3788185828493344e-06, "log_odds_chosen": 2.3227920532226562, "log_odds_ratio": -0.12674236297607422, "logits/chosen": 265.67144775390625, "logits/rejected": 258.4465637207031, "logps/chosen": -0.553837239742279, "logps/rejected": -1.858119249343872, "loss": 0.8738, "nll_loss": 0.9288773536682129, "rewards/accuracies": 1.0, "rewards/chosen": -0.027691861614584923, "rewards/margins": 0.06521410495042801, "rewards/rejected": -0.09290595352649689, "step": 1315 }, { "epoch": 1.0484511517077044, "grad_norm": 29.37790298461914, "learning_rate": 1.376204706407951e-06, "log_odds_chosen": 1.2011265754699707, "log_odds_ratio": -0.3517860472202301, "logits/chosen": 263.21441650390625, "logits/rejected": 300.9775085449219, "logps/chosen": -0.7342194318771362, "logps/rejected": -1.4895966053009033, "loss": 0.8622, "nll_loss": 0.894271731376648, "rewards/accuracies": 1.0, "rewards/chosen": -0.03671097382903099, "rewards/margins": 0.03776886314153671, "rewards/rejected": -0.0744798332452774, "step": 1320 }, { "epoch": 1.0524225575853852, "grad_norm": 43.21280288696289, "learning_rate": 1.3736056394868905e-06, "log_odds_chosen": 1.9679548740386963, "log_odds_ratio": -0.14373886585235596, "logits/chosen": 258.61346435546875, "logits/rejected": 458.6058044433594, "logps/chosen": -0.5523756742477417, "logps/rejected": -1.7980626821517944, "loss": 0.6717, "nll_loss": 0.6116268634796143, "rewards/accuracies": 1.0, "rewards/chosen": -0.027618780732154846, "rewards/margins": 0.062284357845783234, "rewards/rejected": -0.08990313112735748, "step": 1325 }, { "epoch": 1.056393963463066, "grad_norm": 22.586336135864258, "learning_rate": 1.3710212427677044e-06, "log_odds_chosen": 2.0330963134765625, "log_odds_ratio": -0.23092810809612274, "logits/chosen": 466.9287109375, "logits/rejected": 251.6785430908203, "logps/chosen": -0.4764311909675598, "logps/rejected": -1.5419471263885498, "loss": 0.9319, "nll_loss": 0.8202090263366699, "rewards/accuracies": 1.0, "rewards/chosen": -0.02382155880331993, "rewards/margins": 0.05327579379081726, "rewards/rejected": -0.07709735631942749, "step": 1330 }, { "epoch": 1.0603653693407467, "grad_norm": 24.074445724487305, "learning_rate": 1.3684513787599335e-06, "log_odds_chosen": 1.8354690074920654, "log_odds_ratio": -0.24452456831932068, "logits/chosen": 311.4925537109375, "logits/rejected": 325.05133056640625, "logps/chosen": -0.4112465977668762, "logps/rejected": -1.3696399927139282, "loss": 0.7552, "nll_loss": 0.7564548254013062, "rewards/accuracies": 1.0, "rewards/chosen": -0.02056233026087284, "rewards/margins": 0.04791966825723648, "rewards/rejected": -0.06848199665546417, "step": 1335 }, { "epoch": 1.0643367752184274, "grad_norm": 36.35709762573242, "learning_rate": 1.3658959117703826e-06, "log_odds_chosen": 1.767221212387085, "log_odds_ratio": -0.24831262230873108, "logits/chosen": 301.0987243652344, "logits/rejected": 408.99468994140625, "logps/chosen": -0.37535279989242554, "logps/rejected": -1.3626410961151123, "loss": 0.823, "nll_loss": 0.6886339783668518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.018767639994621277, "rewards/margins": 0.04936441406607628, "rewards/rejected": -0.06813205778598785, "step": 1340 }, { "epoch": 1.068308181096108, "grad_norm": 68.75084686279297, "learning_rate": 1.3633547078730297e-06, "log_odds_chosen": 2.1947848796844482, "log_odds_ratio": -0.12946011126041412, "logits/chosen": 387.3055114746094, "logits/rejected": 310.6745300292969, "logps/chosen": -0.3506600260734558, "logps/rejected": -1.50771164894104, "loss": 0.7823, "nll_loss": 0.8046265840530396, "rewards/accuracies": 1.0, "rewards/chosen": -0.01753300055861473, "rewards/margins": 0.05785257741808891, "rewards/rejected": -0.07538558542728424, "step": 1345 }, { "epoch": 1.0722795869737887, "grad_norm": 38.153602600097656, "learning_rate": 1.3608276348795436e-06, "log_odds_chosen": 2.0112361907958984, "log_odds_ratio": -0.2819617688655853, "logits/chosen": 268.81610107421875, "logits/rejected": 327.9646911621094, "logps/chosen": -0.4526129364967346, "logps/rejected": -1.295741319656372, "loss": 0.7329, "nll_loss": 0.6641907691955566, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02263064496219158, "rewards/margins": 0.04215642064809799, "rewards/rejected": -0.06478706747293472, "step": 1350 }, { "epoch": 1.0762509928514694, "grad_norm": 32.85861587524414, "learning_rate": 1.3583145623104033e-06, "log_odds_chosen": 1.1966564655303955, "log_odds_ratio": -0.3293878138065338, "logits/chosen": 249.0363311767578, "logits/rejected": 405.04937744140625, "logps/chosen": -0.6460349559783936, "logps/rejected": -1.4099568128585815, "loss": 0.8066, "nll_loss": 0.7530792951583862, "rewards/accuracies": 1.0, "rewards/chosen": -0.03230174630880356, "rewards/margins": 0.03819609433412552, "rewards/rejected": -0.07049783319234848, "step": 1355 }, { "epoch": 1.0802223987291502, "grad_norm": 35.103965759277344, "learning_rate": 1.355815361366601e-06, "log_odds_chosen": 1.2699463367462158, "log_odds_ratio": -0.31867465376853943, "logits/chosen": 364.13507080078125, "logits/rejected": 267.37847900390625, "logps/chosen": -0.6191684007644653, "logps/rejected": -1.3752472400665283, "loss": 0.6163, "nll_loss": 0.6952677965164185, "rewards/accuracies": 1.0, "rewards/chosen": -0.030958417803049088, "rewards/margins": 0.03780394047498703, "rewards/rejected": -0.06876234710216522, "step": 1360 }, { "epoch": 1.084193804606831, "grad_norm": 30.19273567199707, "learning_rate": 1.353329904901917e-06, "log_odds_chosen": 1.201957106590271, "log_odds_ratio": -0.3093491494655609, "logits/chosen": 413.76995849609375, "logits/rejected": 276.65838623046875, "logps/chosen": -0.4616571068763733, "logps/rejected": -1.0381189584732056, "loss": 0.9525, "nll_loss": 0.9139319658279419, "rewards/accuracies": 1.0, "rewards/chosen": -0.023082856088876724, "rewards/margins": 0.028823088854551315, "rewards/rejected": -0.05190594866871834, "step": 1365 }, { "epoch": 1.0881652104845114, "grad_norm": 24.173643112182617, "learning_rate": 1.350858067395748e-06, "log_odds_chosen": 2.342852830886841, "log_odds_ratio": -0.15998277068138123, "logits/chosen": 344.3056335449219, "logits/rejected": 283.6306457519531, "logps/chosen": -0.49249267578125, "logps/rejected": -2.0865066051483154, "loss": 0.8197, "nll_loss": 1.0569822788238525, "rewards/accuracies": 1.0, "rewards/chosen": -0.02462463639676571, "rewards/margins": 0.07970069348812103, "rewards/rejected": -0.10432533174753189, "step": 1370 }, { "epoch": 1.0921366163621922, "grad_norm": 32.355491638183594, "learning_rate": 1.3483997249264844e-06, "log_odds_chosen": 1.5590509176254272, "log_odds_ratio": -0.2487892359495163, "logits/chosen": 316.20159912109375, "logits/rejected": 294.17279052734375, "logps/chosen": -0.5574954748153687, "logps/rejected": -1.4713170528411865, "loss": 0.8676, "nll_loss": 0.797051191329956, "rewards/accuracies": 1.0, "rewards/chosen": -0.027874771505594254, "rewards/margins": 0.04569108411669731, "rewards/rejected": -0.07356585562229156, "step": 1375 }, { "epoch": 1.096108022239873, "grad_norm": 29.341623306274414, "learning_rate": 1.345954755145414e-06, "log_odds_chosen": 2.3310647010803223, "log_odds_ratio": -0.11525207757949829, "logits/chosen": 308.07275390625, "logits/rejected": 301.0416259765625, "logps/chosen": -0.6554034352302551, "logps/rejected": -2.0731258392333984, "loss": 0.9803, "nll_loss": 1.159543514251709, "rewards/accuracies": 1.0, "rewards/chosen": -0.032770175486803055, "rewards/margins": 0.07088612020015717, "rewards/rejected": -0.10365629196166992, "step": 1380 }, { "epoch": 1.1000794281175537, "grad_norm": 41.638519287109375, "learning_rate": 1.3435230372511476e-06, "log_odds_chosen": 1.702256441116333, "log_odds_ratio": -0.24031396210193634, "logits/chosen": 422.57574462890625, "logits/rejected": 285.2515563964844, "logps/chosen": -0.39290952682495117, "logps/rejected": -1.281812071800232, "loss": 0.6728, "nll_loss": 0.5043781995773315, "rewards/accuracies": 1.0, "rewards/chosen": -0.01964547671377659, "rewards/margins": 0.04444512724876404, "rewards/rejected": -0.06409060209989548, "step": 1385 }, { "epoch": 1.1040508339952344, "grad_norm": 27.213613510131836, "learning_rate": 1.3411044519645502e-06, "log_odds_chosen": 2.5730555057525635, "log_odds_ratio": -0.0975516214966774, "logits/chosen": 320.9614562988281, "logits/rejected": 307.90142822265625, "logps/chosen": -0.4752315878868103, "logps/rejected": -2.1970314979553223, "loss": 0.8552, "nll_loss": 0.7889829874038696, "rewards/accuracies": 1.0, "rewards/chosen": -0.023761581629514694, "rewards/margins": 0.08608999848365784, "rewards/rejected": -0.10985157638788223, "step": 1390 }, { "epoch": 1.108022239872915, "grad_norm": 36.346641540527344, "learning_rate": 1.3386988815041649e-06, "log_odds_chosen": 2.5185952186584473, "log_odds_ratio": -0.08284667134284973, "logits/chosen": 272.705078125, "logits/rejected": 315.05780029296875, "logps/chosen": -0.4241599440574646, "logps/rejected": -2.0232601165771484, "loss": 0.8018, "nll_loss": 0.7871303558349609, "rewards/accuracies": 1.0, "rewards/chosen": -0.02120799943804741, "rewards/margins": 0.07995501160621643, "rewards/rejected": -0.10116300731897354, "step": 1395 }, { "epoch": 1.1119936457505957, "grad_norm": 48.55631637573242, "learning_rate": 1.3363062095621222e-06, "log_odds_chosen": 1.3166413307189941, "log_odds_ratio": -0.2837154269218445, "logits/chosen": 345.50860595703125, "logits/rejected": 293.98748779296875, "logps/chosen": -0.6463770866394043, "logps/rejected": -1.450761318206787, "loss": 0.9813, "nll_loss": 0.8488261103630066, "rewards/accuracies": 1.0, "rewards/chosen": -0.032318856567144394, "rewards/margins": 0.04021921008825302, "rewards/rejected": -0.07253806293010712, "step": 1400 }, { "epoch": 1.1159650516282764, "grad_norm": 24.538806915283203, "learning_rate": 1.3339263212805207e-06, "log_odds_chosen": 1.0076740980148315, "log_odds_ratio": -0.4034115672111511, "logits/chosen": 276.04364013671875, "logits/rejected": 311.43414306640625, "logps/chosen": -0.606917142868042, "logps/rejected": -1.1501991748809814, "loss": 0.8539, "nll_loss": 0.8009790182113647, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0303458571434021, "rewards/margins": 0.02716410532593727, "rewards/rejected": -0.05750995874404907, "step": 1405 }, { "epoch": 1.1199364575059572, "grad_norm": 38.5094108581543, "learning_rate": 1.3315591032282687e-06, "log_odds_chosen": 1.1049978733062744, "log_odds_ratio": -0.3262965977191925, "logits/chosen": 312.492919921875, "logits/rejected": 300.9084167480469, "logps/chosen": -0.4392150044441223, "logps/rejected": -0.9970752596855164, "loss": 0.8508, "nll_loss": 0.6344660520553589, "rewards/accuracies": 1.0, "rewards/chosen": -0.021960750222206116, "rewards/margins": 0.027893012389540672, "rewards/rejected": -0.04985376447439194, "step": 1410 }, { "epoch": 1.123907863383638, "grad_norm": 28.918603897094727, "learning_rate": 1.3292044433783766e-06, "log_odds_chosen": 1.1254879236221313, "log_odds_ratio": -0.3050915598869324, "logits/chosen": 357.88006591796875, "logits/rejected": 243.5061492919922, "logps/chosen": -0.5345466732978821, "logps/rejected": -1.1529901027679443, "loss": 0.8655, "nll_loss": 0.7837560772895813, "rewards/accuracies": 1.0, "rewards/chosen": -0.026727333664894104, "rewards/margins": 0.030922168865799904, "rewards/rejected": -0.05764950439333916, "step": 1415 }, { "epoch": 1.1278792692613184, "grad_norm": 27.14815330505371, "learning_rate": 1.3268622310856882e-06, "log_odds_chosen": 1.6597429513931274, "log_odds_ratio": -0.24181696772575378, "logits/chosen": 328.18450927734375, "logits/rejected": 278.3966369628906, "logps/chosen": -0.5262452363967896, "logps/rejected": -1.241698980331421, "loss": 0.8028, "nll_loss": 0.7854543924331665, "rewards/accuracies": 1.0, "rewards/chosen": -0.026312265545129776, "rewards/margins": 0.03577268496155739, "rewards/rejected": -0.06208494305610657, "step": 1420 }, { "epoch": 1.1318506751389992, "grad_norm": 38.537261962890625, "learning_rate": 1.324532357065044e-06, "log_odds_chosen": 2.3596420288085938, "log_odds_ratio": -0.10275080054998398, "logits/chosen": 410.06903076171875, "logits/rejected": 263.68353271484375, "logps/chosen": -0.2327149659395218, "logps/rejected": -1.2866109609603882, "loss": 0.6069, "nll_loss": 0.612027645111084, "rewards/accuracies": 1.0, "rewards/chosen": -0.01163574866950512, "rewards/margins": 0.05269480496644974, "rewards/rejected": -0.0643305554986, "step": 1425 }, { "epoch": 1.13582208101668, "grad_norm": 32.535560607910156, "learning_rate": 1.3222147133698626e-06, "log_odds_chosen": 0.9134140014648438, "log_odds_ratio": -0.42213669419288635, "logits/chosen": 327.38427734375, "logits/rejected": 323.18572998046875, "logps/chosen": -0.7350178360939026, "logps/rejected": -1.153857707977295, "loss": 0.8071, "nll_loss": 0.8739555478096008, "rewards/accuracies": 1.0, "rewards/chosen": -0.03675089031457901, "rewards/margins": 0.020941998809576035, "rewards/rejected": -0.057692885398864746, "step": 1430 }, { "epoch": 1.1397934868943607, "grad_norm": 25.7900390625, "learning_rate": 1.3199091933711366e-06, "log_odds_chosen": 1.2775976657867432, "log_odds_ratio": -0.38191455602645874, "logits/chosen": 259.1048889160156, "logits/rejected": 494.07733154296875, "logps/chosen": -0.8194534182548523, "logps/rejected": -1.699428915977478, "loss": 1.0446, "nll_loss": 0.9419466853141785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.040972668677568436, "rewards/margins": 0.043998777866363525, "rewards/rejected": -0.08497145026922226, "step": 1435 }, { "epoch": 1.1437648927720412, "grad_norm": 30.255916595458984, "learning_rate": 1.3176156917368248e-06, "log_odds_chosen": 1.8949663639068604, "log_odds_ratio": -0.20502403378486633, "logits/chosen": 262.95159912109375, "logits/rejected": 348.63519287109375, "logps/chosen": -0.4378899037837982, "logps/rejected": -1.4804574251174927, "loss": 0.8135, "nll_loss": 0.5400761365890503, "rewards/accuracies": 1.0, "rewards/chosen": -0.02189449779689312, "rewards/margins": 0.05212836712598801, "rewards/rejected": -0.07402287423610687, "step": 1440 }, { "epoch": 1.147736298649722, "grad_norm": 59.08807373046875, "learning_rate": 1.315334104411641e-06, "log_odds_chosen": 1.4889800548553467, "log_odds_ratio": -0.3159145712852478, "logits/chosen": 288.60455322265625, "logits/rejected": 278.4151306152344, "logps/chosen": -0.516040563583374, "logps/rejected": -1.5193082094192505, "loss": 0.7279, "nll_loss": 0.6749471426010132, "rewards/accuracies": 1.0, "rewards/chosen": -0.02580202743411064, "rewards/margins": 0.0501633882522583, "rewards/rejected": -0.07596541941165924, "step": 1445 }, { "epoch": 1.1517077045274027, "grad_norm": 33.056968688964844, "learning_rate": 1.3130643285972255e-06, "log_odds_chosen": 1.6463083028793335, "log_odds_ratio": -0.2137158215045929, "logits/chosen": 330.17108154296875, "logits/rejected": 285.6957092285156, "logps/chosen": -0.305833101272583, "logps/rejected": -0.9774319529533386, "loss": 0.9029, "nll_loss": 1.1484712362289429, "rewards/accuracies": 1.0, "rewards/chosen": -0.01529165543615818, "rewards/margins": 0.03357994556427002, "rewards/rejected": -0.04887159913778305, "step": 1450 }, { "epoch": 1.1556791104050834, "grad_norm": 26.98676872253418, "learning_rate": 1.310806262732691e-06, "log_odds_chosen": 1.2410787343978882, "log_odds_ratio": -0.45242589712142944, "logits/chosen": 357.4566955566406, "logits/rejected": 286.59906005859375, "logps/chosen": -0.4584870934486389, "logps/rejected": -1.169476866722107, "loss": 0.7066, "nll_loss": 0.6666980981826782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.022924354299902916, "rewards/margins": 0.03554948791861534, "rewards/rejected": -0.058473847806453705, "step": 1455 }, { "epoch": 1.1596505162827642, "grad_norm": 25.189367294311523, "learning_rate": 1.3085598064755342e-06, "log_odds_chosen": 2.0372042655944824, "log_odds_ratio": -0.2485310137271881, "logits/chosen": 343.59710693359375, "logits/rejected": 282.86785888671875, "logps/chosen": -0.40503281354904175, "logps/rejected": -1.3477602005004883, "loss": 0.7799, "nll_loss": 0.6682797074317932, "rewards/accuracies": 1.0, "rewards/chosen": -0.020251641049981117, "rewards/margins": 0.04713636636734009, "rewards/rejected": -0.06738801300525665, "step": 1460 }, { "epoch": 1.163621922160445, "grad_norm": 49.8707160949707, "learning_rate": 1.3063248606829104e-06, "log_odds_chosen": 3.3834662437438965, "log_odds_ratio": -0.06317798793315887, "logits/chosen": 316.927978515625, "logits/rejected": 278.1805419921875, "logps/chosen": -0.21554477512836456, "logps/rejected": -1.945351004600525, "loss": 0.9273, "nll_loss": 0.9023275375366211, "rewards/accuracies": 1.0, "rewards/chosen": -0.010777238756418228, "rewards/margins": 0.08649031817913055, "rewards/rejected": -0.09726755321025848, "step": 1465 }, { "epoch": 1.1675933280381254, "grad_norm": 38.0960807800293, "learning_rate": 1.3041013273932528e-06, "log_odds_chosen": 1.1775424480438232, "log_odds_ratio": -0.2878747582435608, "logits/chosen": 349.3218688964844, "logits/rejected": 271.8228759765625, "logps/chosen": -0.45183151960372925, "logps/rejected": -0.9361956715583801, "loss": 0.777, "nll_loss": 0.9663525819778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.022591574117541313, "rewards/margins": 0.024218203499913216, "rewards/rejected": -0.04680977761745453, "step": 1470 }, { "epoch": 1.1715647339158062, "grad_norm": 36.659915924072266, "learning_rate": 1.301889109808239e-06, "log_odds_chosen": 2.377092123031616, "log_odds_ratio": -0.11894341558218002, "logits/chosen": 331.7478942871094, "logits/rejected": 236.0290985107422, "logps/chosen": -0.2474537193775177, "logps/rejected": -1.3175318241119385, "loss": 0.7939, "nll_loss": 1.1325935125350952, "rewards/accuracies": 1.0, "rewards/chosen": -0.012372685596346855, "rewards/margins": 0.05350390076637268, "rewards/rejected": -0.06587658822536469, "step": 1475 }, { "epoch": 1.175536139793487, "grad_norm": 34.146446228027344, "learning_rate": 1.299688112275091e-06, "log_odds_chosen": 2.390129566192627, "log_odds_ratio": -0.13596001267433167, "logits/chosen": 322.39208984375, "logits/rejected": 314.0200500488281, "logps/chosen": -0.3084662854671478, "logps/rejected": -1.475559949874878, "loss": 0.8092, "nll_loss": 0.9278243780136108, "rewards/accuracies": 1.0, "rewards/chosen": -0.015423314645886421, "rewards/margins": 0.0583546943962574, "rewards/rejected": -0.07377800345420837, "step": 1480 }, { "epoch": 1.1795075456711677, "grad_norm": 27.04242706298828, "learning_rate": 1.2974982402692051e-06, "log_odds_chosen": 1.809501051902771, "log_odds_ratio": -0.16602441668510437, "logits/chosen": 296.98419189453125, "logits/rejected": 283.3446350097656, "logps/chosen": -0.40387874841690063, "logps/rejected": -1.3692766427993774, "loss": 0.7862, "nll_loss": 0.5465682148933411, "rewards/accuracies": 1.0, "rewards/chosen": -0.02019393816590309, "rewards/margins": 0.04826989397406578, "rewards/rejected": -0.06846383959054947, "step": 1485 }, { "epoch": 1.1834789515488482, "grad_norm": 28.75420379638672, "learning_rate": 1.2953194003770995e-06, "log_odds_chosen": 0.6536625623703003, "log_odds_ratio": -0.6314162015914917, "logits/chosen": 300.7778625488281, "logits/rejected": 250.1493682861328, "logps/chosen": -0.7219634056091309, "logps/rejected": -1.1960614919662476, "loss": 0.8307, "nll_loss": 0.9051557779312134, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.036098167300224304, "rewards/margins": 0.023704906925559044, "rewards/rejected": -0.059803079813718796, "step": 1490 }, { "epoch": 1.187450357426529, "grad_norm": 34.966590881347656, "learning_rate": 1.2931515002796793e-06, "log_odds_chosen": 2.052030086517334, "log_odds_ratio": -0.19160158932209015, "logits/chosen": 320.68011474609375, "logits/rejected": 255.47189331054688, "logps/chosen": -0.40166154503822327, "logps/rejected": -1.3832619190216064, "loss": 0.7472, "nll_loss": 0.7854171991348267, "rewards/accuracies": 1.0, "rewards/chosen": -0.020083077251911163, "rewards/margins": 0.0490800216794014, "rewards/rejected": -0.06916309893131256, "step": 1495 }, { "epoch": 1.1914217633042097, "grad_norm": 23.868934631347656, "learning_rate": 1.2909944487358056e-06, "log_odds_chosen": 1.3916871547698975, "log_odds_ratio": -0.513548731803894, "logits/chosen": 264.5072326660156, "logits/rejected": 357.7003173828125, "logps/chosen": -0.6542052030563354, "logps/rejected": -1.6498836278915405, "loss": 0.8925, "nll_loss": 0.763934314250946, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03271026164293289, "rewards/margins": 0.04978392273187637, "rewards/rejected": -0.08249418437480927, "step": 1500 }, { "epoch": 1.1953931691818904, "grad_norm": 28.471527099609375, "learning_rate": 1.2888481555661678e-06, "log_odds_chosen": 1.3641421794891357, "log_odds_ratio": -0.3730069696903229, "logits/chosen": 452.96124267578125, "logits/rejected": 295.983154296875, "logps/chosen": -0.5330772995948792, "logps/rejected": -1.2659406661987305, "loss": 0.7427, "nll_loss": 0.723406195640564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.026653865352272987, "rewards/margins": 0.036643169820308685, "rewards/rejected": -0.06329703330993652, "step": 1505 }, { "epoch": 1.1993645750595712, "grad_norm": 35.46685791015625, "learning_rate": 1.286712531637447e-06, "log_odds_chosen": 1.6957210302352905, "log_odds_ratio": -0.2755866050720215, "logits/chosen": 238.7939453125, "logits/rejected": 365.7698059082031, "logps/chosen": -0.5120083689689636, "logps/rejected": -1.307417631149292, "loss": 0.7483, "nll_loss": 0.7828346490859985, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02560041844844818, "rewards/margins": 0.0397704653441906, "rewards/rejected": -0.06537088751792908, "step": 1510 }, { "epoch": 1.2033359809372517, "grad_norm": 59.663177490234375, "learning_rate": 1.2845874888467698e-06, "log_odds_chosen": 3.4014511108398438, "log_odds_ratio": -0.07314275205135345, "logits/chosen": 462.5450134277344, "logits/rejected": 248.7952117919922, "logps/chosen": -0.19772595167160034, "logps/rejected": -2.0527455806732178, "loss": 0.8414, "nll_loss": 0.6311348676681519, "rewards/accuracies": 1.0, "rewards/chosen": -0.009886298328638077, "rewards/margins": 0.09275098145008087, "rewards/rejected": -0.10263729095458984, "step": 1515 }, { "epoch": 1.2073073868149324, "grad_norm": 52.45457077026367, "learning_rate": 1.282472940106443e-06, "log_odds_chosen": 1.8471969366073608, "log_odds_ratio": -0.34033486247062683, "logits/chosen": 359.6947937011719, "logits/rejected": 281.2830810546875, "logps/chosen": -0.503508448600769, "logps/rejected": -1.4968483448028564, "loss": 0.9057, "nll_loss": 0.9135835766792297, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.025175422430038452, "rewards/margins": 0.04966699704527855, "rewards/rejected": -0.0748424157500267, "step": 1520 }, { "epoch": 1.2112787926926132, "grad_norm": 47.01854705810547, "learning_rate": 1.28036879932896e-06, "log_odds_chosen": 1.5671476125717163, "log_odds_ratio": -0.2781728208065033, "logits/chosen": 259.2211608886719, "logits/rejected": 273.3630065917969, "logps/chosen": -0.612583339214325, "logps/rejected": -1.3503682613372803, "loss": 0.8654, "nll_loss": 1.0714460611343384, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030629169195890427, "rewards/margins": 0.03688924387097359, "rewards/rejected": -0.06751841306686401, "step": 1525 }, { "epoch": 1.215250198570294, "grad_norm": 27.87078094482422, "learning_rate": 1.278274981412284e-06, "log_odds_chosen": 1.4453420639038086, "log_odds_ratio": -0.2786465585231781, "logits/chosen": 302.4069519042969, "logits/rejected": 312.5154113769531, "logps/chosen": -0.3448793292045593, "logps/rejected": -1.0912384986877441, "loss": 0.7583, "nll_loss": 0.6790813207626343, "rewards/accuracies": 1.0, "rewards/chosen": -0.017243966460227966, "rewards/margins": 0.03731795400381088, "rewards/rejected": -0.05456192418932915, "step": 1530 }, { "epoch": 1.2192216044479747, "grad_norm": 69.64622497558594, "learning_rate": 1.2761914022253899e-06, "log_odds_chosen": 1.012123703956604, "log_odds_ratio": -0.41925907135009766, "logits/chosen": 343.95050048828125, "logits/rejected": 260.3720703125, "logps/chosen": -0.5246344208717346, "logps/rejected": -1.1103763580322266, "loss": 0.723, "nll_loss": 0.7898105382919312, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02623172104358673, "rewards/margins": 0.029287094250321388, "rewards/rejected": -0.05551881715655327, "step": 1535 }, { "epoch": 1.2231930103256552, "grad_norm": 23.33427619934082, "learning_rate": 1.2741179785940638e-06, "log_odds_chosen": 1.9270483255386353, "log_odds_ratio": -0.16388998925685883, "logits/chosen": 343.7916259765625, "logits/rejected": 238.6454315185547, "logps/chosen": -0.3710968494415283, "logps/rejected": -1.407012701034546, "loss": 0.6983, "nll_loss": 0.45011359453201294, "rewards/accuracies": 1.0, "rewards/chosen": -0.018554842099547386, "rewards/margins": 0.051795799285173416, "rewards/rejected": -0.07035063207149506, "step": 1540 }, { "epoch": 1.227164416203336, "grad_norm": 26.319948196411133, "learning_rate": 1.2720546282869612e-06, "log_odds_chosen": 2.203964948654175, "log_odds_ratio": -0.11018653959035873, "logits/chosen": 414.2149353027344, "logits/rejected": 317.75274658203125, "logps/chosen": -0.2872838079929352, "logps/rejected": -1.3737704753875732, "loss": 0.7333, "nll_loss": 0.5472729802131653, "rewards/accuracies": 1.0, "rewards/chosen": -0.014364190399646759, "rewards/margins": 0.05432434007525444, "rewards/rejected": -0.0686885267496109, "step": 1545 }, { "epoch": 1.2311358220810167, "grad_norm": 37.496543884277344, "learning_rate": 1.270001270001905e-06, "log_odds_chosen": 1.640875220298767, "log_odds_ratio": -0.21115879714488983, "logits/chosen": 268.22589111328125, "logits/rejected": 362.2005310058594, "logps/chosen": -0.3301395773887634, "logps/rejected": -1.0716874599456787, "loss": 0.826, "nll_loss": 0.8113988637924194, "rewards/accuracies": 1.0, "rewards/chosen": -0.0165069792419672, "rewards/margins": 0.037077389657497406, "rewards/rejected": -0.053584374487400055, "step": 1550 }, { "epoch": 1.2351072279586974, "grad_norm": 35.05588912963867, "learning_rate": 1.2679578233524345e-06, "log_odds_chosen": 1.1360805034637451, "log_odds_ratio": -0.3080361485481262, "logits/chosen": 295.03118896484375, "logits/rejected": 284.44036865234375, "logps/chosen": -0.5859525203704834, "logps/rejected": -1.241891860961914, "loss": 0.872, "nll_loss": 0.9364341497421265, "rewards/accuracies": 1.0, "rewards/chosen": -0.02929762564599514, "rewards/margins": 0.03279696777462959, "rewards/rejected": -0.06209459900856018, "step": 1555 }, { "epoch": 1.2390786338363782, "grad_norm": 31.318214416503906, "learning_rate": 1.2659242088545834e-06, "log_odds_chosen": 0.8039556741714478, "log_odds_ratio": -0.46843037009239197, "logits/chosen": 237.40576171875, "logits/rejected": 364.1980895996094, "logps/chosen": -0.7670692801475525, "logps/rejected": -1.2011711597442627, "loss": 0.8833, "nll_loss": 1.014811635017395, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.038353465497493744, "rewards/margins": 0.021705087274312973, "rewards/rejected": -0.060058556497097015, "step": 1560 }, { "epoch": 1.2430500397140587, "grad_norm": 29.747224807739258, "learning_rate": 1.2639003479138966e-06, "log_odds_chosen": 1.1694443225860596, "log_odds_ratio": -0.32304924726486206, "logits/chosen": 270.96624755859375, "logits/rejected": 265.6252746582031, "logps/chosen": -0.5855724811553955, "logps/rejected": -1.1885181665420532, "loss": 0.8378, "nll_loss": 0.8840498924255371, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029278624802827835, "rewards/margins": 0.030147280544042587, "rewards/rejected": -0.05942590907216072, "step": 1565 }, { "epoch": 1.2470214455917394, "grad_norm": 34.243370056152344, "learning_rate": 1.261886162812672e-06, "log_odds_chosen": 0.6181503534317017, "log_odds_ratio": -0.5344885587692261, "logits/chosen": 244.2056884765625, "logits/rejected": 316.17681884765625, "logps/chosen": -1.0504838228225708, "logps/rejected": -1.4136704206466675, "loss": 0.826, "nll_loss": 0.9619119763374329, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05252418667078018, "rewards/margins": 0.018159329891204834, "rewards/rejected": -0.07068352401256561, "step": 1570 }, { "epoch": 1.2509928514694202, "grad_norm": 35.988731384277344, "learning_rate": 1.259881576697424e-06, "log_odds_chosen": 2.3048176765441895, "log_odds_ratio": -0.19754526019096375, "logits/chosen": 287.938232421875, "logits/rejected": 292.93804931640625, "logps/chosen": -0.5019484758377075, "logps/rejected": -2.027036428451538, "loss": 0.7158, "nll_loss": 0.6203959584236145, "rewards/accuracies": 1.0, "rewards/chosen": -0.025097424164414406, "rewards/margins": 0.07625441253185272, "rewards/rejected": -0.10135183483362198, "step": 1575 }, { "epoch": 1.254964257347101, "grad_norm": 23.905803680419922, "learning_rate": 1.257886513566569e-06, "log_odds_chosen": 1.3767532110214233, "log_odds_ratio": -0.24111056327819824, "logits/chosen": 289.76873779296875, "logits/rejected": 291.0559997558594, "logps/chosen": -0.44200173020362854, "logps/rejected": -1.0347238779067993, "loss": 0.6936, "nll_loss": 0.5990425944328308, "rewards/accuracies": 1.0, "rewards/chosen": -0.022100087255239487, "rewards/margins": 0.029636109247803688, "rewards/rejected": -0.05173618718981743, "step": 1580 }, { "epoch": 1.2589356632247815, "grad_norm": 40.032470703125, "learning_rate": 1.255900898258321e-06, "log_odds_chosen": 2.5157618522644043, "log_odds_ratio": -0.21077406406402588, "logits/chosen": 385.93560791015625, "logits/rejected": 246.05679321289062, "logps/chosen": -0.43296509981155396, "logps/rejected": -1.9274288415908813, "loss": 0.7974, "nll_loss": 0.832492470741272, "rewards/accuracies": 1.0, "rewards/chosen": -0.021648254245519638, "rewards/margins": 0.07472319900989532, "rewards/rejected": -0.09637145698070526, "step": 1585 }, { "epoch": 1.2629070691024622, "grad_norm": 27.383275985717773, "learning_rate": 1.253924656438798e-06, "log_odds_chosen": 0.7217229008674622, "log_odds_ratio": -0.443845272064209, "logits/chosen": 387.6333923339844, "logits/rejected": 236.80252075195312, "logps/chosen": -0.6088986396789551, "logps/rejected": -0.9363006353378296, "loss": 0.7629, "nll_loss": 0.7160018682479858, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030444931238889694, "rewards/margins": 0.016370099037885666, "rewards/rejected": -0.04681503027677536, "step": 1590 }, { "epoch": 1.266878474980143, "grad_norm": 27.825769424438477, "learning_rate": 1.2519577145903362e-06, "log_odds_chosen": 2.075462818145752, "log_odds_ratio": -0.13059628009796143, "logits/chosen": 313.5865173339844, "logits/rejected": 298.07293701171875, "logps/chosen": -0.3759838938713074, "logps/rejected": -1.504678726196289, "loss": 0.6985, "nll_loss": 0.556339681148529, "rewards/accuracies": 1.0, "rewards/chosen": -0.0187991950660944, "rewards/margins": 0.056434739381074905, "rewards/rejected": -0.07523393630981445, "step": 1595 }, { "epoch": 1.2708498808578237, "grad_norm": 36.40824508666992, "learning_rate": 1.25e-06, "log_odds_chosen": 1.3817975521087646, "log_odds_ratio": -0.32284778356552124, "logits/chosen": 378.4946594238281, "logits/rejected": 315.1073913574219, "logps/chosen": -0.4628058969974518, "logps/rejected": -1.0874122381210327, "loss": 0.7269, "nll_loss": 0.6962202191352844, "rewards/accuracies": 1.0, "rewards/chosen": -0.02314029261469841, "rewards/margins": 0.031230319291353226, "rewards/rejected": -0.054370611906051636, "step": 1600 }, { "epoch": 1.2748212867355044, "grad_norm": 51.04415512084961, "learning_rate": 1.2480514407482947e-06, "log_odds_chosen": 2.1562647819519043, "log_odds_ratio": -0.11893711239099503, "logits/chosen": 224.6322479248047, "logits/rejected": 447.8099670410156, "logps/chosen": -0.3827270567417145, "logps/rejected": -1.6206636428833008, "loss": 0.7221, "nll_loss": 0.48888301849365234, "rewards/accuracies": 1.0, "rewards/chosen": -0.019136350601911545, "rewards/margins": 0.06189683824777603, "rewards/rejected": -0.08103318512439728, "step": 1605 }, { "epoch": 1.2787926926131852, "grad_norm": 56.877532958984375, "learning_rate": 1.246111965698067e-06, "log_odds_chosen": 1.901529312133789, "log_odds_ratio": -0.22505538165569305, "logits/chosen": 287.8641052246094, "logits/rejected": 321.10369873046875, "logps/chosen": -0.4749757647514343, "logps/rejected": -1.3261306285858154, "loss": 0.9818, "nll_loss": 1.1012128591537476, "rewards/accuracies": 1.0, "rewards/chosen": -0.023748790845274925, "rewards/margins": 0.042557746171951294, "rewards/rejected": -0.06630653887987137, "step": 1610 }, { "epoch": 1.2827640984908657, "grad_norm": 41.132686614990234, "learning_rate": 1.244181504483599e-06, "log_odds_chosen": 1.9316246509552002, "log_odds_ratio": -0.1521489918231964, "logits/chosen": 272.6374816894531, "logits/rejected": 258.6047668457031, "logps/chosen": -0.5842836499214172, "logps/rejected": -1.6665958166122437, "loss": 0.7013, "nll_loss": 0.7633215188980103, "rewards/accuracies": 1.0, "rewards/chosen": -0.02921418286859989, "rewards/margins": 0.05411560460925102, "rewards/rejected": -0.08332978934049606, "step": 1615 }, { "epoch": 1.2867355043685464, "grad_norm": 35.068145751953125, "learning_rate": 1.2422599874998834e-06, "log_odds_chosen": 2.0793488025665283, "log_odds_ratio": -0.27748990058898926, "logits/chosen": 249.419189453125, "logits/rejected": 439.7445373535156, "logps/chosen": -0.4987415373325348, "logps/rejected": -1.9686177968978882, "loss": 0.7522, "nll_loss": 0.5755370259284973, "rewards/accuracies": 1.0, "rewards/chosen": -0.02493707649409771, "rewards/margins": 0.07349381595849991, "rewards/rejected": -0.09843089431524277, "step": 1620 }, { "epoch": 1.2907069102462272, "grad_norm": 58.41203308105469, "learning_rate": 1.2403473458920848e-06, "log_odds_chosen": 2.0760154724121094, "log_odds_ratio": -0.13080164790153503, "logits/chosen": 346.79364013671875, "logits/rejected": 283.41265869140625, "logps/chosen": -0.5347332954406738, "logps/rejected": -1.8155488967895508, "loss": 0.8326, "nll_loss": 0.8670746088027954, "rewards/accuracies": 1.0, "rewards/chosen": -0.02673666551709175, "rewards/margins": 0.06404078006744385, "rewards/rejected": -0.0907774418592453, "step": 1625 }, { "epoch": 1.294678316123908, "grad_norm": 26.609634399414062, "learning_rate": 1.238443511545175e-06, "log_odds_chosen": 0.9718191027641296, "log_odds_ratio": -0.4202337861061096, "logits/chosen": 334.9565124511719, "logits/rejected": 276.83258056640625, "logps/chosen": -0.6160775423049927, "logps/rejected": -1.0907124280929565, "loss": 0.842, "nll_loss": 0.9310005903244019, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030803877860307693, "rewards/margins": 0.023731743916869164, "rewards/rejected": -0.05453561991453171, "step": 1630 }, { "epoch": 1.2986497220015885, "grad_norm": 27.735910415649414, "learning_rate": 1.236548417073745e-06, "log_odds_chosen": 1.4771227836608887, "log_odds_ratio": -0.31675320863723755, "logits/chosen": 366.62310791015625, "logits/rejected": 267.44036865234375, "logps/chosen": -0.6183649301528931, "logps/rejected": -1.462820291519165, "loss": 0.8923, "nll_loss": 0.8503854870796204, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030918246135115623, "rewards/margins": 0.042222760617733, "rewards/rejected": -0.07314101606607437, "step": 1635 }, { "epoch": 1.3026211278792692, "grad_norm": 42.74282455444336, "learning_rate": 1.2346619958119873e-06, "log_odds_chosen": 2.29691743850708, "log_odds_ratio": -0.1986047625541687, "logits/chosen": 323.72161865234375, "logits/rejected": 377.61419677734375, "logps/chosen": -0.38441476225852966, "logps/rejected": -1.7476539611816406, "loss": 0.9019, "nll_loss": 0.8927809000015259, "rewards/accuracies": 1.0, "rewards/chosen": -0.019220737740397453, "rewards/margins": 0.0681619718670845, "rewards/rejected": -0.0873827114701271, "step": 1640 }, { "epoch": 1.30659253375695, "grad_norm": 26.081533432006836, "learning_rate": 1.2327841818038448e-06, "log_odds_chosen": 1.103318691253662, "log_odds_ratio": -0.394954115152359, "logits/chosen": 357.2593994140625, "logits/rejected": 316.22930908203125, "logps/chosen": -0.7667959928512573, "logps/rejected": -1.315467119216919, "loss": 0.7768, "nll_loss": 1.0027587413787842, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.038339801132678986, "rewards/margins": 0.02743355929851532, "rewards/rejected": -0.0657733678817749, "step": 1645 }, { "epoch": 1.3105639396346307, "grad_norm": 29.404010772705078, "learning_rate": 1.2309149097933274e-06, "log_odds_chosen": 2.014024257659912, "log_odds_ratio": -0.2092103213071823, "logits/chosen": 333.6972961425781, "logits/rejected": 280.7713928222656, "logps/chosen": -0.4117712378501892, "logps/rejected": -1.3921597003936768, "loss": 0.712, "nll_loss": 0.6148799061775208, "rewards/accuracies": 1.0, "rewards/chosen": -0.02058856189250946, "rewards/margins": 0.04901942238211632, "rewards/rejected": -0.06960798799991608, "step": 1650 }, { "epoch": 1.3145353455123114, "grad_norm": 30.204944610595703, "learning_rate": 1.2290541152149845e-06, "log_odds_chosen": 1.8049323558807373, "log_odds_ratio": -0.21223409473896027, "logits/chosen": 283.7622985839844, "logits/rejected": 278.0716552734375, "logps/chosen": -0.4594515860080719, "logps/rejected": -1.5247669219970703, "loss": 0.947, "nll_loss": 0.7514600157737732, "rewards/accuracies": 1.0, "rewards/chosen": -0.022972578182816505, "rewards/margins": 0.0532657615840435, "rewards/rejected": -0.07623834908008575, "step": 1655 }, { "epoch": 1.3185067513899922, "grad_norm": 114.78897094726562, "learning_rate": 1.2272017341845401e-06, "log_odds_chosen": 2.2300524711608887, "log_odds_ratio": -0.18504497408866882, "logits/chosen": 371.3212890625, "logits/rejected": 351.45355224609375, "logps/chosen": -0.3518930673599243, "logps/rejected": -1.3690972328186035, "loss": 0.7413, "nll_loss": 0.7462723851203918, "rewards/accuracies": 1.0, "rewards/chosen": -0.017594654113054276, "rewards/margins": 0.0508602038025856, "rewards/rejected": -0.06845486164093018, "step": 1660 }, { "epoch": 1.3224781572676727, "grad_norm": 30.150894165039062, "learning_rate": 1.2253577034896796e-06, "log_odds_chosen": 1.8445310592651367, "log_odds_ratio": -0.18787363171577454, "logits/chosen": 372.4892578125, "logits/rejected": 307.2557373046875, "logps/chosen": -0.4438856542110443, "logps/rejected": -1.3671103715896606, "loss": 0.7718, "nll_loss": 0.7103620171546936, "rewards/accuracies": 1.0, "rewards/chosen": -0.022194284945726395, "rewards/margins": 0.0461612343788147, "rewards/rejected": -0.06835552304983139, "step": 1665 }, { "epoch": 1.3264495631453534, "grad_norm": 56.493675231933594, "learning_rate": 1.223521960580991e-06, "log_odds_chosen": 0.36352911591529846, "log_odds_ratio": -0.5619034767150879, "logits/chosen": 250.56436157226562, "logits/rejected": 326.8438415527344, "logps/chosen": -0.9630180597305298, "logps/rejected": -1.1797587871551514, "loss": 0.8786, "nll_loss": 0.9603252410888672, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04815090075135231, "rewards/margins": 0.010837038978934288, "rewards/rejected": -0.05898793786764145, "step": 1670 }, { "epoch": 1.3304209690230342, "grad_norm": 46.662139892578125, "learning_rate": 1.2216944435630524e-06, "log_odds_chosen": 1.6360156536102295, "log_odds_ratio": -0.20836324989795685, "logits/chosen": 360.5296936035156, "logits/rejected": 287.63836669921875, "logps/chosen": -0.5066941976547241, "logps/rejected": -1.322191596031189, "loss": 0.7144, "nll_loss": 0.834607720375061, "rewards/accuracies": 1.0, "rewards/chosen": -0.025334710255265236, "rewards/margins": 0.0407748706638813, "rewards/rejected": -0.06610958278179169, "step": 1675 }, { "epoch": 1.3343923749007147, "grad_norm": 38.70802307128906, "learning_rate": 1.2198750911856664e-06, "log_odds_chosen": 1.0125881433486938, "log_odds_ratio": -0.40528297424316406, "logits/chosen": 255.6591339111328, "logits/rejected": 316.5745849609375, "logps/chosen": -0.6354233026504517, "logps/rejected": -1.1144318580627441, "loss": 0.8726, "nll_loss": 0.612061083316803, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03177116811275482, "rewards/margins": 0.023950431495904922, "rewards/rejected": -0.055721599608659744, "step": 1680 }, { "epoch": 1.3383637807783955, "grad_norm": 33.74903106689453, "learning_rate": 1.2180638428352399e-06, "log_odds_chosen": 1.447291374206543, "log_odds_ratio": -0.2997308075428009, "logits/chosen": 335.4109191894531, "logits/rejected": 307.80523681640625, "logps/chosen": -0.37111929059028625, "logps/rejected": -0.817639172077179, "loss": 0.7906, "nll_loss": 0.7807439565658569, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.018555965274572372, "rewards/margins": 0.022325992584228516, "rewards/rejected": -0.04088195413351059, "step": 1685 }, { "epoch": 1.3423351866560762, "grad_norm": 38.63343811035156, "learning_rate": 1.2162606385262997e-06, "log_odds_chosen": 2.291609525680542, "log_odds_ratio": -0.19204507768154144, "logits/chosen": 363.9112854003906, "logits/rejected": 289.1708068847656, "logps/chosen": -0.3431180715560913, "logps/rejected": -1.5781381130218506, "loss": 0.6895, "nll_loss": 0.5135194659233093, "rewards/accuracies": 1.0, "rewards/chosen": -0.017155904322862625, "rewards/margins": 0.06175100803375244, "rewards/rejected": -0.07890690863132477, "step": 1690 }, { "epoch": 1.346306592533757, "grad_norm": 36.493934631347656, "learning_rate": 1.2144654188931508e-06, "log_odds_chosen": 1.6693967580795288, "log_odds_ratio": -0.22054991126060486, "logits/chosen": 329.10711669921875, "logits/rejected": 326.18890380859375, "logps/chosen": -0.4193757176399231, "logps/rejected": -1.1420009136199951, "loss": 0.8402, "nll_loss": 0.5613064765930176, "rewards/accuracies": 1.0, "rewards/chosen": -0.020968783646821976, "rewards/margins": 0.03613125532865524, "rewards/rejected": -0.057100046426057816, "step": 1695 }, { "epoch": 1.3502779984114377, "grad_norm": 41.491329193115234, "learning_rate": 1.2126781251816649e-06, "log_odds_chosen": 2.1258764266967773, "log_odds_ratio": -0.19279615581035614, "logits/chosen": 275.3325500488281, "logits/rejected": 364.13507080078125, "logps/chosen": -0.4255383610725403, "logps/rejected": -1.7605087757110596, "loss": 0.7322, "nll_loss": 0.6230169534683228, "rewards/accuracies": 1.0, "rewards/chosen": -0.021276917308568954, "rewards/margins": 0.06674851477146149, "rewards/rejected": -0.08802543580532074, "step": 1700 }, { "epoch": 1.3542494042891184, "grad_norm": 63.20461654663086, "learning_rate": 1.210898699241207e-06, "log_odds_chosen": 0.5076674818992615, "log_odds_ratio": -0.5698251128196716, "logits/chosen": 375.07818603515625, "logits/rejected": 300.9750061035156, "logps/chosen": -0.7717366218566895, "logps/rejected": -1.0271308422088623, "loss": 0.8147, "nll_loss": 0.9089614748954773, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03858683258295059, "rewards/margins": 0.012769708409905434, "rewards/rejected": -0.051356542855501175, "step": 1705 }, { "epoch": 1.358220810166799, "grad_norm": 36.315547943115234, "learning_rate": 1.2091270835166862e-06, "log_odds_chosen": 2.0527215003967285, "log_odds_ratio": -0.14111559092998505, "logits/chosen": 256.4891357421875, "logits/rejected": 378.0007019042969, "logps/chosen": -0.32579556107521057, "logps/rejected": -1.1657521724700928, "loss": 0.7747, "nll_loss": 0.6338789463043213, "rewards/accuracies": 1.0, "rewards/chosen": -0.01628977805376053, "rewards/margins": 0.04199782758951187, "rewards/rejected": -0.0582876093685627, "step": 1710 }, { "epoch": 1.3621922160444797, "grad_norm": 23.591333389282227, "learning_rate": 1.207363221040738e-06, "log_odds_chosen": 1.1408276557922363, "log_odds_ratio": -0.2928302586078644, "logits/chosen": 303.75872802734375, "logits/rejected": 315.8148193359375, "logps/chosen": -0.4450489580631256, "logps/rejected": -1.0119152069091797, "loss": 0.7225, "nll_loss": 0.642244279384613, "rewards/accuracies": 1.0, "rewards/chosen": -0.02225244790315628, "rewards/margins": 0.028343314304947853, "rewards/rejected": -0.050595760345458984, "step": 1715 }, { "epoch": 1.3661636219221605, "grad_norm": 36.07604217529297, "learning_rate": 1.2056070554260305e-06, "log_odds_chosen": 1.7148020267486572, "log_odds_ratio": -0.27437013387680054, "logits/chosen": 297.388671875, "logits/rejected": 462.2918395996094, "logps/chosen": -0.4170989394187927, "logps/rejected": -1.1417804956436157, "loss": 0.6754, "nll_loss": 0.716262936592102, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.020854949951171875, "rewards/margins": 0.03623408079147339, "rewards/rejected": -0.057089030742645264, "step": 1720 }, { "epoch": 1.3701350277998412, "grad_norm": 27.890535354614258, "learning_rate": 1.2038585308576922e-06, "log_odds_chosen": 2.661741018295288, "log_odds_ratio": -0.2920236587524414, "logits/chosen": 314.42279052734375, "logits/rejected": 376.5033264160156, "logps/chosen": -0.43217021226882935, "logps/rejected": -1.5007961988449097, "loss": 0.8006, "nll_loss": 0.7613029479980469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.021608510985970497, "rewards/margins": 0.053431302309036255, "rewards/rejected": -0.0750398188829422, "step": 1725 }, { "epoch": 1.3741064336775217, "grad_norm": 27.169368743896484, "learning_rate": 1.2021175920858626e-06, "log_odds_chosen": 0.9390004873275757, "log_odds_ratio": -0.35079219937324524, "logits/chosen": 427.57708740234375, "logits/rejected": 271.9527587890625, "logps/chosen": -0.4680325388908386, "logps/rejected": -0.8964841961860657, "loss": 0.8189, "nll_loss": 0.581561803817749, "rewards/accuracies": 1.0, "rewards/chosen": -0.02340162731707096, "rewards/margins": 0.021422583609819412, "rewards/rejected": -0.044824205338954926, "step": 1730 }, { "epoch": 1.3780778395552025, "grad_norm": 39.7526969909668, "learning_rate": 1.200384184418359e-06, "log_odds_chosen": 2.284052848815918, "log_odds_ratio": -0.19260060787200928, "logits/chosen": 235.9784393310547, "logits/rejected": 304.092041015625, "logps/chosen": -0.45018672943115234, "logps/rejected": -2.012763738632202, "loss": 0.8584, "nll_loss": 0.9266872406005859, "rewards/accuracies": 1.0, "rewards/chosen": -0.022509338334202766, "rewards/margins": 0.07812884449958801, "rewards/rejected": -0.10063817352056503, "step": 1735 }, { "epoch": 1.3820492454328832, "grad_norm": 32.230445861816406, "learning_rate": 1.1986582537134606e-06, "log_odds_chosen": 3.3276565074920654, "log_odds_ratio": -0.05102468281984329, "logits/chosen": 455.22802734375, "logits/rejected": 278.1533203125, "logps/chosen": -0.15806782245635986, "logps/rejected": -1.480958342552185, "loss": 0.6712, "nll_loss": 0.5678219199180603, "rewards/accuracies": 1.0, "rewards/chosen": -0.007903391495347023, "rewards/margins": 0.06614452600479126, "rewards/rejected": -0.07404791563749313, "step": 1740 }, { "epoch": 1.386020651310564, "grad_norm": 41.62521743774414, "learning_rate": 1.1969397463728037e-06, "log_odds_chosen": 1.6859906911849976, "log_odds_ratio": -0.20657625794410706, "logits/chosen": 239.4410400390625, "logits/rejected": 359.784423828125, "logps/chosen": -0.5606490969657898, "logps/rejected": -1.5242068767547607, "loss": 0.8003, "nll_loss": 1.018355369567871, "rewards/accuracies": 1.0, "rewards/chosen": -0.02803245559334755, "rewards/margins": 0.04817789047956467, "rewards/rejected": -0.07621034234762192, "step": 1745 }, { "epoch": 1.3899920571882447, "grad_norm": 30.053016662597656, "learning_rate": 1.1952286093343937e-06, "log_odds_chosen": 1.3429896831512451, "log_odds_ratio": -0.2858211100101471, "logits/chosen": 309.0750732421875, "logits/rejected": 321.745849609375, "logps/chosen": -0.4044710099697113, "logps/rejected": -1.078237771987915, "loss": 0.7899, "nll_loss": 0.8156072497367859, "rewards/accuracies": 1.0, "rewards/chosen": -0.020223554223775864, "rewards/margins": 0.033688344061374664, "rewards/rejected": -0.05391188710927963, "step": 1750 }, { "epoch": 1.3939634630659254, "grad_norm": 24.49996566772461, "learning_rate": 1.1935247900657217e-06, "log_odds_chosen": 1.5360620021820068, "log_odds_ratio": -0.2454959601163864, "logits/chosen": 301.2558288574219, "logits/rejected": 246.26052856445312, "logps/chosen": -0.3781280815601349, "logps/rejected": -1.1283109188079834, "loss": 0.7204, "nll_loss": 0.7882005572319031, "rewards/accuracies": 1.0, "rewards/chosen": -0.018906403332948685, "rewards/margins": 0.03750915080308914, "rewards/rejected": -0.05641555041074753, "step": 1755 }, { "epoch": 1.397934868943606, "grad_norm": 45.932064056396484, "learning_rate": 1.1918282365569903e-06, "log_odds_chosen": 2.2376155853271484, "log_odds_ratio": -0.11105986684560776, "logits/chosen": 288.0708923339844, "logits/rejected": 317.3603515625, "logps/chosen": -0.40139836072921753, "logps/rejected": -1.6960868835449219, "loss": 0.8195, "nll_loss": 0.7521315813064575, "rewards/accuracies": 1.0, "rewards/chosen": -0.020069921389222145, "rewards/margins": 0.06473441421985626, "rewards/rejected": -0.08480434119701385, "step": 1760 }, { "epoch": 1.4019062748212867, "grad_norm": 35.327667236328125, "learning_rate": 1.1901388973144479e-06, "log_odds_chosen": 2.4044885635375977, "log_odds_ratio": -0.11451097577810287, "logits/chosen": 357.29425048828125, "logits/rejected": 296.12188720703125, "logps/chosen": -0.3405108153820038, "logps/rejected": -1.5622296333312988, "loss": 0.7732, "nll_loss": 0.6014407873153687, "rewards/accuracies": 1.0, "rewards/chosen": -0.01702554151415825, "rewards/margins": 0.06108593940734863, "rewards/rejected": -0.07811148464679718, "step": 1765 }, { "epoch": 1.4058776806989675, "grad_norm": 28.74220848083496, "learning_rate": 1.1884567213538209e-06, "log_odds_chosen": 0.05671717971563339, "log_odds_ratio": -0.7521728277206421, "logits/chosen": 275.2093505859375, "logits/rejected": 291.18218994140625, "logps/chosen": -0.7687788605690002, "logps/rejected": -0.8631227612495422, "loss": 0.9296, "nll_loss": 1.1221811771392822, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03843894228339195, "rewards/margins": 0.004717194940894842, "rewards/rejected": -0.04315613582730293, "step": 1770 }, { "epoch": 1.4098490865766482, "grad_norm": 25.72754669189453, "learning_rate": 1.1867816581938534e-06, "log_odds_chosen": 2.094773530960083, "log_odds_ratio": -0.1832566112279892, "logits/chosen": 295.9609680175781, "logits/rejected": 386.21722412109375, "logps/chosen": -0.5518186688423157, "logps/rejected": -2.0147547721862793, "loss": 0.6952, "nll_loss": 0.7042763233184814, "rewards/accuracies": 1.0, "rewards/chosen": -0.027590930461883545, "rewards/margins": 0.07314680516719818, "rewards/rejected": -0.10073773562908173, "step": 1775 }, { "epoch": 1.4138204924543287, "grad_norm": 44.56334686279297, "learning_rate": 1.1851136578499433e-06, "log_odds_chosen": 2.180377960205078, "log_odds_ratio": -0.1614275872707367, "logits/chosen": 287.19976806640625, "logits/rejected": 339.51861572265625, "logps/chosen": -0.36367788910865784, "logps/rejected": -1.620428442955017, "loss": 0.7019, "nll_loss": 0.5081968307495117, "rewards/accuracies": 1.0, "rewards/chosen": -0.018183894455432892, "rewards/margins": 0.06283754110336304, "rewards/rejected": -0.08102142810821533, "step": 1780 }, { "epoch": 1.4177918983320095, "grad_norm": 34.925498962402344, "learning_rate": 1.1834526708278771e-06, "log_odds_chosen": 1.0344644784927368, "log_odds_ratio": -0.390333890914917, "logits/chosen": 370.9827575683594, "logits/rejected": 322.5367736816406, "logps/chosen": -0.5955963134765625, "logps/rejected": -1.1939870119094849, "loss": 0.869, "nll_loss": 0.8622109293937683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029779816046357155, "rewards/margins": 0.02991952933371067, "rewards/rejected": -0.059699345380067825, "step": 1785 }, { "epoch": 1.4217633042096902, "grad_norm": 51.94044876098633, "learning_rate": 1.181798648117664e-06, "log_odds_chosen": 0.9601621627807617, "log_odds_ratio": -0.3441396653652191, "logits/chosen": 400.4881896972656, "logits/rejected": 305.57904052734375, "logps/chosen": -0.8277777433395386, "logps/rejected": -1.3787654638290405, "loss": 0.7763, "nll_loss": 0.9883519411087036, "rewards/accuracies": 1.0, "rewards/chosen": -0.041388895362615585, "rewards/margins": 0.0275493822991848, "rewards/rejected": -0.06893827766180038, "step": 1790 }, { "epoch": 1.425734710087371, "grad_norm": 29.721099853515625, "learning_rate": 1.1801515411874575e-06, "log_odds_chosen": 1.9157155752182007, "log_odds_ratio": -0.1623825579881668, "logits/chosen": 279.03704833984375, "logits/rejected": 351.99029541015625, "logps/chosen": -0.49805086851119995, "logps/rejected": -1.6073286533355713, "loss": 0.8375, "nll_loss": 0.6787853837013245, "rewards/accuracies": 1.0, "rewards/chosen": -0.024902544915676117, "rewards/margins": 0.05546388775110245, "rewards/rejected": -0.08036642521619797, "step": 1795 }, { "epoch": 1.4297061159650517, "grad_norm": 23.46649169921875, "learning_rate": 1.1785113019775794e-06, "log_odds_chosen": 2.264665126800537, "log_odds_ratio": -0.17821480333805084, "logits/chosen": 340.29827880859375, "logits/rejected": 320.89910888671875, "logps/chosen": -0.4604433476924896, "logps/rejected": -1.8439369201660156, "loss": 0.8162, "nll_loss": 0.6925734281539917, "rewards/accuracies": 1.0, "rewards/chosen": -0.02302216924726963, "rewards/margins": 0.06917468458414078, "rewards/rejected": -0.09219685196876526, "step": 1800 }, { "epoch": 1.4336775218427324, "grad_norm": 44.617557525634766, "learning_rate": 1.1768778828946262e-06, "log_odds_chosen": 1.7701141834259033, "log_odds_ratio": -0.17851954698562622, "logits/chosen": 305.48785400390625, "logits/rejected": 249.27084350585938, "logps/chosen": -0.43837469816207886, "logps/rejected": -1.4193745851516724, "loss": 0.804, "nll_loss": 1.0134713649749756, "rewards/accuracies": 1.0, "rewards/chosen": -0.021918734535574913, "rewards/margins": 0.049049995839595795, "rewards/rejected": -0.07096873223781586, "step": 1805 }, { "epoch": 1.437648927720413, "grad_norm": 22.284208297729492, "learning_rate": 1.1752512368056712e-06, "log_odds_chosen": 1.949928641319275, "log_odds_ratio": -0.15419664978981018, "logits/chosen": 312.5030212402344, "logits/rejected": 369.13250732421875, "logps/chosen": -0.5765711069107056, "logps/rejected": -1.8438835144042969, "loss": 0.8873, "nll_loss": 1.0679264068603516, "rewards/accuracies": 1.0, "rewards/chosen": -0.028828555718064308, "rewards/margins": 0.0633656233549118, "rewards/rejected": -0.09219418466091156, "step": 1810 }, { "epoch": 1.4416203335980937, "grad_norm": 22.29758644104004, "learning_rate": 1.1736313170325507e-06, "log_odds_chosen": 1.8476063013076782, "log_odds_ratio": -0.2637333869934082, "logits/chosen": 279.7242431640625, "logits/rejected": 426.8993225097656, "logps/chosen": -0.5844321250915527, "logps/rejected": -1.6296072006225586, "loss": 0.7365, "nll_loss": 0.7915916442871094, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029221605509519577, "rewards/margins": 0.052258752286434174, "rewards/rejected": -0.08148036152124405, "step": 1815 }, { "epoch": 1.4455917394757745, "grad_norm": 29.178213119506836, "learning_rate": 1.1720180773462387e-06, "log_odds_chosen": 1.9528881311416626, "log_odds_ratio": -0.20737957954406738, "logits/chosen": 260.05792236328125, "logits/rejected": 275.297119140625, "logps/chosen": -0.4214601516723633, "logps/rejected": -1.6192858219146729, "loss": 0.6882, "nll_loss": 0.5797325372695923, "rewards/accuracies": 1.0, "rewards/chosen": -0.021073007956147194, "rewards/margins": 0.059891290962696075, "rewards/rejected": -0.08096429705619812, "step": 1820 }, { "epoch": 1.449563145353455, "grad_norm": 23.370649337768555, "learning_rate": 1.1704114719613058e-06, "log_odds_chosen": 1.8969475030899048, "log_odds_ratio": -0.1610887199640274, "logits/chosen": 316.1849670410156, "logits/rejected": 326.1905212402344, "logps/chosen": -0.42412129044532776, "logps/rejected": -1.474806785583496, "loss": 0.8086, "nll_loss": 0.6038433313369751, "rewards/accuracies": 1.0, "rewards/chosen": -0.021206064149737358, "rewards/margins": 0.05253427475690842, "rewards/rejected": -0.07374034821987152, "step": 1825 }, { "epoch": 1.4535345512311357, "grad_norm": 25.256650924682617, "learning_rate": 1.168811455530461e-06, "log_odds_chosen": 2.2011282444000244, "log_odds_ratio": -0.1458742320537567, "logits/chosen": 270.08099365234375, "logits/rejected": 414.66912841796875, "logps/chosen": -0.1999184787273407, "logps/rejected": -1.135138750076294, "loss": 0.7276, "nll_loss": 0.4324968755245209, "rewards/accuracies": 1.0, "rewards/chosen": -0.009995924308896065, "rewards/margins": 0.04676101356744766, "rewards/rejected": -0.05675693601369858, "step": 1830 }, { "epoch": 1.4575059571088165, "grad_norm": 40.1107063293457, "learning_rate": 1.1672179831391772e-06, "log_odds_chosen": 1.8065335750579834, "log_odds_ratio": -0.23505112528800964, "logits/chosen": 389.3009338378906, "logits/rejected": 252.0340118408203, "logps/chosen": -0.5170108079910278, "logps/rejected": -1.5569610595703125, "loss": 0.7184, "nll_loss": 0.6483758687973022, "rewards/accuracies": 1.0, "rewards/chosen": -0.02585054375231266, "rewards/margins": 0.05199751257896423, "rewards/rejected": -0.07784806191921234, "step": 1835 }, { "epoch": 1.4614773629864972, "grad_norm": 26.60308074951172, "learning_rate": 1.1656310103003923e-06, "log_odds_chosen": 1.8615598678588867, "log_odds_ratio": -0.19836989045143127, "logits/chosen": 320.78143310546875, "logits/rejected": 354.0133972167969, "logps/chosen": -0.3542177081108093, "logps/rejected": -1.2545313835144043, "loss": 0.8251, "nll_loss": 0.7097489237785339, "rewards/accuracies": 1.0, "rewards/chosen": -0.017710886895656586, "rewards/margins": 0.04501568153500557, "rewards/rejected": -0.06272657215595245, "step": 1840 }, { "epoch": 1.465448768864178, "grad_norm": 27.621578216552734, "learning_rate": 1.164050492949297e-06, "log_odds_chosen": 2.032970905303955, "log_odds_ratio": -0.21160092949867249, "logits/chosen": 288.5796813964844, "logits/rejected": 268.0729064941406, "logps/chosen": -0.45712965726852417, "logps/rejected": -1.5615556240081787, "loss": 0.8043, "nll_loss": 0.8877753019332886, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02285648323595524, "rewards/margins": 0.05522129684686661, "rewards/rejected": -0.0780777782201767, "step": 1845 }, { "epoch": 1.4694201747418587, "grad_norm": 30.004453659057617, "learning_rate": 1.162476387438193e-06, "log_odds_chosen": 1.4030696153640747, "log_odds_ratio": -0.24303074181079865, "logits/chosen": 263.7775573730469, "logits/rejected": 297.43450927734375, "logps/chosen": -0.6241577863693237, "logps/rejected": -1.4998613595962524, "loss": 0.8994, "nll_loss": 0.9912241101264954, "rewards/accuracies": 1.0, "rewards/chosen": -0.031207893043756485, "rewards/margins": 0.04378517344594002, "rewards/rejected": -0.0749930664896965, "step": 1850 }, { "epoch": 1.4733915806195395, "grad_norm": 32.73456573486328, "learning_rate": 1.1609086505314302e-06, "log_odds_chosen": 1.436710000038147, "log_odds_ratio": -0.24272426962852478, "logits/chosen": 319.9948425292969, "logits/rejected": 276.5702209472656, "logps/chosen": -0.44764357805252075, "logps/rejected": -1.2125766277313232, "loss": 0.7988, "nll_loss": 0.7930669188499451, "rewards/accuracies": 1.0, "rewards/chosen": -0.022382179275155067, "rewards/margins": 0.038246653974056244, "rewards/rejected": -0.06062883138656616, "step": 1855 }, { "epoch": 1.47736298649722, "grad_norm": 40.75159454345703, "learning_rate": 1.1593472394004206e-06, "log_odds_chosen": 1.0405197143554688, "log_odds_ratio": -0.3699452579021454, "logits/chosen": 292.70220947265625, "logits/rejected": 364.3802795410156, "logps/chosen": -0.41787219047546387, "logps/rejected": -0.9609645009040833, "loss": 0.7833, "nll_loss": 0.5806514024734497, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.020893611013889313, "rewards/margins": 0.02715461328625679, "rewards/rejected": -0.0480482280254364, "step": 1860 }, { "epoch": 1.4813343923749007, "grad_norm": 27.765846252441406, "learning_rate": 1.1577921116187233e-06, "log_odds_chosen": 2.0443687438964844, "log_odds_ratio": -0.1834229975938797, "logits/chosen": 239.04916381835938, "logits/rejected": 375.8265686035156, "logps/chosen": -0.6495457887649536, "logps/rejected": -2.141505002975464, "loss": 0.7322, "nll_loss": 0.7409297227859497, "rewards/accuracies": 1.0, "rewards/chosen": -0.03247729316353798, "rewards/margins": 0.07459796965122223, "rewards/rejected": -0.10707525908946991, "step": 1865 }, { "epoch": 1.4853057982525815, "grad_norm": 33.22872543334961, "learning_rate": 1.1562432251572007e-06, "log_odds_chosen": 2.502471923828125, "log_odds_ratio": -0.10238447040319443, "logits/chosen": 380.58367919921875, "logits/rejected": 238.6292266845703, "logps/chosen": -0.3574802279472351, "logps/rejected": -1.6461595296859741, "loss": 0.9208, "nll_loss": 0.9279494285583496, "rewards/accuracies": 1.0, "rewards/chosen": -0.017874013632535934, "rewards/margins": 0.06443396210670471, "rewards/rejected": -0.08230797946453094, "step": 1870 }, { "epoch": 1.489277204130262, "grad_norm": 33.23795700073242, "learning_rate": 1.1547005383792514e-06, "log_odds_chosen": 2.7648186683654785, "log_odds_ratio": -0.06605343520641327, "logits/chosen": 391.21295166015625, "logits/rejected": 338.6253356933594, "logps/chosen": -0.3001159727573395, "logps/rejected": -1.8439795970916748, "loss": 0.9542, "nll_loss": 0.6177743673324585, "rewards/accuracies": 1.0, "rewards/chosen": -0.015005799010396004, "rewards/margins": 0.07719318568706512, "rewards/rejected": -0.09219899028539658, "step": 1875 }, { "epoch": 1.4932486100079427, "grad_norm": 31.490821838378906, "learning_rate": 1.1531640100361064e-06, "log_odds_chosen": 1.2999223470687866, "log_odds_ratio": -0.2614326775074005, "logits/chosen": 287.3116760253906, "logits/rejected": 266.83551025390625, "logps/chosen": -0.5728956460952759, "logps/rejected": -1.3332241773605347, "loss": 0.7484, "nll_loss": 0.6585596799850464, "rewards/accuracies": 1.0, "rewards/chosen": -0.028644781559705734, "rewards/margins": 0.038016427308321, "rewards/rejected": -0.06666121631860733, "step": 1880 }, { "epoch": 1.4972200158856235, "grad_norm": 24.411352157592773, "learning_rate": 1.1516335992621969e-06, "log_odds_chosen": 2.1026387214660645, "log_odds_ratio": -0.134886234998703, "logits/chosen": 305.50970458984375, "logits/rejected": 322.8634338378906, "logps/chosen": -0.36966148018836975, "logps/rejected": -1.4685351848602295, "loss": 0.9336, "nll_loss": 0.6422818303108215, "rewards/accuracies": 1.0, "rewards/chosen": -0.018483074381947517, "rewards/margins": 0.054943691939115524, "rewards/rejected": -0.0734267607331276, "step": 1885 }, { "epoch": 1.5011914217633042, "grad_norm": 151.1687469482422, "learning_rate": 1.1501092655705905e-06, "log_odds_chosen": 2.1099042892456055, "log_odds_ratio": -0.15668827295303345, "logits/chosen": 295.8759765625, "logits/rejected": 361.9652404785156, "logps/chosen": -0.5755786299705505, "logps/rejected": -1.6685059070587158, "loss": 0.9132, "nll_loss": 1.1686800718307495, "rewards/accuracies": 1.0, "rewards/chosen": -0.028778931125998497, "rewards/margins": 0.054646365344524384, "rewards/rejected": -0.08342529833316803, "step": 1890 }, { "epoch": 1.505162827640985, "grad_norm": 38.116607666015625, "learning_rate": 1.1485909688484915e-06, "log_odds_chosen": 2.76908802986145, "log_odds_ratio": -0.0817871242761612, "logits/chosen": 366.576171875, "logits/rejected": 233.8785400390625, "logps/chosen": -0.19693537056446075, "logps/rejected": -1.285447359085083, "loss": 0.8993, "nll_loss": 1.0742276906967163, "rewards/accuracies": 1.0, "rewards/chosen": -0.009846767410635948, "rewards/margins": 0.05442560464143753, "rewards/rejected": -0.06427237391471863, "step": 1895 }, { "epoch": 1.5091342335186657, "grad_norm": 39.85503387451172, "learning_rate": 1.1470786693528087e-06, "log_odds_chosen": 1.611579179763794, "log_odds_ratio": -0.2680577337741852, "logits/chosen": 293.2515563964844, "logits/rejected": 279.0267028808594, "logps/chosen": -0.6857994198799133, "logps/rejected": -1.6845874786376953, "loss": 0.8295, "nll_loss": 1.0036165714263916, "rewards/accuracies": 1.0, "rewards/chosen": -0.03428996726870537, "rewards/margins": 0.04993940517306328, "rewards/rejected": -0.08422937989234924, "step": 1900 }, { "epoch": 1.5131056393963465, "grad_norm": 25.456764221191406, "learning_rate": 1.1455723277057847e-06, "log_odds_chosen": 1.930851936340332, "log_odds_ratio": -0.19720369577407837, "logits/chosen": 363.69366455078125, "logits/rejected": 306.1184997558594, "logps/chosen": -0.35775288939476013, "logps/rejected": -1.2952030897140503, "loss": 0.8436, "nll_loss": 0.7519537210464478, "rewards/accuracies": 1.0, "rewards/chosen": -0.017887646332383156, "rewards/margins": 0.04687251150608063, "rewards/rejected": -0.06476015597581863, "step": 1905 }, { "epoch": 1.517077045274027, "grad_norm": 31.93710708618164, "learning_rate": 1.144071904890689e-06, "log_odds_chosen": 1.6818300485610962, "log_odds_ratio": -0.3100757300853729, "logits/chosen": 316.9599914550781, "logits/rejected": 285.8528137207031, "logps/chosen": -0.7037925124168396, "logps/rejected": -1.733506441116333, "loss": 0.7745, "nll_loss": 1.0718820095062256, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03518962487578392, "rewards/margins": 0.05148569867014885, "rewards/rejected": -0.08667532354593277, "step": 1910 }, { "epoch": 1.5210484511517077, "grad_norm": 48.80948257446289, "learning_rate": 1.1425773622475754e-06, "log_odds_chosen": 2.292222261428833, "log_odds_ratio": -0.1542581468820572, "logits/chosen": 348.7215270996094, "logits/rejected": 291.024169921875, "logps/chosen": -0.47094663977622986, "logps/rejected": -1.8219391107559204, "loss": 0.9162, "nll_loss": 0.82440584897995, "rewards/accuracies": 1.0, "rewards/chosen": -0.023547332733869553, "rewards/margins": 0.06754962354898453, "rewards/rejected": -0.09109695255756378, "step": 1915 }, { "epoch": 1.5250198570293882, "grad_norm": 25.52734375, "learning_rate": 1.1410886614690962e-06, "log_odds_chosen": 2.0423831939697266, "log_odds_ratio": -0.1279725432395935, "logits/chosen": 249.74081420898438, "logits/rejected": 294.3277282714844, "logps/chosen": -0.5086468458175659, "logps/rejected": -1.7694532871246338, "loss": 0.8849, "nll_loss": 1.0162447690963745, "rewards/accuracies": 1.0, "rewards/chosen": -0.025432344526052475, "rewards/margins": 0.06304031610488892, "rewards/rejected": -0.08847267180681229, "step": 1920 }, { "epoch": 1.528991262907069, "grad_norm": 30.2684326171875, "learning_rate": 1.1396057645963796e-06, "log_odds_chosen": 0.9635022878646851, "log_odds_ratio": -0.499523788690567, "logits/chosen": 255.3334197998047, "logits/rejected": 351.5635681152344, "logps/chosen": -0.5562784671783447, "logps/rejected": -1.0457470417022705, "loss": 0.8113, "nll_loss": 0.9001520276069641, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.027813920751214027, "rewards/margins": 0.024473432451486588, "rewards/rejected": -0.052287351340055466, "step": 1925 }, { "epoch": 1.5329626687847497, "grad_norm": 22.8693790435791, "learning_rate": 1.1381286340149635e-06, "log_odds_chosen": 2.0736007690429688, "log_odds_ratio": -0.16570061445236206, "logits/chosen": 315.60400390625, "logits/rejected": 312.27056884765625, "logps/chosen": -0.2544867992401123, "logps/rejected": -0.957433819770813, "loss": 0.7446, "nll_loss": 0.6752304434776306, "rewards/accuracies": 1.0, "rewards/chosen": -0.012724341824650764, "rewards/margins": 0.035147350281476974, "rewards/rejected": -0.04787169024348259, "step": 1930 }, { "epoch": 1.5369340746624305, "grad_norm": 35.36579895019531, "learning_rate": 1.1366572324507892e-06, "log_odds_chosen": 0.5588671565055847, "log_odds_ratio": -0.5842665433883667, "logits/chosen": 239.9763946533203, "logits/rejected": 326.97601318359375, "logps/chosen": -0.6879284977912903, "logps/rejected": -0.8991800546646118, "loss": 0.8204, "nll_loss": 0.7677897810935974, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.034396421164274216, "rewards/margins": 0.010562578216195107, "rewards/rejected": -0.04495900124311447, "step": 1935 }, { "epoch": 1.5409054805401112, "grad_norm": 73.0653076171875, "learning_rate": 1.1351915229662496e-06, "log_odds_chosen": 1.0707106590270996, "log_odds_ratio": -0.40579065680503845, "logits/chosen": 310.290283203125, "logits/rejected": 329.74615478515625, "logps/chosen": -0.8593183755874634, "logps/rejected": -1.5329375267028809, "loss": 0.7189, "nll_loss": 0.8812786340713501, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04296591505408287, "rewards/margins": 0.033680956810712814, "rewards/rejected": -0.07664687186479568, "step": 1940 }, { "epoch": 1.544876886417792, "grad_norm": 29.127994537353516, "learning_rate": 1.1337314689562956e-06, "log_odds_chosen": 1.457087755203247, "log_odds_ratio": -0.2620270848274231, "logits/chosen": 386.1640625, "logits/rejected": 308.2832336425781, "logps/chosen": -0.4160517156124115, "logps/rejected": -1.0144550800323486, "loss": 0.8671, "nll_loss": 0.9128124117851257, "rewards/accuracies": 1.0, "rewards/chosen": -0.020802585408091545, "rewards/margins": 0.029920164495706558, "rewards/rejected": -0.05072275549173355, "step": 1945 }, { "epoch": 1.5488482922954727, "grad_norm": 24.65477752685547, "learning_rate": 1.1322770341445958e-06, "log_odds_chosen": 1.257016897201538, "log_odds_ratio": -0.2820950150489807, "logits/chosen": 284.0875549316406, "logits/rejected": 310.27606201171875, "logps/chosen": -0.5743626952171326, "logps/rejected": -1.278131365776062, "loss": 0.6535, "nll_loss": 0.6732789874076843, "rewards/accuracies": 1.0, "rewards/chosen": -0.0287181343883276, "rewards/margins": 0.03518843278288841, "rewards/rejected": -0.06390657275915146, "step": 1950 }, { "epoch": 1.5528196981731535, "grad_norm": 21.72737693786621, "learning_rate": 1.1308281825797517e-06, "log_odds_chosen": 0.7033153772354126, "log_odds_ratio": -0.4314216077327728, "logits/chosen": 290.4872131347656, "logits/rejected": 324.4375, "logps/chosen": -0.7639486193656921, "logps/rejected": -1.205558180809021, "loss": 0.8264, "nll_loss": 0.9597232937812805, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03819743171334267, "rewards/margins": 0.02208048105239868, "rewards/rejected": -0.06027790904045105, "step": 1955 }, { "epoch": 1.556791104050834, "grad_norm": 31.13661003112793, "learning_rate": 1.1293848786315642e-06, "log_odds_chosen": 0.610641360282898, "log_odds_ratio": -0.6338435411453247, "logits/chosen": 262.70538330078125, "logits/rejected": 292.87548828125, "logps/chosen": -0.5347281098365784, "logps/rejected": -0.9647022485733032, "loss": 0.7839, "nll_loss": 0.6543559432029724, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02673640474677086, "rewards/margins": 0.021498704329133034, "rewards/rejected": -0.04823511093854904, "step": 1960 }, { "epoch": 1.5607625099285147, "grad_norm": 32.95920944213867, "learning_rate": 1.1279470869873539e-06, "log_odds_chosen": 1.6339528560638428, "log_odds_ratio": -0.23149630427360535, "logits/chosen": 292.2558288574219, "logits/rejected": 289.98492431640625, "logps/chosen": -0.5067226886749268, "logps/rejected": -1.425010085105896, "loss": 0.7711, "nll_loss": 0.8301030397415161, "rewards/accuracies": 1.0, "rewards/chosen": -0.025336135178804398, "rewards/margins": 0.04591437056660652, "rewards/rejected": -0.07125050574541092, "step": 1965 }, { "epoch": 1.5647339158061953, "grad_norm": 35.133758544921875, "learning_rate": 1.1265147726483323e-06, "log_odds_chosen": 1.5698158740997314, "log_odds_ratio": -0.1986834853887558, "logits/chosen": 312.9940490722656, "logits/rejected": 339.71966552734375, "logps/chosen": -0.6160762310028076, "logps/rejected": -1.6119115352630615, "loss": 0.7255, "nll_loss": 0.8140316009521484, "rewards/accuracies": 1.0, "rewards/chosen": -0.03080381080508232, "rewards/margins": 0.04979177564382553, "rewards/rejected": -0.08059557527303696, "step": 1970 }, { "epoch": 1.568705321683876, "grad_norm": 35.48809051513672, "learning_rate": 1.125087900926024e-06, "log_odds_chosen": 1.6797376871109009, "log_odds_ratio": -0.18438556790351868, "logits/chosen": 400.1343994140625, "logits/rejected": 254.5809783935547, "logps/chosen": -0.6721310615539551, "logps/rejected": -1.707035779953003, "loss": 0.9347, "nll_loss": 1.0324268341064453, "rewards/accuracies": 1.0, "rewards/chosen": -0.033606551587581635, "rewards/margins": 0.05174523591995239, "rewards/rejected": -0.08535178750753403, "step": 1975 }, { "epoch": 1.5726767275615567, "grad_norm": 37.31858825683594, "learning_rate": 1.1236664374387369e-06, "log_odds_chosen": 1.521589994430542, "log_odds_ratio": -0.2915099561214447, "logits/chosen": 466.3154296875, "logits/rejected": 254.2445831298828, "logps/chosen": -0.32636314630508423, "logps/rejected": -0.9937745928764343, "loss": 0.7887, "nll_loss": 0.5553312301635742, "rewards/accuracies": 1.0, "rewards/chosen": -0.01631815917789936, "rewards/margins": 0.033370573073625565, "rewards/rejected": -0.049688734114170074, "step": 1980 }, { "epoch": 1.5766481334392375, "grad_norm": 37.352561950683594, "learning_rate": 1.1222503481080839e-06, "log_odds_chosen": 2.001415491104126, "log_odds_ratio": -0.21713057160377502, "logits/chosen": 290.2130432128906, "logits/rejected": 313.20318603515625, "logps/chosen": -0.37033191323280334, "logps/rejected": -1.4434969425201416, "loss": 0.765, "nll_loss": 0.5978688597679138, "rewards/accuracies": 1.0, "rewards/chosen": -0.018516594544053078, "rewards/margins": 0.05365825444459915, "rewards/rejected": -0.07217484712600708, "step": 1985 }, { "epoch": 1.5806195393169182, "grad_norm": 28.262657165527344, "learning_rate": 1.120839599155551e-06, "log_odds_chosen": 0.7699643969535828, "log_odds_ratio": -0.4922252297401428, "logits/chosen": 305.6629943847656, "logits/rejected": 278.87664794921875, "logps/chosen": -0.561299204826355, "logps/rejected": -0.9469798803329468, "loss": 0.6831, "nll_loss": 0.8940151333808899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02806496061384678, "rewards/margins": 0.01928403414785862, "rewards/rejected": -0.0473489984869957, "step": 1990 }, { "epoch": 1.584590945194599, "grad_norm": 41.953609466552734, "learning_rate": 1.1194341570991125e-06, "log_odds_chosen": 1.8478342294692993, "log_odds_ratio": -0.2467450201511383, "logits/chosen": 364.38714599609375, "logits/rejected": 317.972900390625, "logps/chosen": -0.42733925580978394, "logps/rejected": -1.156592845916748, "loss": 0.687, "nll_loss": 0.6334320902824402, "rewards/accuracies": 1.0, "rewards/chosen": -0.021366963163018227, "rewards/margins": 0.036462679505348206, "rewards/rejected": -0.05782964080572128, "step": 1995 }, { "epoch": 1.5885623510722797, "grad_norm": 54.09469223022461, "learning_rate": 1.118033988749895e-06, "log_odds_chosen": 1.108894944190979, "log_odds_ratio": -0.42433857917785645, "logits/chosen": 246.85830688476562, "logits/rejected": 331.88848876953125, "logps/chosen": -0.37637174129486084, "logps/rejected": -0.8830350041389465, "loss": 0.812, "nll_loss": 0.5468398928642273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.018818587064743042, "rewards/margins": 0.025333160534501076, "rewards/rejected": -0.04415174573659897, "step": 2000 }, { "epoch": 1.5925337569499602, "grad_norm": 35.29500961303711, "learning_rate": 1.1166390612088834e-06, "log_odds_chosen": 2.0460915565490723, "log_odds_ratio": -0.20217151939868927, "logits/chosen": 332.9038391113281, "logits/rejected": 241.957275390625, "logps/chosen": -0.7111643552780151, "logps/rejected": -2.1858718395233154, "loss": 0.8399, "nll_loss": 0.986811637878418, "rewards/accuracies": 1.0, "rewards/chosen": -0.03555821627378464, "rewards/margins": 0.07373537123203278, "rewards/rejected": -0.10929358005523682, "step": 2005 }, { "epoch": 1.596505162827641, "grad_norm": 44.16957473754883, "learning_rate": 1.1152493418636764e-06, "log_odds_chosen": 1.7737945318222046, "log_odds_ratio": -0.18286636471748352, "logits/chosen": 299.9866638183594, "logits/rejected": 319.33050537109375, "logps/chosen": -0.4143436849117279, "logps/rejected": -1.294891119003296, "loss": 0.8105, "nll_loss": 0.5705588459968567, "rewards/accuracies": 1.0, "rewards/chosen": -0.020717184990644455, "rewards/margins": 0.04402737691998482, "rewards/rejected": -0.06474455446004868, "step": 2010 }, { "epoch": 1.6004765687053217, "grad_norm": 28.743602752685547, "learning_rate": 1.1138647983852827e-06, "log_odds_chosen": 1.285465955734253, "log_odds_ratio": -0.301657110452652, "logits/chosen": 299.4744567871094, "logits/rejected": 253.02835083007812, "logps/chosen": -0.5782285928726196, "logps/rejected": -1.2731786966323853, "loss": 0.692, "nll_loss": 0.6972297430038452, "rewards/accuracies": 1.0, "rewards/chosen": -0.02891143225133419, "rewards/margins": 0.03474750369787216, "rewards/rejected": -0.0636589378118515, "step": 2015 }, { "epoch": 1.6044479745830023, "grad_norm": 32.377628326416016, "learning_rate": 1.112485398724962e-06, "log_odds_chosen": 1.4153087139129639, "log_odds_ratio": -0.2784711718559265, "logits/chosen": 408.69146728515625, "logits/rejected": 323.42388916015625, "logps/chosen": -0.5624197721481323, "logps/rejected": -1.2454121112823486, "loss": 0.727, "nll_loss": 0.8249729871749878, "rewards/accuracies": 1.0, "rewards/chosen": -0.028120988979935646, "rewards/margins": 0.03414962440729141, "rewards/rejected": -0.06227061152458191, "step": 2020 }, { "epoch": 1.608419380460683, "grad_norm": 29.039993286132812, "learning_rate": 1.111111111111111e-06, "log_odds_chosen": 3.085414409637451, "log_odds_ratio": -0.11739520728588104, "logits/chosen": 299.89031982421875, "logits/rejected": 295.5804443359375, "logps/chosen": -0.22455701231956482, "logps/rejected": -1.8523008823394775, "loss": 0.6705, "nll_loss": 0.49535632133483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.01122785173356533, "rewards/margins": 0.08138719201087952, "rewards/rejected": -0.0926150381565094, "step": 2025 }, { "epoch": 1.6123907863383637, "grad_norm": 30.045818328857422, "learning_rate": 1.1097419040461884e-06, "log_odds_chosen": 1.698897361755371, "log_odds_ratio": -0.21327969431877136, "logits/chosen": 270.95648193359375, "logits/rejected": 383.06256103515625, "logps/chosen": -0.4442528784275055, "logps/rejected": -1.416225790977478, "loss": 0.7029, "nll_loss": 0.6176842451095581, "rewards/accuracies": 1.0, "rewards/chosen": -0.022212643176317215, "rewards/margins": 0.048598650842905045, "rewards/rejected": -0.07081129401922226, "step": 2030 }, { "epoch": 1.6163621922160445, "grad_norm": 54.25676727294922, "learning_rate": 1.1083777463036816e-06, "log_odds_chosen": 1.8387584686279297, "log_odds_ratio": -0.4786914885044098, "logits/chosen": 347.6900939941406, "logits/rejected": 285.0926818847656, "logps/chosen": -0.6492979526519775, "logps/rejected": -1.5062249898910522, "loss": 0.881, "nll_loss": 0.99772709608078, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.032464899122714996, "rewards/margins": 0.04284634813666344, "rewards/rejected": -0.07531125098466873, "step": 2035 }, { "epoch": 1.6203335980937252, "grad_norm": 30.767316818237305, "learning_rate": 1.1070186069251193e-06, "log_odds_chosen": 0.8958157300949097, "log_odds_ratio": -0.3673623204231262, "logits/chosen": 400.4134826660156, "logits/rejected": 247.96182250976562, "logps/chosen": -0.633257269859314, "logps/rejected": -1.1521893739700317, "loss": 0.7333, "nll_loss": 0.7941089868545532, "rewards/accuracies": 1.0, "rewards/chosen": -0.03166285902261734, "rewards/margins": 0.025946607813239098, "rewards/rejected": -0.05760946869850159, "step": 2040 }, { "epoch": 1.624305003971406, "grad_norm": 26.96861457824707, "learning_rate": 1.1056644552171163e-06, "log_odds_chosen": 2.199897050857544, "log_odds_ratio": -0.17961575090885162, "logits/chosen": 235.00869750976562, "logits/rejected": 412.4244079589844, "logps/chosen": -0.5231403112411499, "logps/rejected": -1.801314353942871, "loss": 0.8437, "nll_loss": 0.8201786279678345, "rewards/accuracies": 1.0, "rewards/chosen": -0.026157017797231674, "rewards/margins": 0.06390871107578278, "rewards/rejected": -0.09006571769714355, "step": 2045 }, { "epoch": 1.6282764098490867, "grad_norm": 50.35895538330078, "learning_rate": 1.1043152607484655e-06, "log_odds_chosen": 1.1554151773452759, "log_odds_ratio": -0.28396543860435486, "logits/chosen": 277.6943054199219, "logits/rejected": 342.0705871582031, "logps/chosen": -0.4897230565547943, "logps/rejected": -1.1020798683166504, "loss": 0.8528, "nll_loss": 0.6063051223754883, "rewards/accuracies": 1.0, "rewards/chosen": -0.024486154317855835, "rewards/margins": 0.030617838725447655, "rewards/rejected": -0.05510399490594864, "step": 2050 }, { "epoch": 1.6322478157267672, "grad_norm": 54.209327697753906, "learning_rate": 1.1029709933472638e-06, "log_odds_chosen": 1.4155397415161133, "log_odds_ratio": -0.2870427668094635, "logits/chosen": 248.59048461914062, "logits/rejected": 335.0494079589844, "logps/chosen": -0.49244603514671326, "logps/rejected": -1.2132689952850342, "loss": 0.9627, "nll_loss": 0.8640304803848267, "rewards/accuracies": 1.0, "rewards/chosen": -0.024622302502393723, "rewards/margins": 0.036041148006916046, "rewards/rejected": -0.06066344305872917, "step": 2055 }, { "epoch": 1.636219221604448, "grad_norm": 24.583423614501953, "learning_rate": 1.1016316230980794e-06, "log_odds_chosen": 2.2713701725006104, "log_odds_ratio": -0.13930584490299225, "logits/chosen": 343.72979736328125, "logits/rejected": 319.630859375, "logps/chosen": -0.5581644773483276, "logps/rejected": -1.981873869895935, "loss": 0.8047, "nll_loss": 0.8394874334335327, "rewards/accuracies": 1.0, "rewards/chosen": -0.02790822647511959, "rewards/margins": 0.07118546962738037, "rewards/rejected": -0.09909369796514511, "step": 2060 }, { "epoch": 1.6401906274821285, "grad_norm": 31.54615020751953, "learning_rate": 1.100297120339154e-06, "log_odds_chosen": 2.6403470039367676, "log_odds_ratio": -0.10118647664785385, "logits/chosen": 383.3280029296875, "logits/rejected": 294.6977233886719, "logps/chosen": -0.36460989713668823, "logps/rejected": -1.8322269916534424, "loss": 0.6359, "nll_loss": 0.6053825616836548, "rewards/accuracies": 1.0, "rewards/chosen": -0.018230494111776352, "rewards/margins": 0.07338085025548935, "rewards/rejected": -0.0916113406419754, "step": 2065 }, { "epoch": 1.6441620333598093, "grad_norm": 30.021217346191406, "learning_rate": 1.098967455659645e-06, "log_odds_chosen": 1.0652350187301636, "log_odds_ratio": -0.3493257164955139, "logits/chosen": 336.79705810546875, "logits/rejected": 291.21636962890625, "logps/chosen": -0.6652384996414185, "logps/rejected": -1.2979891300201416, "loss": 0.6926, "nll_loss": 0.9665688276290894, "rewards/accuracies": 1.0, "rewards/chosen": -0.03326192498207092, "rewards/margins": 0.0316375307738781, "rewards/rejected": -0.06489945948123932, "step": 2070 }, { "epoch": 1.64813343923749, "grad_norm": 30.264102935791016, "learning_rate": 1.0976425998969036e-06, "log_odds_chosen": 1.5590989589691162, "log_odds_ratio": -0.24328143894672394, "logits/chosen": 286.5331115722656, "logits/rejected": 367.84771728515625, "logps/chosen": -0.7489336133003235, "logps/rejected": -1.8240808248519897, "loss": 0.8402, "nll_loss": 0.8905247449874878, "rewards/accuracies": 1.0, "rewards/chosen": -0.037446681410074234, "rewards/margins": 0.053757358342409134, "rewards/rejected": -0.09120403230190277, "step": 2075 }, { "epoch": 1.6521048451151708, "grad_norm": 29.85713768005371, "learning_rate": 1.0963225241337867e-06, "log_odds_chosen": 1.7376596927642822, "log_odds_ratio": -0.20286710560321808, "logits/chosen": 252.2242889404297, "logits/rejected": 387.5245361328125, "logps/chosen": -0.38523685932159424, "logps/rejected": -1.3179008960723877, "loss": 0.7234, "nll_loss": 0.5829646587371826, "rewards/accuracies": 1.0, "rewards/chosen": -0.019261842593550682, "rewards/margins": 0.04663320630788803, "rewards/rejected": -0.06589505076408386, "step": 2080 }, { "epoch": 1.6560762509928515, "grad_norm": 38.490631103515625, "learning_rate": 1.0950071996960073e-06, "log_odds_chosen": 1.6538069248199463, "log_odds_ratio": -0.22928845882415771, "logits/chosen": 324.57879638671875, "logits/rejected": 291.5035705566406, "logps/chosen": -0.6583439111709595, "logps/rejected": -1.5701768398284912, "loss": 0.9184, "nll_loss": 0.846837043762207, "rewards/accuracies": 1.0, "rewards/chosen": -0.03291719779372215, "rewards/margins": 0.045591648668050766, "rewards/rejected": -0.07850884646177292, "step": 2085 }, { "epoch": 1.6600476568705322, "grad_norm": 47.06442642211914, "learning_rate": 1.093696598149518e-06, "log_odds_chosen": 1.538604497909546, "log_odds_ratio": -0.26061558723449707, "logits/chosen": 319.3267822265625, "logits/rejected": 376.86767578125, "logps/chosen": -0.3853234052658081, "logps/rejected": -1.1279296875, "loss": 0.7315, "nll_loss": 0.5786786079406738, "rewards/accuracies": 1.0, "rewards/chosen": -0.019266171380877495, "rewards/margins": 0.037130314856767654, "rewards/rejected": -0.056396484375, "step": 2090 }, { "epoch": 1.664019062748213, "grad_norm": 32.46980285644531, "learning_rate": 1.0923906912979294e-06, "log_odds_chosen": 2.563507556915283, "log_odds_ratio": -0.08386361598968506, "logits/chosen": 373.00958251953125, "logits/rejected": 278.15411376953125, "logps/chosen": -0.4534314274787903, "logps/rejected": -1.8156788349151611, "loss": 0.7171, "nll_loss": 0.7025438547134399, "rewards/accuracies": 1.0, "rewards/chosen": -0.022671569138765335, "rewards/margins": 0.06811238825321198, "rewards/rejected": -0.09078395366668701, "step": 2095 }, { "epoch": 1.6679904686258937, "grad_norm": 33.24217987060547, "learning_rate": 1.091089451179962e-06, "log_odds_chosen": 0.8273458480834961, "log_odds_ratio": -0.4423191547393799, "logits/chosen": 379.5636291503906, "logits/rejected": 243.4392852783203, "logps/chosen": -0.6664489507675171, "logps/rejected": -1.1298898458480835, "loss": 0.9125, "nll_loss": 1.0257153511047363, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.033322446048259735, "rewards/margins": 0.0231720469892025, "rewards/rejected": -0.056494493037462234, "step": 2100 }, { "epoch": 1.6719618745035743, "grad_norm": 36.56023406982422, "learning_rate": 1.0897928500669322e-06, "log_odds_chosen": 0.48522695899009705, "log_odds_ratio": -0.5532909035682678, "logits/chosen": 337.21429443359375, "logits/rejected": 289.87176513671875, "logps/chosen": -0.8676580190658569, "logps/rejected": -1.076594591140747, "loss": 0.9392, "nll_loss": 0.9415254592895508, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.043382905423641205, "rewards/margins": 0.010446821339428425, "rewards/rejected": -0.05382972210645676, "step": 2105 }, { "epoch": 1.675933280381255, "grad_norm": 32.79849624633789, "learning_rate": 1.0885008604602703e-06, "log_odds_chosen": 1.568729043006897, "log_odds_ratio": -0.25548508763313293, "logits/chosen": 308.7678527832031, "logits/rejected": 345.7282409667969, "logps/chosen": -0.44344788789749146, "logps/rejected": -1.2481848001480103, "loss": 0.6848, "nll_loss": 0.6747992634773254, "rewards/accuracies": 1.0, "rewards/chosen": -0.022172395139932632, "rewards/margins": 0.04023684561252594, "rewards/rejected": -0.06240924075245857, "step": 2110 }, { "epoch": 1.6799046862589355, "grad_norm": 30.764209747314453, "learning_rate": 1.0872134550890703e-06, "log_odds_chosen": 1.1366727352142334, "log_odds_ratio": -0.4399314820766449, "logits/chosen": 249.7522735595703, "logits/rejected": 347.0238342285156, "logps/chosen": -0.6293274164199829, "logps/rejected": -1.1428825855255127, "loss": 0.794, "nll_loss": 0.8456639051437378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.031466372311115265, "rewards/margins": 0.025677751749753952, "rewards/rejected": -0.05714412406086922, "step": 2115 }, { "epoch": 1.6838760921366163, "grad_norm": 34.06650161743164, "learning_rate": 1.0859306069076736e-06, "log_odds_chosen": 0.9071727991104126, "log_odds_ratio": -0.5517928004264832, "logits/chosen": 246.1561737060547, "logits/rejected": 483.20465087890625, "logps/chosen": -0.48550352454185486, "logps/rejected": -1.149099588394165, "loss": 0.8235, "nll_loss": 0.6903790235519409, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.024275178089737892, "rewards/margins": 0.03317980095744133, "rewards/rejected": -0.05745498090982437, "step": 2120 }, { "epoch": 1.687847498014297, "grad_norm": 29.03752899169922, "learning_rate": 1.084652289093281e-06, "log_odds_chosen": 1.5771881341934204, "log_odds_ratio": -0.3552386164665222, "logits/chosen": 329.28778076171875, "logits/rejected": 293.9125671386719, "logps/chosen": -0.4627589285373688, "logps/rejected": -0.8882190585136414, "loss": 0.7296, "nll_loss": 0.7793964147567749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.023137949407100677, "rewards/margins": 0.02127300761640072, "rewards/rejected": -0.04441095516085625, "step": 2125 }, { "epoch": 1.6918189038919778, "grad_norm": 27.400096893310547, "learning_rate": 1.083378475043599e-06, "log_odds_chosen": 2.1568071842193604, "log_odds_ratio": -0.18791693449020386, "logits/chosen": 376.5939636230469, "logits/rejected": 325.4264221191406, "logps/chosen": -0.34812647104263306, "logps/rejected": -1.3398598432540894, "loss": 0.7049, "nll_loss": 0.5113323926925659, "rewards/accuracies": 1.0, "rewards/chosen": -0.017406323924660683, "rewards/margins": 0.049586668610572815, "rewards/rejected": -0.06699299067258835, "step": 2130 }, { "epoch": 1.6957903097696585, "grad_norm": 35.258235931396484, "learning_rate": 1.0821091383745125e-06, "log_odds_chosen": 1.3307862281799316, "log_odds_ratio": -0.27201247215270996, "logits/chosen": 326.7689514160156, "logits/rejected": 252.6797332763672, "logps/chosen": -0.5550287365913391, "logps/rejected": -1.2143471240997314, "loss": 0.7516, "nll_loss": 0.9621167182922363, "rewards/accuracies": 1.0, "rewards/chosen": -0.027751434594392776, "rewards/margins": 0.032965924590826035, "rewards/rejected": -0.06071736291050911, "step": 2135 }, { "epoch": 1.6997617156473392, "grad_norm": 35.40523147583008, "learning_rate": 1.0808442529177925e-06, "log_odds_chosen": 1.7957347631454468, "log_odds_ratio": -0.27554595470428467, "logits/chosen": 326.6446838378906, "logits/rejected": 290.5052185058594, "logps/chosen": -0.6328108906745911, "logps/rejected": -1.6637407541275024, "loss": 0.9048, "nll_loss": 1.0120429992675781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03164054453372955, "rewards/margins": 0.05154649168252945, "rewards/rejected": -0.083187036216259, "step": 2140 }, { "epoch": 1.70373312152502, "grad_norm": 33.236244201660156, "learning_rate": 1.0795837927188263e-06, "log_odds_chosen": 2.5924830436706543, "log_odds_ratio": -0.09222547709941864, "logits/chosen": 234.89968872070312, "logits/rejected": 404.0227966308594, "logps/chosen": -0.4588467478752136, "logps/rejected": -2.025322198867798, "loss": 0.7654, "nll_loss": 0.5697577595710754, "rewards/accuracies": 1.0, "rewards/chosen": -0.02294233813881874, "rewards/margins": 0.07832376658916473, "rewards/rejected": -0.10126610100269318, "step": 2145 }, { "epoch": 1.7077045274027005, "grad_norm": 25.529382705688477, "learning_rate": 1.0783277320343842e-06, "log_odds_chosen": 1.2242435216903687, "log_odds_ratio": -0.3057636618614197, "logits/chosen": 357.5350646972656, "logits/rejected": 285.787841796875, "logps/chosen": -0.6072322726249695, "logps/rejected": -1.252858281135559, "loss": 0.6573, "nll_loss": 0.9143392443656921, "rewards/accuracies": 1.0, "rewards/chosen": -0.030361616984009743, "rewards/margins": 0.0322813019156456, "rewards/rejected": -0.06264291703701019, "step": 2150 }, { "epoch": 1.7116759332803813, "grad_norm": 49.64712142944336, "learning_rate": 1.0770760453304094e-06, "log_odds_chosen": 2.2228212356567383, "log_odds_ratio": -0.12164826691150665, "logits/chosen": 322.6756591796875, "logits/rejected": 239.7041015625, "logps/chosen": -0.2784230709075928, "logps/rejected": -1.3234379291534424, "loss": 0.8904, "nll_loss": 0.7466278672218323, "rewards/accuracies": 1.0, "rewards/chosen": -0.013921153731644154, "rewards/margins": 0.052250735461711884, "rewards/rejected": -0.06617189198732376, "step": 2155 }, { "epoch": 1.715647339158062, "grad_norm": 25.497785568237305, "learning_rate": 1.075828707279838e-06, "log_odds_chosen": 1.9126389026641846, "log_odds_ratio": -0.1578390896320343, "logits/chosen": 375.0826110839844, "logits/rejected": 281.03997802734375, "logps/chosen": -0.4245838522911072, "logps/rejected": -1.5614581108093262, "loss": 0.6819, "nll_loss": 0.7630025744438171, "rewards/accuracies": 1.0, "rewards/chosen": -0.02122919075191021, "rewards/margins": 0.05684372037649155, "rewards/rejected": -0.0780729129910469, "step": 2160 }, { "epoch": 1.7196187450357425, "grad_norm": 30.034915924072266, "learning_rate": 1.0745856927604474e-06, "log_odds_chosen": 1.5273187160491943, "log_odds_ratio": -0.26404619216918945, "logits/chosen": 334.1214599609375, "logits/rejected": 238.54653930664062, "logps/chosen": -0.6224659085273743, "logps/rejected": -1.5819625854492188, "loss": 0.8031, "nll_loss": 0.8806807398796082, "rewards/accuracies": 1.0, "rewards/chosen": -0.031123295426368713, "rewards/margins": 0.047974843531847, "rewards/rejected": -0.07909813523292542, "step": 2165 }, { "epoch": 1.7235901509134233, "grad_norm": 34.6898307800293, "learning_rate": 1.0733469768527298e-06, "log_odds_chosen": 1.7383909225463867, "log_odds_ratio": -0.20549456775188446, "logits/chosen": 341.08563232421875, "logits/rejected": 324.42132568359375, "logps/chosen": -0.5074697732925415, "logps/rejected": -1.5371900796890259, "loss": 0.7717, "nll_loss": 0.7111256718635559, "rewards/accuracies": 1.0, "rewards/chosen": -0.025373492389917374, "rewards/margins": 0.05148600786924362, "rewards/rejected": -0.0768595039844513, "step": 2170 }, { "epoch": 1.727561556791104, "grad_norm": 32.80601119995117, "learning_rate": 1.0721125348377948e-06, "log_odds_chosen": 1.6041771173477173, "log_odds_ratio": -0.3034898042678833, "logits/chosen": 309.17962646484375, "logits/rejected": 346.447509765625, "logps/chosen": -0.5312290191650391, "logps/rejected": -1.4099701642990112, "loss": 0.6985, "nll_loss": 0.8906863331794739, "rewards/accuracies": 1.0, "rewards/chosen": -0.026561450213193893, "rewards/margins": 0.04393706098198891, "rewards/rejected": -0.0704985111951828, "step": 2175 }, { "epoch": 1.7315329626687848, "grad_norm": 37.60955810546875, "learning_rate": 1.0708823421952984e-06, "log_odds_chosen": 1.7141081094741821, "log_odds_ratio": -0.24008333683013916, "logits/chosen": 338.4755554199219, "logits/rejected": 307.659423828125, "logps/chosen": -0.4687555432319641, "logps/rejected": -1.3905603885650635, "loss": 0.7047, "nll_loss": 0.6992734670639038, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.023437779396772385, "rewards/margins": 0.04609023407101631, "rewards/rejected": -0.0695280134677887, "step": 2180 }, { "epoch": 1.7355043685464655, "grad_norm": 24.991849899291992, "learning_rate": 1.0696563746013951e-06, "log_odds_chosen": 1.723670244216919, "log_odds_ratio": -0.2573872208595276, "logits/chosen": 305.9327087402344, "logits/rejected": 333.73638916015625, "logps/chosen": -0.5920778512954712, "logps/rejected": -1.579659342765808, "loss": 0.9738, "nll_loss": 0.8010879755020142, "rewards/accuracies": 1.0, "rewards/chosen": -0.02960388921201229, "rewards/margins": 0.04937908053398132, "rewards/rejected": -0.07898297160863876, "step": 2185 }, { "epoch": 1.7394757744241462, "grad_norm": 27.51617431640625, "learning_rate": 1.0684346079267208e-06, "log_odds_chosen": 2.167844772338867, "log_odds_ratio": -0.18182644248008728, "logits/chosen": 300.15960693359375, "logits/rejected": 263.7042541503906, "logps/chosen": -0.34622564911842346, "logps/rejected": -1.5231422185897827, "loss": 0.704, "nll_loss": 0.5762925744056702, "rewards/accuracies": 1.0, "rewards/chosen": -0.017311284318566322, "rewards/margins": 0.05884582921862602, "rewards/rejected": -0.0761571079492569, "step": 2190 }, { "epoch": 1.743447180301827, "grad_norm": 36.284423828125, "learning_rate": 1.0672170182343944e-06, "log_odds_chosen": 1.5064342021942139, "log_odds_ratio": -0.30479374527931213, "logits/chosen": 265.1541442871094, "logits/rejected": 310.3070068359375, "logps/chosen": -0.7095457315444946, "logps/rejected": -1.6736814975738525, "loss": 0.9196, "nll_loss": 0.837944507598877, "rewards/accuracies": 1.0, "rewards/chosen": -0.03547728806734085, "rewards/margins": 0.04820678383111954, "rewards/rejected": -0.08368407189846039, "step": 2195 }, { "epoch": 1.7474185861795075, "grad_norm": 57.919429779052734, "learning_rate": 1.066003581778052e-06, "log_odds_chosen": 2.6329212188720703, "log_odds_ratio": -0.11882112920284271, "logits/chosen": 256.0660400390625, "logits/rejected": 321.71478271484375, "logps/chosen": -0.4541356563568115, "logps/rejected": -2.08860182762146, "loss": 0.7238, "nll_loss": 0.7454873323440552, "rewards/accuracies": 1.0, "rewards/chosen": -0.022706782445311546, "rewards/margins": 0.08172331750392914, "rewards/rejected": -0.10443009436130524, "step": 2200 }, { "epoch": 1.7513899920571883, "grad_norm": 50.52003860473633, "learning_rate": 1.0647942749998999e-06, "log_odds_chosen": 1.3120359182357788, "log_odds_ratio": -0.31396904587745667, "logits/chosen": 328.6595153808594, "logits/rejected": 402.1265563964844, "logps/chosen": -0.6453569531440735, "logps/rejected": -1.3103744983673096, "loss": 1.0108, "nll_loss": 0.8728786706924438, "rewards/accuracies": 1.0, "rewards/chosen": -0.032267846167087555, "rewards/margins": 0.03325087949633598, "rewards/rejected": -0.06551872193813324, "step": 2205 }, { "epoch": 1.7553613979348688, "grad_norm": 41.56594467163086, "learning_rate": 1.0635890745287928e-06, "log_odds_chosen": 2.6617274284362793, "log_odds_ratio": -0.19012318551540375, "logits/chosen": 258.3802795410156, "logits/rejected": 250.87179565429688, "logps/chosen": -0.4259757101535797, "logps/rejected": -1.9833433628082275, "loss": 0.846, "nll_loss": 0.7863501310348511, "rewards/accuracies": 1.0, "rewards/chosen": -0.021298784762620926, "rewards/margins": 0.07786837965250015, "rewards/rejected": -0.09916716814041138, "step": 2210 }, { "epoch": 1.7593328038125495, "grad_norm": 34.22325897216797, "learning_rate": 1.0623879571783382e-06, "log_odds_chosen": 0.8847603797912598, "log_odds_ratio": -0.4665696620941162, "logits/chosen": 305.217041015625, "logits/rejected": 351.25518798828125, "logps/chosen": -0.605812668800354, "logps/rejected": -1.0832501649856567, "loss": 0.8867, "nll_loss": 0.7453805804252625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0302906334400177, "rewards/margins": 0.023871876299381256, "rewards/rejected": -0.054162509739398956, "step": 2215 }, { "epoch": 1.7633042096902303, "grad_norm": 25.505517959594727, "learning_rate": 1.0611908999450224e-06, "log_odds_chosen": 1.605830430984497, "log_odds_ratio": -0.20149704813957214, "logits/chosen": 354.46044921875, "logits/rejected": 311.38763427734375, "logps/chosen": -0.37914207577705383, "logps/rejected": -1.1268947124481201, "loss": 0.7463, "nll_loss": 0.6839945912361145, "rewards/accuracies": 1.0, "rewards/chosen": -0.01895710453391075, "rewards/margins": 0.037387631833553314, "rewards/rejected": -0.056344740092754364, "step": 2220 }, { "epoch": 1.767275615567911, "grad_norm": 27.840789794921875, "learning_rate": 1.0599978800063601e-06, "log_odds_chosen": 1.546400785446167, "log_odds_ratio": -0.2523784935474396, "logits/chosen": 320.6352844238281, "logits/rejected": 245.90072631835938, "logps/chosen": -0.5363035798072815, "logps/rejected": -1.4044965505599976, "loss": 0.7344, "nll_loss": 0.672437846660614, "rewards/accuracies": 1.0, "rewards/chosen": -0.026815179735422134, "rewards/margins": 0.04340965673327446, "rewards/rejected": -0.0702248364686966, "step": 2225 }, { "epoch": 1.7712470214455918, "grad_norm": 32.31303405761719, "learning_rate": 1.058808874719067e-06, "log_odds_chosen": 1.2581157684326172, "log_odds_ratio": -0.3380572497844696, "logits/chosen": 282.74761962890625, "logits/rejected": 327.1788024902344, "logps/chosen": -0.7751240134239197, "logps/rejected": -1.5154592990875244, "loss": 0.7602, "nll_loss": 0.8768072128295898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03875620290637016, "rewards/margins": 0.03701676428318024, "rewards/rejected": -0.0757729634642601, "step": 2230 }, { "epoch": 1.7752184273232725, "grad_norm": 27.653583526611328, "learning_rate": 1.057623861617254e-06, "log_odds_chosen": 2.2240524291992188, "log_odds_ratio": -0.14906403422355652, "logits/chosen": 259.7555236816406, "logits/rejected": 386.7548522949219, "logps/chosen": -0.37181687355041504, "logps/rejected": -1.6917979717254639, "loss": 0.7392, "nll_loss": 0.6186314821243286, "rewards/accuracies": 1.0, "rewards/chosen": -0.018590843304991722, "rewards/margins": 0.06599906086921692, "rewards/rejected": -0.08458990603685379, "step": 2235 }, { "epoch": 1.7791898332009533, "grad_norm": 34.99640655517578, "learning_rate": 1.0564428184106459e-06, "log_odds_chosen": 1.4233901500701904, "log_odds_ratio": -0.27014046907424927, "logits/chosen": 337.10662841796875, "logits/rejected": 255.8411102294922, "logps/chosen": -0.4958310127258301, "logps/rejected": -1.2386577129364014, "loss": 0.8677, "nll_loss": 0.9047778844833374, "rewards/accuracies": 1.0, "rewards/chosen": -0.024791549891233444, "rewards/margins": 0.037141330540180206, "rewards/rejected": -0.06193288043141365, "step": 2240 }, { "epoch": 1.783161239078634, "grad_norm": 32.94675064086914, "learning_rate": 1.0552657229828183e-06, "log_odds_chosen": 0.731157124042511, "log_odds_ratio": -0.4831514358520508, "logits/chosen": 321.543701171875, "logits/rejected": 240.626220703125, "logps/chosen": -0.6978667974472046, "logps/rejected": -1.0622944831848145, "loss": 0.8724, "nll_loss": 0.9993354082107544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03489334136247635, "rewards/margins": 0.018221387639641762, "rewards/rejected": -0.05311473086476326, "step": 2245 }, { "epoch": 1.7871326449563145, "grad_norm": 30.07236671447754, "learning_rate": 1.0540925533894598e-06, "log_odds_chosen": 0.8772686719894409, "log_odds_ratio": -0.4837498664855957, "logits/chosen": 438.844482421875, "logits/rejected": 251.0099639892578, "logps/chosen": -0.6994816064834595, "logps/rejected": -1.0784616470336914, "loss": 0.7779, "nll_loss": 0.8594354391098022, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.034974079579114914, "rewards/margins": 0.018949000164866447, "rewards/rejected": -0.05392308160662651, "step": 2250 }, { "epoch": 1.7911040508339953, "grad_norm": 29.69354820251465, "learning_rate": 1.0529232878566533e-06, "log_odds_chosen": 1.2839289903640747, "log_odds_ratio": -0.2796970009803772, "logits/chosen": 289.58209228515625, "logits/rejected": 359.0069885253906, "logps/chosen": -0.46519821882247925, "logps/rejected": -1.1590473651885986, "loss": 0.6845, "nll_loss": 0.6256710886955261, "rewards/accuracies": 1.0, "rewards/chosen": -0.023259911686182022, "rewards/margins": 0.03469245508313179, "rewards/rejected": -0.05795236676931381, "step": 2255 }, { "epoch": 1.7950754567116758, "grad_norm": 27.493417739868164, "learning_rate": 1.0517579047791782e-06, "log_odds_chosen": 0.9947487115859985, "log_odds_ratio": -0.4154279828071594, "logits/chosen": 321.8368225097656, "logits/rejected": 355.9534606933594, "logps/chosen": -0.47752445936203003, "logps/rejected": -1.0383057594299316, "loss": 0.6544, "nll_loss": 0.5580215454101562, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02387622371315956, "rewards/margins": 0.02803906239569187, "rewards/rejected": -0.05191528797149658, "step": 2260 }, { "epoch": 1.7990468625893565, "grad_norm": 36.50867462158203, "learning_rate": 1.050596382718834e-06, "log_odds_chosen": 1.929277777671814, "log_odds_ratio": -0.24522367119789124, "logits/chosen": 248.70803833007812, "logits/rejected": 370.62701416015625, "logps/chosen": -0.6788391470909119, "logps/rejected": -1.9356067180633545, "loss": 0.9336, "nll_loss": 0.8503932952880859, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03394196182489395, "rewards/margins": 0.06283838301897049, "rewards/rejected": -0.09678034484386444, "step": 2265 }, { "epoch": 1.8030182684670373, "grad_norm": 30.090612411499023, "learning_rate": 1.049438700402784e-06, "log_odds_chosen": 2.4246153831481934, "log_odds_ratio": -0.1122988611459732, "logits/chosen": 317.8182678222656, "logits/rejected": 389.30523681640625, "logps/chosen": -0.40875324606895447, "logps/rejected": -1.923872709274292, "loss": 0.6992, "nll_loss": 0.4913650155067444, "rewards/accuracies": 1.0, "rewards/chosen": -0.020437661558389664, "rewards/margins": 0.07575596868991852, "rewards/rejected": -0.09619362652301788, "step": 2270 }, { "epoch": 1.806989674344718, "grad_norm": 25.525978088378906, "learning_rate": 1.0482848367219184e-06, "log_odds_chosen": 1.7310831546783447, "log_odds_ratio": -0.34687691926956177, "logits/chosen": 293.1515197753906, "logits/rejected": 304.5860900878906, "logps/chosen": -0.6816526055335999, "logps/rejected": -1.7223570346832275, "loss": 0.9117, "nll_loss": 0.8708241581916809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03408262878656387, "rewards/margins": 0.05203522369265556, "rewards/rejected": -0.08611784875392914, "step": 2275 }, { "epoch": 1.8109610802223988, "grad_norm": 29.124156951904297, "learning_rate": 1.0471347707292389e-06, "log_odds_chosen": 2.642489194869995, "log_odds_ratio": -0.14402975142002106, "logits/chosen": 286.64959716796875, "logits/rejected": 330.8363037109375, "logps/chosen": -0.36216282844543457, "logps/rejected": -1.9222888946533203, "loss": 0.7766, "nll_loss": 0.5919591188430786, "rewards/accuracies": 1.0, "rewards/chosen": -0.01810814067721367, "rewards/margins": 0.078006312251091, "rewards/rejected": -0.09611444920301437, "step": 2280 }, { "epoch": 1.8149324861000795, "grad_norm": 24.49781036376953, "learning_rate": 1.04598848163826e-06, "log_odds_chosen": 1.9413111209869385, "log_odds_ratio": -0.19651436805725098, "logits/chosen": 294.1470031738281, "logits/rejected": 285.11810302734375, "logps/chosen": -0.6075969338417053, "logps/rejected": -1.9450137615203857, "loss": 0.8606, "nll_loss": 0.7974241971969604, "rewards/accuracies": 1.0, "rewards/chosen": -0.030379846692085266, "rewards/margins": 0.06687085330486298, "rewards/rejected": -0.09725069999694824, "step": 2285 }, { "epoch": 1.8189038919777603, "grad_norm": 24.984628677368164, "learning_rate": 1.0448459488214322e-06, "log_odds_chosen": 1.4830385446548462, "log_odds_ratio": -0.24547508358955383, "logits/chosen": 284.15216064453125, "logits/rejected": 289.8674621582031, "logps/chosen": -0.576518177986145, "logps/rejected": -1.3695199489593506, "loss": 0.712, "nll_loss": 0.845025360584259, "rewards/accuracies": 1.0, "rewards/chosen": -0.02882590889930725, "rewards/margins": 0.0396500900387764, "rewards/rejected": -0.06847599893808365, "step": 2290 }, { "epoch": 1.8228752978554408, "grad_norm": 35.125213623046875, "learning_rate": 1.0437071518085826e-06, "log_odds_chosen": 1.9451690912246704, "log_odds_ratio": -0.2187591791152954, "logits/chosen": 344.46112060546875, "logits/rejected": 290.50848388671875, "logps/chosen": -0.4077928066253662, "logps/rejected": -1.4108073711395264, "loss": 0.76, "nll_loss": 0.8611392974853516, "rewards/accuracies": 1.0, "rewards/chosen": -0.02038964070379734, "rewards/margins": 0.05015072971582413, "rewards/rejected": -0.07054036855697632, "step": 2295 }, { "epoch": 1.8268467037331215, "grad_norm": 29.224489212036133, "learning_rate": 1.042572070285374e-06, "log_odds_chosen": 1.6341838836669922, "log_odds_ratio": -0.47226276993751526, "logits/chosen": 240.94723510742188, "logits/rejected": 439.5355529785156, "logps/chosen": -0.2641201317310333, "logps/rejected": -1.169684886932373, "loss": 0.7786, "nll_loss": 0.39919307827949524, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.013206006959080696, "rewards/margins": 0.04527823254466057, "rewards/rejected": -0.05848424509167671, "step": 2300 }, { "epoch": 1.8308181096108023, "grad_norm": 64.61443328857422, "learning_rate": 1.0414406840917835e-06, "log_odds_chosen": 1.7078742980957031, "log_odds_ratio": -0.29076051712036133, "logits/chosen": 301.88037109375, "logits/rejected": 251.6233673095703, "logps/chosen": -0.525429368019104, "logps/rejected": -1.6450624465942383, "loss": 0.8899, "nll_loss": 0.8937755823135376, "rewards/accuracies": 1.0, "rewards/chosen": -0.02627146616578102, "rewards/margins": 0.05598165839910507, "rewards/rejected": -0.0822531208395958, "step": 2305 }, { "epoch": 1.8347895154884828, "grad_norm": 36.214271545410156, "learning_rate": 1.0403129732205989e-06, "log_odds_chosen": 1.3265584707260132, "log_odds_ratio": -0.40558844804763794, "logits/chosen": 346.4527893066406, "logits/rejected": 320.7982177734375, "logps/chosen": -0.6041200757026672, "logps/rejected": -1.3090689182281494, "loss": 0.7463, "nll_loss": 0.9375091791152954, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030206000432372093, "rewards/margins": 0.03524744138121605, "rewards/rejected": -0.06545344740152359, "step": 2310 }, { "epoch": 1.8387609213661635, "grad_norm": 23.28519058227539, "learning_rate": 1.0391889178159317e-06, "log_odds_chosen": 1.4057496786117554, "log_odds_ratio": -0.24964909255504608, "logits/chosen": 309.9521484375, "logits/rejected": 243.9438934326172, "logps/chosen": -0.8198047876358032, "logps/rejected": -1.7336012125015259, "loss": 0.9028, "nll_loss": 1.1149805784225464, "rewards/accuracies": 1.0, "rewards/chosen": -0.04099023714661598, "rewards/margins": 0.04568982869386673, "rewards/rejected": -0.08668006211519241, "step": 2315 }, { "epoch": 1.8427323272438443, "grad_norm": 33.16374969482422, "learning_rate": 1.0380684981717496e-06, "log_odds_chosen": 0.6586580276489258, "log_odds_ratio": -0.4859169125556946, "logits/chosen": 334.7452087402344, "logits/rejected": 262.04071044921875, "logps/chosen": -0.43002748489379883, "logps/rejected": -0.7598254084587097, "loss": 0.7969, "nll_loss": 0.7446638941764832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02150137536227703, "rewards/margins": 0.016489896923303604, "rewards/rejected": -0.037991270422935486, "step": 2320 }, { "epoch": 1.846703733121525, "grad_norm": 29.125152587890625, "learning_rate": 1.0369516947304254e-06, "log_odds_chosen": 0.0750177875161171, "log_odds_ratio": -0.9107866287231445, "logits/chosen": 350.4623718261719, "logits/rejected": 319.9029541015625, "logps/chosen": -0.5689317584037781, "logps/rejected": -0.5792874097824097, "loss": 0.7853, "nll_loss": 0.6771488785743713, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.028446590527892113, "rewards/margins": 0.0005177788552828133, "rewards/rejected": -0.028964370489120483, "step": 2325 }, { "epoch": 1.8506751389992058, "grad_norm": 34.48380661010742, "learning_rate": 1.0358384880813022e-06, "log_odds_chosen": 1.9680553674697876, "log_odds_ratio": -0.1879040002822876, "logits/chosen": 311.9992980957031, "logits/rejected": 378.4543762207031, "logps/chosen": -0.49781322479248047, "logps/rejected": -1.6778230667114258, "loss": 0.7279, "nll_loss": 0.7636333703994751, "rewards/accuracies": 1.0, "rewards/chosen": -0.024890663102269173, "rewards/margins": 0.059000492095947266, "rewards/rejected": -0.08389115333557129, "step": 2330 }, { "epoch": 1.8546465448768865, "grad_norm": 51.420326232910156, "learning_rate": 1.0347288589592778e-06, "log_odds_chosen": 2.020479679107666, "log_odds_ratio": -0.23942995071411133, "logits/chosen": 301.0638732910156, "logits/rejected": 328.4521179199219, "logps/chosen": -0.6289145946502686, "logps/rejected": -1.9039732217788696, "loss": 0.7954, "nll_loss": 0.821241021156311, "rewards/accuracies": 1.0, "rewards/chosen": -0.03144572675228119, "rewards/margins": 0.0637529268860817, "rewards/rejected": -0.09519866108894348, "step": 2335 }, { "epoch": 1.8586179507545673, "grad_norm": 39.684078216552734, "learning_rate": 1.033622788243404e-06, "log_odds_chosen": 1.3421988487243652, "log_odds_ratio": -0.37751635909080505, "logits/chosen": 305.427978515625, "logits/rejected": 291.96466064453125, "logps/chosen": -0.5937973856925964, "logps/rejected": -1.6161730289459229, "loss": 0.8183, "nll_loss": 0.7988775968551636, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02968987263739109, "rewards/margins": 0.051118772476911545, "rewards/rejected": -0.08080865442752838, "step": 2340 }, { "epoch": 1.8625893566322478, "grad_norm": 30.320964813232422, "learning_rate": 1.0325202569555013e-06, "log_odds_chosen": 1.2330553531646729, "log_odds_ratio": -0.3010442554950714, "logits/chosen": 298.0875549316406, "logits/rejected": 370.0763854980469, "logps/chosen": -0.558303713798523, "logps/rejected": -1.2143747806549072, "loss": 0.7559, "nll_loss": 0.7310600280761719, "rewards/accuracies": 1.0, "rewards/chosen": -0.027915185317397118, "rewards/margins": 0.032803554087877274, "rewards/rejected": -0.06071874499320984, "step": 2345 }, { "epoch": 1.8665607625099285, "grad_norm": 25.897533416748047, "learning_rate": 1.0314212462587935e-06, "log_odds_chosen": 2.4509646892547607, "log_odds_ratio": -0.09875769913196564, "logits/chosen": 253.66830444335938, "logits/rejected": 334.7943115234375, "logps/chosen": -0.34774085879325867, "logps/rejected": -1.5809571743011475, "loss": 0.8168, "nll_loss": 0.6432914137840271, "rewards/accuracies": 1.0, "rewards/chosen": -0.017387043684720993, "rewards/margins": 0.06166081503033638, "rewards/rejected": -0.07904786616563797, "step": 2350 }, { "epoch": 1.870532168387609, "grad_norm": 26.26967430114746, "learning_rate": 1.0303257374565546e-06, "log_odds_chosen": 1.6334152221679688, "log_odds_ratio": -0.266364187002182, "logits/chosen": 385.4601135253906, "logits/rejected": 300.8085632324219, "logps/chosen": -0.6583752036094666, "logps/rejected": -1.4509037733078003, "loss": 0.7401, "nll_loss": 0.7208520174026489, "rewards/accuracies": 1.0, "rewards/chosen": -0.03291876241564751, "rewards/margins": 0.039626430720090866, "rewards/rejected": -0.07254519313573837, "step": 2355 }, { "epoch": 1.8745035742652898, "grad_norm": 26.586294174194336, "learning_rate": 1.029233711990773e-06, "log_odds_chosen": 2.712470531463623, "log_odds_ratio": -0.20829498767852783, "logits/chosen": 307.21173095703125, "logits/rejected": 252.4169464111328, "logps/chosen": -0.383186399936676, "logps/rejected": -1.2868399620056152, "loss": 0.6887, "nll_loss": 0.6436999440193176, "rewards/accuracies": 1.0, "rewards/chosen": -0.01915931887924671, "rewards/margins": 0.04518268257379532, "rewards/rejected": -0.06434200704097748, "step": 2360 }, { "epoch": 1.8784749801429705, "grad_norm": 23.74375343322754, "learning_rate": 1.0281451514408315e-06, "log_odds_chosen": 1.4261778593063354, "log_odds_ratio": -0.2637864947319031, "logits/chosen": 264.46990966796875, "logits/rejected": 421.1216735839844, "logps/chosen": -0.6919044852256775, "logps/rejected": -1.547218918800354, "loss": 0.7403, "nll_loss": 0.9019848108291626, "rewards/accuracies": 1.0, "rewards/chosen": -0.03459522873163223, "rewards/margins": 0.04276571795344353, "rewards/rejected": -0.07736094295978546, "step": 2365 }, { "epoch": 1.8824463860206513, "grad_norm": 30.572526931762695, "learning_rate": 1.0270600375222014e-06, "log_odds_chosen": 2.870634078979492, "log_odds_ratio": -0.06479945033788681, "logits/chosen": 312.4655456542969, "logits/rejected": 231.99411010742188, "logps/chosen": -0.21137888729572296, "logps/rejected": -1.5268778800964355, "loss": 0.5935, "nll_loss": 0.5586276054382324, "rewards/accuracies": 1.0, "rewards/chosen": -0.010568944737315178, "rewards/margins": 0.06577495485544205, "rewards/rejected": -0.07634389400482178, "step": 2370 }, { "epoch": 1.886417791898332, "grad_norm": 31.64859962463379, "learning_rate": 1.0259783520851542e-06, "log_odds_chosen": 1.6580451726913452, "log_odds_ratio": -0.22571304440498352, "logits/chosen": 271.18524169921875, "logits/rejected": 313.13592529296875, "logps/chosen": -0.61414635181427, "logps/rejected": -1.621797800064087, "loss": 0.8558, "nll_loss": 1.0191186666488647, "rewards/accuracies": 1.0, "rewards/chosen": -0.03070731833577156, "rewards/margins": 0.050382573157548904, "rewards/rejected": -0.08108989149332047, "step": 2375 }, { "epoch": 1.8903891977760128, "grad_norm": 29.006942749023438, "learning_rate": 1.0249000771134847e-06, "log_odds_chosen": 2.5084846019744873, "log_odds_ratio": -0.1580258011817932, "logits/chosen": 237.0716552734375, "logits/rejected": 358.3143310546875, "logps/chosen": -0.5167864561080933, "logps/rejected": -2.2698826789855957, "loss": 0.7576, "nll_loss": 0.6090616583824158, "rewards/accuracies": 1.0, "rewards/chosen": -0.025839323177933693, "rewards/margins": 0.08765481412410736, "rewards/rejected": -0.1134941428899765, "step": 2380 }, { "epoch": 1.8943606036536935, "grad_norm": 25.368074417114258, "learning_rate": 1.023825194723252e-06, "log_odds_chosen": 1.3378890752792358, "log_odds_ratio": -0.2533974349498749, "logits/chosen": 340.38909912109375, "logits/rejected": 305.83673095703125, "logps/chosen": -0.5703948736190796, "logps/rejected": -1.343727469444275, "loss": 0.8309, "nll_loss": 0.6171257495880127, "rewards/accuracies": 1.0, "rewards/chosen": -0.02851974405348301, "rewards/margins": 0.038666628301143646, "rewards/rejected": -0.0671863779425621, "step": 2385 }, { "epoch": 1.8983320095313743, "grad_norm": 42.32550811767578, "learning_rate": 1.022753687161533e-06, "log_odds_chosen": 1.654007911682129, "log_odds_ratio": -0.21362581849098206, "logits/chosen": 269.56829833984375, "logits/rejected": 278.0132141113281, "logps/chosen": -0.4740678369998932, "logps/rejected": -1.3538405895233154, "loss": 0.6666, "nll_loss": 0.7328553199768066, "rewards/accuracies": 1.0, "rewards/chosen": -0.02370339073240757, "rewards/margins": 0.043988633900880814, "rewards/rejected": -0.06769202649593353, "step": 2390 }, { "epoch": 1.9023034154090548, "grad_norm": 35.25934982299805, "learning_rate": 1.0216855368051905e-06, "log_odds_chosen": 1.5869925022125244, "log_odds_ratio": -0.27753788232803345, "logits/chosen": 266.008056640625, "logits/rejected": 325.9742126464844, "logps/chosen": -0.31553053855895996, "logps/rejected": -1.1027727127075195, "loss": 0.7717, "nll_loss": 0.6756271719932556, "rewards/accuracies": 1.0, "rewards/chosen": -0.01577652618288994, "rewards/margins": 0.0393621101975441, "rewards/rejected": -0.055138640105724335, "step": 2395 }, { "epoch": 1.9062748212867355, "grad_norm": 42.96984100341797, "learning_rate": 1.0206207261596577e-06, "log_odds_chosen": 1.7989251613616943, "log_odds_ratio": -0.18788772821426392, "logits/chosen": 284.7127685546875, "logits/rejected": 315.79156494140625, "logps/chosen": -0.4359824061393738, "logps/rejected": -1.3588236570358276, "loss": 0.7178, "nll_loss": 0.5897383689880371, "rewards/accuracies": 1.0, "rewards/chosen": -0.02179912105202675, "rewards/margins": 0.046142060309648514, "rewards/rejected": -0.06794118881225586, "step": 2400 }, { "epoch": 1.910246227164416, "grad_norm": 32.65408706665039, "learning_rate": 1.019559237857732e-06, "log_odds_chosen": 1.6669549942016602, "log_odds_ratio": -0.26262766122817993, "logits/chosen": 267.59027099609375, "logits/rejected": 248.32080078125, "logps/chosen": -0.5081090331077576, "logps/rejected": -1.4452846050262451, "loss": 0.6928, "nll_loss": 0.6476167440414429, "rewards/accuracies": 1.0, "rewards/chosen": -0.02540545165538788, "rewards/margins": 0.0468587800860405, "rewards/rejected": -0.07226422429084778, "step": 2405 }, { "epoch": 1.9142176330420968, "grad_norm": 29.009151458740234, "learning_rate": 1.0185010546583882e-06, "log_odds_chosen": 1.5558197498321533, "log_odds_ratio": -0.21908335387706757, "logits/chosen": 277.3587341308594, "logits/rejected": 372.0375061035156, "logps/chosen": -0.4578898549079895, "logps/rejected": -1.304565668106079, "loss": 0.7609, "nll_loss": 0.7717305421829224, "rewards/accuracies": 1.0, "rewards/chosen": -0.022894492372870445, "rewards/margins": 0.042333800345659256, "rewards/rejected": -0.06522828340530396, "step": 2410 }, { "epoch": 1.9181890389197775, "grad_norm": 29.15569305419922, "learning_rate": 1.0174461594455997e-06, "log_odds_chosen": 1.5694271326065063, "log_odds_ratio": -0.27755317091941833, "logits/chosen": 310.48773193359375, "logits/rejected": 290.09130859375, "logps/chosen": -0.5000747442245483, "logps/rejected": -1.2587058544158936, "loss": 0.6486, "nll_loss": 0.6504746675491333, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.025003740563988686, "rewards/margins": 0.03793155401945114, "rewards/rejected": -0.06293529272079468, "step": 2415 }, { "epoch": 1.9221604447974583, "grad_norm": 39.57065963745117, "learning_rate": 1.0163945352271773e-06, "log_odds_chosen": 1.5484364032745361, "log_odds_ratio": -0.3084541857242584, "logits/chosen": 394.11370849609375, "logits/rejected": 378.93695068359375, "logps/chosen": -0.4161972403526306, "logps/rejected": -1.2610418796539307, "loss": 0.7183, "nll_loss": 0.610600471496582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02080986276268959, "rewards/margins": 0.04224223643541336, "rewards/rejected": -0.06305209547281265, "step": 2420 }, { "epoch": 1.926131850675139, "grad_norm": 37.694602966308594, "learning_rate": 1.0153461651336193e-06, "log_odds_chosen": 2.225497007369995, "log_odds_ratio": -0.26799148321151733, "logits/chosen": 340.69232177734375, "logits/rejected": 271.46759033203125, "logps/chosen": -0.45521894097328186, "logps/rejected": -1.3658949136734009, "loss": 0.7839, "nll_loss": 0.8904461860656738, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.022760944440960884, "rewards/margins": 0.04553379863500595, "rewards/rejected": -0.06829474866390228, "step": 2425 }, { "epoch": 1.9301032565528198, "grad_norm": 23.69498062133789, "learning_rate": 1.0143010324169743e-06, "log_odds_chosen": 1.5935895442962646, "log_odds_ratio": -0.22372519969940186, "logits/chosen": 304.7364501953125, "logits/rejected": 331.30670166015625, "logps/chosen": -0.41409072279930115, "logps/rejected": -1.1915451288223267, "loss": 0.7498, "nll_loss": 0.6026512980461121, "rewards/accuracies": 1.0, "rewards/chosen": -0.020704539492726326, "rewards/margins": 0.038872718811035156, "rewards/rejected": -0.05957725644111633, "step": 2430 }, { "epoch": 1.9340746624305005, "grad_norm": 29.575180053710938, "learning_rate": 1.013259120449719e-06, "log_odds_chosen": 1.621791124343872, "log_odds_ratio": -0.295773983001709, "logits/chosen": 348.9219970703125, "logits/rejected": 284.30517578125, "logps/chosen": -0.5032614469528198, "logps/rejected": -1.1986490488052368, "loss": 0.7465, "nll_loss": 0.626564621925354, "rewards/accuracies": 1.0, "rewards/chosen": -0.0251630749553442, "rewards/margins": 0.03476938232779503, "rewards/rejected": -0.05993245169520378, "step": 2435 }, { "epoch": 1.938046068308181, "grad_norm": 90.26566314697266, "learning_rate": 1.0122204127236452e-06, "log_odds_chosen": 2.639167070388794, "log_odds_ratio": -0.12427319586277008, "logits/chosen": 504.92901611328125, "logits/rejected": 271.24432373046875, "logps/chosen": -0.2446070909500122, "logps/rejected": -1.3790600299835205, "loss": 0.7872, "nll_loss": 0.5850281715393066, "rewards/accuracies": 1.0, "rewards/chosen": -0.012230354361236095, "rewards/margins": 0.05672264099121094, "rewards/rejected": -0.06895299255847931, "step": 2440 }, { "epoch": 1.9420174741858618, "grad_norm": 31.37210464477539, "learning_rate": 1.0111848928487622e-06, "log_odds_chosen": 2.033698081970215, "log_odds_ratio": -0.29641473293304443, "logits/chosen": 243.27108764648438, "logits/rejected": 469.9246520996094, "logps/chosen": -0.7070415019989014, "logps/rejected": -2.0553581714630127, "loss": 0.8964, "nll_loss": 0.928133487701416, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03535207733511925, "rewards/margins": 0.06741583347320557, "rewards/rejected": -0.10276790708303452, "step": 2445 }, { "epoch": 1.9459888800635425, "grad_norm": 43.04076385498047, "learning_rate": 1.0101525445522107e-06, "log_odds_chosen": 1.378095030784607, "log_odds_ratio": -0.2568764090538025, "logits/chosen": 273.47930908203125, "logits/rejected": 273.142578125, "logps/chosen": -0.40632033348083496, "logps/rejected": -1.0940074920654297, "loss": 0.7759, "nll_loss": 0.6475064754486084, "rewards/accuracies": 1.0, "rewards/chosen": -0.020316017791628838, "rewards/margins": 0.034384358674287796, "rewards/rejected": -0.054700374603271484, "step": 2450 }, { "epoch": 1.949960285941223, "grad_norm": 29.865938186645508, "learning_rate": 1.0091233516771892e-06, "log_odds_chosen": 2.293731689453125, "log_odds_ratio": -0.18753847479820251, "logits/chosen": 384.34149169921875, "logits/rejected": 279.8568420410156, "logps/chosen": -0.38630157709121704, "logps/rejected": -1.4292711019515991, "loss": 0.8241, "nll_loss": 0.8875762224197388, "rewards/accuracies": 1.0, "rewards/chosen": -0.019315078854560852, "rewards/margins": 0.0521484799683094, "rewards/rejected": -0.07146355509757996, "step": 2455 }, { "epoch": 1.9539316918189038, "grad_norm": 25.074451446533203, "learning_rate": 1.0080972981818898e-06, "log_odds_chosen": 1.5017271041870117, "log_odds_ratio": -0.24289080500602722, "logits/chosen": 290.76361083984375, "logits/rejected": 316.858154296875, "logps/chosen": -0.5781405568122864, "logps/rejected": -1.4656955003738403, "loss": 0.749, "nll_loss": 1.0332854986190796, "rewards/accuracies": 1.0, "rewards/chosen": -0.02890702709555626, "rewards/margins": 0.044377751648426056, "rewards/rejected": -0.07328477501869202, "step": 2460 }, { "epoch": 1.9579030976965845, "grad_norm": 37.553321838378906, "learning_rate": 1.0070743681384512e-06, "log_odds_chosen": 1.714714765548706, "log_odds_ratio": -0.20342151820659637, "logits/chosen": 304.4083557128906, "logits/rejected": 403.9393310546875, "logps/chosen": -0.49747514724731445, "logps/rejected": -1.5647690296173096, "loss": 0.7228, "nll_loss": 0.6288737058639526, "rewards/accuracies": 1.0, "rewards/chosen": -0.024873757734894753, "rewards/margins": 0.05336468666791916, "rewards/rejected": -0.07823844999074936, "step": 2465 }, { "epoch": 1.9618745035742653, "grad_norm": 30.394023895263672, "learning_rate": 1.0060545457319173e-06, "log_odds_chosen": 1.9750301837921143, "log_odds_ratio": -0.15881523489952087, "logits/chosen": 252.62771606445312, "logits/rejected": 321.7646484375, "logps/chosen": -0.32253313064575195, "logps/rejected": -1.2601935863494873, "loss": 0.71, "nll_loss": 0.6166780591011047, "rewards/accuracies": 1.0, "rewards/chosen": -0.01612665504217148, "rewards/margins": 0.04688302427530289, "rewards/rejected": -0.06300967931747437, "step": 2470 }, { "epoch": 1.965845909451946, "grad_norm": 39.465248107910156, "learning_rate": 1.0050378152592122e-06, "log_odds_chosen": 1.7062022686004639, "log_odds_ratio": -0.2684418559074402, "logits/chosen": 297.23333740234375, "logits/rejected": 356.68939208984375, "logps/chosen": -0.5603381395339966, "logps/rejected": -1.553205966949463, "loss": 0.7624, "nll_loss": 0.9264168739318848, "rewards/accuracies": 1.0, "rewards/chosen": -0.02801690623164177, "rewards/margins": 0.049643389880657196, "rewards/rejected": -0.07766029983758926, "step": 2475 }, { "epoch": 1.9698173153296268, "grad_norm": 31.2788143157959, "learning_rate": 1.0040241611281238e-06, "log_odds_chosen": 1.464468240737915, "log_odds_ratio": -0.3190952241420746, "logits/chosen": 290.74603271484375, "logits/rejected": 258.4278869628906, "logps/chosen": -0.601416826248169, "logps/rejected": -1.4750945568084717, "loss": 0.772, "nll_loss": 0.6782388091087341, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030070845037698746, "rewards/margins": 0.043683890253305435, "rewards/rejected": -0.07375473529100418, "step": 2480 }, { "epoch": 1.9737887212073075, "grad_norm": 33.23145294189453, "learning_rate": 1.0030135678562994e-06, "log_odds_chosen": 1.415673851966858, "log_odds_ratio": -0.26751992106437683, "logits/chosen": 326.2284240722656, "logits/rejected": 294.62982177734375, "logps/chosen": -0.4133778214454651, "logps/rejected": -1.1859859228134155, "loss": 0.9344, "nll_loss": 0.588900625705719, "rewards/accuracies": 1.0, "rewards/chosen": -0.020668892189860344, "rewards/margins": 0.0386304035782814, "rewards/rejected": -0.059299297630786896, "step": 2485 }, { "epoch": 1.977760127084988, "grad_norm": 31.663124084472656, "learning_rate": 1.002006020070253e-06, "log_odds_chosen": 1.7711327075958252, "log_odds_ratio": -0.23336951434612274, "logits/chosen": 392.6171875, "logits/rejected": 279.3924865722656, "logps/chosen": -0.5186364054679871, "logps/rejected": -1.4825407266616821, "loss": 0.7194, "nll_loss": 0.8356044888496399, "rewards/accuracies": 1.0, "rewards/chosen": -0.025931820273399353, "rewards/margins": 0.04819521680474281, "rewards/rejected": -0.07412703335285187, "step": 2490 }, { "epoch": 1.9817315329626688, "grad_norm": 30.687904357910156, "learning_rate": 1.0010015025043829e-06, "log_odds_chosen": 2.0353431701660156, "log_odds_ratio": -0.14225442707538605, "logits/chosen": 312.7574157714844, "logits/rejected": 353.1292419433594, "logps/chosen": -0.4555455148220062, "logps/rejected": -1.703881859779358, "loss": 0.7101, "nll_loss": 0.6733388900756836, "rewards/accuracies": 1.0, "rewards/chosen": -0.02277727797627449, "rewards/margins": 0.062416814267635345, "rewards/rejected": -0.08519409596920013, "step": 2495 }, { "epoch": 1.9857029388403493, "grad_norm": 20.97602653503418, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 1.2658917903900146, "log_odds_ratio": -0.2939799427986145, "logits/chosen": 352.18548583984375, "logits/rejected": 275.24761962890625, "logps/chosen": -0.5702251195907593, "logps/rejected": -1.279112458229065, "loss": 0.7243, "nll_loss": 0.8630763292312622, "rewards/accuracies": 1.0, "rewards/chosen": -0.028511255979537964, "rewards/margins": 0.035444360226392746, "rewards/rejected": -0.06395561993122101, "step": 2500 }, { "epoch": 1.98967434471803, "grad_norm": 23.28777503967285, "learning_rate": 9.990014975043674e-07, "log_odds_chosen": 1.8104581832885742, "log_odds_ratio": -0.19139598309993744, "logits/chosen": 271.8651428222656, "logits/rejected": 290.1709289550781, "logps/chosen": -0.652216911315918, "logps/rejected": -1.774309754371643, "loss": 0.8809, "nll_loss": 0.8805280923843384, "rewards/accuracies": 1.0, "rewards/chosen": -0.03261084109544754, "rewards/margins": 0.056104641407728195, "rewards/rejected": -0.08871549367904663, "step": 2505 }, { "epoch": 1.9936457505957108, "grad_norm": 32.018531799316406, "learning_rate": 9.98005980069749e-07, "log_odds_chosen": 0.9189640879631042, "log_odds_ratio": -0.353455513715744, "logits/chosen": 254.6789093017578, "logits/rejected": 327.7507629394531, "logps/chosen": -0.996709942817688, "logps/rejected": -1.6359264850616455, "loss": 0.8168, "nll_loss": 0.9266790151596069, "rewards/accuracies": 1.0, "rewards/chosen": -0.04983549565076828, "rewards/margins": 0.03196082264184952, "rewards/rejected": -0.0817963257431984, "step": 2510 }, { "epoch": 1.9976171564733916, "grad_norm": 38.965843200683594, "learning_rate": 9.97013432852472e-07, "log_odds_chosen": 3.410547971725464, "log_odds_ratio": -0.0805453434586525, "logits/chosen": 308.8994445800781, "logits/rejected": 340.4012145996094, "logps/chosen": -0.28854474425315857, "logps/rejected": -1.8908188343048096, "loss": 0.647, "nll_loss": 0.5763905048370361, "rewards/accuracies": 1.0, "rewards/chosen": -0.014427239075303078, "rewards/margins": 0.08011370152235031, "rewards/rejected": -0.09454093873500824, "step": 2515 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.3416334092617035, "eval_log_odds_ratio": -0.6777786016464233, "eval_logits/chosen": 324.3109130859375, "eval_logits/rejected": 296.0398254394531, "eval_logps/chosen": -1.0582857131958008, "eval_logps/rejected": -1.2741806507110596, "eval_loss": 1.481645107269287, "eval_nll_loss": 1.4303914308547974, "eval_rewards/accuracies": 0.5899280309677124, "eval_rewards/chosen": -0.05291429162025452, "eval_rewards/margins": 0.010794746689498425, "eval_rewards/rejected": -0.06370903551578522, "eval_runtime": 91.3978, "eval_samples_per_second": 6.05, "eval_steps_per_second": 1.521, "step": 2518 }, { "epoch": 2.0015885623510723, "grad_norm": 23.278343200683594, "learning_rate": 9.960238411119948e-07, "log_odds_chosen": 2.39607572555542, "log_odds_ratio": -0.14948555827140808, "logits/chosen": 280.23614501953125, "logits/rejected": 288.0187683105469, "logps/chosen": -0.36037492752075195, "logps/rejected": -1.700818419456482, "loss": 0.6759, "nll_loss": 0.7635445594787598, "rewards/accuracies": 1.0, "rewards/chosen": -0.018018746748566628, "rewards/margins": 0.0670221820473671, "rewards/rejected": -0.08504092693328857, "step": 2520 }, { "epoch": 2.005559968228753, "grad_norm": 37.55564880371094, "learning_rate": 9.950371902099892e-07, "log_odds_chosen": 2.9879164695739746, "log_odds_ratio": -0.06899777054786682, "logits/chosen": 274.68768310546875, "logits/rejected": 277.73822021484375, "logps/chosen": -0.3065778315067291, "logps/rejected": -2.0628604888916016, "loss": 0.4182, "nll_loss": 0.36765578389167786, "rewards/accuracies": 1.0, "rewards/chosen": -0.015328889712691307, "rewards/margins": 0.08781413733959198, "rewards/rejected": -0.10314302146434784, "step": 2525 }, { "epoch": 2.009531374106434, "grad_norm": 31.862247467041016, "learning_rate": 9.9405346560943e-07, "log_odds_chosen": 3.7556872367858887, "log_odds_ratio": -0.05185776203870773, "logits/chosen": 290.3088684082031, "logits/rejected": 268.1896057128906, "logps/chosen": -0.27296245098114014, "logps/rejected": -2.3146934509277344, "loss": 0.4324, "nll_loss": 0.4719117283821106, "rewards/accuracies": 1.0, "rewards/chosen": -0.013648120686411858, "rewards/margins": 0.10208655893802643, "rewards/rejected": -0.11573468148708344, "step": 2530 }, { "epoch": 2.0135027799841145, "grad_norm": 37.43449020385742, "learning_rate": 9.930726528736969e-07, "log_odds_chosen": 3.0588698387145996, "log_odds_ratio": -0.06747926771640778, "logits/chosen": 290.01312255859375, "logits/rejected": 256.39080810546875, "logps/chosen": -0.24895134568214417, "logps/rejected": -1.8526256084442139, "loss": 0.4001, "nll_loss": 0.31891027092933655, "rewards/accuracies": 1.0, "rewards/chosen": -0.012447567656636238, "rewards/margins": 0.0801837220788002, "rewards/rejected": -0.0926312804222107, "step": 2535 }, { "epoch": 2.0174741858617953, "grad_norm": 23.769168853759766, "learning_rate": 9.920947376656814e-07, "log_odds_chosen": 3.4705471992492676, "log_odds_ratio": -0.08175542205572128, "logits/chosen": 295.07159423828125, "logits/rejected": 319.11773681640625, "logps/chosen": -0.13094039261341095, "logps/rejected": -1.3336389064788818, "loss": 0.3125, "nll_loss": 0.2496316134929657, "rewards/accuracies": 1.0, "rewards/chosen": -0.006547019816935062, "rewards/margins": 0.060134924948215485, "rewards/rejected": -0.06668195128440857, "step": 2540 }, { "epoch": 2.0214455917394756, "grad_norm": 23.81411361694336, "learning_rate": 9.911197057469108e-07, "log_odds_chosen": 3.4386093616485596, "log_odds_ratio": -0.09821267426013947, "logits/chosen": 251.1048126220703, "logits/rejected": 341.10272216796875, "logps/chosen": -0.2074267864227295, "logps/rejected": -2.2674612998962402, "loss": 0.3714, "nll_loss": 0.34040871262550354, "rewards/accuracies": 1.0, "rewards/chosen": -0.01037133764475584, "rewards/margins": 0.10300172865390778, "rewards/rejected": -0.11337306350469589, "step": 2545 }, { "epoch": 2.0254169976171563, "grad_norm": 29.296571731567383, "learning_rate": 9.901475429766744e-07, "log_odds_chosen": 2.699160575866699, "log_odds_ratio": -0.10057506710290909, "logits/chosen": 294.4793701171875, "logits/rejected": 312.450439453125, "logps/chosen": -0.2852121889591217, "logps/rejected": -1.5744909048080444, "loss": 0.4522, "nll_loss": 0.4200217127799988, "rewards/accuracies": 1.0, "rewards/chosen": -0.0142606096342206, "rewards/margins": 0.06446393579244614, "rewards/rejected": -0.07872454822063446, "step": 2550 }, { "epoch": 2.029388403494837, "grad_norm": 18.865428924560547, "learning_rate": 9.891782353111634e-07, "log_odds_chosen": 3.4034037590026855, "log_odds_ratio": -0.0371125191450119, "logits/chosen": 248.248779296875, "logits/rejected": 342.5372619628906, "logps/chosen": -0.13001587986946106, "logps/rejected": -1.6181061267852783, "loss": 0.2983, "nll_loss": 0.2760511040687561, "rewards/accuracies": 1.0, "rewards/chosen": -0.006500795483589172, "rewards/margins": 0.0744045078754425, "rewards/rejected": -0.08090530335903168, "step": 2555 }, { "epoch": 2.033359809372518, "grad_norm": 20.16379165649414, "learning_rate": 9.882117688026186e-07, "log_odds_chosen": 4.662590503692627, "log_odds_ratio": -0.04151756316423416, "logits/chosen": 209.595458984375, "logits/rejected": 306.2817077636719, "logps/chosen": -0.3000139594078064, "logps/rejected": -3.268468141555786, "loss": 0.2853, "nll_loss": 0.38440361618995667, "rewards/accuracies": 1.0, "rewards/chosen": -0.015000698156654835, "rewards/margins": 0.14842268824577332, "rewards/rejected": -0.16342338919639587, "step": 2560 }, { "epoch": 2.0373312152501986, "grad_norm": 21.97180938720703, "learning_rate": 9.872481295984873e-07, "log_odds_chosen": 4.333212375640869, "log_odds_ratio": -0.032386261969804764, "logits/chosen": 399.1155090332031, "logits/rejected": 308.8793640136719, "logps/chosen": -0.20656821131706238, "logps/rejected": -2.2423524856567383, "loss": 0.3985, "nll_loss": 0.34744900465011597, "rewards/accuracies": 1.0, "rewards/chosen": -0.010328411124646664, "rewards/margins": 0.1017892137169838, "rewards/rejected": -0.11211763322353363, "step": 2565 }, { "epoch": 2.0413026211278793, "grad_norm": 25.24811553955078, "learning_rate": 9.862873039405896e-07, "log_odds_chosen": 3.375474452972412, "log_odds_ratio": -0.038382574915885925, "logits/chosen": 264.7865905761719, "logits/rejected": 341.5740051269531, "logps/chosen": -0.17896270751953125, "logps/rejected": -1.8247349262237549, "loss": 0.3276, "nll_loss": 0.2309613674879074, "rewards/accuracies": 1.0, "rewards/chosen": -0.008948136121034622, "rewards/margins": 0.08228860795497894, "rewards/rejected": -0.09123674780130386, "step": 2570 }, { "epoch": 2.04527402700556, "grad_norm": 26.88926124572754, "learning_rate": 9.853292781642933e-07, "log_odds_chosen": 3.7541892528533936, "log_odds_ratio": -0.03717505931854248, "logits/chosen": 236.57650756835938, "logits/rejected": 321.8630676269531, "logps/chosen": -0.14312101900577545, "logps/rejected": -2.043936252593994, "loss": 0.3686, "nll_loss": 0.21125833690166473, "rewards/accuracies": 1.0, "rewards/chosen": -0.007156051695346832, "rewards/margins": 0.09504075348377228, "rewards/rejected": -0.10219679772853851, "step": 2575 }, { "epoch": 2.049245432883241, "grad_norm": 31.291349411010742, "learning_rate": 9.843740386976973e-07, "log_odds_chosen": 2.4147632122039795, "log_odds_ratio": -0.11424986273050308, "logits/chosen": 258.66961669921875, "logits/rejected": 286.5997009277344, "logps/chosen": -0.28673481941223145, "logps/rejected": -1.4659677743911743, "loss": 0.3331, "nll_loss": 0.41449370980262756, "rewards/accuracies": 1.0, "rewards/chosen": -0.014336742460727692, "rewards/margins": 0.0589616484940052, "rewards/rejected": -0.0732983872294426, "step": 2580 }, { "epoch": 2.0532168387609215, "grad_norm": 18.964326858520508, "learning_rate": 9.834215720608247e-07, "log_odds_chosen": 3.3965156078338623, "log_odds_ratio": -0.08347752690315247, "logits/chosen": 286.4773864746094, "logits/rejected": 292.11553955078125, "logps/chosen": -0.3802470564842224, "logps/rejected": -1.8391368389129639, "loss": 0.4585, "nll_loss": 0.4634222388267517, "rewards/accuracies": 1.0, "rewards/chosen": -0.01901235245168209, "rewards/margins": 0.07294448465108871, "rewards/rejected": -0.09195684641599655, "step": 2585 }, { "epoch": 2.0571882446386023, "grad_norm": 26.18207550048828, "learning_rate": 9.824718648648244e-07, "log_odds_chosen": 2.8892555236816406, "log_odds_ratio": -0.05735042691230774, "logits/chosen": 346.8835754394531, "logits/rejected": 247.84585571289062, "logps/chosen": -0.19978216290473938, "logps/rejected": -1.532153844833374, "loss": 0.3647, "nll_loss": 0.2897275388240814, "rewards/accuracies": 1.0, "rewards/chosen": -0.009989107958972454, "rewards/margins": 0.06661858409643173, "rewards/rejected": -0.07660768926143646, "step": 2590 }, { "epoch": 2.0611596505162826, "grad_norm": 35.08377456665039, "learning_rate": 9.81524903811178e-07, "log_odds_chosen": 4.44228982925415, "log_odds_ratio": -0.019657274708151817, "logits/chosen": 343.4027099609375, "logits/rejected": 304.6173095703125, "logps/chosen": -0.06594429910182953, "logps/rejected": -1.7075248956680298, "loss": 0.4044, "nll_loss": 0.29884591698646545, "rewards/accuracies": 1.0, "rewards/chosen": -0.003297214861959219, "rewards/margins": 0.08207902312278748, "rewards/rejected": -0.08537624031305313, "step": 2595 }, { "epoch": 2.0651310563939633, "grad_norm": 23.890464782714844, "learning_rate": 9.805806756909204e-07, "log_odds_chosen": 3.4282004833221436, "log_odds_ratio": -0.04306300729513168, "logits/chosen": 321.9460144042969, "logits/rejected": 254.0849609375, "logps/chosen": -0.10400563478469849, "logps/rejected": -1.4112660884857178, "loss": 0.3748, "nll_loss": 0.3669022023677826, "rewards/accuracies": 1.0, "rewards/chosen": -0.005200281739234924, "rewards/margins": 0.06536302715539932, "rewards/rejected": -0.07056330889463425, "step": 2600 }, { "epoch": 2.069102462271644, "grad_norm": 32.56575012207031, "learning_rate": 9.796391673838654e-07, "log_odds_chosen": 3.7493515014648438, "log_odds_ratio": -0.03417588397860527, "logits/chosen": 400.9460754394531, "logits/rejected": 280.41595458984375, "logps/chosen": -0.1472155898809433, "logps/rejected": -1.8289676904678345, "loss": 0.4138, "nll_loss": 0.4005819261074066, "rewards/accuracies": 1.0, "rewards/chosen": -0.00736077968031168, "rewards/margins": 0.08408761024475098, "rewards/rejected": -0.09144838154315948, "step": 2605 }, { "epoch": 2.073073868149325, "grad_norm": 20.866010665893555, "learning_rate": 9.787003658578392e-07, "log_odds_chosen": 4.661986351013184, "log_odds_ratio": -0.02654130384325981, "logits/chosen": 242.8224334716797, "logits/rejected": 205.6250457763672, "logps/chosen": -0.12456536293029785, "logps/rejected": -2.5623185634613037, "loss": 0.4167, "nll_loss": 0.4310877323150635, "rewards/accuracies": 1.0, "rewards/chosen": -0.006228268146514893, "rewards/margins": 0.12188766151666641, "rewards/rejected": -0.1281159222126007, "step": 2610 }, { "epoch": 2.0770452740270056, "grad_norm": 35.16082000732422, "learning_rate": 9.777642581679234e-07, "log_odds_chosen": 3.326671600341797, "log_odds_ratio": -0.05631138011813164, "logits/chosen": 319.496337890625, "logits/rejected": 213.53369140625, "logps/chosen": -0.1473226100206375, "logps/rejected": -1.5727765560150146, "loss": 0.4094, "nll_loss": 0.3072356581687927, "rewards/accuracies": 1.0, "rewards/chosen": -0.007366130594164133, "rewards/margins": 0.07127268612384796, "rewards/rejected": -0.07863881438970566, "step": 2615 }, { "epoch": 2.0810166799046863, "grad_norm": 31.852258682250977, "learning_rate": 9.768308314557044e-07, "log_odds_chosen": 2.9119925498962402, "log_odds_ratio": -0.08470732718706131, "logits/chosen": 298.0347595214844, "logits/rejected": 212.7636260986328, "logps/chosen": -0.21133773028850555, "logps/rejected": -1.562931776046753, "loss": 0.3144, "nll_loss": 0.3638852834701538, "rewards/accuracies": 1.0, "rewards/chosen": -0.010566887445747852, "rewards/margins": 0.0675797089934349, "rewards/rejected": -0.07814659178256989, "step": 2620 }, { "epoch": 2.084988085782367, "grad_norm": 26.966537475585938, "learning_rate": 9.759000729485334e-07, "log_odds_chosen": 2.8670029640197754, "log_odds_ratio": -0.08872531354427338, "logits/chosen": 207.214599609375, "logits/rejected": 298.1064758300781, "logps/chosen": -0.3392575979232788, "logps/rejected": -1.9147123098373413, "loss": 0.3919, "nll_loss": 0.4130278527736664, "rewards/accuracies": 1.0, "rewards/chosen": -0.01696288026869297, "rewards/margins": 0.07877273857593536, "rewards/rejected": -0.09573562443256378, "step": 2625 }, { "epoch": 2.088959491660048, "grad_norm": 28.27079963684082, "learning_rate": 9.749719699587899e-07, "log_odds_chosen": 2.507535457611084, "log_odds_ratio": -0.10697324573993683, "logits/chosen": 241.3874053955078, "logits/rejected": 232.6899871826172, "logps/chosen": -0.33466896414756775, "logps/rejected": -1.7164186239242554, "loss": 0.4482, "nll_loss": 0.4309083819389343, "rewards/accuracies": 1.0, "rewards/chosen": -0.016733448952436447, "rewards/margins": 0.06908748298883438, "rewards/rejected": -0.08582093566656113, "step": 2630 }, { "epoch": 2.0929308975377285, "grad_norm": 20.495689392089844, "learning_rate": 9.740465098831574e-07, "log_odds_chosen": 3.1356701850891113, "log_odds_ratio": -0.08666279166936874, "logits/chosen": 286.4986877441406, "logits/rejected": 300.3104553222656, "logps/chosen": -0.31942036747932434, "logps/rejected": -2.1801095008850098, "loss": 0.4267, "nll_loss": 0.4930347502231598, "rewards/accuracies": 1.0, "rewards/chosen": -0.015971018001437187, "rewards/margins": 0.09303445369005203, "rewards/rejected": -0.10900546610355377, "step": 2635 }, { "epoch": 2.096902303415409, "grad_norm": 28.138750076293945, "learning_rate": 9.731236802019038e-07, "log_odds_chosen": 5.115718841552734, "log_odds_ratio": -0.017810791730880737, "logits/chosen": 201.8949432373047, "logits/rejected": 369.33294677734375, "logps/chosen": -0.19664621353149414, "logps/rejected": -2.509212017059326, "loss": 0.296, "nll_loss": 0.30467721819877625, "rewards/accuracies": 1.0, "rewards/chosen": -0.009832310490310192, "rewards/margins": 0.11562828719615936, "rewards/rejected": -0.12546059489250183, "step": 2640 }, { "epoch": 2.1008737092930896, "grad_norm": 15.730376243591309, "learning_rate": 9.722034684781694e-07, "log_odds_chosen": 5.129834175109863, "log_odds_ratio": -0.038504939526319504, "logits/chosen": 264.3175354003906, "logits/rejected": 380.3741760253906, "logps/chosen": -0.20069575309753418, "logps/rejected": -2.866584062576294, "loss": 0.3021, "nll_loss": 0.3250558078289032, "rewards/accuracies": 1.0, "rewards/chosen": -0.010034788399934769, "rewards/margins": 0.13329441845417023, "rewards/rejected": -0.1433292031288147, "step": 2645 }, { "epoch": 2.1048451151707703, "grad_norm": 24.789533615112305, "learning_rate": 9.712858623572642e-07, "log_odds_chosen": 5.037966728210449, "log_odds_ratio": -0.017735213041305542, "logits/chosen": 287.1584167480469, "logits/rejected": 210.5322723388672, "logps/chosen": -0.06346876919269562, "logps/rejected": -1.834437608718872, "loss": 0.2681, "nll_loss": 0.2563795745372772, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031734383665025234, "rewards/margins": 0.0885484367609024, "rewards/rejected": -0.09172187745571136, "step": 2650 }, { "epoch": 2.108816521048451, "grad_norm": 33.17031478881836, "learning_rate": 9.7037084956597e-07, "log_odds_chosen": 3.0833840370178223, "log_odds_ratio": -0.07446109503507614, "logits/chosen": 244.01657104492188, "logits/rejected": 245.7843780517578, "logps/chosen": -0.27821382880210876, "logps/rejected": -1.9189503192901611, "loss": 0.441, "nll_loss": 0.4175170063972473, "rewards/accuracies": 1.0, "rewards/chosen": -0.013910690322518349, "rewards/margins": 0.0820368155837059, "rewards/rejected": -0.0959475189447403, "step": 2655 }, { "epoch": 2.112787926926132, "grad_norm": 28.597440719604492, "learning_rate": 9.694584179118515e-07, "log_odds_chosen": 3.850795030593872, "log_odds_ratio": -0.06737051904201508, "logits/chosen": 281.43511962890625, "logits/rejected": 314.6874084472656, "logps/chosen": -0.2290450781583786, "logps/rejected": -1.983991026878357, "loss": 0.47, "nll_loss": 0.38972610235214233, "rewards/accuracies": 1.0, "rewards/chosen": -0.01145225390791893, "rewards/margins": 0.08774729818105698, "rewards/rejected": -0.0991995632648468, "step": 2660 }, { "epoch": 2.1167593328038126, "grad_norm": 18.566396713256836, "learning_rate": 9.685485552825746e-07, "log_odds_chosen": 3.5323333740234375, "log_odds_ratio": -0.05120311304926872, "logits/chosen": 305.2156677246094, "logits/rejected": 283.5207824707031, "logps/chosen": -0.16874612867832184, "logps/rejected": -1.4323012828826904, "loss": 0.4172, "nll_loss": 0.3532731235027313, "rewards/accuracies": 1.0, "rewards/chosen": -0.008437307551503181, "rewards/margins": 0.06317774951457977, "rewards/rejected": -0.0716150552034378, "step": 2665 }, { "epoch": 2.1207307386814933, "grad_norm": 30.003000259399414, "learning_rate": 9.676412496452296e-07, "log_odds_chosen": 3.4242184162139893, "log_odds_ratio": -0.0653470903635025, "logits/chosen": 244.9000244140625, "logits/rejected": 314.68817138671875, "logps/chosen": -0.21268995106220245, "logps/rejected": -1.9504411220550537, "loss": 0.3682, "nll_loss": 0.31997257471084595, "rewards/accuracies": 1.0, "rewards/chosen": -0.010634497739374638, "rewards/margins": 0.08688755333423615, "rewards/rejected": -0.0975220575928688, "step": 2670 }, { "epoch": 2.124702144559174, "grad_norm": 25.37851905822754, "learning_rate": 9.667364890456637e-07, "log_odds_chosen": 3.1758782863616943, "log_odds_ratio": -0.04303749278187752, "logits/chosen": 300.64410400390625, "logits/rejected": 272.6418762207031, "logps/chosen": -0.23436030745506287, "logps/rejected": -1.9541059732437134, "loss": 0.3247, "nll_loss": 0.400359570980072, "rewards/accuracies": 1.0, "rewards/chosen": -0.011718015186488628, "rewards/margins": 0.08598728477954865, "rewards/rejected": -0.09770529717206955, "step": 2675 }, { "epoch": 2.128673550436855, "grad_norm": 17.71100616455078, "learning_rate": 9.658342616078198e-07, "log_odds_chosen": 3.650857448577881, "log_odds_ratio": -0.03125698119401932, "logits/chosen": 299.0740661621094, "logits/rejected": 227.38809204101562, "logps/chosen": -0.14033564925193787, "logps/rejected": -1.8093980550765991, "loss": 0.3149, "nll_loss": 0.3184364140033722, "rewards/accuracies": 1.0, "rewards/chosen": -0.0070167831145226955, "rewards/margins": 0.08345311135053635, "rewards/rejected": -0.09046989679336548, "step": 2680 }, { "epoch": 2.1326449563145355, "grad_norm": 21.77927589416504, "learning_rate": 9.649345555330812e-07, "log_odds_chosen": 3.3544082641601562, "log_odds_ratio": -0.136978879570961, "logits/chosen": 322.86651611328125, "logits/rejected": 292.94366455078125, "logps/chosen": -0.3305078148841858, "logps/rejected": -1.6913312673568726, "loss": 0.341, "nll_loss": 0.39123356342315674, "rewards/accuracies": 1.0, "rewards/chosen": -0.01652539148926735, "rewards/margins": 0.06804117560386658, "rewards/rejected": -0.08456657081842422, "step": 2685 }, { "epoch": 2.136616362192216, "grad_norm": 23.627885818481445, "learning_rate": 9.640373590996239e-07, "log_odds_chosen": 3.0574584007263184, "log_odds_ratio": -0.053799599409103394, "logits/chosen": 256.87298583984375, "logits/rejected": 341.5148010253906, "logps/chosen": -0.2786779999732971, "logps/rejected": -1.8729734420776367, "loss": 0.3642, "nll_loss": 0.29080718755722046, "rewards/accuracies": 1.0, "rewards/chosen": -0.013933899812400341, "rewards/margins": 0.07971477508544922, "rewards/rejected": -0.09364867955446243, "step": 2690 }, { "epoch": 2.1405877680698966, "grad_norm": 20.16524887084961, "learning_rate": 9.631426606617747e-07, "log_odds_chosen": 3.7517459392547607, "log_odds_ratio": -0.05596904084086418, "logits/chosen": 265.704833984375, "logits/rejected": 255.4348602294922, "logps/chosen": -0.16427966952323914, "logps/rejected": -1.8877394199371338, "loss": 0.3792, "nll_loss": 0.5157122611999512, "rewards/accuracies": 1.0, "rewards/chosen": -0.008213983848690987, "rewards/margins": 0.08617298305034637, "rewards/rejected": -0.09438695758581161, "step": 2695 }, { "epoch": 2.1445591739475773, "grad_norm": 29.250465393066406, "learning_rate": 9.622504486493764e-07, "log_odds_chosen": 4.140095233917236, "log_odds_ratio": -0.09328913688659668, "logits/chosen": 272.82281494140625, "logits/rejected": 249.73330688476562, "logps/chosen": -0.16515249013900757, "logps/rejected": -2.2072982788085938, "loss": 0.3285, "nll_loss": 0.367251455783844, "rewards/accuracies": 1.0, "rewards/chosen": -0.008257624693214893, "rewards/margins": 0.10210728645324707, "rewards/rejected": -0.1103648990392685, "step": 2700 }, { "epoch": 2.148530579825258, "grad_norm": 24.482175827026367, "learning_rate": 9.613607115671605e-07, "log_odds_chosen": 2.847738742828369, "log_odds_ratio": -0.07871608436107635, "logits/chosen": 327.7214660644531, "logits/rejected": 281.2625732421875, "logps/chosen": -0.24785375595092773, "logps/rejected": -1.403300166130066, "loss": 0.337, "nll_loss": 0.5055629014968872, "rewards/accuracies": 1.0, "rewards/chosen": -0.012392686679959297, "rewards/margins": 0.057772327214479446, "rewards/rejected": -0.0701650083065033, "step": 2705 }, { "epoch": 2.152501985702939, "grad_norm": 22.381025314331055, "learning_rate": 9.604734379941232e-07, "log_odds_chosen": 3.220674991607666, "log_odds_ratio": -0.04303758218884468, "logits/chosen": 351.3204040527344, "logits/rejected": 287.9315185546875, "logps/chosen": -0.1675499528646469, "logps/rejected": -1.6308742761611938, "loss": 0.3403, "nll_loss": 0.2904892861843109, "rewards/accuracies": 1.0, "rewards/chosen": -0.008377498015761375, "rewards/margins": 0.07316621392965317, "rewards/rejected": -0.08154371380805969, "step": 2710 }, { "epoch": 2.1564733915806196, "grad_norm": 23.287328720092773, "learning_rate": 9.595886165829119e-07, "log_odds_chosen": 3.32081937789917, "log_odds_ratio": -0.06217331811785698, "logits/chosen": 237.415283203125, "logits/rejected": 217.04019165039062, "logps/chosen": -0.8307470083236694, "logps/rejected": -2.7454867362976074, "loss": 0.3688, "nll_loss": 0.6546460390090942, "rewards/accuracies": 1.0, "rewards/chosen": -0.04153735563158989, "rewards/margins": 0.09573698043823242, "rewards/rejected": -0.1372743397951126, "step": 2715 }, { "epoch": 2.1604447974583003, "grad_norm": 22.75811767578125, "learning_rate": 9.58706236059213e-07, "log_odds_chosen": 4.277588367462158, "log_odds_ratio": -0.02797316014766693, "logits/chosen": 256.08526611328125, "logits/rejected": 295.3904113769531, "logps/chosen": -0.23130519688129425, "logps/rejected": -2.8242764472961426, "loss": 0.3771, "nll_loss": 0.3463202118873596, "rewards/accuracies": 1.0, "rewards/chosen": -0.011565258726477623, "rewards/margins": 0.12964856624603271, "rewards/rejected": -0.14121383428573608, "step": 2720 }, { "epoch": 2.164416203335981, "grad_norm": 19.948747634887695, "learning_rate": 9.578262852211515e-07, "log_odds_chosen": 3.2157044410705566, "log_odds_ratio": -0.06531454622745514, "logits/chosen": 269.98675537109375, "logits/rejected": 270.8153381347656, "logps/chosen": -0.13189613819122314, "logps/rejected": -1.1261787414550781, "loss": 0.3333, "nll_loss": 0.24800708889961243, "rewards/accuracies": 1.0, "rewards/chosen": -0.006594807840883732, "rewards/margins": 0.04971412941813469, "rewards/rejected": -0.05630893632769585, "step": 2725 }, { "epoch": 2.168387609213662, "grad_norm": 28.03223991394043, "learning_rate": 9.56948752938691e-07, "log_odds_chosen": 3.26519775390625, "log_odds_ratio": -0.049314845353364944, "logits/chosen": 303.38262939453125, "logits/rejected": 347.95281982421875, "logps/chosen": -0.2853826880455017, "logps/rejected": -2.1887669563293457, "loss": 0.3569, "nll_loss": 0.35581302642822266, "rewards/accuracies": 1.0, "rewards/chosen": -0.014269135892391205, "rewards/margins": 0.09516922384500504, "rewards/rejected": -0.10943835973739624, "step": 2730 }, { "epoch": 2.1723590150913425, "grad_norm": 54.01997756958008, "learning_rate": 9.560736281530443e-07, "log_odds_chosen": 3.5612130165100098, "log_odds_ratio": -0.11573145538568497, "logits/chosen": 331.6031494140625, "logits/rejected": 283.62493896484375, "logps/chosen": -0.2640966773033142, "logps/rejected": -1.7330589294433594, "loss": 0.4621, "nll_loss": 0.3747265934944153, "rewards/accuracies": 1.0, "rewards/chosen": -0.01320483349263668, "rewards/margins": 0.07344810664653778, "rewards/rejected": -0.08665294945240021, "step": 2735 }, { "epoch": 2.176330420969023, "grad_norm": 40.430973052978516, "learning_rate": 9.552008998760876e-07, "log_odds_chosen": 4.288855075836182, "log_odds_ratio": -0.10139509290456772, "logits/chosen": 347.55853271484375, "logits/rejected": 246.91439819335938, "logps/chosen": -0.22820158302783966, "logps/rejected": -1.6849181652069092, "loss": 0.4092, "nll_loss": 0.33607205748558044, "rewards/accuracies": 1.0, "rewards/chosen": -0.011410078033804893, "rewards/margins": 0.07283583283424377, "rewards/rejected": -0.08424590528011322, "step": 2740 }, { "epoch": 2.1803018268467036, "grad_norm": 28.763097763061523, "learning_rate": 9.543305571897804e-07, "log_odds_chosen": 3.6398234367370605, "log_odds_ratio": -0.052009087055921555, "logits/chosen": 217.4949493408203, "logits/rejected": 313.9044189453125, "logps/chosen": -0.18499043583869934, "logps/rejected": -2.0533955097198486, "loss": 0.432, "nll_loss": 0.33994507789611816, "rewards/accuracies": 1.0, "rewards/chosen": -0.009249521419405937, "rewards/margins": 0.09342024475336075, "rewards/rejected": -0.10266977548599243, "step": 2745 }, { "epoch": 2.1842732327243843, "grad_norm": 18.268781661987305, "learning_rate": 9.534625892455924e-07, "log_odds_chosen": 3.4035229682922363, "log_odds_ratio": -0.08708290755748749, "logits/chosen": 272.8382263183594, "logits/rejected": 301.1288757324219, "logps/chosen": -0.27422863245010376, "logps/rejected": -1.7393039464950562, "loss": 0.3134, "nll_loss": 0.3447985053062439, "rewards/accuracies": 1.0, "rewards/chosen": -0.013711432926356792, "rewards/margins": 0.07325376570224762, "rewards/rejected": -0.08696520328521729, "step": 2750 }, { "epoch": 2.188244638602065, "grad_norm": 35.49375915527344, "learning_rate": 9.525969852639353e-07, "log_odds_chosen": 3.902122974395752, "log_odds_ratio": -0.03137136623263359, "logits/chosen": 257.04730224609375, "logits/rejected": 268.3082580566406, "logps/chosen": -0.31830140948295593, "logps/rejected": -2.567204475402832, "loss": 0.3315, "nll_loss": 0.2598883807659149, "rewards/accuracies": 1.0, "rewards/chosen": -0.015915069729089737, "rewards/margins": 0.1124451532959938, "rewards/rejected": -0.12836022675037384, "step": 2755 }, { "epoch": 2.192216044479746, "grad_norm": 21.208223342895508, "learning_rate": 9.517337345336012e-07, "log_odds_chosen": 3.7665162086486816, "log_odds_ratio": -0.027825195342302322, "logits/chosen": 232.5461883544922, "logits/rejected": 363.5819396972656, "logps/chosen": -0.20895162224769592, "logps/rejected": -2.4105193614959717, "loss": 0.2947, "nll_loss": 0.277402400970459, "rewards/accuracies": 1.0, "rewards/chosen": -0.010447581298649311, "rewards/margins": 0.11007839441299438, "rewards/rejected": -0.12052597850561142, "step": 2760 }, { "epoch": 2.1961874503574266, "grad_norm": 25.69028663635254, "learning_rate": 9.508728264112049e-07, "log_odds_chosen": 3.399308443069458, "log_odds_ratio": -0.04535920172929764, "logits/chosen": 281.50384521484375, "logits/rejected": 310.96966552734375, "logps/chosen": -0.16359075903892517, "logps/rejected": -1.8376325368881226, "loss": 0.3831, "nll_loss": 0.38407355546951294, "rewards/accuracies": 1.0, "rewards/chosen": -0.008179538883268833, "rewards/margins": 0.08370209485292435, "rewards/rejected": -0.0918816328048706, "step": 2765 }, { "epoch": 2.2001588562351073, "grad_norm": 30.133134841918945, "learning_rate": 9.50014250320633e-07, "log_odds_chosen": 4.295925140380859, "log_odds_ratio": -0.01640843227505684, "logits/chosen": 223.8397979736328, "logits/rejected": 306.9639587402344, "logps/chosen": -0.13873472809791565, "logps/rejected": -2.4861035346984863, "loss": 0.4461, "nll_loss": 0.37577375769615173, "rewards/accuracies": 1.0, "rewards/chosen": -0.006936737336218357, "rewards/margins": 0.11736844480037689, "rewards/rejected": -0.12430517375469208, "step": 2770 }, { "epoch": 2.204130262112788, "grad_norm": 22.508953094482422, "learning_rate": 9.49157995752499e-07, "log_odds_chosen": 3.5316948890686035, "log_odds_ratio": -0.0377383753657341, "logits/chosen": 241.3332061767578, "logits/rejected": 260.64349365234375, "logps/chosen": -0.23985370993614197, "logps/rejected": -2.1687886714935303, "loss": 0.4267, "nll_loss": 0.6603747606277466, "rewards/accuracies": 1.0, "rewards/chosen": -0.011992687359452248, "rewards/margins": 0.09644675254821777, "rewards/rejected": -0.10843944549560547, "step": 2775 }, { "epoch": 2.208101667990469, "grad_norm": 26.58831787109375, "learning_rate": 9.483040522636021e-07, "log_odds_chosen": 3.693864345550537, "log_odds_ratio": -0.06571256369352341, "logits/chosen": 263.2666015625, "logits/rejected": 331.05438232421875, "logps/chosen": -0.26286572217941284, "logps/rejected": -1.72158944606781, "loss": 0.423, "nll_loss": 0.45609745383262634, "rewards/accuracies": 1.0, "rewards/chosen": -0.013143287971615791, "rewards/margins": 0.07293619215488434, "rewards/rejected": -0.08607947826385498, "step": 2780 }, { "epoch": 2.212073073868149, "grad_norm": 21.288402557373047, "learning_rate": 9.474524094763924e-07, "log_odds_chosen": 2.505340576171875, "log_odds_ratio": -0.0928802341222763, "logits/chosen": 236.01602172851562, "logits/rejected": 320.7162170410156, "logps/chosen": -0.3501175045967102, "logps/rejected": -1.6990416049957275, "loss": 0.3701, "nll_loss": 0.4051796495914459, "rewards/accuracies": 1.0, "rewards/chosen": -0.01750587485730648, "rewards/margins": 0.06744619458913803, "rewards/rejected": -0.08495207130908966, "step": 2785 }, { "epoch": 2.21604447974583, "grad_norm": 52.22238540649414, "learning_rate": 9.466030570784414e-07, "log_odds_chosen": 3.867032289505005, "log_odds_ratio": -0.06416045874357224, "logits/chosen": 312.247802734375, "logits/rejected": 239.3249969482422, "logps/chosen": -0.25704219937324524, "logps/rejected": -2.131070852279663, "loss": 0.3458, "nll_loss": 0.48503756523132324, "rewards/accuracies": 1.0, "rewards/chosen": -0.012852109968662262, "rewards/margins": 0.09370143711566925, "rewards/rejected": -0.10655353963375092, "step": 2790 }, { "epoch": 2.2200158856235106, "grad_norm": 19.706954956054688, "learning_rate": 9.45755984821918e-07, "log_odds_chosen": 3.0532429218292236, "log_odds_ratio": -0.07293901592493057, "logits/chosen": 255.22787475585938, "logits/rejected": 326.5018615722656, "logps/chosen": -0.3715572953224182, "logps/rejected": -2.3187978267669678, "loss": 0.3952, "nll_loss": 0.3520717918872833, "rewards/accuracies": 1.0, "rewards/chosen": -0.01857786439359188, "rewards/margins": 0.09736202657222748, "rewards/rejected": -0.11593989282846451, "step": 2795 }, { "epoch": 2.2239872915011913, "grad_norm": 30.489246368408203, "learning_rate": 9.449111825230681e-07, "log_odds_chosen": 2.9744670391082764, "log_odds_ratio": -0.06568741798400879, "logits/chosen": 253.3533935546875, "logits/rejected": 280.07855224609375, "logps/chosen": -0.14280185103416443, "logps/rejected": -1.3558251857757568, "loss": 0.3892, "nll_loss": 0.528894305229187, "rewards/accuracies": 1.0, "rewards/chosen": -0.0071400925517082214, "rewards/margins": 0.06065117195248604, "rewards/rejected": -0.06779126077890396, "step": 2800 }, { "epoch": 2.227958697378872, "grad_norm": 23.173385620117188, "learning_rate": 9.440686400617012e-07, "log_odds_chosen": 3.465458393096924, "log_odds_ratio": -0.07254897803068161, "logits/chosen": 221.92202758789062, "logits/rejected": 349.90740966796875, "logps/chosen": -0.15940417349338531, "logps/rejected": -1.8571916818618774, "loss": 0.4338, "nll_loss": 0.4682881236076355, "rewards/accuracies": 1.0, "rewards/chosen": -0.007970208302140236, "rewards/margins": 0.08488937467336655, "rewards/rejected": -0.09285958111286163, "step": 2805 }, { "epoch": 2.231930103256553, "grad_norm": 20.893659591674805, "learning_rate": 9.432283473806812e-07, "log_odds_chosen": 3.7795605659484863, "log_odds_ratio": -0.025308597832918167, "logits/chosen": 205.07113647460938, "logits/rejected": 391.563232421875, "logps/chosen": -0.28899306058883667, "logps/rejected": -2.687159538269043, "loss": 0.3758, "nll_loss": 0.44255542755126953, "rewards/accuracies": 1.0, "rewards/chosen": -0.0144496513530612, "rewards/margins": 0.11990831792354584, "rewards/rejected": -0.1343579739332199, "step": 2810 }, { "epoch": 2.2359015091342336, "grad_norm": 68.56454467773438, "learning_rate": 9.423902944854219e-07, "log_odds_chosen": 3.024015426635742, "log_odds_ratio": -0.06248214840888977, "logits/chosen": 309.1986389160156, "logits/rejected": 306.1770935058594, "logps/chosen": -0.16128352284431458, "logps/rejected": -1.5402535200119019, "loss": 0.3595, "nll_loss": 0.3539174199104309, "rewards/accuracies": 1.0, "rewards/chosen": -0.008064175955951214, "rewards/margins": 0.06894850730895996, "rewards/rejected": -0.07701267302036285, "step": 2815 }, { "epoch": 2.2398729150119143, "grad_norm": 46.14353942871094, "learning_rate": 9.415544714433869e-07, "log_odds_chosen": 3.5264244079589844, "log_odds_ratio": -0.056026797741651535, "logits/chosen": 401.4872131347656, "logits/rejected": 307.99200439453125, "logps/chosen": -0.16968777775764465, "logps/rejected": -1.8524665832519531, "loss": 0.357, "nll_loss": 0.4214702248573303, "rewards/accuracies": 1.0, "rewards/chosen": -0.008484388701617718, "rewards/margins": 0.08413894474506378, "rewards/rejected": -0.09262333810329437, "step": 2820 }, { "epoch": 2.243844320889595, "grad_norm": 22.524415969848633, "learning_rate": 9.407208683835973e-07, "log_odds_chosen": 3.638291120529175, "log_odds_ratio": -0.035198599100112915, "logits/chosen": 315.48431396484375, "logits/rejected": 204.6620330810547, "logps/chosen": -0.2649177312850952, "logps/rejected": -2.029332399368286, "loss": 0.4461, "nll_loss": 0.29909247159957886, "rewards/accuracies": 1.0, "rewards/chosen": -0.01324588805437088, "rewards/margins": 0.0882207378745079, "rewards/rejected": -0.10146661847829819, "step": 2825 }, { "epoch": 2.247815726767276, "grad_norm": 18.5728702545166, "learning_rate": 9.398894754961406e-07, "log_odds_chosen": 4.158797264099121, "log_odds_ratio": -0.06240306422114372, "logits/chosen": 255.5278778076172, "logits/rejected": 277.1158142089844, "logps/chosen": -0.11548423767089844, "logps/rejected": -2.0450329780578613, "loss": 0.3872, "nll_loss": 0.26154083013534546, "rewards/accuracies": 1.0, "rewards/chosen": -0.005774212069809437, "rewards/margins": 0.0964774340391159, "rewards/rejected": -0.10225164890289307, "step": 2830 }, { "epoch": 2.2517871326449566, "grad_norm": 27.462656021118164, "learning_rate": 9.390602830316851e-07, "log_odds_chosen": 2.5769989490509033, "log_odds_ratio": -0.10126359760761261, "logits/chosen": 256.6750793457031, "logits/rejected": 298.19500732421875, "logps/chosen": -0.2556411921977997, "logps/rejected": -1.4168459177017212, "loss": 0.3646, "nll_loss": 0.4656898081302643, "rewards/accuracies": 1.0, "rewards/chosen": -0.012782061472535133, "rewards/margins": 0.058060236275196075, "rewards/rejected": -0.07084228843450546, "step": 2835 }, { "epoch": 2.255758538522637, "grad_norm": 24.728313446044922, "learning_rate": 9.38233281301002e-07, "log_odds_chosen": 4.362663269042969, "log_odds_ratio": -0.0349862240254879, "logits/chosen": 265.12548828125, "logits/rejected": 215.07431030273438, "logps/chosen": -0.20180007815361023, "logps/rejected": -2.3115715980529785, "loss": 0.3206, "nll_loss": 0.2718152701854706, "rewards/accuracies": 1.0, "rewards/chosen": -0.010090004652738571, "rewards/margins": 0.10548856109380722, "rewards/rejected": -0.11557857692241669, "step": 2840 }, { "epoch": 2.2597299444003176, "grad_norm": 30.671133041381836, "learning_rate": 9.374084606744878e-07, "log_odds_chosen": 3.672755002975464, "log_odds_ratio": -0.038879863917827606, "logits/chosen": 335.2356872558594, "logits/rejected": 203.84803771972656, "logps/chosen": -0.17998583614826202, "logps/rejected": -1.6547071933746338, "loss": 0.4663, "nll_loss": 0.6368466019630432, "rewards/accuracies": 1.0, "rewards/chosen": -0.008999291807413101, "rewards/margins": 0.07373605668544769, "rewards/rejected": -0.08273535966873169, "step": 2845 }, { "epoch": 2.2637013502779983, "grad_norm": 28.535974502563477, "learning_rate": 9.365858115816941e-07, "log_odds_chosen": 1.8314237594604492, "log_odds_ratio": -0.19887328147888184, "logits/chosen": 243.3570556640625, "logits/rejected": 255.3211669921875, "logps/chosen": -0.4105517268180847, "logps/rejected": -1.2685024738311768, "loss": 0.5051, "nll_loss": 0.5791507363319397, "rewards/accuracies": 1.0, "rewards/chosen": -0.020527586340904236, "rewards/margins": 0.0428975448012352, "rewards/rejected": -0.06342513859272003, "step": 2850 }, { "epoch": 2.267672756155679, "grad_norm": 35.174720764160156, "learning_rate": 9.357653245108616e-07, "log_odds_chosen": 2.9875216484069824, "log_odds_ratio": -0.0903446301817894, "logits/chosen": 230.1251220703125, "logits/rejected": 300.3447265625, "logps/chosen": -0.24468517303466797, "logps/rejected": -1.8320715427398682, "loss": 0.4694, "nll_loss": 0.3818613588809967, "rewards/accuracies": 1.0, "rewards/chosen": -0.012234258465468884, "rewards/margins": 0.07936931401491165, "rewards/rejected": -0.09160356968641281, "step": 2855 }, { "epoch": 2.27164416203336, "grad_norm": 19.731929779052734, "learning_rate": 9.349469900084572e-07, "log_odds_chosen": 3.242330551147461, "log_odds_ratio": -0.0612449049949646, "logits/chosen": 251.2331085205078, "logits/rejected": 286.5898132324219, "logps/chosen": -0.2064785659313202, "logps/rejected": -1.7228063344955444, "loss": 0.348, "nll_loss": 0.2930075228214264, "rewards/accuracies": 1.0, "rewards/chosen": -0.01032392866909504, "rewards/margins": 0.07581637799739838, "rewards/rejected": -0.08614031970500946, "step": 2860 }, { "epoch": 2.2756155679110406, "grad_norm": 60.175567626953125, "learning_rate": 9.341307986787181e-07, "log_odds_chosen": 3.061659336090088, "log_odds_ratio": -0.07501424849033356, "logits/chosen": 239.71029663085938, "logits/rejected": 295.3159484863281, "logps/chosen": -0.23593612015247345, "logps/rejected": -1.6371171474456787, "loss": 0.3748, "nll_loss": 0.4964061677455902, "rewards/accuracies": 1.0, "rewards/chosen": -0.011796806938946247, "rewards/margins": 0.07005905359983444, "rewards/rejected": -0.08185587078332901, "step": 2865 }, { "epoch": 2.2795869737887213, "grad_norm": 25.93592643737793, "learning_rate": 9.333167411831968e-07, "log_odds_chosen": 3.542975902557373, "log_odds_ratio": -0.06436657905578613, "logits/chosen": 238.16976928710938, "logits/rejected": 287.948486328125, "logps/chosen": -0.2085086554288864, "logps/rejected": -1.9400198459625244, "loss": 0.3322, "nll_loss": 0.2988547682762146, "rewards/accuracies": 1.0, "rewards/chosen": -0.010425432585179806, "rewards/margins": 0.08657556772232056, "rewards/rejected": -0.09700099378824234, "step": 2870 }, { "epoch": 2.283558379666402, "grad_norm": 20.469728469848633, "learning_rate": 9.325048082403139e-07, "log_odds_chosen": 3.4106850624084473, "log_odds_ratio": -0.11686725914478302, "logits/chosen": 304.0782165527344, "logits/rejected": 226.82803344726562, "logps/chosen": -0.13091017305850983, "logps/rejected": -1.417621374130249, "loss": 0.3601, "nll_loss": 0.33367031812667847, "rewards/accuracies": 1.0, "rewards/chosen": -0.006545508746057749, "rewards/margins": 0.06433556228876114, "rewards/rejected": -0.07088107615709305, "step": 2875 }, { "epoch": 2.2875297855440824, "grad_norm": 339.516357421875, "learning_rate": 9.316949906249125e-07, "log_odds_chosen": 2.911581039428711, "log_odds_ratio": -0.05822296813130379, "logits/chosen": 272.34173583984375, "logits/rejected": 280.1712646484375, "logps/chosen": -0.19476798176765442, "logps/rejected": -1.585203766822815, "loss": 0.3893, "nll_loss": 0.2995101511478424, "rewards/accuracies": 1.0, "rewards/chosen": -0.009738398715853691, "rewards/margins": 0.06952179223299026, "rewards/rejected": -0.07926018536090851, "step": 2880 }, { "epoch": 2.291501191421763, "grad_norm": 22.142641067504883, "learning_rate": 9.308872791678188e-07, "log_odds_chosen": 3.053778886795044, "log_odds_ratio": -0.12560723721981049, "logits/chosen": 242.20468139648438, "logits/rejected": 357.9628601074219, "logps/chosen": -0.22581541538238525, "logps/rejected": -1.8815644979476929, "loss": 0.5007, "nll_loss": 0.4521845877170563, "rewards/accuracies": 1.0, "rewards/chosen": -0.011290770955383778, "rewards/margins": 0.08278746157884598, "rewards/rejected": -0.09407822787761688, "step": 2885 }, { "epoch": 2.295472597299444, "grad_norm": 24.857589721679688, "learning_rate": 9.300816647554058e-07, "log_odds_chosen": 2.7269444465637207, "log_odds_ratio": -0.10888878256082535, "logits/chosen": 352.1503601074219, "logits/rejected": 283.04217529296875, "logps/chosen": -0.27864494919776917, "logps/rejected": -1.7774139642715454, "loss": 0.4257, "nll_loss": 0.30876272916793823, "rewards/accuracies": 1.0, "rewards/chosen": -0.013932247646152973, "rewards/margins": 0.07493844628334045, "rewards/rejected": -0.08887068927288055, "step": 2890 }, { "epoch": 2.2994440031771246, "grad_norm": 33.68102264404297, "learning_rate": 9.292781383291611e-07, "log_odds_chosen": 4.8815598487854, "log_odds_ratio": -0.011089108884334564, "logits/chosen": 285.96978759765625, "logits/rejected": 205.0143585205078, "logps/chosen": -0.08274070173501968, "logps/rejected": -2.379570960998535, "loss": 0.3248, "nll_loss": 0.22218620777130127, "rewards/accuracies": 1.0, "rewards/chosen": -0.004137034527957439, "rewards/margins": 0.1148415058851242, "rewards/rejected": -0.11897854506969452, "step": 2895 }, { "epoch": 2.3034154090548054, "grad_norm": 27.541589736938477, "learning_rate": 9.284766908852594e-07, "log_odds_chosen": 3.0898163318634033, "log_odds_ratio": -0.06252577155828476, "logits/chosen": 233.49624633789062, "logits/rejected": 270.4180908203125, "logps/chosen": -0.22724232077598572, "logps/rejected": -1.7946640253067017, "loss": 0.3331, "nll_loss": 0.29226577281951904, "rewards/accuracies": 1.0, "rewards/chosen": -0.011362116783857346, "rewards/margins": 0.0783710852265358, "rewards/rejected": -0.08973319828510284, "step": 2900 }, { "epoch": 2.307386814932486, "grad_norm": 22.57000160217285, "learning_rate": 9.276773134741389e-07, "log_odds_chosen": 4.6132612228393555, "log_odds_ratio": -0.034605059772729874, "logits/chosen": 214.30801391601562, "logits/rejected": 292.3231506347656, "logps/chosen": -0.1400974690914154, "logps/rejected": -2.607959032058716, "loss": 0.3647, "nll_loss": 0.3344481289386749, "rewards/accuracies": 1.0, "rewards/chosen": -0.007004873361438513, "rewards/margins": 0.12339308112859726, "rewards/rejected": -0.1303979456424713, "step": 2905 }, { "epoch": 2.311358220810167, "grad_norm": 21.834867477416992, "learning_rate": 9.26879997200081e-07, "log_odds_chosen": 2.0610976219177246, "log_odds_ratio": -0.1374814808368683, "logits/chosen": 238.64797973632812, "logits/rejected": 196.60305786132812, "logps/chosen": -0.36072736978530884, "logps/rejected": -1.38467276096344, "loss": 0.3675, "nll_loss": 0.45493263006210327, "rewards/accuracies": 1.0, "rewards/chosen": -0.0180363692343235, "rewards/margins": 0.051197268068790436, "rewards/rejected": -0.06923364102840424, "step": 2910 }, { "epoch": 2.3153296266878476, "grad_norm": 16.910837173461914, "learning_rate": 9.260847332207952e-07, "log_odds_chosen": 3.3941338062286377, "log_odds_ratio": -0.07332415133714676, "logits/chosen": 348.9075012207031, "logits/rejected": 315.0403747558594, "logps/chosen": -0.12917575240135193, "logps/rejected": -1.595969557762146, "loss": 0.3124, "nll_loss": 0.26820236444473267, "rewards/accuracies": 1.0, "rewards/chosen": -0.006458788178861141, "rewards/margins": 0.07333968579769135, "rewards/rejected": -0.07979847490787506, "step": 2915 }, { "epoch": 2.3193010325655283, "grad_norm": 28.343481063842773, "learning_rate": 9.252915127470066e-07, "log_odds_chosen": 2.11761474609375, "log_odds_ratio": -0.12370810657739639, "logits/chosen": 321.7706298828125, "logits/rejected": 285.8647155761719, "logps/chosen": -0.24254365265369415, "logps/rejected": -1.139145851135254, "loss": 0.3549, "nll_loss": 0.40812140703201294, "rewards/accuracies": 1.0, "rewards/chosen": -0.012127181515097618, "rewards/margins": 0.044830113649368286, "rewards/rejected": -0.056957293301820755, "step": 2920 }, { "epoch": 2.323272438443209, "grad_norm": 29.542682647705078, "learning_rate": 9.245003270420485e-07, "log_odds_chosen": 3.9975972175598145, "log_odds_ratio": -0.058384932577610016, "logits/chosen": 238.3087615966797, "logits/rejected": 257.13079833984375, "logps/chosen": -0.15752217173576355, "logps/rejected": -1.8759005069732666, "loss": 0.3918, "nll_loss": 0.3589743673801422, "rewards/accuracies": 1.0, "rewards/chosen": -0.007876109331846237, "rewards/margins": 0.08591891080141068, "rewards/rejected": -0.09379501640796661, "step": 2925 }, { "epoch": 2.32724384432089, "grad_norm": 21.245723724365234, "learning_rate": 9.23711167421458e-07, "log_odds_chosen": 2.95237398147583, "log_odds_ratio": -0.10379862785339355, "logits/chosen": 197.9760284423828, "logits/rejected": 312.78790283203125, "logps/chosen": -0.3067656457424164, "logps/rejected": -1.6372512578964233, "loss": 0.3624, "nll_loss": 0.3775458037853241, "rewards/accuracies": 1.0, "rewards/chosen": -0.015338283963501453, "rewards/margins": 0.06652427464723587, "rewards/rejected": -0.08186255395412445, "step": 2930 }, { "epoch": 2.33121525019857, "grad_norm": 19.79974937438965, "learning_rate": 9.229240252525751e-07, "log_odds_chosen": 2.867802381515503, "log_odds_ratio": -0.07918517291545868, "logits/chosen": 221.4095458984375, "logits/rejected": 272.38873291015625, "logps/chosen": -0.3694096505641937, "logps/rejected": -1.5930800437927246, "loss": 0.3878, "nll_loss": 0.5072149038314819, "rewards/accuracies": 1.0, "rewards/chosen": -0.018470484763383865, "rewards/margins": 0.061183519661426544, "rewards/rejected": -0.07965400815010071, "step": 2935 }, { "epoch": 2.335186656076251, "grad_norm": 19.503841400146484, "learning_rate": 9.221388919541469e-07, "log_odds_chosen": 4.028315544128418, "log_odds_ratio": -0.03385575860738754, "logits/chosen": 184.2939453125, "logits/rejected": 463.99481201171875, "logps/chosen": -0.1569368541240692, "logps/rejected": -2.1916651725769043, "loss": 0.3327, "nll_loss": 0.19962573051452637, "rewards/accuracies": 1.0, "rewards/chosen": -0.007846842519938946, "rewards/margins": 0.1017364114522934, "rewards/rejected": -0.10958325862884521, "step": 2940 }, { "epoch": 2.3391580619539316, "grad_norm": 22.579517364501953, "learning_rate": 9.213557589959346e-07, "log_odds_chosen": 4.402327537536621, "log_odds_ratio": -0.06422088295221329, "logits/chosen": 311.88970947265625, "logits/rejected": 222.9725799560547, "logps/chosen": -0.12918733060359955, "logps/rejected": -1.5751354694366455, "loss": 0.3988, "nll_loss": 0.27917248010635376, "rewards/accuracies": 1.0, "rewards/chosen": -0.006459367461502552, "rewards/margins": 0.07229740917682648, "rewards/rejected": -0.07875677198171616, "step": 2945 }, { "epoch": 2.3431294678316124, "grad_norm": 23.760480880737305, "learning_rate": 9.205746178983235e-07, "log_odds_chosen": 4.011848449707031, "log_odds_ratio": -0.04609540104866028, "logits/chosen": 280.3443298339844, "logits/rejected": 283.18096923828125, "logps/chosen": -0.16002188622951508, "logps/rejected": -2.123976469039917, "loss": 0.3309, "nll_loss": 0.30090197920799255, "rewards/accuracies": 1.0, "rewards/chosen": -0.008001094684004784, "rewards/margins": 0.09819772839546204, "rewards/rejected": -0.10619882494211197, "step": 2950 }, { "epoch": 2.347100873709293, "grad_norm": 25.123260498046875, "learning_rate": 9.19795460231938e-07, "log_odds_chosen": 3.2714781761169434, "log_odds_ratio": -0.08971662819385529, "logits/chosen": 353.42388916015625, "logits/rejected": 275.33245849609375, "logps/chosen": -0.2007518708705902, "logps/rejected": -1.8207184076309204, "loss": 0.3645, "nll_loss": 0.3350695073604584, "rewards/accuracies": 1.0, "rewards/chosen": -0.010037594474852085, "rewards/margins": 0.08099832385778427, "rewards/rejected": -0.09103592485189438, "step": 2955 }, { "epoch": 2.351072279586974, "grad_norm": 33.57479476928711, "learning_rate": 9.190182776172598e-07, "log_odds_chosen": 3.7782235145568848, "log_odds_ratio": -0.051849596202373505, "logits/chosen": 261.42962646484375, "logits/rejected": 268.8240966796875, "logps/chosen": -0.14148275554180145, "logps/rejected": -1.8189716339111328, "loss": 0.3353, "nll_loss": 0.26106011867523193, "rewards/accuracies": 1.0, "rewards/chosen": -0.007074137218296528, "rewards/margins": 0.08387444913387299, "rewards/rejected": -0.09094859659671783, "step": 2960 }, { "epoch": 2.3550436854646546, "grad_norm": 25.17067527770996, "learning_rate": 9.182430617242484e-07, "log_odds_chosen": 3.2659249305725098, "log_odds_ratio": -0.045460015535354614, "logits/chosen": 291.0422668457031, "logits/rejected": 208.2694091796875, "logps/chosen": -0.07350897789001465, "logps/rejected": -1.1179029941558838, "loss": 0.3735, "nll_loss": 0.40316909551620483, "rewards/accuracies": 1.0, "rewards/chosen": -0.003675449173897505, "rewards/margins": 0.0522197000682354, "rewards/rejected": -0.05589514970779419, "step": 2965 }, { "epoch": 2.3590150913423353, "grad_norm": 25.110368728637695, "learning_rate": 9.174698042719672e-07, "log_odds_chosen": 4.775304794311523, "log_odds_ratio": -0.03073939122259617, "logits/chosen": 232.52755737304688, "logits/rejected": 406.4723205566406, "logps/chosen": -0.21328440308570862, "logps/rejected": -2.7473394870758057, "loss": 0.3524, "nll_loss": 0.31327253580093384, "rewards/accuracies": 1.0, "rewards/chosen": -0.010664219036698341, "rewards/margins": 0.12670275568962097, "rewards/rejected": -0.13736696541309357, "step": 2970 }, { "epoch": 2.3629864972200156, "grad_norm": 32.33005142211914, "learning_rate": 9.166984970282114e-07, "log_odds_chosen": 2.634587049484253, "log_odds_ratio": -0.39152711629867554, "logits/chosen": 295.1396789550781, "logits/rejected": 308.6955261230469, "logps/chosen": -0.3132212162017822, "logps/rejected": -1.5187908411026, "loss": 0.3392, "nll_loss": 0.4394780695438385, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.015661058947443962, "rewards/margins": 0.06027848646044731, "rewards/rejected": -0.07593954354524612, "step": 2975 }, { "epoch": 2.3669579030976964, "grad_norm": 26.105850219726562, "learning_rate": 9.159291318091397e-07, "log_odds_chosen": 4.424689292907715, "log_odds_ratio": -0.028805622830986977, "logits/chosen": 185.22755432128906, "logits/rejected": 448.46466064453125, "logps/chosen": -0.1817312389612198, "logps/rejected": -2.5383591651916504, "loss": 0.3573, "nll_loss": 0.36008864641189575, "rewards/accuracies": 1.0, "rewards/chosen": -0.00908656232059002, "rewards/margins": 0.11783139407634735, "rewards/rejected": -0.12691795825958252, "step": 2980 }, { "epoch": 2.370929308975377, "grad_norm": 25.48073959350586, "learning_rate": 9.151617004789102e-07, "log_odds_chosen": 2.9670186042785645, "log_odds_ratio": -0.06621041893959045, "logits/chosen": 273.04693603515625, "logits/rejected": 238.86740112304688, "logps/chosen": -0.19776080548763275, "logps/rejected": -1.5920594930648804, "loss": 0.3417, "nll_loss": 0.25471195578575134, "rewards/accuracies": 1.0, "rewards/chosen": -0.009888040833175182, "rewards/margins": 0.06971494108438492, "rewards/rejected": -0.07960297912359238, "step": 2985 }, { "epoch": 2.374900714853058, "grad_norm": 23.61982536315918, "learning_rate": 9.143961949493189e-07, "log_odds_chosen": 2.8940136432647705, "log_odds_ratio": -0.10211262851953506, "logits/chosen": 323.6980895996094, "logits/rejected": 273.4058837890625, "logps/chosen": -0.3123711347579956, "logps/rejected": -1.6104358434677124, "loss": 0.3747, "nll_loss": 0.4702371060848236, "rewards/accuracies": 1.0, "rewards/chosen": -0.01561855711042881, "rewards/margins": 0.06490323692560196, "rewards/rejected": -0.08052179217338562, "step": 2990 }, { "epoch": 2.3788721207307386, "grad_norm": 28.628559112548828, "learning_rate": 9.136326071794409e-07, "log_odds_chosen": 3.454132080078125, "log_odds_ratio": -0.07264744490385056, "logits/chosen": 257.689208984375, "logits/rejected": 262.5711364746094, "logps/chosen": -0.1612526923418045, "logps/rejected": -1.4927973747253418, "loss": 0.4392, "nll_loss": 0.36521124839782715, "rewards/accuracies": 1.0, "rewards/chosen": -0.008062634617090225, "rewards/margins": 0.06657722592353821, "rewards/rejected": -0.07463986426591873, "step": 2995 }, { "epoch": 2.3828435266084194, "grad_norm": 27.72311019897461, "learning_rate": 9.128709291752768e-07, "log_odds_chosen": 3.825345277786255, "log_odds_ratio": -0.06467778980731964, "logits/chosen": 215.4276885986328, "logits/rejected": 267.2101135253906, "logps/chosen": -0.3039247989654541, "logps/rejected": -2.7370784282684326, "loss": 0.3702, "nll_loss": 0.3736027181148529, "rewards/accuracies": 1.0, "rewards/chosen": -0.015196239575743675, "rewards/margins": 0.12165769189596176, "rewards/rejected": -0.13685393333435059, "step": 3000 }, { "epoch": 2.3868149324861, "grad_norm": 33.66513442993164, "learning_rate": 9.121111529894007e-07, "log_odds_chosen": 3.429539442062378, "log_odds_ratio": -0.07355336099863052, "logits/chosen": 280.227294921875, "logits/rejected": 266.79351806640625, "logps/chosen": -0.3146992623806, "logps/rejected": -2.431187152862549, "loss": 0.42, "nll_loss": 0.5019311308860779, "rewards/accuracies": 1.0, "rewards/chosen": -0.015734964981675148, "rewards/margins": 0.10582438856363297, "rewards/rejected": -0.12155935913324356, "step": 3005 }, { "epoch": 2.390786338363781, "grad_norm": 28.422985076904297, "learning_rate": 9.113532707206116e-07, "log_odds_chosen": 3.8442492485046387, "log_odds_ratio": -0.035998668521642685, "logits/chosen": 199.76071166992188, "logits/rejected": 245.70974731445312, "logps/chosen": -0.21047630906105042, "logps/rejected": -2.027498722076416, "loss": 0.3807, "nll_loss": 0.5179567933082581, "rewards/accuracies": 1.0, "rewards/chosen": -0.010523815639317036, "rewards/margins": 0.09085111320018768, "rewards/rejected": -0.10137493908405304, "step": 3010 }, { "epoch": 2.3947577442414616, "grad_norm": 30.8699951171875, "learning_rate": 9.105972745135884e-07, "log_odds_chosen": 3.4679412841796875, "log_odds_ratio": -0.05988996475934982, "logits/chosen": 350.3097229003906, "logits/rejected": 279.5090637207031, "logps/chosen": -0.11882610619068146, "logps/rejected": -1.5109213590621948, "loss": 0.3682, "nll_loss": 0.34842449426651, "rewards/accuracies": 1.0, "rewards/chosen": -0.005941305309534073, "rewards/margins": 0.0696047693490982, "rewards/rejected": -0.07554607093334198, "step": 3015 }, { "epoch": 2.3987291501191423, "grad_norm": 20.073862075805664, "learning_rate": 9.098431565585488e-07, "log_odds_chosen": 3.60724139213562, "log_odds_ratio": -0.04012635350227356, "logits/chosen": 189.92782592773438, "logits/rejected": 394.52960205078125, "logps/chosen": -0.1309356540441513, "logps/rejected": -1.7340739965438843, "loss": 0.2913, "nll_loss": 0.2573624551296234, "rewards/accuracies": 1.0, "rewards/chosen": -0.006546782795339823, "rewards/margins": 0.08015692979097366, "rewards/rejected": -0.08670370280742645, "step": 3020 }, { "epoch": 2.402700555996823, "grad_norm": 22.885211944580078, "learning_rate": 9.090909090909091e-07, "log_odds_chosen": 3.2791740894317627, "log_odds_ratio": -0.04902596399188042, "logits/chosen": 329.19525146484375, "logits/rejected": 196.19236755371094, "logps/chosen": -0.23585081100463867, "logps/rejected": -2.0871119499206543, "loss": 0.4016, "nll_loss": 0.3254674971103668, "rewards/accuracies": 1.0, "rewards/chosen": -0.011792539618909359, "rewards/margins": 0.09256306290626526, "rewards/rejected": -0.1043555960059166, "step": 3025 }, { "epoch": 2.4066719618745034, "grad_norm": 20.781896591186523, "learning_rate": 9.083405243909494e-07, "log_odds_chosen": 2.4194552898406982, "log_odds_ratio": -0.0930199846625328, "logits/chosen": 204.93038940429688, "logits/rejected": 288.8913269042969, "logps/chosen": -0.19672732055187225, "logps/rejected": -1.1895649433135986, "loss": 0.4447, "nll_loss": 0.37261468172073364, "rewards/accuracies": 1.0, "rewards/chosen": -0.009836366400122643, "rewards/margins": 0.04964187741279602, "rewards/rejected": -0.059478241950273514, "step": 3030 }, { "epoch": 2.410643367752184, "grad_norm": 25.38187599182129, "learning_rate": 9.075919947834808e-07, "log_odds_chosen": 4.394177436828613, "log_odds_ratio": -0.013578305020928383, "logits/chosen": 304.8377685546875, "logits/rejected": 219.35733032226562, "logps/chosen": -0.15937462449073792, "logps/rejected": -2.3069756031036377, "loss": 0.3331, "nll_loss": 0.3496881425380707, "rewards/accuracies": 1.0, "rewards/chosen": -0.007968731224536896, "rewards/margins": 0.10738004744052887, "rewards/rejected": -0.11534877866506577, "step": 3035 }, { "epoch": 2.414614773629865, "grad_norm": 20.055810928344727, "learning_rate": 9.068453126375147e-07, "log_odds_chosen": 3.3665032386779785, "log_odds_ratio": -0.0692509263753891, "logits/chosen": 279.8843688964844, "logits/rejected": 198.7550048828125, "logps/chosen": -0.10582447052001953, "logps/rejected": -1.3955246210098267, "loss": 0.3492, "nll_loss": 0.30622151494026184, "rewards/accuracies": 1.0, "rewards/chosen": -0.005291222594678402, "rewards/margins": 0.06448500603437424, "rewards/rejected": -0.06977623701095581, "step": 3040 }, { "epoch": 2.4185861795075456, "grad_norm": 16.259273529052734, "learning_rate": 9.061004703659373e-07, "log_odds_chosen": 4.736048698425293, "log_odds_ratio": -0.036501117050647736, "logits/chosen": 305.44647216796875, "logits/rejected": 281.8908996582031, "logps/chosen": -0.1840660125017166, "logps/rejected": -2.381317377090454, "loss": 0.4145, "nll_loss": 0.3028008043766022, "rewards/accuracies": 1.0, "rewards/chosen": -0.00920330174267292, "rewards/margins": 0.1098625659942627, "rewards/rejected": -0.11906585842370987, "step": 3045 }, { "epoch": 2.4225575853852264, "grad_norm": 22.572229385375977, "learning_rate": 9.053574604251853e-07, "log_odds_chosen": 3.1664023399353027, "log_odds_ratio": -0.055031102150678635, "logits/chosen": 248.3058624267578, "logits/rejected": 298.56103515625, "logps/chosen": -0.1691017895936966, "logps/rejected": -1.5698134899139404, "loss": 0.322, "nll_loss": 0.31757646799087524, "rewards/accuracies": 1.0, "rewards/chosen": -0.008455089293420315, "rewards/margins": 0.07003559172153473, "rewards/rejected": -0.07849067449569702, "step": 3050 }, { "epoch": 2.426528991262907, "grad_norm": 24.180944442749023, "learning_rate": 9.04616275314925e-07, "log_odds_chosen": 4.054632186889648, "log_odds_ratio": -0.03361033648252487, "logits/chosen": 233.0185089111328, "logits/rejected": 368.1893005371094, "logps/chosen": -0.1435851901769638, "logps/rejected": -2.2944867610931396, "loss": 0.2769, "nll_loss": 0.24402709305286407, "rewards/accuracies": 1.0, "rewards/chosen": -0.00717926025390625, "rewards/margins": 0.10754507780075073, "rewards/rejected": -0.11472433805465698, "step": 3055 }, { "epoch": 2.430500397140588, "grad_norm": 26.717470169067383, "learning_rate": 9.03876907577734e-07, "log_odds_chosen": 3.7755863666534424, "log_odds_ratio": -0.044100239872932434, "logits/chosen": 169.8478546142578, "logits/rejected": 342.99444580078125, "logps/chosen": -0.18389829993247986, "logps/rejected": -2.2423253059387207, "loss": 0.2942, "nll_loss": 0.23073211312294006, "rewards/accuracies": 1.0, "rewards/chosen": -0.009194915182888508, "rewards/margins": 0.10292134433984756, "rewards/rejected": -0.1121162623167038, "step": 3060 }, { "epoch": 2.4344718030182686, "grad_norm": 30.117958068847656, "learning_rate": 9.03139349798787e-07, "log_odds_chosen": 4.355618476867676, "log_odds_ratio": -0.023019861429929733, "logits/chosen": 248.4194793701172, "logits/rejected": 325.993896484375, "logps/chosen": -0.14225144684314728, "logps/rejected": -2.0682573318481445, "loss": 0.4001, "nll_loss": 0.33710920810699463, "rewards/accuracies": 1.0, "rewards/chosen": -0.007112572900950909, "rewards/margins": 0.09630030393600464, "rewards/rejected": -0.10341286659240723, "step": 3065 }, { "epoch": 2.4384432088959493, "grad_norm": 40.229515075683594, "learning_rate": 9.024035946055421e-07, "log_odds_chosen": 2.305717706680298, "log_odds_ratio": -0.10136429965496063, "logits/chosen": 216.3096923828125, "logits/rejected": 268.7192687988281, "logps/chosen": -0.32101818919181824, "logps/rejected": -1.5222715139389038, "loss": 0.3947, "nll_loss": 0.415952205657959, "rewards/accuracies": 1.0, "rewards/chosen": -0.016050908714532852, "rewards/margins": 0.06006266549229622, "rewards/rejected": -0.07611358165740967, "step": 3070 }, { "epoch": 2.4424146147736296, "grad_norm": 20.493045806884766, "learning_rate": 9.016696346674324e-07, "log_odds_chosen": 3.1343421936035156, "log_odds_ratio": -0.050869233906269073, "logits/chosen": 301.9378356933594, "logits/rejected": 206.95703125, "logps/chosen": -0.25668659806251526, "logps/rejected": -1.7620422840118408, "loss": 0.3361, "nll_loss": 0.3529127836227417, "rewards/accuracies": 1.0, "rewards/chosen": -0.012834331020712852, "rewards/margins": 0.07526777684688568, "rewards/rejected": -0.08810211718082428, "step": 3075 }, { "epoch": 2.4463860206513104, "grad_norm": 15.868453025817871, "learning_rate": 9.00937462695559e-07, "log_odds_chosen": 2.079472541809082, "log_odds_ratio": -0.15630726516246796, "logits/chosen": 259.4786071777344, "logits/rejected": 208.0851593017578, "logps/chosen": -0.30298787355422974, "logps/rejected": -1.1315406560897827, "loss": 0.3567, "nll_loss": 0.4347425401210785, "rewards/accuracies": 1.0, "rewards/chosen": -0.015149394050240517, "rewards/margins": 0.04142763838171959, "rewards/rejected": -0.056577038019895554, "step": 3080 }, { "epoch": 2.450357426528991, "grad_norm": 30.881765365600586, "learning_rate": 9.002070714423869e-07, "log_odds_chosen": 3.3806204795837402, "log_odds_ratio": -0.054750990122556686, "logits/chosen": 247.8966827392578, "logits/rejected": 301.9482727050781, "logps/chosen": -0.17770621180534363, "logps/rejected": -1.7712430953979492, "loss": 0.3547, "nll_loss": 0.3148205280303955, "rewards/accuracies": 1.0, "rewards/chosen": -0.008885310962796211, "rewards/margins": 0.07967683672904968, "rewards/rejected": -0.08856214582920074, "step": 3085 }, { "epoch": 2.454328832406672, "grad_norm": 20.376623153686523, "learning_rate": 8.994784537014432e-07, "log_odds_chosen": 3.2993292808532715, "log_odds_ratio": -0.0405481681227684, "logits/chosen": 323.1062927246094, "logits/rejected": 218.21615600585938, "logps/chosen": -0.19290466606616974, "logps/rejected": -1.7616689205169678, "loss": 0.3416, "nll_loss": 0.302539587020874, "rewards/accuracies": 1.0, "rewards/chosen": -0.009645233862102032, "rewards/margins": 0.07843820750713348, "rewards/rejected": -0.08808344602584839, "step": 3090 }, { "epoch": 2.4583002382843526, "grad_norm": 27.777835845947266, "learning_rate": 8.987516023070194e-07, "log_odds_chosen": 3.353145122528076, "log_odds_ratio": -0.109768345952034, "logits/chosen": 288.3103332519531, "logits/rejected": 237.58154296875, "logps/chosen": -0.34388288855552673, "logps/rejected": -2.0795645713806152, "loss": 0.4017, "nll_loss": 0.45742273330688477, "rewards/accuracies": 1.0, "rewards/chosen": -0.017194144427776337, "rewards/margins": 0.08678407222032547, "rewards/rejected": -0.1039782166481018, "step": 3095 }, { "epoch": 2.4622716441620334, "grad_norm": 20.824138641357422, "learning_rate": 8.980265101338747e-07, "log_odds_chosen": 3.3091349601745605, "log_odds_ratio": -0.05483214184641838, "logits/chosen": 233.0238800048828, "logits/rejected": 291.1861572265625, "logps/chosen": -0.19537723064422607, "logps/rejected": -1.9486373662948608, "loss": 0.481, "nll_loss": 0.41400328278541565, "rewards/accuracies": 1.0, "rewards/chosen": -0.009768862277269363, "rewards/margins": 0.08766300976276398, "rewards/rejected": -0.09743185341358185, "step": 3100 }, { "epoch": 2.466243050039714, "grad_norm": 21.861316680908203, "learning_rate": 8.973031700969425e-07, "log_odds_chosen": 3.9362945556640625, "log_odds_ratio": -0.042985569685697556, "logits/chosen": 247.0224151611328, "logits/rejected": 247.30386352539062, "logps/chosen": -0.17336855828762054, "logps/rejected": -1.86944580078125, "loss": 0.3268, "nll_loss": 0.24799279868602753, "rewards/accuracies": 1.0, "rewards/chosen": -0.008668428286910057, "rewards/margins": 0.08480386435985565, "rewards/rejected": -0.09347229450941086, "step": 3105 }, { "epoch": 2.470214455917395, "grad_norm": 29.43597984313965, "learning_rate": 8.965815751510408e-07, "log_odds_chosen": 4.157784461975098, "log_odds_ratio": -0.05995911359786987, "logits/chosen": 209.51339721679688, "logits/rejected": 263.91632080078125, "logps/chosen": -0.18789565563201904, "logps/rejected": -2.0847220420837402, "loss": 0.3517, "nll_loss": 0.39410367608070374, "rewards/accuracies": 1.0, "rewards/chosen": -0.009394782595336437, "rewards/margins": 0.09484131634235382, "rewards/rejected": -0.10423608869314194, "step": 3110 }, { "epoch": 2.4741858617950756, "grad_norm": 23.069969177246094, "learning_rate": 8.958617182905828e-07, "log_odds_chosen": 4.918656349182129, "log_odds_ratio": -0.030725345015525818, "logits/chosen": 282.84674072265625, "logits/rejected": 367.82489013671875, "logps/chosen": -0.18277548253536224, "logps/rejected": -3.082590103149414, "loss": 0.3415, "nll_loss": 0.25883248448371887, "rewards/accuracies": 1.0, "rewards/chosen": -0.009138774126768112, "rewards/margins": 0.1449907273054123, "rewards/rejected": -0.1541295051574707, "step": 3115 }, { "epoch": 2.4781572676727563, "grad_norm": 24.059284210205078, "learning_rate": 8.951435925492912e-07, "log_odds_chosen": 3.07710337638855, "log_odds_ratio": -0.0691499263048172, "logits/chosen": 308.7353515625, "logits/rejected": 252.30648803710938, "logps/chosen": -0.18907026946544647, "logps/rejected": -1.4084926843643188, "loss": 0.4312, "nll_loss": 0.5141544342041016, "rewards/accuracies": 1.0, "rewards/chosen": -0.009453514590859413, "rewards/margins": 0.06097111850976944, "rewards/rejected": -0.0704246312379837, "step": 3120 }, { "epoch": 2.482128673550437, "grad_norm": 22.84012222290039, "learning_rate": 8.94427190999916e-07, "log_odds_chosen": 4.7749199867248535, "log_odds_ratio": -0.04176812991499901, "logits/chosen": 250.2894744873047, "logits/rejected": 233.84292602539062, "logps/chosen": -0.19017019867897034, "logps/rejected": -1.9456065893173218, "loss": 0.2761, "nll_loss": 0.3522077202796936, "rewards/accuracies": 1.0, "rewards/chosen": -0.009508511051535606, "rewards/margins": 0.08777181804180145, "rewards/rejected": -0.09728033095598221, "step": 3125 }, { "epoch": 2.4861000794281174, "grad_norm": 42.411163330078125, "learning_rate": 8.93712506753953e-07, "log_odds_chosen": 2.530211925506592, "log_odds_ratio": -0.12716186046600342, "logits/chosen": 230.0726776123047, "logits/rejected": 294.5306701660156, "logps/chosen": -0.32971224188804626, "logps/rejected": -1.8698943853378296, "loss": 0.4152, "nll_loss": 0.4431188106536865, "rewards/accuracies": 1.0, "rewards/chosen": -0.016485612839460373, "rewards/margins": 0.07700910419225693, "rewards/rejected": -0.0934947207570076, "step": 3130 }, { "epoch": 2.490071485305798, "grad_norm": 20.534034729003906, "learning_rate": 8.929995329613664e-07, "log_odds_chosen": 3.810880661010742, "log_odds_ratio": -0.03098614513874054, "logits/chosen": 258.92645263671875, "logits/rejected": 379.466796875, "logps/chosen": -0.19361644983291626, "logps/rejected": -2.1948695182800293, "loss": 0.3683, "nll_loss": 0.2922203540802002, "rewards/accuracies": 1.0, "rewards/chosen": -0.009680822491645813, "rewards/margins": 0.10006266832351685, "rewards/rejected": -0.10974349081516266, "step": 3135 }, { "epoch": 2.494042891183479, "grad_norm": 26.821924209594727, "learning_rate": 8.922882628103122e-07, "log_odds_chosen": 3.4475345611572266, "log_odds_ratio": -0.03615453466773033, "logits/chosen": 177.71914672851562, "logits/rejected": 325.3964538574219, "logps/chosen": -0.14965102076530457, "logps/rejected": -1.773688554763794, "loss": 0.3296, "nll_loss": 0.3020648658275604, "rewards/accuracies": 1.0, "rewards/chosen": -0.007482551038265228, "rewards/margins": 0.08120187371969223, "rewards/rejected": -0.08868442475795746, "step": 3140 }, { "epoch": 2.4980142970611596, "grad_norm": 17.4930419921875, "learning_rate": 8.91578689526865e-07, "log_odds_chosen": 3.373814344406128, "log_odds_ratio": -0.07843703031539917, "logits/chosen": 228.93408203125, "logits/rejected": 266.8359680175781, "logps/chosen": -0.30311352014541626, "logps/rejected": -2.266686201095581, "loss": 0.3862, "nll_loss": 0.35967734456062317, "rewards/accuracies": 1.0, "rewards/chosen": -0.015155675821006298, "rewards/margins": 0.09817864745855331, "rewards/rejected": -0.11333432048559189, "step": 3145 }, { "epoch": 2.5019857029388404, "grad_norm": 49.936431884765625, "learning_rate": 8.90870806374748e-07, "log_odds_chosen": 2.624094009399414, "log_odds_ratio": -0.0867760106921196, "logits/chosen": 289.07318115234375, "logits/rejected": 254.26565551757812, "logps/chosen": -0.26391011476516724, "logps/rejected": -1.578924298286438, "loss": 0.4147, "nll_loss": 0.36335909366607666, "rewards/accuracies": 1.0, "rewards/chosen": -0.013195505365729332, "rewards/margins": 0.06575071066617966, "rewards/rejected": -0.07894621789455414, "step": 3150 }, { "epoch": 2.505957108816521, "grad_norm": 32.26227569580078, "learning_rate": 8.90164606655063e-07, "log_odds_chosen": 3.1176323890686035, "log_odds_ratio": -0.05419561266899109, "logits/chosen": 244.9784393310547, "logits/rejected": 271.72808837890625, "logps/chosen": -0.14082542061805725, "logps/rejected": -1.3495501279830933, "loss": 0.2844, "nll_loss": 0.25858157873153687, "rewards/accuracies": 1.0, "rewards/chosen": -0.007041270844638348, "rewards/margins": 0.06043623015284538, "rewards/rejected": -0.0674775093793869, "step": 3155 }, { "epoch": 2.509928514694202, "grad_norm": 28.52840232849121, "learning_rate": 8.894600837060251e-07, "log_odds_chosen": 2.9466381072998047, "log_odds_ratio": -0.08857440948486328, "logits/chosen": 293.58978271484375, "logits/rejected": 310.09246826171875, "logps/chosen": -0.3098464012145996, "logps/rejected": -1.6593332290649414, "loss": 0.481, "nll_loss": 0.5123347043991089, "rewards/accuracies": 1.0, "rewards/chosen": -0.01549232192337513, "rewards/margins": 0.06747434288263321, "rewards/rejected": -0.08296666294336319, "step": 3160 }, { "epoch": 2.5138999205718826, "grad_norm": 23.358430862426758, "learning_rate": 8.887572309026986e-07, "log_odds_chosen": 3.9055874347686768, "log_odds_ratio": -0.04153949022293091, "logits/chosen": 237.70809936523438, "logits/rejected": 308.60382080078125, "logps/chosen": -0.09806036949157715, "logps/rejected": -1.6581532955169678, "loss": 0.3043, "nll_loss": 0.2352304756641388, "rewards/accuracies": 1.0, "rewards/chosen": -0.004903018940240145, "rewards/margins": 0.07800465822219849, "rewards/rejected": -0.08290766924619675, "step": 3165 }, { "epoch": 2.517871326449563, "grad_norm": 33.0350456237793, "learning_rate": 8.880560416567349e-07, "log_odds_chosen": 3.675652265548706, "log_odds_ratio": -0.1050681620836258, "logits/chosen": 439.34344482421875, "logits/rejected": 253.87222290039062, "logps/chosen": -0.292144775390625, "logps/rejected": -1.6460235118865967, "loss": 0.3491, "nll_loss": 0.4088570475578308, "rewards/accuracies": 1.0, "rewards/chosen": -0.014607238583266735, "rewards/margins": 0.06769393384456635, "rewards/rejected": -0.08230117708444595, "step": 3170 }, { "epoch": 2.5218427323272437, "grad_norm": 22.11117172241211, "learning_rate": 8.873565094161139e-07, "log_odds_chosen": 2.985295295715332, "log_odds_ratio": -0.07777807861566544, "logits/chosen": 322.97698974609375, "logits/rejected": 256.5289306640625, "logps/chosen": -0.25270113348960876, "logps/rejected": -1.9781535863876343, "loss": 0.3116, "nll_loss": 0.4061063230037689, "rewards/accuracies": 1.0, "rewards/chosen": -0.012635056860744953, "rewards/margins": 0.08627263456583023, "rewards/rejected": -0.09890768676996231, "step": 3175 }, { "epoch": 2.5258141382049244, "grad_norm": 23.091205596923828, "learning_rate": 8.866586276648859e-07, "log_odds_chosen": 4.1930012702941895, "log_odds_ratio": -0.01747201755642891, "logits/chosen": 304.1520080566406, "logits/rejected": 369.5452575683594, "logps/chosen": -0.11393336951732635, "logps/rejected": -2.1033239364624023, "loss": 0.3055, "nll_loss": 0.20445315539836884, "rewards/accuracies": 1.0, "rewards/chosen": -0.005696668289601803, "rewards/margins": 0.09946952760219574, "rewards/rejected": -0.10516619682312012, "step": 3180 }, { "epoch": 2.529785544082605, "grad_norm": 27.869813919067383, "learning_rate": 8.859623899229175e-07, "log_odds_chosen": 2.7538349628448486, "log_odds_ratio": -0.08827327191829681, "logits/chosen": 318.21588134765625, "logits/rejected": 307.63275146484375, "logps/chosen": -0.24295452237129211, "logps/rejected": -1.4790904521942139, "loss": 0.3332, "nll_loss": 0.3576405644416809, "rewards/accuracies": 1.0, "rewards/chosen": -0.012147725559771061, "rewards/margins": 0.06180679798126221, "rewards/rejected": -0.0739545226097107, "step": 3185 }, { "epoch": 2.533756949960286, "grad_norm": 21.698623657226562, "learning_rate": 8.852677897456389e-07, "log_odds_chosen": 4.32507848739624, "log_odds_ratio": -0.04182177782058716, "logits/chosen": 307.76556396484375, "logits/rejected": 265.21307373046875, "logps/chosen": -0.11581907421350479, "logps/rejected": -1.9977920055389404, "loss": 0.3533, "nll_loss": 0.3494908809661865, "rewards/accuracies": 1.0, "rewards/chosen": -0.005790953524410725, "rewards/margins": 0.09409864246845245, "rewards/rejected": -0.0998895987868309, "step": 3190 }, { "epoch": 2.5377283558379666, "grad_norm": 23.7899169921875, "learning_rate": 8.845748207237923e-07, "log_odds_chosen": 4.4067888259887695, "log_odds_ratio": -0.05649635195732117, "logits/chosen": 199.7589874267578, "logits/rejected": 377.6457214355469, "logps/chosen": -0.24582286179065704, "logps/rejected": -3.0470097064971924, "loss": 0.3842, "nll_loss": 0.31226563453674316, "rewards/accuracies": 1.0, "rewards/chosen": -0.012291142717003822, "rewards/margins": 0.14005932211875916, "rewards/rejected": -0.15235047042369843, "step": 3195 }, { "epoch": 2.5416997617156474, "grad_norm": 26.713525772094727, "learning_rate": 8.838834764831844e-07, "log_odds_chosen": 3.987933397293091, "log_odds_ratio": -0.022607123479247093, "logits/chosen": 289.32916259765625, "logits/rejected": 310.29010009765625, "logps/chosen": -0.11314906924962997, "logps/rejected": -1.6021335124969482, "loss": 0.2942, "nll_loss": 0.301480770111084, "rewards/accuracies": 1.0, "rewards/chosen": -0.0056574540212750435, "rewards/margins": 0.07444922626018524, "rewards/rejected": -0.08010667562484741, "step": 3200 }, { "epoch": 2.545671167593328, "grad_norm": 24.519126892089844, "learning_rate": 8.831937506844408e-07, "log_odds_chosen": 2.726931095123291, "log_odds_ratio": -0.12800243496894836, "logits/chosen": 300.64373779296875, "logits/rejected": 290.03192138671875, "logps/chosen": -0.3544635474681854, "logps/rejected": -1.5170186758041382, "loss": 0.4417, "nll_loss": 0.41465067863464355, "rewards/accuracies": 1.0, "rewards/chosen": -0.01772317662835121, "rewards/margins": 0.05812777206301689, "rewards/rejected": -0.0758509486913681, "step": 3205 }, { "epoch": 2.549642573471009, "grad_norm": 39.54831314086914, "learning_rate": 8.825056370227597e-07, "log_odds_chosen": 5.636147975921631, "log_odds_ratio": -0.013704921118915081, "logits/chosen": 337.90618896484375, "logits/rejected": 267.09014892578125, "logps/chosen": -0.1419641226530075, "logps/rejected": -3.0420889854431152, "loss": 0.3481, "nll_loss": 0.33497047424316406, "rewards/accuracies": 1.0, "rewards/chosen": -0.00709820631891489, "rewards/margins": 0.1450062245130539, "rewards/rejected": -0.1521044373512268, "step": 3210 }, { "epoch": 2.5536139793486896, "grad_norm": 33.25807189941406, "learning_rate": 8.818191292276726e-07, "log_odds_chosen": 2.867290735244751, "log_odds_ratio": -0.11395450681447983, "logits/chosen": 356.44366455078125, "logits/rejected": 316.00958251953125, "logps/chosen": -0.21636183559894562, "logps/rejected": -1.3843950033187866, "loss": 0.3734, "nll_loss": 0.29570505023002625, "rewards/accuracies": 1.0, "rewards/chosen": -0.01081809215247631, "rewards/margins": 0.05840166285634041, "rewards/rejected": -0.06921975314617157, "step": 3215 }, { "epoch": 2.5575853852263704, "grad_norm": 24.43802833557129, "learning_rate": 8.811342210628018e-07, "log_odds_chosen": 4.013164520263672, "log_odds_ratio": -0.0282739344984293, "logits/chosen": 370.65472412109375, "logits/rejected": 214.8960418701172, "logps/chosen": -0.09479434043169022, "logps/rejected": -1.5942752361297607, "loss": 0.412, "nll_loss": 0.5181189179420471, "rewards/accuracies": 1.0, "rewards/chosen": -0.004739716649055481, "rewards/margins": 0.07497404515743256, "rewards/rejected": -0.07971376925706863, "step": 3220 }, { "epoch": 2.561556791104051, "grad_norm": 23.81389808654785, "learning_rate": 8.804509063256239e-07, "log_odds_chosen": 3.736074447631836, "log_odds_ratio": -0.05703800916671753, "logits/chosen": 437.04461669921875, "logits/rejected": 222.80154418945312, "logps/chosen": -0.08796362578868866, "logps/rejected": -1.3682935237884521, "loss": 0.3924, "nll_loss": 0.3038131594657898, "rewards/accuracies": 1.0, "rewards/chosen": -0.004398181103169918, "rewards/margins": 0.06401649117469788, "rewards/rejected": -0.06841467320919037, "step": 3225 }, { "epoch": 2.5655281969817314, "grad_norm": 20.135488510131836, "learning_rate": 8.797691788472336e-07, "log_odds_chosen": 3.9858810901641846, "log_odds_ratio": -0.05607563257217407, "logits/chosen": 329.1815185546875, "logits/rejected": 290.13653564453125, "logps/chosen": -0.108522430062294, "logps/rejected": -1.2941747903823853, "loss": 0.3484, "nll_loss": 0.22560131549835205, "rewards/accuracies": 1.0, "rewards/chosen": -0.0054261209443211555, "rewards/margins": 0.05928261950612068, "rewards/rejected": -0.06470874696969986, "step": 3230 }, { "epoch": 2.569499602859412, "grad_norm": 34.43703079223633, "learning_rate": 8.790890324921097e-07, "log_odds_chosen": 2.3997080326080322, "log_odds_ratio": -0.11185695976018906, "logits/chosen": 310.90777587890625, "logits/rejected": 232.01480102539062, "logps/chosen": -0.3746749460697174, "logps/rejected": -1.6247644424438477, "loss": 0.4029, "nll_loss": 0.5081365704536438, "rewards/accuracies": 1.0, "rewards/chosen": -0.01873374730348587, "rewards/margins": 0.06250447034835815, "rewards/rejected": -0.08123821765184402, "step": 3235 }, { "epoch": 2.573471008737093, "grad_norm": 17.835147857666016, "learning_rate": 8.784104611578832e-07, "log_odds_chosen": 2.310650587081909, "log_odds_ratio": -0.12545891106128693, "logits/chosen": 246.5829315185547, "logits/rejected": 253.65975952148438, "logps/chosen": -0.35286539793014526, "logps/rejected": -1.4031962156295776, "loss": 0.3793, "nll_loss": 0.43818941712379456, "rewards/accuracies": 1.0, "rewards/chosen": -0.017643271014094353, "rewards/margins": 0.05251653864979744, "rewards/rejected": -0.07015980780124664, "step": 3240 }, { "epoch": 2.5774424146147736, "grad_norm": 26.749759674072266, "learning_rate": 8.777334587751073e-07, "log_odds_chosen": 3.4511096477508545, "log_odds_ratio": -0.07684098184108734, "logits/chosen": 253.508056640625, "logits/rejected": 276.6531677246094, "logps/chosen": -0.2683263421058655, "logps/rejected": -2.0055651664733887, "loss": 0.3453, "nll_loss": 0.4227082133293152, "rewards/accuracies": 1.0, "rewards/chosen": -0.013416317291557789, "rewards/margins": 0.08686195313930511, "rewards/rejected": -0.10027827322483063, "step": 3245 }, { "epoch": 2.5814138204924544, "grad_norm": 25.256689071655273, "learning_rate": 8.770580193070293e-07, "log_odds_chosen": 4.399047374725342, "log_odds_ratio": -0.032868240028619766, "logits/chosen": 393.154052734375, "logits/rejected": 251.6790008544922, "logps/chosen": -0.15343406796455383, "logps/rejected": -1.8153202533721924, "loss": 0.3641, "nll_loss": 0.5575748085975647, "rewards/accuracies": 1.0, "rewards/chosen": -0.007671704050153494, "rewards/margins": 0.08309431374073029, "rewards/rejected": -0.09076601266860962, "step": 3250 }, { "epoch": 2.585385226370135, "grad_norm": 31.991716384887695, "learning_rate": 8.763841367493649e-07, "log_odds_chosen": 3.1166841983795166, "log_odds_ratio": -0.08671603351831436, "logits/chosen": 337.2877502441406, "logits/rejected": 236.4402313232422, "logps/chosen": -0.24322061240673065, "logps/rejected": -1.8942596912384033, "loss": 0.4194, "nll_loss": 0.5026718378067017, "rewards/accuracies": 1.0, "rewards/chosen": -0.012161030434072018, "rewards/margins": 0.08255194127559662, "rewards/rejected": -0.09471298009157181, "step": 3255 }, { "epoch": 2.589356632247816, "grad_norm": 25.604633331298828, "learning_rate": 8.757118051300735e-07, "log_odds_chosen": 3.5466561317443848, "log_odds_ratio": -0.03917517513036728, "logits/chosen": 279.3944396972656, "logits/rejected": 271.83636474609375, "logps/chosen": -0.17348912358283997, "logps/rejected": -1.8674802780151367, "loss": 0.2927, "nll_loss": 0.2331702709197998, "rewards/accuracies": 1.0, "rewards/chosen": -0.008674455806612968, "rewards/margins": 0.08469955623149872, "rewards/rejected": -0.09337402135133743, "step": 3260 }, { "epoch": 2.593328038125496, "grad_norm": 27.33872413635254, "learning_rate": 8.750410185091365e-07, "log_odds_chosen": 4.348583698272705, "log_odds_ratio": -0.0230005644261837, "logits/chosen": 328.2115478515625, "logits/rejected": 254.36544799804688, "logps/chosen": -0.14674702286720276, "logps/rejected": -2.459757089614868, "loss": 0.306, "nll_loss": 0.28896063566207886, "rewards/accuracies": 1.0, "rewards/chosen": -0.007337350398302078, "rewards/margins": 0.11565049737691879, "rewards/rejected": -0.12298785150051117, "step": 3265 }, { "epoch": 2.597299444003177, "grad_norm": 25.737802505493164, "learning_rate": 8.743717709783363e-07, "log_odds_chosen": 3.1614301204681396, "log_odds_ratio": -0.06867258995771408, "logits/chosen": 348.8385314941406, "logits/rejected": 204.63389587402344, "logps/chosen": -0.23629799485206604, "logps/rejected": -1.561525821685791, "loss": 0.3409, "nll_loss": 0.4211881756782532, "rewards/accuracies": 1.0, "rewards/chosen": -0.011814900673925877, "rewards/margins": 0.0662613958120346, "rewards/rejected": -0.07807628810405731, "step": 3270 }, { "epoch": 2.6012708498808577, "grad_norm": 22.75188636779785, "learning_rate": 8.737040566610381e-07, "log_odds_chosen": 3.9433999061584473, "log_odds_ratio": -0.04137764498591423, "logits/chosen": 384.3919677734375, "logits/rejected": 265.02557373046875, "logps/chosen": -0.15901608765125275, "logps/rejected": -1.3845044374465942, "loss": 0.3862, "nll_loss": 0.3304668962955475, "rewards/accuracies": 1.0, "rewards/chosen": -0.007950803264975548, "rewards/margins": 0.06127442046999931, "rewards/rejected": -0.06922522932291031, "step": 3275 }, { "epoch": 2.6052422557585384, "grad_norm": 27.857601165771484, "learning_rate": 8.730378697119729e-07, "log_odds_chosen": 3.643134593963623, "log_odds_ratio": -0.03681778535246849, "logits/chosen": 321.9202575683594, "logits/rejected": 334.7354736328125, "logps/chosen": -0.10982207208871841, "logps/rejected": -1.6907075643539429, "loss": 0.3246, "nll_loss": 0.22584645450115204, "rewards/accuracies": 1.0, "rewards/chosen": -0.005491103511303663, "rewards/margins": 0.07904426753520966, "rewards/rejected": -0.0845353752374649, "step": 3280 }, { "epoch": 2.609213661636219, "grad_norm": 25.07754898071289, "learning_rate": 8.723732043170228e-07, "log_odds_chosen": 4.07627010345459, "log_odds_ratio": -0.04696853086352348, "logits/chosen": 333.41082763671875, "logits/rejected": 297.3253479003906, "logps/chosen": -0.17340394854545593, "logps/rejected": -1.9108400344848633, "loss": 0.3297, "nll_loss": 0.3067760169506073, "rewards/accuracies": 1.0, "rewards/chosen": -0.008670197799801826, "rewards/margins": 0.08687180280685425, "rewards/rejected": -0.09554199874401093, "step": 3285 }, { "epoch": 2.6131850675139, "grad_norm": 24.27914810180664, "learning_rate": 8.717100546930084e-07, "log_odds_chosen": 3.4627201557159424, "log_odds_ratio": -0.04240020364522934, "logits/chosen": 297.2704772949219, "logits/rejected": 297.8516845703125, "logps/chosen": -0.18625622987747192, "logps/rejected": -1.753732442855835, "loss": 0.3577, "nll_loss": 0.35155242681503296, "rewards/accuracies": 1.0, "rewards/chosen": -0.009312811307609081, "rewards/margins": 0.07837380468845367, "rewards/rejected": -0.08768662065267563, "step": 3290 }, { "epoch": 2.6171564733915806, "grad_norm": 29.44333839416504, "learning_rate": 8.710484150874759e-07, "log_odds_chosen": 2.7033531665802, "log_odds_ratio": -0.09133219718933105, "logits/chosen": 218.84378051757812, "logits/rejected": 311.80950927734375, "logps/chosen": -0.1878434419631958, "logps/rejected": -1.3301265239715576, "loss": 0.3785, "nll_loss": 0.2603824734687805, "rewards/accuracies": 1.0, "rewards/chosen": -0.00939217209815979, "rewards/margins": 0.057114146649837494, "rewards/rejected": -0.06650632619857788, "step": 3295 }, { "epoch": 2.6211278792692614, "grad_norm": 25.69460105895996, "learning_rate": 8.703882797784894e-07, "log_odds_chosen": 3.080230712890625, "log_odds_ratio": -0.05720607191324234, "logits/chosen": 200.24032592773438, "logits/rejected": 362.2676086425781, "logps/chosen": -0.2980789542198181, "logps/rejected": -2.0313193798065186, "loss": 0.3677, "nll_loss": 0.3649226725101471, "rewards/accuracies": 1.0, "rewards/chosen": -0.014903949573636055, "rewards/margins": 0.08666200935840607, "rewards/rejected": -0.10156597197055817, "step": 3300 }, { "epoch": 2.625099285146942, "grad_norm": 22.543445587158203, "learning_rate": 8.697296430744212e-07, "log_odds_chosen": 3.6661219596862793, "log_odds_ratio": -0.185410737991333, "logits/chosen": 267.294677734375, "logits/rejected": 246.5009002685547, "logps/chosen": -0.14372417330741882, "logps/rejected": -1.8430382013320923, "loss": 0.3137, "nll_loss": 0.26537400484085083, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0071862079203128815, "rewards/margins": 0.08496570587158203, "rewards/rejected": -0.09215191006660461, "step": 3305 }, { "epoch": 2.629070691024623, "grad_norm": 21.284128189086914, "learning_rate": 8.690724993137478e-07, "log_odds_chosen": 3.3694980144500732, "log_odds_ratio": -0.04535522311925888, "logits/chosen": 262.36431884765625, "logits/rejected": 278.66455078125, "logps/chosen": -0.22373194992542267, "logps/rejected": -2.0730624198913574, "loss": 0.3005, "nll_loss": 0.32457104325294495, "rewards/accuracies": 1.0, "rewards/chosen": -0.011186597868800163, "rewards/margins": 0.09246651828289032, "rewards/rejected": -0.10365311056375504, "step": 3310 }, { "epoch": 2.6330420969023036, "grad_norm": 26.384048461914062, "learning_rate": 8.684168428648437e-07, "log_odds_chosen": 3.301476001739502, "log_odds_ratio": -0.06395457684993744, "logits/chosen": 268.52532958984375, "logits/rejected": 219.0271453857422, "logps/chosen": -0.1741420328617096, "logps/rejected": -1.5680488348007202, "loss": 0.3139, "nll_loss": 0.325143039226532, "rewards/accuracies": 1.0, "rewards/chosen": -0.00870710052549839, "rewards/margins": 0.06969533860683441, "rewards/rejected": -0.07840244472026825, "step": 3315 }, { "epoch": 2.6370135027799844, "grad_norm": 29.413419723510742, "learning_rate": 8.677626681257792e-07, "log_odds_chosen": 2.828350305557251, "log_odds_ratio": -0.10513371229171753, "logits/chosen": 251.63174438476562, "logits/rejected": 276.219970703125, "logps/chosen": -0.262276828289032, "logps/rejected": -1.6337566375732422, "loss": 0.4856, "nll_loss": 0.46243181824684143, "rewards/accuracies": 1.0, "rewards/chosen": -0.013113843277096748, "rewards/margins": 0.06857398897409439, "rewards/rejected": -0.08168783038854599, "step": 3320 }, { "epoch": 2.6409849086576647, "grad_norm": 29.527265548706055, "learning_rate": 8.6710996952412e-07, "log_odds_chosen": 2.508577823638916, "log_odds_ratio": -0.2033156156539917, "logits/chosen": 293.64349365234375, "logits/rejected": 280.0192565917969, "logps/chosen": -0.3613029718399048, "logps/rejected": -1.2332953214645386, "loss": 0.4505, "nll_loss": 0.4004967212677002, "rewards/accuracies": 1.0, "rewards/chosen": -0.01806515082716942, "rewards/margins": 0.04359962046146393, "rewards/rejected": -0.06166477128863335, "step": 3325 }, { "epoch": 2.6449563145353454, "grad_norm": 26.56429672241211, "learning_rate": 8.664587415167274e-07, "log_odds_chosen": 3.569314956665039, "log_odds_ratio": -0.0447075180709362, "logits/chosen": 221.4001922607422, "logits/rejected": 289.3610534667969, "logps/chosen": -0.20264658331871033, "logps/rejected": -2.0745139122009277, "loss": 0.4131, "nll_loss": 0.4826287627220154, "rewards/accuracies": 1.0, "rewards/chosen": -0.010132329538464546, "rewards/margins": 0.09359337389469147, "rewards/rejected": -0.10372570902109146, "step": 3330 }, { "epoch": 2.648927720413026, "grad_norm": 25.318214416503906, "learning_rate": 8.658089785895599e-07, "log_odds_chosen": 4.950355052947998, "log_odds_ratio": -0.013309493660926819, "logits/chosen": 208.6847686767578, "logits/rejected": 290.908935546875, "logps/chosen": -0.07694874703884125, "logps/rejected": -2.389655351638794, "loss": 0.3298, "nll_loss": 0.2016306221485138, "rewards/accuracies": 1.0, "rewards/chosen": -0.00384743744507432, "rewards/margins": 0.11563533544540405, "rewards/rejected": -0.11948277801275253, "step": 3335 }, { "epoch": 2.652899126290707, "grad_norm": 21.625627517700195, "learning_rate": 8.651606752574786e-07, "log_odds_chosen": 3.532393217086792, "log_odds_ratio": -0.07799427211284637, "logits/chosen": 251.471435546875, "logits/rejected": 363.47119140625, "logps/chosen": -0.3781413733959198, "logps/rejected": -2.1852574348449707, "loss": 0.3282, "nll_loss": 0.43493717908859253, "rewards/accuracies": 1.0, "rewards/chosen": -0.01890706829726696, "rewards/margins": 0.09035581350326538, "rewards/rejected": -0.10926288366317749, "step": 3340 }, { "epoch": 2.6568705321683876, "grad_norm": 22.178985595703125, "learning_rate": 8.645138260640511e-07, "log_odds_chosen": 3.403825283050537, "log_odds_ratio": -0.04612133651971817, "logits/chosen": 264.21868896484375, "logits/rejected": 315.3473205566406, "logps/chosen": -0.130589097738266, "logps/rejected": -1.5952059030532837, "loss": 0.3498, "nll_loss": 0.20455436408519745, "rewards/accuracies": 1.0, "rewards/chosen": -0.0065294550731778145, "rewards/margins": 0.07323084771633148, "rewards/rejected": -0.07976029813289642, "step": 3345 }, { "epoch": 2.6608419380460684, "grad_norm": 27.09998321533203, "learning_rate": 8.638684255813602e-07, "log_odds_chosen": 2.8917152881622314, "log_odds_ratio": -0.07270168513059616, "logits/chosen": 202.5989227294922, "logits/rejected": 326.9261474609375, "logps/chosen": -0.2160286009311676, "logps/rejected": -1.6441643238067627, "loss": 0.3844, "nll_loss": 0.40900883078575134, "rewards/accuracies": 1.0, "rewards/chosen": -0.01080142892897129, "rewards/margins": 0.0714067816734314, "rewards/rejected": -0.08220821619033813, "step": 3350 }, { "epoch": 2.664813343923749, "grad_norm": 28.016910552978516, "learning_rate": 8.63224468409811e-07, "log_odds_chosen": 3.194028377532959, "log_odds_ratio": -0.069419726729393, "logits/chosen": 226.66091918945312, "logits/rejected": 326.2010192871094, "logps/chosen": -0.4025501310825348, "logps/rejected": -2.2129743099212646, "loss": 0.4066, "nll_loss": 0.4211258292198181, "rewards/accuracies": 1.0, "rewards/chosen": -0.02012750506401062, "rewards/margins": 0.0905212014913559, "rewards/rejected": -0.11064871400594711, "step": 3355 }, { "epoch": 2.6687847498014294, "grad_norm": 39.505130767822266, "learning_rate": 8.625819491779427e-07, "log_odds_chosen": 3.5177810192108154, "log_odds_ratio": -0.1360502988100052, "logits/chosen": 232.5872039794922, "logits/rejected": 382.99786376953125, "logps/chosen": -0.1633174568414688, "logps/rejected": -1.9696972370147705, "loss": 0.3207, "nll_loss": 0.2783007025718689, "rewards/accuracies": 1.0, "rewards/chosen": -0.008165872655808926, "rewards/margins": 0.09031899273395538, "rewards/rejected": -0.09848486632108688, "step": 3360 }, { "epoch": 2.67275615567911, "grad_norm": 24.680538177490234, "learning_rate": 8.619408625422394e-07, "log_odds_chosen": 2.9842259883880615, "log_odds_ratio": -0.06695107370615005, "logits/chosen": 266.1699523925781, "logits/rejected": 277.45489501953125, "logps/chosen": -0.28020209074020386, "logps/rejected": -1.8149499893188477, "loss": 0.385, "nll_loss": 0.4652983248233795, "rewards/accuracies": 1.0, "rewards/chosen": -0.014010104350745678, "rewards/margins": 0.07673738896846771, "rewards/rejected": -0.09074750542640686, "step": 3365 }, { "epoch": 2.676727561556791, "grad_norm": 21.564693450927734, "learning_rate": 8.613012031869432e-07, "log_odds_chosen": 2.69940185546875, "log_odds_ratio": -0.09041761606931686, "logits/chosen": 407.0078125, "logits/rejected": 278.8470153808594, "logps/chosen": -0.24051721394062042, "logps/rejected": -1.4418036937713623, "loss": 0.3393, "nll_loss": 0.3935914933681488, "rewards/accuracies": 1.0, "rewards/chosen": -0.012025861069560051, "rewards/margins": 0.06006432697176933, "rewards/rejected": -0.07209019362926483, "step": 3370 }, { "epoch": 2.6806989674344717, "grad_norm": 16.88412857055664, "learning_rate": 8.606629658238705e-07, "log_odds_chosen": 4.196094512939453, "log_odds_ratio": -0.02909168228507042, "logits/chosen": 231.52145385742188, "logits/rejected": 231.4295196533203, "logps/chosen": -0.10496889054775238, "logps/rejected": -1.6880567073822021, "loss": 0.4033, "nll_loss": 0.3222340941429138, "rewards/accuracies": 1.0, "rewards/chosen": -0.005248443689197302, "rewards/margins": 0.07915439456701279, "rewards/rejected": -0.08440284430980682, "step": 3375 }, { "epoch": 2.6846703733121524, "grad_norm": 26.121601104736328, "learning_rate": 8.600261451922269e-07, "log_odds_chosen": 3.75126576423645, "log_odds_ratio": -0.03185001388192177, "logits/chosen": 326.3580017089844, "logits/rejected": 236.6237030029297, "logps/chosen": -0.1497809886932373, "logps/rejected": -1.8008592128753662, "loss": 0.3518, "nll_loss": 0.32695260643959045, "rewards/accuracies": 1.0, "rewards/chosen": -0.007489049341529608, "rewards/margins": 0.0825539156794548, "rewards/rejected": -0.09004296362400055, "step": 3380 }, { "epoch": 2.688641779189833, "grad_norm": 22.405277252197266, "learning_rate": 8.593907360584258e-07, "log_odds_chosen": 3.142768621444702, "log_odds_ratio": -0.06925017386674881, "logits/chosen": 221.24462890625, "logits/rejected": 374.22503662109375, "logps/chosen": -0.23636960983276367, "logps/rejected": -1.8491294384002686, "loss": 0.3725, "nll_loss": 0.35332077741622925, "rewards/accuracies": 1.0, "rewards/chosen": -0.011818479746580124, "rewards/margins": 0.08063800632953644, "rewards/rejected": -0.09245647490024567, "step": 3385 }, { "epoch": 2.692613185067514, "grad_norm": 22.6203556060791, "learning_rate": 8.587567332159079e-07, "log_odds_chosen": 2.694579839706421, "log_odds_ratio": -0.10035224258899689, "logits/chosen": 256.6727600097656, "logits/rejected": 332.54217529296875, "logps/chosen": -0.22367092967033386, "logps/rejected": -1.6074225902557373, "loss": 0.4156, "nll_loss": 0.32448482513427734, "rewards/accuracies": 1.0, "rewards/chosen": -0.011183546856045723, "rewards/margins": 0.06918759644031525, "rewards/rejected": -0.08037114143371582, "step": 3390 }, { "epoch": 2.6965845909451946, "grad_norm": 21.095897674560547, "learning_rate": 8.581241314849612e-07, "log_odds_chosen": 3.3230767250061035, "log_odds_ratio": -0.06266503781080246, "logits/chosen": 259.4347229003906, "logits/rejected": 244.94088745117188, "logps/chosen": -0.2486274689435959, "logps/rejected": -1.8099991083145142, "loss": 0.3862, "nll_loss": 0.3283666968345642, "rewards/accuracies": 1.0, "rewards/chosen": -0.012431373819708824, "rewards/margins": 0.0780685767531395, "rewards/rejected": -0.09049995988607407, "step": 3395 }, { "epoch": 2.7005559968228754, "grad_norm": 25.299463272094727, "learning_rate": 8.574929257125441e-07, "log_odds_chosen": 3.30029296875, "log_odds_ratio": -0.047333456575870514, "logits/chosen": 203.181396484375, "logits/rejected": 309.99542236328125, "logps/chosen": -0.24806909263134003, "logps/rejected": -2.0718834400177, "loss": 0.4356, "nll_loss": 0.47107014060020447, "rewards/accuracies": 1.0, "rewards/chosen": -0.012403455562889576, "rewards/margins": 0.09119071066379547, "rewards/rejected": -0.10359416902065277, "step": 3400 }, { "epoch": 2.704527402700556, "grad_norm": 40.7325553894043, "learning_rate": 8.568631107721093e-07, "log_odds_chosen": 3.272376298904419, "log_odds_ratio": -0.04923254996538162, "logits/chosen": 251.3883819580078, "logits/rejected": 296.41925048828125, "logps/chosen": -0.192131906747818, "logps/rejected": -1.8338134288787842, "loss": 0.3396, "nll_loss": 0.3637116551399231, "rewards/accuracies": 1.0, "rewards/chosen": -0.009606595151126385, "rewards/margins": 0.08208408206701279, "rewards/rejected": -0.09169068187475204, "step": 3405 }, { "epoch": 2.708498808578237, "grad_norm": 19.95270347595215, "learning_rate": 8.562346815634272e-07, "log_odds_chosen": 4.730654716491699, "log_odds_ratio": -0.016973715275526047, "logits/chosen": 375.0517578125, "logits/rejected": 268.80157470703125, "logps/chosen": -0.09887482225894928, "logps/rejected": -1.5165462493896484, "loss": 0.3366, "nll_loss": 0.20561964809894562, "rewards/accuracies": 1.0, "rewards/chosen": -0.004943741485476494, "rewards/margins": 0.07088357955217361, "rewards/rejected": -0.07582731544971466, "step": 3410 }, { "epoch": 2.7124702144559176, "grad_norm": 32.70396041870117, "learning_rate": 8.556076330124148e-07, "log_odds_chosen": 2.935753583908081, "log_odds_ratio": -0.1059001088142395, "logits/chosen": 334.5813293457031, "logits/rejected": 297.3235778808594, "logps/chosen": -0.29814326763153076, "logps/rejected": -1.8218357563018799, "loss": 0.3963, "nll_loss": 0.3989710807800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.014907163567841053, "rewards/margins": 0.07618463039398193, "rewards/rejected": -0.09109178930521011, "step": 3415 }, { "epoch": 2.716441620333598, "grad_norm": 27.2939453125, "learning_rate": 8.549819600709619e-07, "log_odds_chosen": 5.619948863983154, "log_odds_ratio": -0.03720756620168686, "logits/chosen": 344.32708740234375, "logits/rejected": 293.09710693359375, "logps/chosen": -0.15366186201572418, "logps/rejected": -2.807699680328369, "loss": 0.3832, "nll_loss": 0.34514352679252625, "rewards/accuracies": 1.0, "rewards/chosen": -0.007683093193918467, "rewards/margins": 0.13270190358161926, "rewards/rejected": -0.1403849869966507, "step": 3420 }, { "epoch": 2.7204130262112787, "grad_norm": 24.816383361816406, "learning_rate": 8.54357657716761e-07, "log_odds_chosen": 4.2862443923950195, "log_odds_ratio": -0.022790148854255676, "logits/chosen": 264.8843078613281, "logits/rejected": 247.75222778320312, "logps/chosen": -0.11239345371723175, "logps/rejected": -1.8959920406341553, "loss": 0.3598, "nll_loss": 0.25543782114982605, "rewards/accuracies": 1.0, "rewards/chosen": -0.00561967259272933, "rewards/margins": 0.08917994052171707, "rewards/rejected": -0.09479960799217224, "step": 3425 }, { "epoch": 2.7243844320889594, "grad_norm": 28.45637321472168, "learning_rate": 8.537347209531384e-07, "log_odds_chosen": 3.2400569915771484, "log_odds_ratio": -0.19109003245830536, "logits/chosen": 314.67193603515625, "logits/rejected": 245.2732391357422, "logps/chosen": -0.24329812824726105, "logps/rejected": -1.4679007530212402, "loss": 0.3757, "nll_loss": 0.39857763051986694, "rewards/accuracies": 1.0, "rewards/chosen": -0.012164906598627567, "rewards/margins": 0.0612301342189312, "rewards/rejected": -0.07339503616094589, "step": 3430 }, { "epoch": 2.72835583796664, "grad_norm": 22.829252243041992, "learning_rate": 8.531131448088853e-07, "log_odds_chosen": 3.3454837799072266, "log_odds_ratio": -0.04038381576538086, "logits/chosen": 286.8428955078125, "logits/rejected": 271.490966796875, "logps/chosen": -0.15350893139839172, "logps/rejected": -1.5690906047821045, "loss": 0.3353, "nll_loss": 0.3089643120765686, "rewards/accuracies": 1.0, "rewards/chosen": -0.007675447501242161, "rewards/margins": 0.07077908515930176, "rewards/rejected": -0.07845453172922134, "step": 3435 }, { "epoch": 2.732327243844321, "grad_norm": 23.981935501098633, "learning_rate": 8.52492924338092e-07, "log_odds_chosen": 1.9324777126312256, "log_odds_ratio": -0.1563093215227127, "logits/chosen": 264.3291015625, "logits/rejected": 246.1039581298828, "logps/chosen": -0.3107035756111145, "logps/rejected": -1.1190191507339478, "loss": 0.3325, "nll_loss": 0.4039735198020935, "rewards/accuracies": 1.0, "rewards/chosen": -0.01553518045693636, "rewards/margins": 0.04041577875614166, "rewards/rejected": -0.05595095828175545, "step": 3440 }, { "epoch": 2.7362986497220017, "grad_norm": 27.215803146362305, "learning_rate": 8.51874054619982e-07, "log_odds_chosen": 3.2275519371032715, "log_odds_ratio": -0.06337851285934448, "logits/chosen": 307.99066162109375, "logits/rejected": 368.2579040527344, "logps/chosen": -0.22807280719280243, "logps/rejected": -1.8899672031402588, "loss": 0.3956, "nll_loss": 0.2683226466178894, "rewards/accuracies": 1.0, "rewards/chosen": -0.011403640732169151, "rewards/margins": 0.08309471607208252, "rewards/rejected": -0.09449835866689682, "step": 3445 }, { "epoch": 2.7402700555996824, "grad_norm": 25.120004653930664, "learning_rate": 8.512565307587487e-07, "log_odds_chosen": 3.7829699516296387, "log_odds_ratio": -0.03385692834854126, "logits/chosen": 252.9814453125, "logits/rejected": 311.3857727050781, "logps/chosen": -0.14769130945205688, "logps/rejected": -1.8254680633544922, "loss": 0.4128, "nll_loss": 0.2506926953792572, "rewards/accuracies": 1.0, "rewards/chosen": -0.0073845661245286465, "rewards/margins": 0.08388884365558624, "rewards/rejected": -0.09127341210842133, "step": 3450 }, { "epoch": 2.744241461477363, "grad_norm": 25.093229293823242, "learning_rate": 8.50640347883392e-07, "log_odds_chosen": 3.7419135570526123, "log_odds_ratio": -0.029192060232162476, "logits/chosen": 334.3880310058594, "logits/rejected": 249.8907470703125, "logps/chosen": -0.12614428997039795, "logps/rejected": -1.8909130096435547, "loss": 0.4699, "nll_loss": 0.21717166900634766, "rewards/accuracies": 1.0, "rewards/chosen": -0.0063072144985198975, "rewards/margins": 0.0882384404540062, "rewards/rejected": -0.0945456475019455, "step": 3455 }, { "epoch": 2.7482128673550434, "grad_norm": 35.0034065246582, "learning_rate": 8.500255011475575e-07, "log_odds_chosen": 3.9182846546173096, "log_odds_ratio": -0.06826486438512802, "logits/chosen": 224.9783935546875, "logits/rejected": 205.3113250732422, "logps/chosen": -0.19937647879123688, "logps/rejected": -1.5910238027572632, "loss": 0.4177, "nll_loss": 0.36896106600761414, "rewards/accuracies": 1.0, "rewards/chosen": -0.009968823753297329, "rewards/margins": 0.06958237290382385, "rewards/rejected": -0.07955120503902435, "step": 3460 }, { "epoch": 2.752184273232724, "grad_norm": 33.20505905151367, "learning_rate": 8.49411985729376e-07, "log_odds_chosen": 3.2729244232177734, "log_odds_ratio": -0.05948426574468613, "logits/chosen": 323.2681579589844, "logits/rejected": 243.99734497070312, "logps/chosen": -0.1548212319612503, "logps/rejected": -1.6072862148284912, "loss": 0.381, "nll_loss": 0.3288036584854126, "rewards/accuracies": 1.0, "rewards/chosen": -0.007741062436252832, "rewards/margins": 0.07262326031923294, "rewards/rejected": -0.08036432415246964, "step": 3465 }, { "epoch": 2.756155679110405, "grad_norm": 26.826465606689453, "learning_rate": 8.48799796831305e-07, "log_odds_chosen": 3.9374356269836426, "log_odds_ratio": -0.025482967495918274, "logits/chosen": 241.34744262695312, "logits/rejected": 329.5173034667969, "logps/chosen": -0.19136756658554077, "logps/rejected": -2.1954541206359863, "loss": 0.3625, "nll_loss": 0.3516727685928345, "rewards/accuracies": 1.0, "rewards/chosen": -0.009568377397954464, "rewards/margins": 0.10020432621240616, "rewards/rejected": -0.1097727045416832, "step": 3470 }, { "epoch": 2.7601270849880857, "grad_norm": 27.76506233215332, "learning_rate": 8.48188929679971e-07, "log_odds_chosen": 3.391524076461792, "log_odds_ratio": -0.0386800579726696, "logits/chosen": 231.2411651611328, "logits/rejected": 282.93878173828125, "logps/chosen": -0.1687900722026825, "logps/rejected": -1.7434227466583252, "loss": 0.2898, "nll_loss": 0.2864622175693512, "rewards/accuracies": 1.0, "rewards/chosen": -0.008439503610134125, "rewards/margins": 0.07873163372278214, "rewards/rejected": -0.08717113733291626, "step": 3475 }, { "epoch": 2.7640984908657664, "grad_norm": 36.33848190307617, "learning_rate": 8.475793795260132e-07, "log_odds_chosen": 4.21061897277832, "log_odds_ratio": -0.03166338428854942, "logits/chosen": 248.8314971923828, "logits/rejected": 235.77224731445312, "logps/chosen": -0.10397151857614517, "logps/rejected": -1.8724483251571655, "loss": 0.3554, "nll_loss": 0.31431207060813904, "rewards/accuracies": 1.0, "rewards/chosen": -0.005198576487600803, "rewards/margins": 0.08842384815216064, "rewards/rejected": -0.09362240880727768, "step": 3480 }, { "epoch": 2.768069896743447, "grad_norm": 17.6458683013916, "learning_rate": 8.469711416439277e-07, "log_odds_chosen": 3.060960292816162, "log_odds_ratio": -0.07192051410675049, "logits/chosen": 220.4897003173828, "logits/rejected": 304.0692138671875, "logps/chosen": -0.3180966079235077, "logps/rejected": -2.093109607696533, "loss": 0.3345, "nll_loss": 0.4048178791999817, "rewards/accuracies": 1.0, "rewards/chosen": -0.015904832631349564, "rewards/margins": 0.08875066787004471, "rewards/rejected": -0.10465548932552338, "step": 3485 }, { "epoch": 2.772041302621128, "grad_norm": 23.570837020874023, "learning_rate": 8.463642113319158e-07, "log_odds_chosen": 2.799910068511963, "log_odds_ratio": -0.1606951504945755, "logits/chosen": 236.31088256835938, "logits/rejected": 249.38705444335938, "logps/chosen": -0.29771658778190613, "logps/rejected": -1.322884202003479, "loss": 0.2954, "nll_loss": 0.43411684036254883, "rewards/accuracies": 1.0, "rewards/chosen": -0.01488583069294691, "rewards/margins": 0.051258385181427, "rewards/rejected": -0.06614421308040619, "step": 3490 }, { "epoch": 2.7760127084988087, "grad_norm": 44.051631927490234, "learning_rate": 8.457585839117283e-07, "log_odds_chosen": 3.4752700328826904, "log_odds_ratio": -0.044789545238018036, "logits/chosen": 331.0361328125, "logits/rejected": 264.93536376953125, "logps/chosen": -0.1473982334136963, "logps/rejected": -1.7014989852905273, "loss": 0.3962, "nll_loss": 0.4065122604370117, "rewards/accuracies": 1.0, "rewards/chosen": -0.007369911763817072, "rewards/margins": 0.07770504802465439, "rewards/rejected": -0.08507496118545532, "step": 3495 }, { "epoch": 2.7799841143764894, "grad_norm": 29.056276321411133, "learning_rate": 8.451542547285166e-07, "log_odds_chosen": 3.405332088470459, "log_odds_ratio": -0.03398241847753525, "logits/chosen": 229.6246795654297, "logits/rejected": 339.95166015625, "logps/chosen": -0.1531856805086136, "logps/rejected": -1.7593185901641846, "loss": 0.3732, "nll_loss": 0.3807409703731537, "rewards/accuracies": 1.0, "rewards/chosen": -0.007659283932298422, "rewards/margins": 0.08030664920806885, "rewards/rejected": -0.0879659354686737, "step": 3500 }, { "epoch": 2.78395552025417, "grad_norm": 22.696245193481445, "learning_rate": 8.44551219150681e-07, "log_odds_chosen": 4.299327373504639, "log_odds_ratio": -0.06721607595682144, "logits/chosen": 272.5831298828125, "logits/rejected": 238.8101806640625, "logps/chosen": -0.23352356255054474, "logps/rejected": -1.9828745126724243, "loss": 0.3538, "nll_loss": 0.5397769212722778, "rewards/accuracies": 1.0, "rewards/chosen": -0.011676179245114326, "rewards/margins": 0.08746754378080368, "rewards/rejected": -0.09914372861385345, "step": 3505 }, { "epoch": 2.787926926131851, "grad_norm": 26.001413345336914, "learning_rate": 8.439494725697223e-07, "log_odds_chosen": 2.3186655044555664, "log_odds_ratio": -0.09961884468793869, "logits/chosen": 275.5745544433594, "logits/rejected": 246.73629760742188, "logps/chosen": -0.29723626375198364, "logps/rejected": -1.390271544456482, "loss": 0.3147, "nll_loss": 0.42867952585220337, "rewards/accuracies": 1.0, "rewards/chosen": -0.014861812815070152, "rewards/margins": 0.05465176701545715, "rewards/rejected": -0.06951358169317245, "step": 3510 }, { "epoch": 2.7918983320095316, "grad_norm": 25.731021881103516, "learning_rate": 8.433490104000933e-07, "log_odds_chosen": 4.046853065490723, "log_odds_ratio": -0.03547334298491478, "logits/chosen": 367.0143127441406, "logits/rejected": 219.3634796142578, "logps/chosen": -0.12885086238384247, "logps/rejected": -1.706560492515564, "loss": 0.3309, "nll_loss": 0.27449122071266174, "rewards/accuracies": 1.0, "rewards/chosen": -0.006442543119192123, "rewards/margins": 0.07888549566268921, "rewards/rejected": -0.08532802760601044, "step": 3515 }, { "epoch": 2.795869737887212, "grad_norm": 28.774776458740234, "learning_rate": 8.427498280790526e-07, "log_odds_chosen": 4.661393642425537, "log_odds_ratio": -0.07130368053913116, "logits/chosen": 292.60260009765625, "logits/rejected": 327.83294677734375, "logps/chosen": -0.21304917335510254, "logps/rejected": -2.304790496826172, "loss": 0.4884, "nll_loss": 0.4281933307647705, "rewards/accuracies": 1.0, "rewards/chosen": -0.010652460157871246, "rewards/margins": 0.10458706319332123, "rewards/rejected": -0.11523952335119247, "step": 3520 }, { "epoch": 2.7998411437648927, "grad_norm": 20.014240264892578, "learning_rate": 8.421519210665191e-07, "log_odds_chosen": 3.561429262161255, "log_odds_ratio": -0.03733684495091438, "logits/chosen": 284.66497802734375, "logits/rejected": 329.2096252441406, "logps/chosen": -0.11633183807134628, "logps/rejected": -1.6266489028930664, "loss": 0.3561, "nll_loss": 0.2186901569366455, "rewards/accuracies": 1.0, "rewards/chosen": -0.005816592834889889, "rewards/margins": 0.07551585882902145, "rewards/rejected": -0.08133245259523392, "step": 3525 }, { "epoch": 2.8038125496425734, "grad_norm": 22.12482261657715, "learning_rate": 8.415552848449264e-07, "log_odds_chosen": 4.567162990570068, "log_odds_ratio": -0.07148279994726181, "logits/chosen": 311.3138732910156, "logits/rejected": 362.58966064453125, "logps/chosen": -0.219939187169075, "logps/rejected": -2.1984989643096924, "loss": 0.4238, "nll_loss": 0.2968602776527405, "rewards/accuracies": 1.0, "rewards/chosen": -0.010996958240866661, "rewards/margins": 0.09892800450325012, "rewards/rejected": -0.10992495715618134, "step": 3530 }, { "epoch": 2.807783955520254, "grad_norm": 16.48638916015625, "learning_rate": 8.409599149190806e-07, "log_odds_chosen": 3.0395667552948, "log_odds_ratio": -0.07454101741313934, "logits/chosen": 276.5237731933594, "logits/rejected": 244.59121704101562, "logps/chosen": -0.13108542561531067, "logps/rejected": -1.354529619216919, "loss": 0.2954, "nll_loss": 0.3918497562408447, "rewards/accuracies": 1.0, "rewards/chosen": -0.006554270628839731, "rewards/margins": 0.061172209680080414, "rewards/rejected": -0.06772647798061371, "step": 3535 }, { "epoch": 2.811755361397935, "grad_norm": 27.75493812561035, "learning_rate": 8.40365806816018e-07, "log_odds_chosen": 4.262506008148193, "log_odds_ratio": -0.06077839806675911, "logits/chosen": 212.52090454101562, "logits/rejected": 330.6539611816406, "logps/chosen": -0.19797523319721222, "logps/rejected": -2.2590105533599854, "loss": 0.3203, "nll_loss": 0.3225499987602234, "rewards/accuracies": 1.0, "rewards/chosen": -0.009898761287331581, "rewards/margins": 0.10305176675319672, "rewards/rejected": -0.11295051872730255, "step": 3540 }, { "epoch": 2.8157267672756157, "grad_norm": 25.588348388671875, "learning_rate": 8.397729560848629e-07, "log_odds_chosen": 3.5041167736053467, "log_odds_ratio": -0.03463669493794441, "logits/chosen": 288.7964782714844, "logits/rejected": 280.5426025390625, "logps/chosen": -0.15195515751838684, "logps/rejected": -1.7217051982879639, "loss": 0.525, "nll_loss": 0.6102060079574585, "rewards/accuracies": 1.0, "rewards/chosen": -0.0075977579690515995, "rewards/margins": 0.07848750799894333, "rewards/rejected": -0.08608527481555939, "step": 3545 }, { "epoch": 2.8196981731532964, "grad_norm": 25.35242462158203, "learning_rate": 8.391813582966891e-07, "log_odds_chosen": 3.6780853271484375, "log_odds_ratio": -0.06320033222436905, "logits/chosen": 300.8489990234375, "logits/rejected": 237.04409790039062, "logps/chosen": -0.2152899205684662, "logps/rejected": -1.7724052667617798, "loss": 0.43, "nll_loss": 0.3502303659915924, "rewards/accuracies": 1.0, "rewards/chosen": -0.01076449640095234, "rewards/margins": 0.07785578072071075, "rewards/rejected": -0.08862027525901794, "step": 3550 }, { "epoch": 2.8236695790309767, "grad_norm": 25.48731803894043, "learning_rate": 8.385910090443796e-07, "log_odds_chosen": 3.4835567474365234, "log_odds_ratio": -0.041546985507011414, "logits/chosen": 313.85137939453125, "logits/rejected": 290.18048095703125, "logps/chosen": -0.1454685628414154, "logps/rejected": -1.5348907709121704, "loss": 0.3626, "nll_loss": 0.3012126684188843, "rewards/accuracies": 1.0, "rewards/chosen": -0.007273429073393345, "rewards/margins": 0.06947110593318939, "rewards/rejected": -0.07674454152584076, "step": 3555 }, { "epoch": 2.8276409849086575, "grad_norm": 25.764835357666016, "learning_rate": 8.380019039424888e-07, "log_odds_chosen": 4.237963676452637, "log_odds_ratio": -0.032441817224025726, "logits/chosen": 271.6688232421875, "logits/rejected": 337.88592529296875, "logps/chosen": -0.08865581452846527, "logps/rejected": -1.7506749629974365, "loss": 0.4494, "nll_loss": 0.5733296275138855, "rewards/accuracies": 1.0, "rewards/chosen": -0.0044327909126877785, "rewards/margins": 0.08310095965862274, "rewards/rejected": -0.08753375709056854, "step": 3560 }, { "epoch": 2.831612390786338, "grad_norm": 22.246402740478516, "learning_rate": 8.374140386271069e-07, "log_odds_chosen": 3.5762290954589844, "log_odds_ratio": -0.06507720053195953, "logits/chosen": 221.15878295898438, "logits/rejected": 425.18212890625, "logps/chosen": -0.1669560670852661, "logps/rejected": -1.895721197128296, "loss": 0.2844, "nll_loss": 0.255990207195282, "rewards/accuracies": 1.0, "rewards/chosen": -0.008347803726792336, "rewards/margins": 0.08643826097249985, "rewards/rejected": -0.09478606283664703, "step": 3565 }, { "epoch": 2.835583796664019, "grad_norm": 19.88669776916504, "learning_rate": 8.368274087557231e-07, "log_odds_chosen": 3.337949275970459, "log_odds_ratio": -0.0535353422164917, "logits/chosen": 209.78427124023438, "logits/rejected": 359.92724609375, "logps/chosen": -0.26661959290504456, "logps/rejected": -2.1073858737945557, "loss": 0.3546, "nll_loss": 0.366042822599411, "rewards/accuracies": 1.0, "rewards/chosen": -0.013330979272723198, "rewards/margins": 0.09203831106424332, "rewards/rejected": -0.10536929219961166, "step": 3570 }, { "epoch": 2.8395552025416997, "grad_norm": 20.765398025512695, "learning_rate": 8.362420100070909e-07, "log_odds_chosen": 2.78408145904541, "log_odds_ratio": -0.11600840091705322, "logits/chosen": 237.7899932861328, "logits/rejected": 289.7607116699219, "logps/chosen": -0.27459466457366943, "logps/rejected": -1.748347520828247, "loss": 0.3883, "nll_loss": 0.4142521917819977, "rewards/accuracies": 1.0, "rewards/chosen": -0.013729733414947987, "rewards/margins": 0.07368763536214828, "rewards/rejected": -0.087417371571064, "step": 3575 }, { "epoch": 2.8435266084193804, "grad_norm": 28.12468910217285, "learning_rate": 8.356578380810946e-07, "log_odds_chosen": 3.1770873069763184, "log_odds_ratio": -0.051418136805295944, "logits/chosen": 261.5604553222656, "logits/rejected": 263.3280944824219, "logps/chosen": -0.1799846589565277, "logps/rejected": -1.5317844152450562, "loss": 0.4097, "nll_loss": 0.36274874210357666, "rewards/accuracies": 1.0, "rewards/chosen": -0.008999234065413475, "rewards/margins": 0.06758998334407806, "rewards/rejected": -0.07658922672271729, "step": 3580 }, { "epoch": 2.847498014297061, "grad_norm": 20.850906372070312, "learning_rate": 8.350748886986167e-07, "log_odds_chosen": 3.5928845405578613, "log_odds_ratio": -0.07899219542741776, "logits/chosen": 258.620849609375, "logits/rejected": 246.1554412841797, "logps/chosen": -0.28887057304382324, "logps/rejected": -2.014991283416748, "loss": 0.4146, "nll_loss": 0.5440294146537781, "rewards/accuracies": 1.0, "rewards/chosen": -0.014443526975810528, "rewards/margins": 0.08630602061748505, "rewards/rejected": -0.10074955224990845, "step": 3585 }, { "epoch": 2.851469420174742, "grad_norm": 32.508541107177734, "learning_rate": 8.344931576014064e-07, "log_odds_chosen": 4.088571548461914, "log_odds_ratio": -0.026189763098955154, "logits/chosen": 309.7861022949219, "logits/rejected": 281.23187255859375, "logps/chosen": -0.11156181246042252, "logps/rejected": -2.1266567707061768, "loss": 0.3974, "nll_loss": 0.3166486620903015, "rewards/accuracies": 1.0, "rewards/chosen": -0.005578090436756611, "rewards/margins": 0.1007547378540039, "rewards/rejected": -0.10633282363414764, "step": 3590 }, { "epoch": 2.8554408260524227, "grad_norm": 21.285322189331055, "learning_rate": 8.339126405519482e-07, "log_odds_chosen": 2.9751880168914795, "log_odds_ratio": -0.05453987047076225, "logits/chosen": 395.43243408203125, "logits/rejected": 205.71664428710938, "logps/chosen": -0.18474337458610535, "logps/rejected": -1.4767392873764038, "loss": 0.3599, "nll_loss": 0.25680750608444214, "rewards/accuracies": 1.0, "rewards/chosen": -0.009237168356776237, "rewards/margins": 0.06459980458021164, "rewards/rejected": -0.07383696734905243, "step": 3595 }, { "epoch": 2.8594122319301034, "grad_norm": 20.413772583007812, "learning_rate": 8.333333333333333e-07, "log_odds_chosen": 3.2024922370910645, "log_odds_ratio": -0.07393097877502441, "logits/chosen": 260.7085266113281, "logits/rejected": 306.81732177734375, "logps/chosen": -0.20199549198150635, "logps/rejected": -1.9069011211395264, "loss": 0.3761, "nll_loss": 0.39756855368614197, "rewards/accuracies": 1.0, "rewards/chosen": -0.010099775157868862, "rewards/margins": 0.085245281457901, "rewards/rejected": -0.09534506499767303, "step": 3600 }, { "epoch": 2.863383637807784, "grad_norm": 24.55630874633789, "learning_rate": 8.327552317491304e-07, "log_odds_chosen": 3.7288384437561035, "log_odds_ratio": -0.03377489000558853, "logits/chosen": 322.8309631347656, "logits/rejected": 250.8905792236328, "logps/chosen": -0.11226388067007065, "logps/rejected": -1.4427611827850342, "loss": 0.4202, "nll_loss": 0.43689388036727905, "rewards/accuracies": 1.0, "rewards/chosen": -0.0056131938472390175, "rewards/margins": 0.06652487069368362, "rewards/rejected": -0.07213805615901947, "step": 3605 }, { "epoch": 2.867355043685465, "grad_norm": 48.43893051147461, "learning_rate": 8.321783316232578e-07, "log_odds_chosen": 2.6593003273010254, "log_odds_ratio": -0.08480212092399597, "logits/chosen": 288.72784423828125, "logits/rejected": 249.3139190673828, "logps/chosen": -0.23106858134269714, "logps/rejected": -1.5768505334854126, "loss": 0.4155, "nll_loss": 0.35096269845962524, "rewards/accuracies": 1.0, "rewards/chosen": -0.011553429998457432, "rewards/margins": 0.06728909909725189, "rewards/rejected": -0.07884253561496735, "step": 3610 }, { "epoch": 2.871326449563145, "grad_norm": 15.93793773651123, "learning_rate": 8.31602628799857e-07, "log_odds_chosen": 4.548216819763184, "log_odds_ratio": -0.022985955700278282, "logits/chosen": 259.6219482421875, "logits/rejected": 291.5054931640625, "logps/chosen": -0.0862610787153244, "logps/rejected": -1.969438910484314, "loss": 0.3525, "nll_loss": 0.5677148103713989, "rewards/accuracies": 1.0, "rewards/chosen": -0.0043130540288984776, "rewards/margins": 0.09415888041257858, "rewards/rejected": -0.09847193956375122, "step": 3615 }, { "epoch": 2.875297855440826, "grad_norm": 21.008142471313477, "learning_rate": 8.310281191431671e-07, "log_odds_chosen": 3.653172254562378, "log_odds_ratio": -0.05353150516748428, "logits/chosen": 392.10675048828125, "logits/rejected": 335.0233459472656, "logps/chosen": -0.22985415160655975, "logps/rejected": -1.759158730506897, "loss": 0.3337, "nll_loss": 0.3584360182285309, "rewards/accuracies": 1.0, "rewards/chosen": -0.011492708697915077, "rewards/margins": 0.07646523416042328, "rewards/rejected": -0.08795793354511261, "step": 3620 }, { "epoch": 2.8792692613185067, "grad_norm": 22.55990219116211, "learning_rate": 8.304547985373998e-07, "log_odds_chosen": 3.8257369995117188, "log_odds_ratio": -0.03157994896173477, "logits/chosen": 312.32489013671875, "logits/rejected": 237.1319122314453, "logps/chosen": -0.1275133341550827, "logps/rejected": -1.4732251167297363, "loss": 0.37, "nll_loss": 0.28386086225509644, "rewards/accuracies": 1.0, "rewards/chosen": -0.00637566763907671, "rewards/margins": 0.06728558987379074, "rewards/rejected": -0.07366125285625458, "step": 3625 }, { "epoch": 2.8832406671961874, "grad_norm": 25.964088439941406, "learning_rate": 8.298826628866154e-07, "log_odds_chosen": 3.358013868331909, "log_odds_ratio": -0.06236337497830391, "logits/chosen": 359.78125, "logits/rejected": 213.65756225585938, "logps/chosen": -0.13523688912391663, "logps/rejected": -1.4496452808380127, "loss": 0.3508, "nll_loss": 0.3368256986141205, "rewards/accuracies": 1.0, "rewards/chosen": -0.006761844269931316, "rewards/margins": 0.06572041660547256, "rewards/rejected": -0.07248225808143616, "step": 3630 }, { "epoch": 2.887212073073868, "grad_norm": 21.916109085083008, "learning_rate": 8.293117081146003e-07, "log_odds_chosen": 2.733748435974121, "log_odds_ratio": -0.17232218384742737, "logits/chosen": 307.90667724609375, "logits/rejected": 298.56878662109375, "logps/chosen": -0.40919560194015503, "logps/rejected": -2.092665195465088, "loss": 0.3532, "nll_loss": 0.5489149689674377, "rewards/accuracies": 1.0, "rewards/chosen": -0.02045978233218193, "rewards/margins": 0.08417348563671112, "rewards/rejected": -0.10463327169418335, "step": 3635 }, { "epoch": 2.891183478951549, "grad_norm": 27.160499572753906, "learning_rate": 8.287419301647449e-07, "log_odds_chosen": 3.6234652996063232, "log_odds_ratio": -0.047272827476263046, "logits/chosen": 290.9279479980469, "logits/rejected": 207.3817901611328, "logps/chosen": -0.1130966767668724, "logps/rejected": -1.4548262357711792, "loss": 0.3461, "nll_loss": 0.24585804343223572, "rewards/accuracies": 1.0, "rewards/chosen": -0.005654833745211363, "rewards/margins": 0.06708647310733795, "rewards/rejected": -0.0727413147687912, "step": 3640 }, { "epoch": 2.8951548848292297, "grad_norm": 21.572660446166992, "learning_rate": 8.281733249999222e-07, "log_odds_chosen": 3.359938383102417, "log_odds_ratio": -0.0552956759929657, "logits/chosen": 273.9320068359375, "logits/rejected": 251.6232147216797, "logps/chosen": -0.14354383945465088, "logps/rejected": -1.6231215000152588, "loss": 0.3165, "nll_loss": 0.369306743144989, "rewards/accuracies": 1.0, "rewards/chosen": -0.007177191786468029, "rewards/margins": 0.07397888600826263, "rewards/rejected": -0.08115608245134354, "step": 3645 }, { "epoch": 2.89912629070691, "grad_norm": 23.444482803344727, "learning_rate": 8.27605888602368e-07, "log_odds_chosen": 3.4784095287323, "log_odds_ratio": -0.05267126113176346, "logits/chosen": 394.29229736328125, "logits/rejected": 270.6805725097656, "logps/chosen": -0.1418350636959076, "logps/rejected": -1.267978549003601, "loss": 0.3543, "nll_loss": 0.3297954201698303, "rewards/accuracies": 1.0, "rewards/chosen": -0.00709175318479538, "rewards/margins": 0.056307174265384674, "rewards/rejected": -0.06339892745018005, "step": 3650 }, { "epoch": 2.9030976965845907, "grad_norm": 25.005584716796875, "learning_rate": 8.270396169735619e-07, "log_odds_chosen": 2.632509708404541, "log_odds_ratio": -0.09709839522838593, "logits/chosen": 368.68621826171875, "logits/rejected": 293.9627380371094, "logps/chosen": -0.21916499733924866, "logps/rejected": -1.4162951707839966, "loss": 0.3263, "nll_loss": 0.35514330863952637, "rewards/accuracies": 1.0, "rewards/chosen": -0.010958249680697918, "rewards/margins": 0.059856511652469635, "rewards/rejected": -0.07081475853919983, "step": 3655 }, { "epoch": 2.9070691024622715, "grad_norm": 20.11078643798828, "learning_rate": 8.264745061341079e-07, "log_odds_chosen": 2.6697070598602295, "log_odds_ratio": -0.08199223130941391, "logits/chosen": 218.2410430908203, "logits/rejected": 342.093505859375, "logps/chosen": -0.21852803230285645, "logps/rejected": -1.4549704790115356, "loss": 0.3297, "nll_loss": 0.33630573749542236, "rewards/accuracies": 1.0, "rewards/chosen": -0.010926402173936367, "rewards/margins": 0.06182212382555008, "rewards/rejected": -0.07274852693080902, "step": 3660 }, { "epoch": 2.911040508339952, "grad_norm": 26.86299705505371, "learning_rate": 8.259105521236187e-07, "log_odds_chosen": 3.63004732131958, "log_odds_ratio": -0.05410841107368469, "logits/chosen": 304.13555908203125, "logits/rejected": 246.46408081054688, "logps/chosen": -0.12242048978805542, "logps/rejected": -1.770677924156189, "loss": 0.3446, "nll_loss": 0.3197631537914276, "rewards/accuracies": 1.0, "rewards/chosen": -0.006121024489402771, "rewards/margins": 0.08241288363933563, "rewards/rejected": -0.0885339081287384, "step": 3665 }, { "epoch": 2.915011914217633, "grad_norm": 26.29234504699707, "learning_rate": 8.253477510005973e-07, "log_odds_chosen": 3.3279595375061035, "log_odds_ratio": -0.08358468860387802, "logits/chosen": 281.2162170410156, "logits/rejected": 326.6824035644531, "logps/chosen": -0.15816627442836761, "logps/rejected": -1.7283798456192017, "loss": 0.4041, "nll_loss": 0.24479857087135315, "rewards/accuracies": 1.0, "rewards/chosen": -0.00790831446647644, "rewards/margins": 0.07851067185401917, "rewards/rejected": -0.0864189937710762, "step": 3670 }, { "epoch": 2.9189833200953137, "grad_norm": 25.961944580078125, "learning_rate": 8.247860988423226e-07, "log_odds_chosen": 3.988990068435669, "log_odds_ratio": -0.034325532615184784, "logits/chosen": 266.3609924316406, "logits/rejected": 294.2245788574219, "logps/chosen": -0.21574988961219788, "logps/rejected": -2.2639670372009277, "loss": 0.2977, "nll_loss": 0.30673372745513916, "rewards/accuracies": 1.0, "rewards/chosen": -0.010787495411932468, "rewards/margins": 0.10241083800792694, "rewards/rejected": -0.11319833993911743, "step": 3675 }, { "epoch": 2.9229547259729944, "grad_norm": 29.21520233154297, "learning_rate": 8.24225591744734e-07, "log_odds_chosen": 3.7201995849609375, "log_odds_ratio": -0.03867778182029724, "logits/chosen": 186.59046936035156, "logits/rejected": 431.41693115234375, "logps/chosen": -0.11616279184818268, "logps/rejected": -1.8416305780410767, "loss": 0.3822, "nll_loss": 0.302185982465744, "rewards/accuracies": 1.0, "rewards/chosen": -0.005808139685541391, "rewards/margins": 0.08627338707447052, "rewards/rejected": -0.09208153188228607, "step": 3680 }, { "epoch": 2.926926131850675, "grad_norm": 18.776615142822266, "learning_rate": 8.23666225822317e-07, "log_odds_chosen": 3.707352876663208, "log_odds_ratio": -0.0334990993142128, "logits/chosen": 292.5160827636719, "logits/rejected": 330.042236328125, "logps/chosen": -0.12229110300540924, "logps/rejected": -1.8790652751922607, "loss": 0.3169, "nll_loss": 0.3112315535545349, "rewards/accuracies": 1.0, "rewards/chosen": -0.006114555522799492, "rewards/margins": 0.08783871680498123, "rewards/rejected": -0.09395327419042587, "step": 3685 }, { "epoch": 2.930897537728356, "grad_norm": 46.925052642822266, "learning_rate": 8.231079972079914e-07, "log_odds_chosen": 3.986647367477417, "log_odds_ratio": -0.13695932924747467, "logits/chosen": 260.3636779785156, "logits/rejected": 258.30419921875, "logps/chosen": -0.30076563358306885, "logps/rejected": -1.8926414251327515, "loss": 0.432, "nll_loss": 0.4053588807582855, "rewards/accuracies": 1.0, "rewards/chosen": -0.015038281679153442, "rewards/margins": 0.07959380000829697, "rewards/rejected": -0.09463207423686981, "step": 3690 }, { "epoch": 2.9348689436060367, "grad_norm": 25.633981704711914, "learning_rate": 8.225509020529979e-07, "log_odds_chosen": 3.198657512664795, "log_odds_ratio": -0.06324279308319092, "logits/chosen": 225.9180908203125, "logits/rejected": 322.01019287109375, "logps/chosen": -0.17518872022628784, "logps/rejected": -1.7073516845703125, "loss": 0.3587, "nll_loss": 0.3306831121444702, "rewards/accuracies": 1.0, "rewards/chosen": -0.008759436197578907, "rewards/margins": 0.07660814374685287, "rewards/rejected": -0.0853675827383995, "step": 3695 }, { "epoch": 2.9388403494837174, "grad_norm": 26.146686553955078, "learning_rate": 8.219949365267865e-07, "log_odds_chosen": 2.8343605995178223, "log_odds_ratio": -0.09005337208509445, "logits/chosen": 366.45263671875, "logits/rejected": 246.6904754638672, "logps/chosen": -0.2314903736114502, "logps/rejected": -1.2461912631988525, "loss": 0.4265, "nll_loss": 0.3210602402687073, "rewards/accuracies": 1.0, "rewards/chosen": -0.011574518866837025, "rewards/margins": 0.05073503777384758, "rewards/rejected": -0.06230955570936203, "step": 3700 }, { "epoch": 2.942811755361398, "grad_norm": 24.32229232788086, "learning_rate": 8.214400968169071e-07, "log_odds_chosen": 4.051434516906738, "log_odds_ratio": -0.035749178379774094, "logits/chosen": 408.79180908203125, "logits/rejected": 208.0432586669922, "logps/chosen": -0.12245174497365952, "logps/rejected": -1.649357557296753, "loss": 0.3691, "nll_loss": 0.27775415778160095, "rewards/accuracies": 1.0, "rewards/chosen": -0.0061225867830216885, "rewards/margins": 0.076345294713974, "rewards/rejected": -0.08246787637472153, "step": 3705 }, { "epoch": 2.946783161239079, "grad_norm": 20.886632919311523, "learning_rate": 8.208863791288982e-07, "log_odds_chosen": 1.9202591180801392, "log_odds_ratio": -0.16510021686553955, "logits/chosen": 357.2604064941406, "logits/rejected": 294.9902038574219, "logps/chosen": -0.2927519381046295, "logps/rejected": -1.1146470308303833, "loss": 0.3704, "nll_loss": 0.3985922336578369, "rewards/accuracies": 1.0, "rewards/chosen": -0.014637596905231476, "rewards/margins": 0.04109475016593933, "rewards/rejected": -0.05573234707117081, "step": 3710 }, { "epoch": 2.950754567116759, "grad_norm": 23.93471336364746, "learning_rate": 8.203337796861792e-07, "log_odds_chosen": 3.167175769805908, "log_odds_ratio": -0.11499425023794174, "logits/chosen": 268.34649658203125, "logits/rejected": 203.5771484375, "logps/chosen": -0.3126305639743805, "logps/rejected": -1.8504225015640259, "loss": 0.3641, "nll_loss": 0.4621480405330658, "rewards/accuracies": 1.0, "rewards/chosen": -0.015631528571248055, "rewards/margins": 0.07688958942890167, "rewards/rejected": -0.09252111613750458, "step": 3715 }, { "epoch": 2.95472597299444, "grad_norm": 16.99188804626465, "learning_rate": 8.197822947299412e-07, "log_odds_chosen": 5.456472873687744, "log_odds_ratio": -0.005662807263433933, "logits/chosen": 311.07086181640625, "logits/rejected": 217.932373046875, "logps/chosen": -0.11500344425439835, "logps/rejected": -2.622190237045288, "loss": 0.3117, "nll_loss": 0.22724008560180664, "rewards/accuracies": 1.0, "rewards/chosen": -0.005750172771513462, "rewards/margins": 0.12535934150218964, "rewards/rejected": -0.13110950589179993, "step": 3720 }, { "epoch": 2.9586973788721207, "grad_norm": 28.691146850585938, "learning_rate": 8.192319205190406e-07, "log_odds_chosen": 4.087296485900879, "log_odds_ratio": -0.0780719742178917, "logits/chosen": 328.07684326171875, "logits/rejected": 221.5873565673828, "logps/chosen": -0.10672744363546371, "logps/rejected": -1.4921457767486572, "loss": 0.418, "nll_loss": 0.3577578663825989, "rewards/accuracies": 1.0, "rewards/chosen": -0.005336372647434473, "rewards/margins": 0.06927091628313065, "rewards/rejected": -0.07460729032754898, "step": 3725 }, { "epoch": 2.9626687847498014, "grad_norm": 16.78936004638672, "learning_rate": 8.186826533298912e-07, "log_odds_chosen": 3.149646759033203, "log_odds_ratio": -0.09916869550943375, "logits/chosen": 464.217041015625, "logits/rejected": 231.6147918701172, "logps/chosen": -0.2227720320224762, "logps/rejected": -1.3256566524505615, "loss": 0.3351, "nll_loss": 0.4213685989379883, "rewards/accuracies": 1.0, "rewards/chosen": -0.011138602159917355, "rewards/margins": 0.055144231766462326, "rewards/rejected": -0.06628282368183136, "step": 3730 }, { "epoch": 2.966640190627482, "grad_norm": 21.047441482543945, "learning_rate": 8.181344894563601e-07, "log_odds_chosen": 3.3799023628234863, "log_odds_ratio": -0.04747181385755539, "logits/chosen": 225.9113006591797, "logits/rejected": 278.0364990234375, "logps/chosen": -0.14476028084754944, "logps/rejected": -1.603838562965393, "loss": 0.3064, "nll_loss": 0.4089042544364929, "rewards/accuracies": 1.0, "rewards/chosen": -0.007238014135509729, "rewards/margins": 0.07295391708612442, "rewards/rejected": -0.08019192516803741, "step": 3735 }, { "epoch": 2.970611596505163, "grad_norm": 26.87488555908203, "learning_rate": 8.175874252096609e-07, "log_odds_chosen": 4.79461669921875, "log_odds_ratio": -0.02344256453216076, "logits/chosen": 224.568115234375, "logits/rejected": 237.93795776367188, "logps/chosen": -0.08755536377429962, "logps/rejected": -1.7086296081542969, "loss": 0.3787, "nll_loss": 0.4090026915073395, "rewards/accuracies": 1.0, "rewards/chosen": -0.004377768374979496, "rewards/margins": 0.0810537114739418, "rewards/rejected": -0.08543147891759872, "step": 3740 }, { "epoch": 2.9745830023828437, "grad_norm": 30.589311599731445, "learning_rate": 8.170414569182505e-07, "log_odds_chosen": 3.1199939250946045, "log_odds_ratio": -0.04739534854888916, "logits/chosen": 223.1153106689453, "logits/rejected": 348.6710205078125, "logps/chosen": -0.23181037604808807, "logps/rejected": -1.9143383502960205, "loss": 0.4121, "nll_loss": 0.3163473606109619, "rewards/accuracies": 1.0, "rewards/chosen": -0.011590519919991493, "rewards/margins": 0.08412639796733856, "rewards/rejected": -0.0957169160246849, "step": 3745 }, { "epoch": 2.978554408260524, "grad_norm": 29.43885612487793, "learning_rate": 8.164965809277262e-07, "log_odds_chosen": 2.644817352294922, "log_odds_ratio": -0.09716804325580597, "logits/chosen": 293.18695068359375, "logits/rejected": 317.6696472167969, "logps/chosen": -0.25146573781967163, "logps/rejected": -1.4811335802078247, "loss": 0.3221, "nll_loss": 0.3455609679222107, "rewards/accuracies": 1.0, "rewards/chosen": -0.012573286890983582, "rewards/margins": 0.06148339435458183, "rewards/rejected": -0.07405668497085571, "step": 3750 }, { "epoch": 2.9825258141382047, "grad_norm": 16.82297706604004, "learning_rate": 8.159527936007208e-07, "log_odds_chosen": 3.3892300128936768, "log_odds_ratio": -0.04550132527947426, "logits/chosen": 257.3266296386719, "logits/rejected": 390.421875, "logps/chosen": -0.21004052460193634, "logps/rejected": -1.9455446004867554, "loss": 0.4137, "nll_loss": 0.3465914726257324, "rewards/accuracies": 1.0, "rewards/chosen": -0.010502026416361332, "rewards/margins": 0.08677520602941513, "rewards/rejected": -0.09727723896503448, "step": 3755 }, { "epoch": 2.9864972200158855, "grad_norm": 22.206357955932617, "learning_rate": 8.154100913168028e-07, "log_odds_chosen": 3.4398300647735596, "log_odds_ratio": -0.03233512490987778, "logits/chosen": 242.57949829101562, "logits/rejected": 351.6434020996094, "logps/chosen": -0.1868184357881546, "logps/rejected": -1.9083659648895264, "loss": 0.4083, "nll_loss": 0.27293431758880615, "rewards/accuracies": 1.0, "rewards/chosen": -0.0093409214168787, "rewards/margins": 0.08607737720012665, "rewards/rejected": -0.0954183042049408, "step": 3760 }, { "epoch": 2.990468625893566, "grad_norm": 30.97701644897461, "learning_rate": 8.148684704723743e-07, "log_odds_chosen": 2.4790635108947754, "log_odds_ratio": -0.24006590247154236, "logits/chosen": 268.708740234375, "logits/rejected": 207.7413330078125, "logps/chosen": -0.33791905641555786, "logps/rejected": -1.7299648523330688, "loss": 0.4932, "nll_loss": 0.571212887763977, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.016895953565835953, "rewards/margins": 0.06960227340459824, "rewards/rejected": -0.08649824559688568, "step": 3765 }, { "epoch": 2.994440031771247, "grad_norm": 18.902019500732422, "learning_rate": 8.143279274805705e-07, "log_odds_chosen": 4.727214813232422, "log_odds_ratio": -0.02812029980123043, "logits/chosen": 257.0202941894531, "logits/rejected": 227.53219604492188, "logps/chosen": -0.1045892983675003, "logps/rejected": -1.8573554754257202, "loss": 0.3164, "nll_loss": 0.2800999581813812, "rewards/accuracies": 1.0, "rewards/chosen": -0.0052294647321105, "rewards/margins": 0.08763831108808517, "rewards/rejected": -0.09286777675151825, "step": 3770 }, { "epoch": 2.9984114376489277, "grad_norm": 20.72915267944336, "learning_rate": 8.137884587711594e-07, "log_odds_chosen": 3.518254041671753, "log_odds_ratio": -0.03578583896160126, "logits/chosen": 275.1228942871094, "logits/rejected": 329.595458984375, "logps/chosen": -0.20344781875610352, "logps/rejected": -2.006162166595459, "loss": 0.348, "nll_loss": 0.2927265763282776, "rewards/accuracies": 1.0, "rewards/chosen": -0.010172391310334206, "rewards/margins": 0.09013573080301285, "rewards/rejected": -0.1003081351518631, "step": 3775 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.3720725178718567, "eval_log_odds_ratio": -0.699243426322937, "eval_logits/chosen": 295.62017822265625, "eval_logits/rejected": 266.0260314941406, "eval_logps/chosen": -1.3004335165023804, "eval_logps/rejected": -1.5281699895858765, "eval_loss": 1.7558966875076294, "eval_nll_loss": 1.6941194534301758, "eval_rewards/accuracies": 0.597122311592102, "eval_rewards/chosen": -0.06502167135477066, "eval_rewards/margins": 0.011386833153665066, "eval_rewards/rejected": -0.0764085054397583, "eval_runtime": 91.0513, "eval_samples_per_second": 6.073, "eval_steps_per_second": 1.527, "step": 3777 }, { "epoch": 3.0, "step": 3777, "total_flos": 0.0, "train_loss": 0.968865410152301, "train_runtime": 16784.8411, "train_samples_per_second": 0.9, "train_steps_per_second": 0.225 } ], "logging_steps": 5, "max_steps": 3777, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }