{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 504, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02976190476190476, "grad_norm": 1776.2471923828125, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.3503778874874115, "log_odds_ratio": -1.0639952421188354, "logits/chosen": 125.49534606933594, "logits/rejected": 180.3563232421875, "logps/chosen": -15.494331359863281, "logps/rejected": -15.143954277038574, "loss": 14.9629, "nll_loss": 15.042287826538086, "rewards/accuracies": 0.25, "rewards/chosen": -0.7747165560722351, "rewards/margins": -0.01751876249909401, "rewards/rejected": -0.7571978569030762, "step": 5 }, { "epoch": 0.05952380952380952, "grad_norm": 1193.9168701171875, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.06253216415643692, "log_odds_ratio": -0.8492839932441711, "logits/chosen": 210.31631469726562, "logits/rejected": 245.1850128173828, "logps/chosen": -12.538459777832031, "logps/rejected": -12.600992202758789, "loss": 12.6111, "nll_loss": 12.388693809509277, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.6269229650497437, "rewards/margins": 0.0031266347505152225, "rewards/rejected": -0.6300495862960815, "step": 10 }, { "epoch": 0.08928571428571429, "grad_norm": 723.246826171875, "learning_rate": 7.5e-07, "log_odds_chosen": -0.26854413747787476, "log_odds_ratio": -0.9210551977157593, "logits/chosen": 300.58953857421875, "logits/rejected": 289.635009765625, "logps/chosen": -8.324029922485352, "logps/rejected": -8.055707931518555, "loss": 8.2823, "nll_loss": 8.378087997436523, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.41620150208473206, "rewards/margins": -0.013416108675301075, "rewards/rejected": -0.40278539061546326, "step": 15 }, { "epoch": 0.11904761904761904, "grad_norm": 216.30667114257812, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": -0.09513016790151596, "log_odds_ratio": -0.9796999096870422, "logits/chosen": 255.70156860351562, "logits/rejected": 235.23721313476562, "logps/chosen": -5.404101848602295, "logps/rejected": -5.310047149658203, "loss": 5.4458, "nll_loss": 5.430812835693359, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.2702050805091858, "rewards/margins": -0.004702714271843433, "rewards/rejected": -0.265502393245697, "step": 20 }, { "epoch": 0.1488095238095238, "grad_norm": 164.33297729492188, "learning_rate": 1.25e-06, "log_odds_chosen": 0.25551921129226685, "log_odds_ratio": -0.6588834524154663, "logits/chosen": 274.9960632324219, "logits/rejected": 302.8502502441406, "logps/chosen": -3.1612606048583984, "logps/rejected": -3.390745162963867, "loss": 3.5011, "nll_loss": 3.502755641937256, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.15806300938129425, "rewards/margins": 0.01147423591464758, "rewards/rejected": -0.1695372611284256, "step": 25 }, { "epoch": 0.17857142857142858, "grad_norm": 78.93386840820312, "learning_rate": 1.5e-06, "log_odds_chosen": 0.24210360646247864, "log_odds_ratio": -0.6352943181991577, "logits/chosen": 290.36334228515625, "logits/rejected": 385.3616638183594, "logps/chosen": -2.418250799179077, "logps/rejected": -2.630479097366333, "loss": 2.5553, "nll_loss": 2.509606122970581, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12091253697872162, "rewards/margins": 0.01061142049729824, "rewards/rejected": -0.1315239518880844, "step": 30 }, { "epoch": 0.20833333333333334, "grad_norm": 42.19651412963867, "learning_rate": 1.75e-06, "log_odds_chosen": 0.4038185477256775, "log_odds_ratio": -0.5650765299797058, "logits/chosen": 365.90753173828125, "logits/rejected": 389.90338134765625, "logps/chosen": -1.5057640075683594, "logps/rejected": -1.8460853099822998, "loss": 2.129, "nll_loss": 1.8491789102554321, "rewards/accuracies": 0.75, "rewards/chosen": -0.07528821378946304, "rewards/margins": 0.01701606810092926, "rewards/rejected": -0.0923042744398117, "step": 35 }, { "epoch": 0.23809523809523808, "grad_norm": 35.26750183105469, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.15938052535057068, "log_odds_ratio": -0.6910892724990845, "logits/chosen": 331.43438720703125, "logits/rejected": 372.8791198730469, "logps/chosen": -1.6345170736312866, "logps/rejected": -1.7539236545562744, "loss": 1.935, "nll_loss": 1.968824028968811, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08172585070133209, "rewards/margins": 0.005970318801701069, "rewards/rejected": -0.08769617974758148, "step": 40 }, { "epoch": 0.26785714285714285, "grad_norm": 87.45353698730469, "learning_rate": 2.25e-06, "log_odds_chosen": -0.15498922765254974, "log_odds_ratio": -0.8517929911613464, "logits/chosen": 386.3527526855469, "logits/rejected": 371.41851806640625, "logps/chosen": -1.4574557542800903, "logps/rejected": -1.3248956203460693, "loss": 1.8765, "nll_loss": 1.7655121088027954, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.07287278026342392, "rewards/margins": -0.006628001574426889, "rewards/rejected": -0.06624479591846466, "step": 45 }, { "epoch": 0.2976190476190476, "grad_norm": 41.48765563964844, "learning_rate": 2.5e-06, "log_odds_chosen": 0.043729472905397415, "log_odds_ratio": -0.7361106872558594, "logits/chosen": 437.00823974609375, "logits/rejected": 443.38232421875, "logps/chosen": -1.658603310585022, "logps/rejected": -1.6873445510864258, "loss": 1.8933, "nll_loss": 2.108198642730713, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08293016999959946, "rewards/margins": 0.0014370663557201624, "rewards/rejected": -0.08436723798513412, "step": 50 }, { "epoch": 0.3273809523809524, "grad_norm": 51.39496994018555, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.10381323099136353, "log_odds_ratio": -0.7126073241233826, "logits/chosen": 365.14544677734375, "logits/rejected": 364.1676330566406, "logps/chosen": -1.4783101081848145, "logps/rejected": -1.550569772720337, "loss": 1.8734, "nll_loss": 1.8976672887802124, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.073915496468544, "rewards/margins": 0.003612985834479332, "rewards/rejected": -0.07752849161624908, "step": 55 }, { "epoch": 0.35714285714285715, "grad_norm": 47.07830810546875, "learning_rate": 3e-06, "log_odds_chosen": 0.20662184059619904, "log_odds_ratio": -0.6513880491256714, "logits/chosen": 386.7112731933594, "logits/rejected": 356.2147216796875, "logps/chosen": -1.1965901851654053, "logps/rejected": -1.3431804180145264, "loss": 1.6989, "nll_loss": 1.9430646896362305, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05982951074838638, "rewards/margins": 0.007329514715820551, "rewards/rejected": -0.0671590268611908, "step": 60 }, { "epoch": 0.3869047619047619, "grad_norm": 116.96641540527344, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.029183167964220047, "log_odds_ratio": -0.7434911727905273, "logits/chosen": 385.2690734863281, "logits/rejected": 389.1334228515625, "logps/chosen": -1.3286149501800537, "logps/rejected": -1.3333098888397217, "loss": 1.636, "nll_loss": 1.6027374267578125, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06643076241016388, "rewards/margins": 0.00023474842600990087, "rewards/rejected": -0.06666550040245056, "step": 65 }, { "epoch": 0.4166666666666667, "grad_norm": 40.99021530151367, "learning_rate": 3.5e-06, "log_odds_chosen": 0.045442335307598114, "log_odds_ratio": -0.7358086705207825, "logits/chosen": 395.22906494140625, "logits/rejected": 390.57305908203125, "logps/chosen": -1.7554155588150024, "logps/rejected": -1.765885591506958, "loss": 1.6688, "nll_loss": 1.9699609279632568, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.08777078241109848, "rewards/margins": 0.0005234999698586762, "rewards/rejected": -0.08829428255558014, "step": 70 }, { "epoch": 0.44642857142857145, "grad_norm": 67.57577514648438, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.24833233654499054, "log_odds_ratio": -0.6203548908233643, "logits/chosen": 386.412109375, "logits/rejected": 376.1787414550781, "logps/chosen": -1.1873780488967896, "logps/rejected": -1.3402230739593506, "loss": 1.5454, "nll_loss": 1.4503333568572998, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.059368908405303955, "rewards/margins": 0.0076422421261668205, "rewards/rejected": -0.06701115518808365, "step": 75 }, { "epoch": 0.47619047619047616, "grad_norm": 29.741676330566406, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.29179519414901733, "log_odds_ratio": -0.6332293748855591, "logits/chosen": 389.09295654296875, "logits/rejected": 405.5395202636719, "logps/chosen": -1.2367178201675415, "logps/rejected": -1.3924906253814697, "loss": 1.5966, "nll_loss": 1.5792012214660645, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.061835892498493195, "rewards/margins": 0.007788646034896374, "rewards/rejected": -0.06962453573942184, "step": 80 }, { "epoch": 0.5059523809523809, "grad_norm": 53.26546096801758, "learning_rate": 4.25e-06, "log_odds_chosen": -0.2370494306087494, "log_odds_ratio": -0.9606464505195618, "logits/chosen": 405.4431457519531, "logits/rejected": 386.801025390625, "logps/chosen": -1.5545951128005981, "logps/rejected": -1.2866960763931274, "loss": 1.6757, "nll_loss": 1.9027442932128906, "rewards/accuracies": 0.5, "rewards/chosen": -0.07772975414991379, "rewards/margins": -0.013394953683018684, "rewards/rejected": -0.06433480232954025, "step": 85 }, { "epoch": 0.5357142857142857, "grad_norm": 28.4445743560791, "learning_rate": 4.5e-06, "log_odds_chosen": 0.7336848378181458, "log_odds_ratio": -0.5354053378105164, "logits/chosen": 416.51580810546875, "logits/rejected": 425.767578125, "logps/chosen": -1.2461140155792236, "logps/rejected": -1.842694878578186, "loss": 1.4576, "nll_loss": 1.5203516483306885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06230570003390312, "rewards/margins": 0.02982904389500618, "rewards/rejected": -0.0921347513794899, "step": 90 }, { "epoch": 0.5654761904761905, "grad_norm": 46.642581939697266, "learning_rate": 4.75e-06, "log_odds_chosen": 0.25576168298721313, "log_odds_ratio": -0.6427351832389832, "logits/chosen": 347.84832763671875, "logits/rejected": 380.0849609375, "logps/chosen": -1.119447946548462, "logps/rejected": -1.234407663345337, "loss": 1.523, "nll_loss": 1.489450454711914, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05597240477800369, "rewards/margins": 0.005747987423092127, "rewards/rejected": -0.06172039359807968, "step": 95 }, { "epoch": 0.5952380952380952, "grad_norm": 43.77986526489258, "learning_rate": 5e-06, "log_odds_chosen": 0.12196314334869385, "log_odds_ratio": -0.7031902074813843, "logits/chosen": 417.86639404296875, "logits/rejected": 436.10858154296875, "logps/chosen": -1.2873280048370361, "logps/rejected": -1.3734104633331299, "loss": 1.5418, "nll_loss": 1.5198986530303955, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0643664002418518, "rewards/margins": 0.004304117523133755, "rewards/rejected": -0.06867052614688873, "step": 100 }, { "epoch": 0.625, "grad_norm": 32.244384765625, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.27784407138824463, "log_odds_ratio": -0.6329095959663391, "logits/chosen": 412.528564453125, "logits/rejected": 394.52130126953125, "logps/chosen": -1.0501761436462402, "logps/rejected": -1.182625412940979, "loss": 1.5085, "nll_loss": 1.6539300680160522, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05250881239771843, "rewards/margins": 0.006622466258704662, "rewards/rejected": -0.059131283313035965, "step": 105 }, { "epoch": 0.6547619047619048, "grad_norm": 69.666259765625, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.30888259410858154, "log_odds_ratio": -0.5859929919242859, "logits/chosen": 375.71612548828125, "logits/rejected": 356.3163146972656, "logps/chosen": -1.0360513925552368, "logps/rejected": -1.2427626848220825, "loss": 1.5381, "nll_loss": 1.5933644771575928, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05180257558822632, "rewards/margins": 0.01033556554466486, "rewards/rejected": -0.06213812902569771, "step": 110 }, { "epoch": 0.6845238095238095, "grad_norm": 34.408199310302734, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.2710966467857361, "log_odds_ratio": -0.6335155367851257, "logits/chosen": 389.82244873046875, "logits/rejected": 437.4444274902344, "logps/chosen": -0.9292726516723633, "logps/rejected": -1.0246301889419556, "loss": 1.4878, "nll_loss": 1.3399275541305542, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04646363481879234, "rewards/margins": 0.0047678761184215546, "rewards/rejected": -0.0512315109372139, "step": 115 }, { "epoch": 0.7142857142857143, "grad_norm": 41.90970230102539, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.2518194615840912, "log_odds_ratio": -0.6617739796638489, "logits/chosen": 393.5774841308594, "logits/rejected": 395.57147216796875, "logps/chosen": -1.0596468448638916, "logps/rejected": -1.1526801586151123, "loss": 1.5378, "nll_loss": 1.5159982442855835, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05298234894871712, "rewards/margins": 0.004651663359254599, "rewards/rejected": -0.05763401836156845, "step": 120 }, { "epoch": 0.7440476190476191, "grad_norm": 30.15009880065918, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": -0.14496295154094696, "log_odds_ratio": -0.9012987017631531, "logits/chosen": 378.92242431640625, "logits/rejected": 388.03662109375, "logps/chosen": -1.3249809741973877, "logps/rejected": -1.1592341661453247, "loss": 1.4803, "nll_loss": 1.3684661388397217, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0662490501999855, "rewards/margins": -0.00828734040260315, "rewards/rejected": -0.05796170234680176, "step": 125 }, { "epoch": 0.7738095238095238, "grad_norm": 24.6285457611084, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.3137187361717224, "log_odds_ratio": -0.6695318818092346, "logits/chosen": 401.2060546875, "logits/rejected": 372.0254821777344, "logps/chosen": -1.065645456314087, "logps/rejected": -1.1312780380249023, "loss": 1.5122, "nll_loss": 1.4193568229675293, "rewards/accuracies": 0.5, "rewards/chosen": -0.053282272070646286, "rewards/margins": 0.003281626384705305, "rewards/rejected": -0.05656389519572258, "step": 130 }, { "epoch": 0.8035714285714286, "grad_norm": 22.474136352539062, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 0.08262600004673004, "log_odds_ratio": -0.7314961552619934, "logits/chosen": 373.8929138183594, "logits/rejected": 405.0035705566406, "logps/chosen": -1.1277202367782593, "logps/rejected": -1.2345101833343506, "loss": 1.5189, "nll_loss": 1.4631812572479248, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.056386012583971024, "rewards/margins": 0.005339500494301319, "rewards/rejected": -0.06172550842165947, "step": 135 }, { "epoch": 0.8333333333333334, "grad_norm": 23.9536190032959, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": -0.12259285151958466, "log_odds_ratio": -0.7952130436897278, "logits/chosen": 394.89117431640625, "logits/rejected": 361.0680847167969, "logps/chosen": -1.0518170595169067, "logps/rejected": -0.986822247505188, "loss": 1.5033, "nll_loss": 1.4649903774261475, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.052590854465961456, "rewards/margins": -0.003249742556363344, "rewards/rejected": -0.0493411123752594, "step": 140 }, { "epoch": 0.8630952380952381, "grad_norm": 22.85877227783203, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": -0.07754195481538773, "log_odds_ratio": -0.7574380040168762, "logits/chosen": 365.57147216796875, "logits/rejected": 380.41046142578125, "logps/chosen": -1.0989421606063843, "logps/rejected": -1.0417240858078003, "loss": 1.4892, "nll_loss": 1.3651247024536133, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05494711548089981, "rewards/margins": -0.0028609037399291992, "rewards/rejected": -0.052086204290390015, "step": 145 }, { "epoch": 0.8928571428571429, "grad_norm": 28.741422653198242, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.4838029742240906, "log_odds_ratio": -0.5155819654464722, "logits/chosen": 390.33367919921875, "logits/rejected": 395.64117431640625, "logps/chosen": -1.0308912992477417, "logps/rejected": -1.3601925373077393, "loss": 1.4181, "nll_loss": 1.4082247018814087, "rewards/accuracies": 0.75, "rewards/chosen": -0.051544565707445145, "rewards/margins": 0.016465062275528908, "rewards/rejected": -0.0680096298456192, "step": 150 }, { "epoch": 0.9226190476190477, "grad_norm": 29.028520584106445, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.07582991570234299, "log_odds_ratio": -0.7352056503295898, "logits/chosen": 384.6861572265625, "logits/rejected": 382.9585876464844, "logps/chosen": -1.1874030828475952, "logps/rejected": -1.2361079454421997, "loss": 1.4165, "nll_loss": 1.365952730178833, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05937015265226364, "rewards/margins": 0.0024352427572011948, "rewards/rejected": -0.06180540472269058, "step": 155 }, { "epoch": 0.9523809523809523, "grad_norm": 47.11198043823242, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.17520497739315033, "log_odds_ratio": -0.691135048866272, "logits/chosen": 389.0434265136719, "logits/rejected": 431.94989013671875, "logps/chosen": -0.9387935400009155, "logps/rejected": -1.0681307315826416, "loss": 1.3893, "nll_loss": 1.2998160123825073, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.046939678490161896, "rewards/margins": 0.006466855760663748, "rewards/rejected": -0.05340652912855148, "step": 160 }, { "epoch": 0.9821428571428571, "grad_norm": 16.736385345458984, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": -0.01084871869534254, "log_odds_ratio": -0.759717583656311, "logits/chosen": 427.4103088378906, "logits/rejected": 431.9371032714844, "logps/chosen": -1.1341661214828491, "logps/rejected": -1.1380488872528076, "loss": 1.4218, "nll_loss": 1.4594279527664185, "rewards/accuracies": 0.5, "rewards/chosen": -0.05670831352472305, "rewards/margins": 0.00019413381232880056, "rewards/rejected": -0.0569024458527565, "step": 165 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.23235350847244263, "eval_log_odds_ratio": -0.6711738109588623, "eval_logits/chosen": 326.8028564453125, "eval_logits/rejected": 273.7525634765625, "eval_logps/chosen": -1.007071614265442, "eval_logps/rejected": -1.159067988395691, "eval_loss": 1.4487849473953247, "eval_nll_loss": 1.455283284187317, "eval_rewards/accuracies": 0.5571428537368774, "eval_rewards/chosen": -0.050353582948446274, "eval_rewards/margins": 0.007599818520247936, "eval_rewards/rejected": -0.057953398674726486, "eval_runtime": 201.1698, "eval_samples_per_second": 2.749, "eval_steps_per_second": 0.348, "step": 168 }, { "epoch": 1.0119047619047619, "grad_norm": 19.479097366333008, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.4537879526615143, "log_odds_ratio": -0.5628765225410461, "logits/chosen": 367.5221252441406, "logits/rejected": 402.8450622558594, "logps/chosen": -0.841064453125, "logps/rejected": -1.091392159461975, "loss": 1.2782, "nll_loss": 1.1047321557998657, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0420532263815403, "rewards/margins": 0.012516376562416553, "rewards/rejected": -0.054569609463214874, "step": 170 }, { "epoch": 1.0416666666666667, "grad_norm": 18.193614959716797, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 0.6258941292762756, "log_odds_ratio": -0.5433114171028137, "logits/chosen": 404.79443359375, "logits/rejected": 432.61151123046875, "logps/chosen": -0.7016412615776062, "logps/rejected": -1.0624569654464722, "loss": 1.05, "nll_loss": 1.2699940204620361, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03508206456899643, "rewards/margins": 0.018040789291262627, "rewards/rejected": -0.053122855722904205, "step": 175 }, { "epoch": 1.0714285714285714, "grad_norm": 15.39194393157959, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.7378727197647095, "log_odds_ratio": -0.47085660696029663, "logits/chosen": 343.8995056152344, "logits/rejected": 338.2045593261719, "logps/chosen": -0.8047459721565247, "logps/rejected": -1.2078239917755127, "loss": 1.1048, "nll_loss": 1.2225919961929321, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04023730009794235, "rewards/margins": 0.020153898745775223, "rewards/rejected": -0.060391198843717575, "step": 180 }, { "epoch": 1.1011904761904763, "grad_norm": 20.72310447692871, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": 1.557734727859497, "log_odds_ratio": -0.26492685079574585, "logits/chosen": 435.18865966796875, "logits/rejected": 362.96868896484375, "logps/chosen": -0.6113921403884888, "logps/rejected": -1.4927384853363037, "loss": 0.9894, "nll_loss": 1.086829423904419, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03056960739195347, "rewards/margins": 0.044067323207855225, "rewards/rejected": -0.07463693618774414, "step": 185 }, { "epoch": 1.130952380952381, "grad_norm": 16.229610443115234, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.5972079038619995, "log_odds_ratio": -0.5253661870956421, "logits/chosen": 386.9327087402344, "logits/rejected": 405.801513671875, "logps/chosen": -0.7007042169570923, "logps/rejected": -1.028989553451538, "loss": 1.1004, "nll_loss": 1.0136523246765137, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03503521531820297, "rewards/margins": 0.01641426980495453, "rewards/rejected": -0.0514494851231575, "step": 190 }, { "epoch": 1.1607142857142858, "grad_norm": 21.483884811401367, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": 0.9149399995803833, "log_odds_ratio": -0.4483817219734192, "logits/chosen": 371.6880187988281, "logits/rejected": 403.46356201171875, "logps/chosen": -0.7223442196846008, "logps/rejected": -1.1754904985427856, "loss": 1.0787, "nll_loss": 1.0912402868270874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03611721470952034, "rewards/margins": 0.022657308727502823, "rewards/rejected": -0.05877452343702316, "step": 195 }, { "epoch": 1.1904761904761905, "grad_norm": 19.23330307006836, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 1.305513620376587, "log_odds_ratio": -0.32568031549453735, "logits/chosen": 431.67156982421875, "logits/rejected": 391.12017822265625, "logps/chosen": -0.4990530014038086, "logps/rejected": -1.1961959600448608, "loss": 1.0519, "nll_loss": 0.9312452077865601, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02495265007019043, "rewards/margins": 0.03485715016722679, "rewards/rejected": -0.05980980396270752, "step": 200 }, { "epoch": 1.2202380952380953, "grad_norm": 18.404104232788086, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 1.2872587442398071, "log_odds_ratio": -0.3010835349559784, "logits/chosen": 335.2537841796875, "logits/rejected": 387.68621826171875, "logps/chosen": -0.6490974426269531, "logps/rejected": -1.4184123277664185, "loss": 1.0519, "nll_loss": 1.0444796085357666, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.032454878091812134, "rewards/margins": 0.03846573829650879, "rewards/rejected": -0.07092060893774033, "step": 205 }, { "epoch": 1.25, "grad_norm": 13.957087516784668, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 1.1957916021347046, "log_odds_ratio": -0.36826610565185547, "logits/chosen": 365.1176452636719, "logits/rejected": 363.3384094238281, "logps/chosen": -0.6256347298622131, "logps/rejected": -1.242398738861084, "loss": 0.9696, "nll_loss": 1.0164039134979248, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.031281743198633194, "rewards/margins": 0.03083820268511772, "rewards/rejected": -0.06211994215846062, "step": 210 }, { "epoch": 1.2797619047619047, "grad_norm": 25.830501556396484, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 1.1722948551177979, "log_odds_ratio": -0.3577140271663666, "logits/chosen": 410.3714904785156, "logits/rejected": 416.93621826171875, "logps/chosen": -0.7465067505836487, "logps/rejected": -1.4228246212005615, "loss": 1.0589, "nll_loss": 1.059061050415039, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.037325333803892136, "rewards/margins": 0.0338158942759037, "rewards/rejected": -0.07114122807979584, "step": 215 }, { "epoch": 1.3095238095238095, "grad_norm": 15.7648344039917, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.9515246152877808, "log_odds_ratio": -0.4343441128730774, "logits/chosen": 352.2547607421875, "logits/rejected": 370.08245849609375, "logps/chosen": -0.6594604253768921, "logps/rejected": -1.1094262599945068, "loss": 1.0138, "nll_loss": 0.9946004748344421, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.032973017543554306, "rewards/margins": 0.022498302161693573, "rewards/rejected": -0.05547132343053818, "step": 220 }, { "epoch": 1.3392857142857144, "grad_norm": 21.596298217773438, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 0.4564495086669922, "log_odds_ratio": -0.5201828479766846, "logits/chosen": 387.99542236328125, "logits/rejected": 365.8666076660156, "logps/chosen": -1.059605360031128, "logps/rejected": -1.3452465534210205, "loss": 1.0353, "nll_loss": 1.2666301727294922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.05298026651144028, "rewards/margins": 0.014282059855759144, "rewards/rejected": -0.06726232916116714, "step": 225 }, { "epoch": 1.369047619047619, "grad_norm": 16.963186264038086, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 1.108178973197937, "log_odds_ratio": -0.3512019217014313, "logits/chosen": 355.7499084472656, "logits/rejected": 366.8659362792969, "logps/chosen": -0.6812053322792053, "logps/rejected": -1.3409600257873535, "loss": 1.0135, "nll_loss": 0.873543381690979, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.034060269594192505, "rewards/margins": 0.03298773616552353, "rewards/rejected": -0.06704800575971603, "step": 230 }, { "epoch": 1.3988095238095237, "grad_norm": 22.207305908203125, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": 1.4102222919464111, "log_odds_ratio": -0.3304731547832489, "logits/chosen": 382.0793151855469, "logits/rejected": 389.06683349609375, "logps/chosen": -0.5401080846786499, "logps/rejected": -1.397993803024292, "loss": 1.0737, "nll_loss": 0.9223471879959106, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027005404233932495, "rewards/margins": 0.04289429262280464, "rewards/rejected": -0.06989969313144684, "step": 235 }, { "epoch": 1.4285714285714286, "grad_norm": 16.992250442504883, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.807520866394043, "log_odds_ratio": -0.4530153274536133, "logits/chosen": 414.99261474609375, "logits/rejected": 423.21392822265625, "logps/chosen": -0.7907418012619019, "logps/rejected": -1.2757242918014526, "loss": 1.0285, "nll_loss": 1.0256245136260986, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03953709080815315, "rewards/margins": 0.0242491252720356, "rewards/rejected": -0.06378621608018875, "step": 240 }, { "epoch": 1.4583333333333333, "grad_norm": 17.553632736206055, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 0.7604994177818298, "log_odds_ratio": -0.42883071303367615, "logits/chosen": 386.42657470703125, "logits/rejected": 363.54931640625, "logps/chosen": -0.6057090163230896, "logps/rejected": -1.0264109373092651, "loss": 1.0502, "nll_loss": 1.1268291473388672, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03028545156121254, "rewards/margins": 0.021035097539424896, "rewards/rejected": -0.05132054537534714, "step": 245 }, { "epoch": 1.4880952380952381, "grad_norm": 22.123960494995117, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 1.0444424152374268, "log_odds_ratio": -0.38615161180496216, "logits/chosen": 369.87274169921875, "logits/rejected": 389.29583740234375, "logps/chosen": -0.7160728573799133, "logps/rejected": -1.2788223028182983, "loss": 0.9816, "nll_loss": 0.8135589361190796, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.035803645849227905, "rewards/margins": 0.02813747525215149, "rewards/rejected": -0.0639411136507988, "step": 250 }, { "epoch": 1.5178571428571428, "grad_norm": 26.341325759887695, "learning_rate": 3.131121455425748e-06, "log_odds_chosen": 1.0527693033218384, "log_odds_ratio": -0.35840553045272827, "logits/chosen": 404.608154296875, "logits/rejected": 410.81854248046875, "logps/chosen": -0.6503961682319641, "logps/rejected": -1.250218152999878, "loss": 1.0367, "nll_loss": 0.9888055920600891, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.032519809901714325, "rewards/margins": 0.029991086572408676, "rewards/rejected": -0.0625109076499939, "step": 255 }, { "epoch": 1.5476190476190477, "grad_norm": 25.059885025024414, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 1.0121057033538818, "log_odds_ratio": -0.4098740220069885, "logits/chosen": 375.2867126464844, "logits/rejected": 426.01507568359375, "logps/chosen": -0.8014397621154785, "logps/rejected": -1.4673702716827393, "loss": 1.0023, "nll_loss": 1.092798113822937, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0400719977915287, "rewards/margins": 0.03329651802778244, "rewards/rejected": -0.07336851954460144, "step": 260 }, { "epoch": 1.5773809523809523, "grad_norm": 17.973594665527344, "learning_rate": 3.0714755841697565e-06, "log_odds_chosen": 0.830208957195282, "log_odds_ratio": -0.5377876162528992, "logits/chosen": 421.629150390625, "logits/rejected": 414.95703125, "logps/chosen": -0.8159686923027039, "logps/rejected": -1.314146876335144, "loss": 1.0967, "nll_loss": 1.1257387399673462, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.040798433125019073, "rewards/margins": 0.024908915162086487, "rewards/rejected": -0.06570734083652496, "step": 265 }, { "epoch": 1.6071428571428572, "grad_norm": 17.566476821899414, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.7149374485015869, "log_odds_ratio": -0.45594334602355957, "logits/chosen": 354.78692626953125, "logits/rejected": 364.1693115234375, "logps/chosen": -0.836654543876648, "logps/rejected": -1.2243211269378662, "loss": 1.0429, "nll_loss": 1.1530247926712036, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.041832733899354935, "rewards/margins": 0.019383331760764122, "rewards/rejected": -0.06121605634689331, "step": 270 }, { "epoch": 1.6369047619047619, "grad_norm": 16.325292587280273, "learning_rate": 3.0151134457776365e-06, "log_odds_chosen": 0.7199637293815613, "log_odds_ratio": -0.434516578912735, "logits/chosen": 322.65130615234375, "logits/rejected": 338.8601989746094, "logps/chosen": -0.5899932384490967, "logps/rejected": -0.9299219250679016, "loss": 1.076, "nll_loss": 0.9019113779067993, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.029499661177396774, "rewards/margins": 0.016996433958411217, "rewards/rejected": -0.04649610072374344, "step": 275 }, { "epoch": 1.6666666666666665, "grad_norm": 14.822625160217285, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.9259305000305176, "log_odds_ratio": -0.5428971648216248, "logits/chosen": 395.42230224609375, "logits/rejected": 360.65234375, "logps/chosen": -0.5504172444343567, "logps/rejected": -1.1162601709365845, "loss": 1.0232, "nll_loss": 0.8841646909713745, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027520865201950073, "rewards/margins": 0.02829214558005333, "rewards/rejected": -0.0558130145072937, "step": 280 }, { "epoch": 1.6964285714285714, "grad_norm": 13.360857009887695, "learning_rate": 2.961744388795462e-06, "log_odds_chosen": 0.7002909183502197, "log_odds_ratio": -0.46074408292770386, "logits/chosen": 344.73712158203125, "logits/rejected": 373.6070556640625, "logps/chosen": -0.6551269292831421, "logps/rejected": -1.0137001276016235, "loss": 0.9926, "nll_loss": 0.9025999307632446, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.032756343483924866, "rewards/margins": 0.017928656190633774, "rewards/rejected": -0.05068499967455864, "step": 285 }, { "epoch": 1.7261904761904763, "grad_norm": 12.45747184753418, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 1.1189637184143066, "log_odds_ratio": -0.3354729413986206, "logits/chosen": 367.6058349609375, "logits/rejected": 436.7281188964844, "logps/chosen": -0.7302691340446472, "logps/rejected": -1.3499362468719482, "loss": 1.0165, "nll_loss": 0.9672770500183105, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03651345893740654, "rewards/margins": 0.03098336234688759, "rewards/rejected": -0.06749682128429413, "step": 290 }, { "epoch": 1.755952380952381, "grad_norm": 20.8387508392334, "learning_rate": 2.9111125486979104e-06, "log_odds_chosen": 0.8264445066452026, "log_odds_ratio": -0.45267271995544434, "logits/chosen": 380.7887268066406, "logits/rejected": 392.38018798828125, "logps/chosen": -0.7333472967147827, "logps/rejected": -1.1829124689102173, "loss": 1.0473, "nll_loss": 1.2072994709014893, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.036667369306087494, "rewards/margins": 0.0224782545119524, "rewards/rejected": -0.059145621955394745, "step": 295 }, { "epoch": 1.7857142857142856, "grad_norm": 14.306224822998047, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 1.0914583206176758, "log_odds_ratio": -0.3539409041404724, "logits/chosen": 376.03033447265625, "logits/rejected": 378.3951416015625, "logps/chosen": -0.58739173412323, "logps/rejected": -1.1421259641647339, "loss": 0.975, "nll_loss": 0.8565952181816101, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02936958707869053, "rewards/margins": 0.027736714109778404, "rewards/rejected": -0.05710630491375923, "step": 300 }, { "epoch": 1.8154761904761905, "grad_norm": 15.119916915893555, "learning_rate": 2.862991671569341e-06, "log_odds_chosen": 0.3856947720050812, "log_odds_ratio": -0.5676103830337524, "logits/chosen": 432.51239013671875, "logits/rejected": 413.9574279785156, "logps/chosen": -1.001999855041504, "logps/rejected": -1.1941574811935425, "loss": 1.026, "nll_loss": 1.176692247390747, "rewards/accuracies": 0.75, "rewards/chosen": -0.050099991261959076, "rewards/margins": 0.009607886895537376, "rewards/rejected": -0.0597078800201416, "step": 305 }, { "epoch": 1.8452380952380953, "grad_norm": 13.858372688293457, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.9851268529891968, "log_odds_ratio": -0.46969637274742126, "logits/chosen": 347.86224365234375, "logits/rejected": 354.42236328125, "logps/chosen": -0.7486366629600525, "logps/rejected": -1.323521375656128, "loss": 1.0778, "nll_loss": 1.036675214767456, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.037431832402944565, "rewards/margins": 0.028744244948029518, "rewards/rejected": -0.06617607176303864, "step": 310 }, { "epoch": 1.875, "grad_norm": 17.435771942138672, "learning_rate": 2.817180849095055e-06, "log_odds_chosen": 0.7195825576782227, "log_odds_ratio": -0.5751198530197144, "logits/chosen": 372.2769470214844, "logits/rejected": 380.11322021484375, "logps/chosen": -0.9793826937675476, "logps/rejected": -1.4906439781188965, "loss": 1.0867, "nll_loss": 1.2049812078475952, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04896913468837738, "rewards/margins": 0.025563066825270653, "rewards/rejected": -0.07453219592571259, "step": 315 }, { "epoch": 1.9047619047619047, "grad_norm": 13.72044563293457, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 1.036712884902954, "log_odds_ratio": -0.36927738785743713, "logits/chosen": 385.5801086425781, "logits/rejected": 423.3285217285156, "logps/chosen": -0.7053465247154236, "logps/rejected": -1.304882526397705, "loss": 0.9866, "nll_loss": 0.9042797088623047, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.03526733070611954, "rewards/margins": 0.029976800084114075, "rewards/rejected": -0.06524413079023361, "step": 320 }, { "epoch": 1.9345238095238095, "grad_norm": 17.074472427368164, "learning_rate": 2.773500981126146e-06, "log_odds_chosen": 1.224524736404419, "log_odds_ratio": -0.35412582755088806, "logits/chosen": 397.96649169921875, "logits/rejected": 423.78094482421875, "logps/chosen": -0.6596122980117798, "logps/rejected": -1.2944166660308838, "loss": 0.9893, "nll_loss": 0.9460498690605164, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03298061341047287, "rewards/margins": 0.0317402258515358, "rewards/rejected": -0.06472083181142807, "step": 325 }, { "epoch": 1.9642857142857144, "grad_norm": 23.940942764282227, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 1.0460882186889648, "log_odds_ratio": -0.416635125875473, "logits/chosen": 391.8972473144531, "logits/rejected": 401.4590148925781, "logps/chosen": -0.7760294675827026, "logps/rejected": -1.4733283519744873, "loss": 1.0094, "nll_loss": 0.9179704785346985, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.038801468908786774, "rewards/margins": 0.034864943474531174, "rewards/rejected": -0.07366641610860825, "step": 330 }, { "epoch": 1.994047619047619, "grad_norm": 18.142581939697266, "learning_rate": 2.7317918235407652e-06, "log_odds_chosen": 0.5594000816345215, "log_odds_ratio": -0.544743537902832, "logits/chosen": 377.28729248046875, "logits/rejected": 394.92413330078125, "logps/chosen": -0.9491284489631653, "logps/rejected": -1.2867456674575806, "loss": 1.0804, "nll_loss": 1.2649915218353271, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.047456420958042145, "rewards/margins": 0.016880858689546585, "rewards/rejected": -0.06433728337287903, "step": 335 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.21522486209869385, "eval_log_odds_ratio": -0.6897445917129517, "eval_logits/chosen": 330.50665283203125, "eval_logits/rejected": 278.2473449707031, "eval_logps/chosen": -1.022003173828125, "eval_logps/rejected": -1.1829901933670044, "eval_loss": 1.4225263595581055, "eval_nll_loss": 1.4082757234573364, "eval_rewards/accuracies": 0.5142857432365417, "eval_rewards/chosen": -0.05110016465187073, "eval_rewards/margins": 0.008049344643950462, "eval_rewards/rejected": -0.05914951115846634, "eval_runtime": 201.3887, "eval_samples_per_second": 2.746, "eval_steps_per_second": 0.348, "step": 336 }, { "epoch": 2.0238095238095237, "grad_norm": 25.43450927734375, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 1.8818715810775757, "log_odds_ratio": -0.2647668421268463, "logits/chosen": 383.9913330078125, "logits/rejected": 380.02801513671875, "logps/chosen": -0.47935551404953003, "logps/rejected": -1.454525351524353, "loss": 0.6307, "nll_loss": 0.6181924343109131, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.023967772722244263, "rewards/margins": 0.04875849559903145, "rewards/rejected": -0.07272626459598541, "step": 340 }, { "epoch": 2.0535714285714284, "grad_norm": 16.127397537231445, "learning_rate": 2.691909510290828e-06, "log_odds_chosen": 2.698786497116089, "log_odds_ratio": -0.14153851568698883, "logits/chosen": 388.04534912109375, "logits/rejected": 355.0823669433594, "logps/chosen": -0.29280781745910645, "logps/rejected": -1.475531816482544, "loss": 0.5566, "nll_loss": 0.6942065954208374, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.014640390872955322, "rewards/margins": 0.059136200696229935, "rewards/rejected": -0.07377658784389496, "step": 345 }, { "epoch": 2.0833333333333335, "grad_norm": 13.852057456970215, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 2.8243956565856934, "log_odds_ratio": -0.09990663826465607, "logits/chosen": 354.74615478515625, "logits/rejected": 387.60528564453125, "logps/chosen": -0.37845298647880554, "logps/rejected": -2.111022472381592, "loss": 0.5438, "nll_loss": 0.5637595057487488, "rewards/accuracies": 1.0, "rewards/chosen": -0.018922649323940277, "rewards/margins": 0.08662847429513931, "rewards/rejected": -0.10555113852024078, "step": 350 }, { "epoch": 2.113095238095238, "grad_norm": 16.1456241607666, "learning_rate": 2.6537244621713765e-06, "log_odds_chosen": 2.1223108768463135, "log_odds_ratio": -0.14685922861099243, "logits/chosen": 359.0018005371094, "logits/rejected": 385.0552062988281, "logps/chosen": -0.425741970539093, "logps/rejected": -1.6288859844207764, "loss": 0.5366, "nll_loss": 0.5897840261459351, "rewards/accuracies": 1.0, "rewards/chosen": -0.0212871003895998, "rewards/margins": 0.060157209634780884, "rewards/rejected": -0.08144429326057434, "step": 355 }, { "epoch": 2.142857142857143, "grad_norm": 11.441978454589844, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 2.8245463371276855, "log_odds_ratio": -0.12314938008785248, "logits/chosen": 369.9977722167969, "logits/rejected": 407.8392333984375, "logps/chosen": -0.3003098964691162, "logps/rejected": -1.5362733602523804, "loss": 0.5329, "nll_loss": 0.5958473086357117, "rewards/accuracies": 1.0, "rewards/chosen": -0.015015493147075176, "rewards/margins": 0.06179817393422127, "rewards/rejected": -0.07681366801261902, "step": 360 }, { "epoch": 2.1726190476190474, "grad_norm": 12.86784839630127, "learning_rate": 2.6171196129510684e-06, "log_odds_chosen": 2.503121852874756, "log_odds_ratio": -0.1363641321659088, "logits/chosen": 334.23468017578125, "logits/rejected": 311.822509765625, "logps/chosen": -0.3001948595046997, "logps/rejected": -1.4599236249923706, "loss": 0.5156, "nll_loss": 0.5041288137435913, "rewards/accuracies": 1.0, "rewards/chosen": -0.0150097431614995, "rewards/margins": 0.05798644572496414, "rewards/rejected": -0.07299618422985077, "step": 365 }, { "epoch": 2.2023809523809526, "grad_norm": 13.525810241699219, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 1.8203538656234741, "log_odds_ratio": -0.19545204937458038, "logits/chosen": 313.6293029785156, "logits/rejected": 326.6126403808594, "logps/chosen": -0.4296380877494812, "logps/rejected": -1.4019193649291992, "loss": 0.5492, "nll_loss": 0.6865583062171936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02148190513253212, "rewards/margins": 0.04861406981945038, "rewards/rejected": -0.0700959712266922, "step": 370 }, { "epoch": 2.232142857142857, "grad_norm": 14.674184799194336, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 1.8546769618988037, "log_odds_ratio": -0.22947852313518524, "logits/chosen": 377.0060119628906, "logits/rejected": 399.48590087890625, "logps/chosen": -0.3884517252445221, "logps/rejected": -1.292641520500183, "loss": 0.5785, "nll_loss": 0.5873233079910278, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.019422587007284164, "rewards/margins": 0.045209504663944244, "rewards/rejected": -0.06463208049535751, "step": 375 }, { "epoch": 2.261904761904762, "grad_norm": 11.674169540405273, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 2.2116830348968506, "log_odds_ratio": -0.12257333844900131, "logits/chosen": 367.1352233886719, "logits/rejected": 336.3498840332031, "logps/chosen": -0.3219259977340698, "logps/rejected": -1.350303292274475, "loss": 0.5297, "nll_loss": 0.5648508071899414, "rewards/accuracies": 1.0, "rewards/chosen": -0.01609629951417446, "rewards/margins": 0.051418863236904144, "rewards/rejected": -0.06751517206430435, "step": 380 }, { "epoch": 2.2916666666666665, "grad_norm": 13.154436111450195, "learning_rate": 2.5482359571881276e-06, "log_odds_chosen": 2.5731985569000244, "log_odds_ratio": -0.1179199069738388, "logits/chosen": 340.62567138671875, "logits/rejected": 364.66888427734375, "logps/chosen": -0.27915579080581665, "logps/rejected": -1.4784090518951416, "loss": 0.5102, "nll_loss": 0.49411922693252563, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.013957786373794079, "rewards/margins": 0.059962667524814606, "rewards/rejected": -0.07392045855522156, "step": 385 }, { "epoch": 2.3214285714285716, "grad_norm": 11.973630905151367, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 2.2964727878570557, "log_odds_ratio": -0.13063772022724152, "logits/chosen": 359.48345947265625, "logits/rejected": 400.7521667480469, "logps/chosen": -0.3307945430278778, "logps/rejected": -1.4601058959960938, "loss": 0.5685, "nll_loss": 0.5173304677009583, "rewards/accuracies": 1.0, "rewards/chosen": -0.01653972640633583, "rewards/margins": 0.056465573608875275, "rewards/rejected": -0.0730053037405014, "step": 390 }, { "epoch": 2.3511904761904763, "grad_norm": 10.832448959350586, "learning_rate": 2.515773027133138e-06, "log_odds_chosen": 2.685337543487549, "log_odds_ratio": -0.10510516166687012, "logits/chosen": 360.8955383300781, "logits/rejected": 359.0787048339844, "logps/chosen": -0.2764904201030731, "logps/rejected": -1.409620761871338, "loss": 0.511, "nll_loss": 0.500057578086853, "rewards/accuracies": 1.0, "rewards/chosen": -0.013824522495269775, "rewards/margins": 0.05665650963783264, "rewards/rejected": -0.07048103958368301, "step": 395 }, { "epoch": 2.380952380952381, "grad_norm": 12.639362335205078, "learning_rate": 2.5e-06, "log_odds_chosen": 2.6170766353607178, "log_odds_ratio": -0.1315409243106842, "logits/chosen": 342.3817138671875, "logits/rejected": 396.52410888671875, "logps/chosen": -0.328637957572937, "logps/rejected": -1.6732807159423828, "loss": 0.527, "nll_loss": 0.5181549787521362, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01643189787864685, "rewards/margins": 0.067232146859169, "rewards/rejected": -0.08366404473781586, "step": 400 }, { "epoch": 2.4107142857142856, "grad_norm": 14.651439666748047, "learning_rate": 2.484519974999767e-06, "log_odds_chosen": 2.2201273441314697, "log_odds_ratio": -0.16681890189647675, "logits/chosen": 424.72918701171875, "logits/rejected": 371.58966064453125, "logps/chosen": -0.30684176087379456, "logps/rejected": -1.3665101528167725, "loss": 0.5749, "nll_loss": 0.5082268714904785, "rewards/accuracies": 1.0, "rewards/chosen": -0.015342088416218758, "rewards/margins": 0.05298342555761337, "rewards/rejected": -0.06832550466060638, "step": 405 }, { "epoch": 2.4404761904761907, "grad_norm": 11.325146675109863, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 2.499159336090088, "log_odds_ratio": -0.21561208367347717, "logits/chosen": 356.08258056640625, "logits/rejected": 390.337890625, "logps/chosen": -0.372717022895813, "logps/rejected": -1.7423985004425049, "loss": 0.5384, "nll_loss": 0.5256937742233276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01863585226237774, "rewards/margins": 0.06848406046628952, "rewards/rejected": -0.087119922041893, "step": 410 }, { "epoch": 2.4702380952380953, "grad_norm": 12.71360969543457, "learning_rate": 2.4544034683690802e-06, "log_odds_chosen": 2.8460097312927246, "log_odds_ratio": -0.09599516540765762, "logits/chosen": 386.35302734375, "logits/rejected": 372.95367431640625, "logps/chosen": -0.26167118549346924, "logps/rejected": -1.7513694763183594, "loss": 0.551, "nll_loss": 0.4728317856788635, "rewards/accuracies": 1.0, "rewards/chosen": -0.013083559460937977, "rewards/margins": 0.0744849145412445, "rewards/rejected": -0.08756847679615021, "step": 415 }, { "epoch": 2.5, "grad_norm": 19.943899154663086, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 2.2042346000671387, "log_odds_ratio": -0.17198964953422546, "logits/chosen": 370.604736328125, "logits/rejected": 320.15875244140625, "logps/chosen": -0.30728116631507874, "logps/rejected": -1.3875327110290527, "loss": 0.5247, "nll_loss": 0.6666563153266907, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.015364060178399086, "rewards/margins": 0.05401257798075676, "rewards/rejected": -0.0693766325712204, "step": 420 }, { "epoch": 2.5297619047619047, "grad_norm": 12.133928298950195, "learning_rate": 2.4253562503633297e-06, "log_odds_chosen": 2.90407133102417, "log_odds_ratio": -0.06831072270870209, "logits/chosen": 342.95733642578125, "logits/rejected": 367.10894775390625, "logps/chosen": -0.3182448446750641, "logps/rejected": -1.8485863208770752, "loss": 0.507, "nll_loss": 0.6053184270858765, "rewards/accuracies": 1.0, "rewards/chosen": -0.015912240371108055, "rewards/margins": 0.07651706039905548, "rewards/rejected": -0.09242931008338928, "step": 425 }, { "epoch": 2.5595238095238093, "grad_norm": 12.26125717163086, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 2.55293607711792, "log_odds_ratio": -0.10798110067844391, "logits/chosen": 391.63848876953125, "logits/rejected": 404.0960388183594, "logps/chosen": -0.27310800552368164, "logps/rejected": -1.4932405948638916, "loss": 0.5084, "nll_loss": 0.4452734887599945, "rewards/accuracies": 1.0, "rewards/chosen": -0.013655401766300201, "rewards/margins": 0.06100662797689438, "rewards/rejected": -0.07466202974319458, "step": 430 }, { "epoch": 2.5892857142857144, "grad_norm": 13.624907493591309, "learning_rate": 2.3973165074269213e-06, "log_odds_chosen": 2.3622078895568848, "log_odds_ratio": -0.2018093764781952, "logits/chosen": 356.6548767089844, "logits/rejected": 360.92474365234375, "logps/chosen": -0.36742302775382996, "logps/rejected": -1.6279608011245728, "loss": 0.5587, "nll_loss": 0.5465327501296997, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.018371151760220528, "rewards/margins": 0.06302689015865326, "rewards/rejected": -0.08139804750680923, "step": 435 }, { "epoch": 2.619047619047619, "grad_norm": 13.956061363220215, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 2.7665021419525146, "log_odds_ratio": -0.10336035490036011, "logits/chosen": 352.8879089355469, "logits/rejected": 368.2629699707031, "logps/chosen": -0.2728215754032135, "logps/rejected": -1.5103857517242432, "loss": 0.5462, "nll_loss": 0.46932536363601685, "rewards/accuracies": 1.0, "rewards/chosen": -0.01364107709378004, "rewards/margins": 0.06187821552157402, "rewards/rejected": -0.07551928609609604, "step": 440 }, { "epoch": 2.6488095238095237, "grad_norm": 11.78285026550293, "learning_rate": 2.3702273156998867e-06, "log_odds_chosen": 2.3770580291748047, "log_odds_ratio": -0.116610586643219, "logits/chosen": 337.458740234375, "logits/rejected": 374.8058776855469, "logps/chosen": -0.3450239896774292, "logps/rejected": -1.6122970581054688, "loss": 0.5447, "nll_loss": 0.5509533882141113, "rewards/accuracies": 1.0, "rewards/chosen": -0.01725119911134243, "rewards/margins": 0.06336364895105362, "rewards/rejected": -0.0806148499250412, "step": 445 }, { "epoch": 2.678571428571429, "grad_norm": 14.717430114746094, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 2.9325308799743652, "log_odds_ratio": -0.08914806693792343, "logits/chosen": 375.6985778808594, "logits/rejected": 367.75787353515625, "logps/chosen": -0.35901179909706116, "logps/rejected": -2.006959915161133, "loss": 0.5752, "nll_loss": 0.49780726432800293, "rewards/accuracies": 1.0, "rewards/chosen": -0.01795058883726597, "rewards/margins": 0.08239741623401642, "rewards/rejected": -0.10034799575805664, "step": 450 }, { "epoch": 2.7083333333333335, "grad_norm": 18.606168746948242, "learning_rate": 2.3440361546924774e-06, "log_odds_chosen": 2.5973100662231445, "log_odds_ratio": -0.09789351373910904, "logits/chosen": 371.6011962890625, "logits/rejected": 356.3915710449219, "logps/chosen": -0.27857938408851624, "logps/rejected": -1.5046110153198242, "loss": 0.5909, "nll_loss": 0.5146271586418152, "rewards/accuracies": 1.0, "rewards/chosen": -0.013928967528045177, "rewards/margins": 0.061301589012145996, "rewards/rejected": -0.07523055374622345, "step": 455 }, { "epoch": 2.738095238095238, "grad_norm": 14.296780586242676, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 2.4838976860046387, "log_odds_ratio": -0.13478006422519684, "logits/chosen": 412.72332763671875, "logits/rejected": 433.41070556640625, "logps/chosen": -0.3284691870212555, "logps/rejected": -1.5628798007965088, "loss": 0.5718, "nll_loss": 0.5497859716415405, "rewards/accuracies": 1.0, "rewards/chosen": -0.016423460096120834, "rewards/margins": 0.061720531433820724, "rewards/rejected": -0.07814399152994156, "step": 460 }, { "epoch": 2.767857142857143, "grad_norm": 12.61571216583252, "learning_rate": 2.3186944788008413e-06, "log_odds_chosen": 2.7270541191101074, "log_odds_ratio": -0.11491680145263672, "logits/chosen": 401.2771301269531, "logits/rejected": 386.7737121582031, "logps/chosen": -0.25855037569999695, "logps/rejected": -1.5313479900360107, "loss": 0.5771, "nll_loss": 0.6066937446594238, "rewards/accuracies": 1.0, "rewards/chosen": -0.012927519157528877, "rewards/margins": 0.06363988667726517, "rewards/rejected": -0.0765674039721489, "step": 465 }, { "epoch": 2.7976190476190474, "grad_norm": 11.338821411132812, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 2.1205978393554688, "log_odds_ratio": -0.20822449028491974, "logits/chosen": 357.4671630859375, "logits/rejected": 335.1904296875, "logps/chosen": -0.36784037947654724, "logps/rejected": -1.465673804283142, "loss": 0.5175, "nll_loss": 0.5160819888114929, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.018392018973827362, "rewards/margins": 0.0548916831612587, "rewards/rejected": -0.07328370213508606, "step": 470 }, { "epoch": 2.8273809523809526, "grad_norm": 11.929247856140137, "learning_rate": 2.2941573387056174e-06, "log_odds_chosen": 2.6770148277282715, "log_odds_ratio": -0.0899493470788002, "logits/chosen": 325.32476806640625, "logits/rejected": 420.31280517578125, "logps/chosen": -0.3526560664176941, "logps/rejected": -1.848586082458496, "loss": 0.5393, "nll_loss": 0.48061132431030273, "rewards/accuracies": 1.0, "rewards/chosen": -0.017632806673645973, "rewards/margins": 0.07479649782180786, "rewards/rejected": -0.09242929518222809, "step": 475 }, { "epoch": 2.857142857142857, "grad_norm": 13.174921035766602, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 2.5945560932159424, "log_odds_ratio": -0.09449335187673569, "logits/chosen": 369.5697326660156, "logits/rejected": 390.010009765625, "logps/chosen": -0.26720184087753296, "logps/rejected": -1.5164011716842651, "loss": 0.4922, "nll_loss": 0.5011499524116516, "rewards/accuracies": 1.0, "rewards/chosen": -0.013360092416405678, "rewards/margins": 0.06245996803045273, "rewards/rejected": -0.07582006603479385, "step": 480 }, { "epoch": 2.886904761904762, "grad_norm": 17.229890823364258, "learning_rate": 2.270383045932499e-06, "log_odds_chosen": 2.310241222381592, "log_odds_ratio": -0.1323787122964859, "logits/chosen": 352.09368896484375, "logits/rejected": 390.89129638671875, "logps/chosen": -0.41382861137390137, "logps/rejected": -1.641650915145874, "loss": 0.5059, "nll_loss": 0.5348228216171265, "rewards/accuracies": 1.0, "rewards/chosen": -0.020691432058811188, "rewards/margins": 0.06139112263917923, "rewards/rejected": -0.08208255469799042, "step": 485 }, { "epoch": 2.9166666666666665, "grad_norm": 11.300827026367188, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 1.7844845056533813, "log_odds_ratio": -0.32421550154685974, "logits/chosen": 398.19256591796875, "logits/rejected": 312.98248291015625, "logps/chosen": -0.5920530557632446, "logps/rejected": -1.449924349784851, "loss": 0.6026, "nll_loss": 0.5389891862869263, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0296026524156332, "rewards/margins": 0.04289356991648674, "rewards/rejected": -0.07249622046947479, "step": 490 }, { "epoch": 2.946428571428571, "grad_norm": 13.277922630310059, "learning_rate": 2.2473328748774737e-06, "log_odds_chosen": 2.5535902976989746, "log_odds_ratio": -0.12692758440971375, "logits/chosen": 344.2357177734375, "logits/rejected": 398.01348876953125, "logps/chosen": -0.27592435479164124, "logps/rejected": -1.5041601657867432, "loss": 0.5166, "nll_loss": 0.4667043685913086, "rewards/accuracies": 1.0, "rewards/chosen": -0.013796217739582062, "rewards/margins": 0.06141179800033569, "rewards/rejected": -0.07520802319049835, "step": 495 }, { "epoch": 2.9761904761904763, "grad_norm": 10.682828903198242, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 2.8015060424804688, "log_odds_ratio": -0.12307514250278473, "logits/chosen": 391.4792785644531, "logits/rejected": 362.94403076171875, "logps/chosen": -0.2695181965827942, "logps/rejected": -1.6667454242706299, "loss": 0.5651, "nll_loss": 0.40066272020339966, "rewards/accuracies": 1.0, "rewards/chosen": -0.01347590796649456, "rewards/margins": 0.06986136734485626, "rewards/rejected": -0.08333728462457657, "step": 500 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.3315908908843994, "eval_log_odds_ratio": -0.6744564771652222, "eval_logits/chosen": 304.2648620605469, "eval_logits/rejected": 249.09341430664062, "eval_logps/chosen": -1.1561349630355835, "eval_logps/rejected": -1.3795461654663086, "eval_loss": 1.6016675233840942, "eval_nll_loss": 1.5642986297607422, "eval_rewards/accuracies": 0.5714285969734192, "eval_rewards/chosen": -0.057806748896837234, "eval_rewards/margins": 0.011170565150678158, "eval_rewards/rejected": -0.06897731870412827, "eval_runtime": 201.3868, "eval_samples_per_second": 2.746, "eval_steps_per_second": 0.348, "step": 504 }, { "epoch": 3.0, "step": 504, "total_flos": 0.0, "train_loss": 1.4336541661667446, "train_runtime": 14833.1355, "train_samples_per_second": 1.085, "train_steps_per_second": 0.034 } ], "logging_steps": 5, "max_steps": 504, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }