diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3583 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.9927766541462, + "eval_steps": 500, + "global_step": 2160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.023114706732158336, + "grad_norm": 68.88048553466797, + "learning_rate": 4.629629629629629e-08, + "logits/chosen": -0.3351331651210785, + "logits/rejected": -0.3151743412017822, + "logps/chosen": -269.4203796386719, + "logps/rejected": -267.72064208984375, + "loss": 2.9236, + "nll_loss": 1.0532859563827515, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -26.94203758239746, + "rewards/margins": -0.1699729710817337, + "rewards/rejected": -26.77206802368164, + "step": 10 + }, + { + "epoch": 0.04622941346431667, + "grad_norm": 61.09861755371094, + "learning_rate": 9.259259259259258e-08, + "logits/chosen": -0.33865073323249817, + "logits/rejected": -0.3208921253681183, + "logps/chosen": -263.8262634277344, + "logps/rejected": -270.32977294921875, + "loss": 2.896, + "nll_loss": 0.9992793202400208, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -26.38262939453125, + "rewards/margins": 0.6503503918647766, + "rewards/rejected": -27.03297996520996, + "step": 20 + }, + { + "epoch": 0.06934412019647501, + "grad_norm": 64.75421142578125, + "learning_rate": 1.3888888888888888e-07, + "logits/chosen": -0.2800094485282898, + "logits/rejected": -0.2686631977558136, + "logps/chosen": -262.0818176269531, + "logps/rejected": -265.42999267578125, + "loss": 2.826, + "nll_loss": 1.124384880065918, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -26.20818519592285, + "rewards/margins": 0.33481523394584656, + "rewards/rejected": -26.54299545288086, + "step": 30 + }, + { + "epoch": 0.09245882692863334, + "grad_norm": 54.530216217041016, + "learning_rate": 1.8518518518518516e-07, + "logits/chosen": -0.328824520111084, + "logits/rejected": -0.3197949528694153, + "logps/chosen": -250.150146484375, + "logps/rejected": -252.0699005126953, + "loss": 2.7636, + "nll_loss": 1.1389970779418945, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -25.015010833740234, + "rewards/margins": 0.19197671115398407, + "rewards/rejected": -25.206989288330078, + "step": 40 + }, + { + "epoch": 0.11557353366079168, + "grad_norm": 54.73969650268555, + "learning_rate": 2.3148148148148148e-07, + "logits/chosen": -0.36699360609054565, + "logits/rejected": -0.344801664352417, + "logps/chosen": -259.365966796875, + "logps/rejected": -257.6177062988281, + "loss": 2.8769, + "nll_loss": 0.9557002782821655, + "rewards/accuracies": 0.5, + "rewards/chosen": -25.936599731445312, + "rewards/margins": -0.17483071982860565, + "rewards/rejected": -25.761768341064453, + "step": 50 + }, + { + "epoch": 0.13868824039295002, + "grad_norm": 61.527992248535156, + "learning_rate": 2.7777777777777776e-07, + "logits/chosen": -0.4444943368434906, + "logits/rejected": -0.43780913949012756, + "logps/chosen": -241.99569702148438, + "logps/rejected": -240.5470428466797, + "loss": 2.8199, + "nll_loss": 1.0306382179260254, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -24.199569702148438, + "rewards/margins": -0.144865483045578, + "rewards/rejected": -24.054706573486328, + "step": 60 + }, + { + "epoch": 0.16180294712510834, + "grad_norm": 58.2850341796875, + "learning_rate": 3.2407407407407406e-07, + "logits/chosen": -0.5648446083068848, + "logits/rejected": -0.5444747805595398, + "logps/chosen": -224.255126953125, + "logps/rejected": -223.83773803710938, + "loss": 2.7692, + "nll_loss": 0.9458900690078735, + "rewards/accuracies": 0.4906249940395355, + "rewards/chosen": -22.425512313842773, + "rewards/margins": -0.04173760861158371, + "rewards/rejected": -22.383777618408203, + "step": 70 + }, + { + "epoch": 0.1849176538572667, + "grad_norm": 50.89101028442383, + "learning_rate": 3.703703703703703e-07, + "logits/chosen": -0.7499346733093262, + "logits/rejected": -0.7246556282043457, + "logps/chosen": -214.29019165039062, + "logps/rejected": -215.6709442138672, + "loss": 2.4664, + "nll_loss": 0.8191965222358704, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -21.429019927978516, + "rewards/margins": 0.13807573914527893, + "rewards/rejected": -21.567096710205078, + "step": 80 + }, + { + "epoch": 0.208032360589425, + "grad_norm": 51.08415222167969, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.824606716632843, + "logits/rejected": -0.803991436958313, + "logps/chosen": -185.02096557617188, + "logps/rejected": -191.6359405517578, + "loss": 2.215, + "nll_loss": 0.6511534452438354, + "rewards/accuracies": 0.565625011920929, + "rewards/chosen": -18.50209617614746, + "rewards/margins": 0.6614967584609985, + "rewards/rejected": -19.163593292236328, + "step": 90 + }, + { + "epoch": 0.23114706732158335, + "grad_norm": 50.10819625854492, + "learning_rate": 4.6296296296296297e-07, + "logits/chosen": -0.7869374752044678, + "logits/rejected": -0.7605717778205872, + "logps/chosen": -172.6743927001953, + "logps/rejected": -173.7969512939453, + "loss": 2.2028, + "nll_loss": 0.5232411623001099, + "rewards/accuracies": 0.515625, + "rewards/chosen": -17.267436981201172, + "rewards/margins": 0.1122552752494812, + "rewards/rejected": -17.379695892333984, + "step": 100 + }, + { + "epoch": 0.2542617740537417, + "grad_norm": 49.00399398803711, + "learning_rate": 5.092592592592593e-07, + "logits/chosen": -0.6167671084403992, + "logits/rejected": -0.5838115811347961, + "logps/chosen": -156.83273315429688, + "logps/rejected": -159.6825408935547, + "loss": 1.8947, + "nll_loss": 0.3989648222923279, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -15.683273315429688, + "rewards/margins": 0.2849821150302887, + "rewards/rejected": -15.968255996704102, + "step": 110 + }, + { + "epoch": 0.27737648078590005, + "grad_norm": 48.19024658203125, + "learning_rate": 5.555555555555555e-07, + "logits/chosen": -0.48373740911483765, + "logits/rejected": -0.46102485060691833, + "logps/chosen": -161.04762268066406, + "logps/rejected": -159.78451538085938, + "loss": 1.8634, + "nll_loss": 0.3991420865058899, + "rewards/accuracies": 0.5218750238418579, + "rewards/chosen": -16.10476303100586, + "rewards/margins": -0.12630942463874817, + "rewards/rejected": -15.9784517288208, + "step": 120 + }, + { + "epoch": 0.30049118751805837, + "grad_norm": 63.570125579833984, + "learning_rate": 6.018518518518519e-07, + "logits/chosen": -0.5185505747795105, + "logits/rejected": -0.4863056242465973, + "logps/chosen": -154.00921630859375, + "logps/rejected": -161.2861785888672, + "loss": 1.8664, + "nll_loss": 0.3488847315311432, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -15.400922775268555, + "rewards/margins": 0.7276966571807861, + "rewards/rejected": -16.128618240356445, + "step": 130 + }, + { + "epoch": 0.3236058942502167, + "grad_norm": 55.390159606933594, + "learning_rate": 6.481481481481481e-07, + "logits/chosen": -0.5367673635482788, + "logits/rejected": -0.5227854251861572, + "logps/chosen": -144.9154815673828, + "logps/rejected": -148.911376953125, + "loss": 1.8519, + "nll_loss": 0.29890117049217224, + "rewards/accuracies": 0.546875, + "rewards/chosen": -14.491546630859375, + "rewards/margins": 0.39959025382995605, + "rewards/rejected": -14.891136169433594, + "step": 140 + }, + { + "epoch": 0.34672060098237506, + "grad_norm": 88.29100799560547, + "learning_rate": 6.944444444444444e-07, + "logits/chosen": -0.5234349370002747, + "logits/rejected": -0.5064178705215454, + "logps/chosen": -144.33682250976562, + "logps/rejected": -146.9467315673828, + "loss": 1.8867, + "nll_loss": 0.29581302404403687, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -14.433680534362793, + "rewards/margins": 0.2609911262989044, + "rewards/rejected": -14.694673538208008, + "step": 150 + }, + { + "epoch": 0.3698353077145334, + "grad_norm": 43.19578170776367, + "learning_rate": 7.407407407407406e-07, + "logits/chosen": -0.47395405173301697, + "logits/rejected": -0.4435350298881531, + "logps/chosen": -155.87083435058594, + "logps/rejected": -157.5062255859375, + "loss": 1.7061, + "nll_loss": 0.3032439351081848, + "rewards/accuracies": 0.5218750238418579, + "rewards/chosen": -15.58708381652832, + "rewards/margins": 0.16353729367256165, + "rewards/rejected": -15.750622749328613, + "step": 160 + }, + { + "epoch": 0.3929500144466917, + "grad_norm": 54.197662353515625, + "learning_rate": 7.870370370370371e-07, + "logits/chosen": -0.4344661235809326, + "logits/rejected": -0.4211999475955963, + "logps/chosen": -155.08998107910156, + "logps/rejected": -160.6627655029297, + "loss": 1.5591, + "nll_loss": 0.2847481667995453, + "rewards/accuracies": 0.546875, + "rewards/chosen": -15.508997917175293, + "rewards/margins": 0.5572806000709534, + "rewards/rejected": -16.066280364990234, + "step": 170 + }, + { + "epoch": 0.41606472117885, + "grad_norm": 48.73773956298828, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.42254990339279175, + "logits/rejected": -0.4155765473842621, + "logps/chosen": -149.37136840820312, + "logps/rejected": -154.17172241210938, + "loss": 1.61, + "nll_loss": 0.27371498942375183, + "rewards/accuracies": 0.546875, + "rewards/chosen": -14.93713665008545, + "rewards/margins": 0.48003578186035156, + "rewards/rejected": -15.4171724319458, + "step": 180 + }, + { + "epoch": 0.4391794279110084, + "grad_norm": 51.67360305786133, + "learning_rate": 8.796296296296296e-07, + "logits/chosen": -0.4299948811531067, + "logits/rejected": -0.4166909158229828, + "logps/chosen": -157.9515380859375, + "logps/rejected": -162.32485961914062, + "loss": 1.6692, + "nll_loss": 0.2900438606739044, + "rewards/accuracies": 0.5625, + "rewards/chosen": -15.795153617858887, + "rewards/margins": 0.4373341500759125, + "rewards/rejected": -16.232486724853516, + "step": 190 + }, + { + "epoch": 0.4622941346431667, + "grad_norm": 45.50596618652344, + "learning_rate": 9.259259259259259e-07, + "logits/chosen": -0.35690927505493164, + "logits/rejected": -0.34764981269836426, + "logps/chosen": -154.99716186523438, + "logps/rejected": -160.2298126220703, + "loss": 1.6466, + "nll_loss": 0.2945239543914795, + "rewards/accuracies": 0.590624988079071, + "rewards/chosen": -15.499715805053711, + "rewards/margins": 0.5232647061347961, + "rewards/rejected": -16.022979736328125, + "step": 200 + }, + { + "epoch": 0.48540884137532503, + "grad_norm": 52.31976318359375, + "learning_rate": 9.722222222222222e-07, + "logits/chosen": -0.4234965443611145, + "logits/rejected": -0.39612382650375366, + "logps/chosen": -154.9087371826172, + "logps/rejected": -155.92794799804688, + "loss": 1.6004, + "nll_loss": 0.2901446223258972, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -15.490873336791992, + "rewards/margins": 0.10192202031612396, + "rewards/rejected": -15.592794418334961, + "step": 210 + }, + { + "epoch": 0.5085235481074833, + "grad_norm": 54.61393737792969, + "learning_rate": 9.979423868312756e-07, + "logits/chosen": -0.4337913393974304, + "logits/rejected": -0.4053143560886383, + "logps/chosen": -168.09202575683594, + "logps/rejected": -172.47401428222656, + "loss": 1.6616, + "nll_loss": 0.30150192975997925, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -16.809206008911133, + "rewards/margins": 0.43819671869277954, + "rewards/rejected": -17.24740219116211, + "step": 220 + }, + { + "epoch": 0.5316382548396418, + "grad_norm": 46.82304000854492, + "learning_rate": 9.927983539094649e-07, + "logits/chosen": -0.41667041182518005, + "logits/rejected": -0.3951401710510254, + "logps/chosen": -165.96499633789062, + "logps/rejected": -171.3835906982422, + "loss": 1.6745, + "nll_loss": 0.30009427666664124, + "rewards/accuracies": 0.5718749761581421, + "rewards/chosen": -16.596498489379883, + "rewards/margins": 0.5418606996536255, + "rewards/rejected": -17.13835906982422, + "step": 230 + }, + { + "epoch": 0.5547529615718001, + "grad_norm": 51.5750846862793, + "learning_rate": 9.876543209876542e-07, + "logits/chosen": -0.3943902254104614, + "logits/rejected": -0.3833962082862854, + "logps/chosen": -163.68643188476562, + "logps/rejected": -167.90953063964844, + "loss": 1.4982, + "nll_loss": 0.2821606993675232, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -16.368642807006836, + "rewards/margins": 0.42231208086013794, + "rewards/rejected": -16.79095458984375, + "step": 240 + }, + { + "epoch": 0.5778676683039584, + "grad_norm": 54.075496673583984, + "learning_rate": 9.825102880658436e-07, + "logits/chosen": -0.4583554267883301, + "logits/rejected": -0.4463082253932953, + "logps/chosen": -160.63284301757812, + "logps/rejected": -163.09634399414062, + "loss": 1.639, + "nll_loss": 0.25729092955589294, + "rewards/accuracies": 0.528124988079071, + "rewards/chosen": -16.063283920288086, + "rewards/margins": 0.24634972214698792, + "rewards/rejected": -16.309635162353516, + "step": 250 + }, + { + "epoch": 0.6009823750361167, + "grad_norm": 50.17490768432617, + "learning_rate": 9.77366255144033e-07, + "logits/chosen": -0.4777965545654297, + "logits/rejected": -0.4631553292274475, + "logps/chosen": -154.1898956298828, + "logps/rejected": -162.0362091064453, + "loss": 1.4771, + "nll_loss": 0.27278777956962585, + "rewards/accuracies": 0.621874988079071, + "rewards/chosen": -15.418991088867188, + "rewards/margins": 0.7846304178237915, + "rewards/rejected": -16.2036190032959, + "step": 260 + }, + { + "epoch": 0.624097081768275, + "grad_norm": 44.40957260131836, + "learning_rate": 9.722222222222222e-07, + "logits/chosen": -0.48693957924842834, + "logits/rejected": -0.4778309762477875, + "logps/chosen": -162.27188110351562, + "logps/rejected": -169.07962036132812, + "loss": 1.5028, + "nll_loss": 0.2821035087108612, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -16.227190017700195, + "rewards/margins": 0.6807710528373718, + "rewards/rejected": -16.907960891723633, + "step": 270 + }, + { + "epoch": 0.6472117885004334, + "grad_norm": 50.629066467285156, + "learning_rate": 9.670781893004115e-07, + "logits/chosen": -0.39725005626678467, + "logits/rejected": -0.3660200238227844, + "logps/chosen": -158.48001098632812, + "logps/rejected": -167.71119689941406, + "loss": 1.4805, + "nll_loss": 0.2827926576137543, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -15.848001480102539, + "rewards/margins": 0.9231182932853699, + "rewards/rejected": -16.771120071411133, + "step": 280 + }, + { + "epoch": 0.6703264952325917, + "grad_norm": 55.39129638671875, + "learning_rate": 9.619341563786007e-07, + "logits/chosen": -0.5320179462432861, + "logits/rejected": -0.4930430054664612, + "logps/chosen": -166.970947265625, + "logps/rejected": -172.72909545898438, + "loss": 1.4575, + "nll_loss": 0.2989470362663269, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -16.697093963623047, + "rewards/margins": 0.5758152604103088, + "rewards/rejected": -17.272911071777344, + "step": 290 + }, + { + "epoch": 0.6934412019647501, + "grad_norm": 42.369606018066406, + "learning_rate": 9.567901234567902e-07, + "logits/chosen": -0.43348032236099243, + "logits/rejected": -0.4254017472267151, + "logps/chosen": -162.8667449951172, + "logps/rejected": -172.35897827148438, + "loss": 1.4884, + "nll_loss": 0.2910870611667633, + "rewards/accuracies": 0.59375, + "rewards/chosen": -16.286678314208984, + "rewards/margins": 0.9492223858833313, + "rewards/rejected": -17.235897064208984, + "step": 300 + }, + { + "epoch": 0.7165559086969084, + "grad_norm": 48.293399810791016, + "learning_rate": 9.516460905349794e-07, + "logits/chosen": -0.509886622428894, + "logits/rejected": -0.49991345405578613, + "logps/chosen": -173.03567504882812, + "logps/rejected": -176.65750122070312, + "loss": 1.5401, + "nll_loss": 0.30316367745399475, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -17.30356788635254, + "rewards/margins": 0.36218342185020447, + "rewards/rejected": -17.665752410888672, + "step": 310 + }, + { + "epoch": 0.7396706154290668, + "grad_norm": 45.7746467590332, + "learning_rate": 9.465020576131687e-07, + "logits/chosen": -0.503333568572998, + "logits/rejected": -0.4878058433532715, + "logps/chosen": -163.34519958496094, + "logps/rejected": -172.25938415527344, + "loss": 1.5247, + "nll_loss": 0.29550039768218994, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -16.33452033996582, + "rewards/margins": 0.89141845703125, + "rewards/rejected": -17.225940704345703, + "step": 320 + }, + { + "epoch": 0.7627853221612251, + "grad_norm": 48.05742645263672, + "learning_rate": 9.413580246913579e-07, + "logits/chosen": -0.5755558609962463, + "logits/rejected": -0.5767273902893066, + "logps/chosen": -158.17958068847656, + "logps/rejected": -165.14163208007812, + "loss": 1.4969, + "nll_loss": 0.2938057780265808, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -15.817957878112793, + "rewards/margins": 0.6962078809738159, + "rewards/rejected": -16.5141658782959, + "step": 330 + }, + { + "epoch": 0.7859000288933834, + "grad_norm": 45.862648010253906, + "learning_rate": 9.362139917695473e-07, + "logits/chosen": -0.6315797567367554, + "logits/rejected": -0.6231464147567749, + "logps/chosen": -164.8571014404297, + "logps/rejected": -170.53570556640625, + "loss": 1.3908, + "nll_loss": 0.28307533264160156, + "rewards/accuracies": 0.5843750238418579, + "rewards/chosen": -16.48571014404297, + "rewards/margins": 0.567859947681427, + "rewards/rejected": -17.053571701049805, + "step": 340 + }, + { + "epoch": 0.8090147356255417, + "grad_norm": 45.217002868652344, + "learning_rate": 9.310699588477366e-07, + "logits/chosen": -0.5783101320266724, + "logits/rejected": -0.5816030502319336, + "logps/chosen": -167.26516723632812, + "logps/rejected": -176.68746948242188, + "loss": 1.5036, + "nll_loss": 0.2909998297691345, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -16.726520538330078, + "rewards/margins": 0.9422298669815063, + "rewards/rejected": -17.66874885559082, + "step": 350 + }, + { + "epoch": 0.8321294423577, + "grad_norm": 56.84000778198242, + "learning_rate": 9.259259259259259e-07, + "logits/chosen": -0.5195820927619934, + "logits/rejected": -0.5026860237121582, + "logps/chosen": -171.53640747070312, + "logps/rejected": -177.3377227783203, + "loss": 1.5078, + "nll_loss": 0.29021695256233215, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -17.153636932373047, + "rewards/margins": 0.5801342725753784, + "rewards/rejected": -17.733774185180664, + "step": 360 + }, + { + "epoch": 0.8552441490898585, + "grad_norm": 50.610069274902344, + "learning_rate": 9.207818930041152e-07, + "logits/chosen": -0.49760836362838745, + "logits/rejected": -0.4677702784538269, + "logps/chosen": -161.1763153076172, + "logps/rejected": -171.69003295898438, + "loss": 1.3722, + "nll_loss": 0.26248103380203247, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -16.117631912231445, + "rewards/margins": 1.051371693611145, + "rewards/rejected": -17.169002532958984, + "step": 370 + }, + { + "epoch": 0.8783588558220168, + "grad_norm": 54.772438049316406, + "learning_rate": 9.156378600823045e-07, + "logits/chosen": -0.42570480704307556, + "logits/rejected": -0.4065491259098053, + "logps/chosen": -168.25025939941406, + "logps/rejected": -176.4032440185547, + "loss": 1.3843, + "nll_loss": 0.313023179769516, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -16.825023651123047, + "rewards/margins": 0.8152991533279419, + "rewards/rejected": -17.64032554626465, + "step": 380 + }, + { + "epoch": 0.9014735625541751, + "grad_norm": 50.42124557495117, + "learning_rate": 9.104938271604939e-07, + "logits/chosen": -0.43410390615463257, + "logits/rejected": -0.4136204719543457, + "logps/chosen": -165.08279418945312, + "logps/rejected": -176.14059448242188, + "loss": 1.4235, + "nll_loss": 0.27761662006378174, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -16.50827980041504, + "rewards/margins": 1.105778455734253, + "rewards/rejected": -17.614057540893555, + "step": 390 + }, + { + "epoch": 0.9245882692863334, + "grad_norm": 51.66304016113281, + "learning_rate": 9.053497942386831e-07, + "logits/chosen": -0.40831509232521057, + "logits/rejected": -0.3836323916912079, + "logps/chosen": -162.02064514160156, + "logps/rejected": -169.6013946533203, + "loss": 1.3933, + "nll_loss": 0.28827401995658875, + "rewards/accuracies": 0.6156250238418579, + "rewards/chosen": -16.20206642150879, + "rewards/margins": 0.7580735087394714, + "rewards/rejected": -16.960140228271484, + "step": 400 + }, + { + "epoch": 0.9477029760184917, + "grad_norm": 48.54574966430664, + "learning_rate": 9.002057613168724e-07, + "logits/chosen": -0.36130112409591675, + "logits/rejected": -0.35345903038978577, + "logps/chosen": -159.15536499023438, + "logps/rejected": -170.9656524658203, + "loss": 1.3593, + "nll_loss": 0.2898252308368683, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -15.915536880493164, + "rewards/margins": 1.181027889251709, + "rewards/rejected": -17.09656524658203, + "step": 410 + }, + { + "epoch": 0.9708176827506501, + "grad_norm": 43.59242248535156, + "learning_rate": 8.950617283950617e-07, + "logits/chosen": -0.4918903410434723, + "logits/rejected": -0.4697975516319275, + "logps/chosen": -165.565673828125, + "logps/rejected": -174.68519592285156, + "loss": 1.3598, + "nll_loss": 0.30875933170318604, + "rewards/accuracies": 0.596875011920929, + "rewards/chosen": -16.556568145751953, + "rewards/margins": 0.9119526147842407, + "rewards/rejected": -17.468521118164062, + "step": 420 + }, + { + "epoch": 0.9939323894828085, + "grad_norm": 50.116798400878906, + "learning_rate": 8.89917695473251e-07, + "logits/chosen": -0.49847784638404846, + "logits/rejected": -0.5088882446289062, + "logps/chosen": -167.231201171875, + "logps/rejected": -177.6866455078125, + "loss": 1.4367, + "nll_loss": 0.28403669595718384, + "rewards/accuracies": 0.6343749761581421, + "rewards/chosen": -16.723121643066406, + "rewards/margins": 1.0455443859100342, + "rewards/rejected": -17.768667221069336, + "step": 430 + }, + { + "epoch": 0.9985553308292401, + "eval_logits/chosen": -0.4373142123222351, + "eval_logits/rejected": -0.40795600414276123, + "eval_logps/chosen": -170.67918395996094, + "eval_logps/rejected": -180.96241760253906, + "eval_loss": 1.392618179321289, + "eval_nll_loss": 0.3199608623981476, + "eval_rewards/accuracies": 0.656521737575531, + "eval_rewards/chosen": -17.067920684814453, + "eval_rewards/margins": 1.0283225774765015, + "eval_rewards/rejected": -18.096242904663086, + "eval_runtime": 77.5612, + "eval_samples_per_second": 23.543, + "eval_steps_per_second": 1.483, + "step": 432 + }, + { + "epoch": 1.0170470962149667, + "grad_norm": 35.45933151245117, + "learning_rate": 8.847736625514403e-07, + "logits/chosen": -0.45173630118370056, + "logits/rejected": -0.4663858413696289, + "logps/chosen": -160.457275390625, + "logps/rejected": -179.97222900390625, + "loss": 0.9484, + "nll_loss": 0.30594602227211, + "rewards/accuracies": 0.784375011920929, + "rewards/chosen": -16.045726776123047, + "rewards/margins": 1.9514964818954468, + "rewards/rejected": -17.997224807739258, + "step": 440 + }, + { + "epoch": 1.0401618029471251, + "grad_norm": 27.835773468017578, + "learning_rate": 8.796296296296296e-07, + "logits/chosen": -0.3361106514930725, + "logits/rejected": -0.3292810022830963, + "logps/chosen": -149.01544189453125, + "logps/rejected": -169.8839111328125, + "loss": 0.7764, + "nll_loss": 0.25240465998649597, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -14.901544570922852, + "rewards/margins": 2.086846351623535, + "rewards/rejected": -16.988391876220703, + "step": 450 + }, + { + "epoch": 1.0632765096792833, + "grad_norm": 32.76046371459961, + "learning_rate": 8.744855967078189e-07, + "logits/chosen": -0.4512772560119629, + "logits/rejected": -0.4271810054779053, + "logps/chosen": -152.64132690429688, + "logps/rejected": -174.70986938476562, + "loss": 0.7216, + "nll_loss": 0.25062257051467896, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -15.264132499694824, + "rewards/margins": 2.206853151321411, + "rewards/rejected": -17.470985412597656, + "step": 460 + }, + { + "epoch": 1.0863912164114418, + "grad_norm": 46.92816162109375, + "learning_rate": 8.693415637860082e-07, + "logits/chosen": -0.510484516620636, + "logits/rejected": -0.4754946827888489, + "logps/chosen": -151.33753967285156, + "logps/rejected": -175.41604614257812, + "loss": 0.7542, + "nll_loss": 0.2625353932380676, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -15.133753776550293, + "rewards/margins": 2.4078497886657715, + "rewards/rejected": -17.54160499572754, + "step": 470 + }, + { + "epoch": 1.1095059231436002, + "grad_norm": 45.01936721801758, + "learning_rate": 8.641975308641974e-07, + "logits/chosen": -0.5488854646682739, + "logits/rejected": -0.534773588180542, + "logps/chosen": -158.13259887695312, + "logps/rejected": -183.81103515625, + "loss": 0.7397, + "nll_loss": 0.23221275210380554, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -15.813260078430176, + "rewards/margins": 2.5678436756134033, + "rewards/rejected": -18.381103515625, + "step": 480 + }, + { + "epoch": 1.1326206298757584, + "grad_norm": 29.731250762939453, + "learning_rate": 8.590534979423868e-07, + "logits/chosen": -0.4209683835506439, + "logits/rejected": -0.40175366401672363, + "logps/chosen": -148.5663604736328, + "logps/rejected": -172.50228881835938, + "loss": 0.6839, + "nll_loss": 0.2801415026187897, + "rewards/accuracies": 0.846875011920929, + "rewards/chosen": -14.856637954711914, + "rewards/margins": 2.3935940265655518, + "rewards/rejected": -17.250232696533203, + "step": 490 + }, + { + "epoch": 1.1557353366079168, + "grad_norm": 35.19107437133789, + "learning_rate": 8.539094650205761e-07, + "logits/chosen": -0.5119351148605347, + "logits/rejected": -0.48603877425193787, + "logps/chosen": -147.54727172851562, + "logps/rejected": -172.57888793945312, + "loss": 0.7342, + "nll_loss": 0.24299657344818115, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -14.754727363586426, + "rewards/margins": 2.503164529800415, + "rewards/rejected": -17.257890701293945, + "step": 500 + }, + { + "epoch": 1.178850043340075, + "grad_norm": 36.37306213378906, + "learning_rate": 8.487654320987654e-07, + "logits/chosen": -0.5116412043571472, + "logits/rejected": -0.5097488164901733, + "logps/chosen": -152.76693725585938, + "logps/rejected": -173.20547485351562, + "loss": 0.7418, + "nll_loss": 0.2616187334060669, + "rewards/accuracies": 0.796875, + "rewards/chosen": -15.276693344116211, + "rewards/margins": 2.0438523292541504, + "rewards/rejected": -17.320547103881836, + "step": 510 + }, + { + "epoch": 1.2019647500722335, + "grad_norm": 32.158714294433594, + "learning_rate": 8.436213991769548e-07, + "logits/chosen": -0.41989222168922424, + "logits/rejected": -0.40580207109451294, + "logps/chosen": -160.35772705078125, + "logps/rejected": -186.72616577148438, + "loss": 0.7297, + "nll_loss": 0.2849249839782715, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -16.0357723236084, + "rewards/margins": 2.636845111846924, + "rewards/rejected": -18.672618865966797, + "step": 520 + }, + { + "epoch": 1.2250794568043917, + "grad_norm": 38.98585510253906, + "learning_rate": 8.38477366255144e-07, + "logits/chosen": -0.43002861738204956, + "logits/rejected": -0.43659868836402893, + "logps/chosen": -149.89114379882812, + "logps/rejected": -177.4897918701172, + "loss": 0.7001, + "nll_loss": 0.25785765051841736, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -14.989115715026855, + "rewards/margins": 2.7598659992218018, + "rewards/rejected": -17.748981475830078, + "step": 530 + }, + { + "epoch": 1.24819416353655, + "grad_norm": 33.50174331665039, + "learning_rate": 8.333333333333333e-07, + "logits/chosen": -0.5792837142944336, + "logits/rejected": -0.5748234987258911, + "logps/chosen": -154.1841278076172, + "logps/rejected": -175.39093017578125, + "loss": 0.77, + "nll_loss": 0.28076162934303284, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -15.418413162231445, + "rewards/margins": 2.120678424835205, + "rewards/rejected": -17.539093017578125, + "step": 540 + }, + { + "epoch": 1.2713088702687085, + "grad_norm": 35.51890182495117, + "learning_rate": 8.281893004115226e-07, + "logits/chosen": -0.6797876358032227, + "logits/rejected": -0.6701671481132507, + "logps/chosen": -164.1734619140625, + "logps/rejected": -189.96820068359375, + "loss": 0.6452, + "nll_loss": 0.2875816822052002, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -16.417346954345703, + "rewards/margins": 2.579475164413452, + "rewards/rejected": -18.9968204498291, + "step": 550 + }, + { + "epoch": 1.2944235770008667, + "grad_norm": 36.58209228515625, + "learning_rate": 8.23045267489712e-07, + "logits/chosen": -0.6092251539230347, + "logits/rejected": -0.5988754630088806, + "logps/chosen": -150.59115600585938, + "logps/rejected": -178.7034149169922, + "loss": 0.7005, + "nll_loss": 0.26352283358573914, + "rewards/accuracies": 0.856249988079071, + "rewards/chosen": -15.059117317199707, + "rewards/margins": 2.811225652694702, + "rewards/rejected": -17.870342254638672, + "step": 560 + }, + { + "epoch": 1.3175382837330252, + "grad_norm": 38.884254455566406, + "learning_rate": 8.179012345679011e-07, + "logits/chosen": -0.5773380994796753, + "logits/rejected": -0.5545040369033813, + "logps/chosen": -159.92147827148438, + "logps/rejected": -186.68997192382812, + "loss": 0.7401, + "nll_loss": 0.26087266206741333, + "rewards/accuracies": 0.84375, + "rewards/chosen": -15.992147445678711, + "rewards/margins": 2.6768481731414795, + "rewards/rejected": -18.668996810913086, + "step": 570 + }, + { + "epoch": 1.3406529904651836, + "grad_norm": 43.70725631713867, + "learning_rate": 8.127572016460905e-07, + "logits/chosen": -0.5863763093948364, + "logits/rejected": -0.5670869946479797, + "logps/chosen": -157.2144012451172, + "logps/rejected": -184.8651123046875, + "loss": 0.72, + "nll_loss": 0.2669151723384857, + "rewards/accuracies": 0.8531249761581421, + "rewards/chosen": -15.721441268920898, + "rewards/margins": 2.7650701999664307, + "rewards/rejected": -18.486513137817383, + "step": 580 + }, + { + "epoch": 1.3637676971973418, + "grad_norm": 39.63798904418945, + "learning_rate": 8.076131687242798e-07, + "logits/chosen": -0.529544472694397, + "logits/rejected": -0.5398887395858765, + "logps/chosen": -148.3323974609375, + "logps/rejected": -174.19955444335938, + "loss": 0.6607, + "nll_loss": 0.24997957050800323, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -14.833239555358887, + "rewards/margins": 2.586716890335083, + "rewards/rejected": -17.419958114624023, + "step": 590 + }, + { + "epoch": 1.3868824039295, + "grad_norm": 36.14802169799805, + "learning_rate": 8.024691358024691e-07, + "logits/chosen": -0.441204309463501, + "logits/rejected": -0.4048687815666199, + "logps/chosen": -156.30531311035156, + "logps/rejected": -183.83956909179688, + "loss": 0.733, + "nll_loss": 0.2541951537132263, + "rewards/accuracies": 0.8343750238418579, + "rewards/chosen": -15.630529403686523, + "rewards/margins": 2.753427743911743, + "rewards/rejected": -18.38395881652832, + "step": 600 + }, + { + "epoch": 1.4099971106616584, + "grad_norm": 40.05307388305664, + "learning_rate": 7.973251028806583e-07, + "logits/chosen": -0.41722431778907776, + "logits/rejected": -0.4100796580314636, + "logps/chosen": -151.99453735351562, + "logps/rejected": -175.85577392578125, + "loss": 0.7682, + "nll_loss": 0.25730782747268677, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -15.199453353881836, + "rewards/margins": 2.3861212730407715, + "rewards/rejected": -17.585575103759766, + "step": 610 + }, + { + "epoch": 1.4331118173938169, + "grad_norm": 24.526100158691406, + "learning_rate": 7.921810699588477e-07, + "logits/chosen": -0.5749002695083618, + "logits/rejected": -0.5751099586486816, + "logps/chosen": -157.60520935058594, + "logps/rejected": -185.5096893310547, + "loss": 0.5956, + "nll_loss": 0.24547366797924042, + "rewards/accuracies": 0.8843749761581421, + "rewards/chosen": -15.760522842407227, + "rewards/margins": 2.790447473526001, + "rewards/rejected": -18.55097007751465, + "step": 620 + }, + { + "epoch": 1.456226524125975, + "grad_norm": 36.09085464477539, + "learning_rate": 7.870370370370371e-07, + "logits/chosen": -0.5282450914382935, + "logits/rejected": -0.5175204873085022, + "logps/chosen": -146.50106811523438, + "logps/rejected": -173.6673126220703, + "loss": 0.6405, + "nll_loss": 0.24812671542167664, + "rewards/accuracies": 0.859375, + "rewards/chosen": -14.650106430053711, + "rewards/margins": 2.7166221141815186, + "rewards/rejected": -17.366729736328125, + "step": 630 + }, + { + "epoch": 1.4793412308581335, + "grad_norm": 41.768348693847656, + "learning_rate": 7.818930041152262e-07, + "logits/chosen": -0.45312589406967163, + "logits/rejected": -0.4504320025444031, + "logps/chosen": -142.28053283691406, + "logps/rejected": -170.82095336914062, + "loss": 0.6841, + "nll_loss": 0.23785972595214844, + "rewards/accuracies": 0.871874988079071, + "rewards/chosen": -14.228052139282227, + "rewards/margins": 2.8540425300598145, + "rewards/rejected": -17.082096099853516, + "step": 640 + }, + { + "epoch": 1.502455937590292, + "grad_norm": 34.300228118896484, + "learning_rate": 7.767489711934156e-07, + "logits/chosen": -0.5092964172363281, + "logits/rejected": -0.5271193981170654, + "logps/chosen": -155.85000610351562, + "logps/rejected": -186.28884887695312, + "loss": 0.6303, + "nll_loss": 0.24494795501232147, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -15.584999084472656, + "rewards/margins": 3.0438854694366455, + "rewards/rejected": -18.62888526916504, + "step": 650 + }, + { + "epoch": 1.5255706443224502, + "grad_norm": 33.022884368896484, + "learning_rate": 7.716049382716049e-07, + "logits/chosen": -0.5350406169891357, + "logits/rejected": -0.5363395810127258, + "logps/chosen": -147.15267944335938, + "logps/rejected": -174.66571044921875, + "loss": 0.7096, + "nll_loss": 0.24733343720436096, + "rewards/accuracies": 0.8062499761581421, + "rewards/chosen": -14.7152681350708, + "rewards/margins": 2.751302480697632, + "rewards/rejected": -17.466571807861328, + "step": 660 + }, + { + "epoch": 1.5486853510546084, + "grad_norm": 53.42652130126953, + "learning_rate": 7.664609053497943e-07, + "logits/chosen": -0.6187707781791687, + "logits/rejected": -0.6232476234436035, + "logps/chosen": -158.1448211669922, + "logps/rejected": -187.09014892578125, + "loss": 0.6173, + "nll_loss": 0.22900207340717316, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -15.814483642578125, + "rewards/margins": 2.8945329189300537, + "rewards/rejected": -18.709014892578125, + "step": 670 + }, + { + "epoch": 1.5718000577867668, + "grad_norm": 40.11577606201172, + "learning_rate": 7.613168724279834e-07, + "logits/chosen": -0.5888317227363586, + "logits/rejected": -0.600538432598114, + "logps/chosen": -149.23678588867188, + "logps/rejected": -175.3176727294922, + "loss": 0.7099, + "nll_loss": 0.21695959568023682, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -14.923675537109375, + "rewards/margins": 2.6080896854400635, + "rewards/rejected": -17.531766891479492, + "step": 680 + }, + { + "epoch": 1.5949147645189252, + "grad_norm": 26.918350219726562, + "learning_rate": 7.561728395061728e-07, + "logits/chosen": -0.6150851845741272, + "logits/rejected": -0.6231178045272827, + "logps/chosen": -164.5893096923828, + "logps/rejected": -196.3010711669922, + "loss": 0.6595, + "nll_loss": 0.23331816494464874, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -16.45893096923828, + "rewards/margins": 3.171175479888916, + "rewards/rejected": -19.630107879638672, + "step": 690 + }, + { + "epoch": 1.6180294712510834, + "grad_norm": 33.39554214477539, + "learning_rate": 7.510288065843621e-07, + "logits/chosen": -0.5018739700317383, + "logits/rejected": -0.4825282692909241, + "logps/chosen": -149.8149871826172, + "logps/rejected": -177.98583984375, + "loss": 0.6348, + "nll_loss": 0.2212187498807907, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -14.981498718261719, + "rewards/margins": 2.817084789276123, + "rewards/rejected": -17.798583984375, + "step": 700 + }, + { + "epoch": 1.6411441779832419, + "grad_norm": 29.109973907470703, + "learning_rate": 7.458847736625515e-07, + "logits/chosen": -0.47257423400878906, + "logits/rejected": -0.4691304564476013, + "logps/chosen": -138.67837524414062, + "logps/rejected": -164.54855346679688, + "loss": 0.6175, + "nll_loss": 0.1982104480266571, + "rewards/accuracies": 0.831250011920929, + "rewards/chosen": -13.867838859558105, + "rewards/margins": 2.5870203971862793, + "rewards/rejected": -16.454858779907227, + "step": 710 + }, + { + "epoch": 1.6642588847154003, + "grad_norm": 38.35542678833008, + "learning_rate": 7.407407407407406e-07, + "logits/chosen": -0.6042996644973755, + "logits/rejected": -0.6067830324172974, + "logps/chosen": -144.49464416503906, + "logps/rejected": -169.24853515625, + "loss": 0.5938, + "nll_loss": 0.23023180663585663, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -14.449464797973633, + "rewards/margins": 2.4753904342651367, + "rewards/rejected": -16.924854278564453, + "step": 720 + }, + { + "epoch": 1.6873735914475585, + "grad_norm": 32.6804084777832, + "learning_rate": 7.3559670781893e-07, + "logits/chosen": -0.6318911910057068, + "logits/rejected": -0.623616099357605, + "logps/chosen": -151.0692596435547, + "logps/rejected": -178.22621154785156, + "loss": 0.6287, + "nll_loss": 0.20305195450782776, + "rewards/accuracies": 0.84375, + "rewards/chosen": -15.106924057006836, + "rewards/margins": 2.7156949043273926, + "rewards/rejected": -17.822620391845703, + "step": 730 + }, + { + "epoch": 1.7104882981797167, + "grad_norm": 33.47980499267578, + "learning_rate": 7.304526748971193e-07, + "logits/chosen": -0.5788182020187378, + "logits/rejected": -0.5648819208145142, + "logps/chosen": -162.39569091796875, + "logps/rejected": -193.59268188476562, + "loss": 0.5942, + "nll_loss": 0.21426251530647278, + "rewards/accuracies": 0.84375, + "rewards/chosen": -16.23956871032715, + "rewards/margins": 3.1196982860565186, + "rewards/rejected": -19.359268188476562, + "step": 740 + }, + { + "epoch": 1.7336030049118751, + "grad_norm": 37.14680099487305, + "learning_rate": 7.253086419753086e-07, + "logits/chosen": -0.5623105764389038, + "logits/rejected": -0.5381472110748291, + "logps/chosen": -139.84085083007812, + "logps/rejected": -167.0809326171875, + "loss": 0.598, + "nll_loss": 0.18970206379890442, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -13.984085083007812, + "rewards/margins": 2.7240078449249268, + "rewards/rejected": -16.708093643188477, + "step": 750 + }, + { + "epoch": 1.7567177116440336, + "grad_norm": 35.07746124267578, + "learning_rate": 7.201646090534979e-07, + "logits/chosen": -0.5330817103385925, + "logits/rejected": -0.540014386177063, + "logps/chosen": -153.24600219726562, + "logps/rejected": -185.0384063720703, + "loss": 0.6322, + "nll_loss": 0.198031947016716, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -15.324602127075195, + "rewards/margins": 3.1792402267456055, + "rewards/rejected": -18.503841400146484, + "step": 760 + }, + { + "epoch": 1.7798324183761918, + "grad_norm": 34.26885986328125, + "learning_rate": 7.150205761316872e-07, + "logits/chosen": -0.6087044477462769, + "logits/rejected": -0.599485456943512, + "logps/chosen": -145.72488403320312, + "logps/rejected": -171.98873901367188, + "loss": 0.6407, + "nll_loss": 0.18888258934020996, + "rewards/accuracies": 0.8187500238418579, + "rewards/chosen": -14.572488784790039, + "rewards/margins": 2.626385450363159, + "rewards/rejected": -17.19887351989746, + "step": 770 + }, + { + "epoch": 1.8029471251083502, + "grad_norm": 33.3639030456543, + "learning_rate": 7.098765432098766e-07, + "logits/chosen": -0.6275098323822021, + "logits/rejected": -0.6126091480255127, + "logps/chosen": -149.48826599121094, + "logps/rejected": -179.92613220214844, + "loss": 0.6014, + "nll_loss": 0.2067473828792572, + "rewards/accuracies": 0.890625, + "rewards/chosen": -14.948827743530273, + "rewards/margins": 3.0437865257263184, + "rewards/rejected": -17.99261474609375, + "step": 780 + }, + { + "epoch": 1.8260618318405086, + "grad_norm": 34.436153411865234, + "learning_rate": 7.047325102880658e-07, + "logits/chosen": -0.6325902938842773, + "logits/rejected": -0.6320141553878784, + "logps/chosen": -149.53546142578125, + "logps/rejected": -177.4294891357422, + "loss": 0.5987, + "nll_loss": 0.21218529343605042, + "rewards/accuracies": 0.840624988079071, + "rewards/chosen": -14.953544616699219, + "rewards/margins": 2.7894036769866943, + "rewards/rejected": -17.742948532104492, + "step": 790 + }, + { + "epoch": 1.8491765385726668, + "grad_norm": 41.68962097167969, + "learning_rate": 6.995884773662551e-07, + "logits/chosen": -0.5112544298171997, + "logits/rejected": -0.5018970370292664, + "logps/chosen": -139.74612426757812, + "logps/rejected": -170.65365600585938, + "loss": 0.5737, + "nll_loss": 0.18416205048561096, + "rewards/accuracies": 0.8812500238418579, + "rewards/chosen": -13.97461223602295, + "rewards/margins": 3.0907552242279053, + "rewards/rejected": -17.065366744995117, + "step": 800 + }, + { + "epoch": 1.872291245304825, + "grad_norm": 34.62812423706055, + "learning_rate": 6.944444444444444e-07, + "logits/chosen": -0.5771014094352722, + "logits/rejected": -0.5736783146858215, + "logps/chosen": -149.42527770996094, + "logps/rejected": -179.3314666748047, + "loss": 0.6492, + "nll_loss": 0.19857726991176605, + "rewards/accuracies": 0.8687499761581421, + "rewards/chosen": -14.942527770996094, + "rewards/margins": 2.990619421005249, + "rewards/rejected": -17.933147430419922, + "step": 810 + }, + { + "epoch": 1.8954059520369835, + "grad_norm": 27.703113555908203, + "learning_rate": 6.893004115226337e-07, + "logits/chosen": -0.6073204278945923, + "logits/rejected": -0.6056413054466248, + "logps/chosen": -151.15286254882812, + "logps/rejected": -184.02236938476562, + "loss": 0.5758, + "nll_loss": 0.20334260165691376, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -15.115285873413086, + "rewards/margins": 3.2869529724121094, + "rewards/rejected": -18.402238845825195, + "step": 820 + }, + { + "epoch": 1.918520658769142, + "grad_norm": 38.63829040527344, + "learning_rate": 6.84156378600823e-07, + "logits/chosen": -0.564698338508606, + "logits/rejected": -0.5553814172744751, + "logps/chosen": -141.9647216796875, + "logps/rejected": -167.49462890625, + "loss": 0.604, + "nll_loss": 0.19638094305992126, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -14.19647216796875, + "rewards/margins": 2.552992343902588, + "rewards/rejected": -16.74946403503418, + "step": 830 + }, + { + "epoch": 1.9416353655013001, + "grad_norm": 37.33395767211914, + "learning_rate": 6.790123456790123e-07, + "logits/chosen": -0.6794390678405762, + "logits/rejected": -0.6817184686660767, + "logps/chosen": -150.2278289794922, + "logps/rejected": -178.04473876953125, + "loss": 0.6078, + "nll_loss": 0.18291929364204407, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -15.022783279418945, + "rewards/margins": 2.781691074371338, + "rewards/rejected": -17.804473876953125, + "step": 840 + }, + { + "epoch": 1.9647500722334585, + "grad_norm": 33.96713638305664, + "learning_rate": 6.738683127572016e-07, + "logits/chosen": -0.716331422328949, + "logits/rejected": -0.7188450694084167, + "logps/chosen": -147.86050415039062, + "logps/rejected": -174.76864624023438, + "loss": 0.5987, + "nll_loss": 0.19556212425231934, + "rewards/accuracies": 0.878125011920929, + "rewards/chosen": -14.786050796508789, + "rewards/margins": 2.6908116340637207, + "rewards/rejected": -17.476863861083984, + "step": 850 + }, + { + "epoch": 1.987864778965617, + "grad_norm": 35.31864929199219, + "learning_rate": 6.687242798353909e-07, + "logits/chosen": -0.6668294668197632, + "logits/rejected": -0.6580954790115356, + "logps/chosen": -149.87158203125, + "logps/rejected": -180.49496459960938, + "loss": 0.5472, + "nll_loss": 0.1864423006772995, + "rewards/accuracies": 0.8656250238418579, + "rewards/chosen": -14.987157821655273, + "rewards/margins": 3.06233811378479, + "rewards/rejected": -18.049495697021484, + "step": 860 + }, + { + "epoch": 1.999422132331696, + "eval_logits/chosen": -0.5687969923019409, + "eval_logits/rejected": -0.5434355139732361, + "eval_logps/chosen": -162.90855407714844, + "eval_logps/rejected": -175.85232543945312, + "eval_loss": 1.2972584962844849, + "eval_nll_loss": 0.2148308902978897, + "eval_rewards/accuracies": 0.658695638179779, + "eval_rewards/chosen": -16.290855407714844, + "eval_rewards/margins": 1.2943781614303589, + "eval_rewards/rejected": -17.585235595703125, + "eval_runtime": 77.3685, + "eval_samples_per_second": 23.601, + "eval_steps_per_second": 1.486, + "step": 865 + }, + { + "epoch": 2.010979485697775, + "grad_norm": 11.489439964294434, + "learning_rate": 6.635802469135802e-07, + "logits/chosen": -0.6154376864433289, + "logits/rejected": -0.581082820892334, + "logps/chosen": -146.31674194335938, + "logps/rejected": -183.1867218017578, + "loss": 0.4233, + "nll_loss": 0.17745935916900635, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -14.631675720214844, + "rewards/margins": 3.6869969367980957, + "rewards/rejected": -18.318674087524414, + "step": 870 + }, + { + "epoch": 2.0340941924299334, + "grad_norm": 8.267936706542969, + "learning_rate": 6.584362139917695e-07, + "logits/chosen": -0.5296713709831238, + "logits/rejected": -0.5492919683456421, + "logps/chosen": -135.2528839111328, + "logps/rejected": -184.4834747314453, + "loss": 0.2554, + "nll_loss": 0.17692770063877106, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -13.525288581848145, + "rewards/margins": 4.923060417175293, + "rewards/rejected": -18.448348999023438, + "step": 880 + }, + { + "epoch": 2.057208899162092, + "grad_norm": 17.753084182739258, + "learning_rate": 6.532921810699589e-07, + "logits/chosen": -0.4458081126213074, + "logits/rejected": -0.45663532614707947, + "logps/chosen": -132.5780792236328, + "logps/rejected": -181.31776428222656, + "loss": 0.2358, + "nll_loss": 0.1446482390165329, + "rewards/accuracies": 0.971875011920929, + "rewards/chosen": -13.257807731628418, + "rewards/margins": 4.87396764755249, + "rewards/rejected": -18.13177490234375, + "step": 890 + }, + { + "epoch": 2.0803236058942502, + "grad_norm": 9.170333862304688, + "learning_rate": 6.481481481481481e-07, + "logits/chosen": -0.4914008677005768, + "logits/rejected": -0.4894467890262604, + "logps/chosen": -139.57400512695312, + "logps/rejected": -189.27447509765625, + "loss": 0.2373, + "nll_loss": 0.1590987890958786, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -13.95740032196045, + "rewards/margins": 4.970045566558838, + "rewards/rejected": -18.927448272705078, + "step": 900 + }, + { + "epoch": 2.1034383126264085, + "grad_norm": 16.0671329498291, + "learning_rate": 6.430041152263375e-07, + "logits/chosen": -0.29768380522727966, + "logits/rejected": -0.3132530450820923, + "logps/chosen": -133.86160278320312, + "logps/rejected": -184.111083984375, + "loss": 0.2528, + "nll_loss": 0.1800731122493744, + "rewards/accuracies": 0.984375, + "rewards/chosen": -13.386159896850586, + "rewards/margins": 5.024949073791504, + "rewards/rejected": -18.411109924316406, + "step": 910 + }, + { + "epoch": 2.1265530193585667, + "grad_norm": 11.169416427612305, + "learning_rate": 6.378600823045267e-07, + "logits/chosen": -0.25930145382881165, + "logits/rejected": -0.2452802211046219, + "logps/chosen": -138.69859313964844, + "logps/rejected": -188.9458465576172, + "loss": 0.2369, + "nll_loss": 0.15493367612361908, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -13.86985969543457, + "rewards/margins": 5.024728298187256, + "rewards/rejected": -18.894588470458984, + "step": 920 + }, + { + "epoch": 2.1496677260907253, + "grad_norm": 20.787609100341797, + "learning_rate": 6.32716049382716e-07, + "logits/chosen": -0.4232078194618225, + "logits/rejected": -0.4213971197605133, + "logps/chosen": -133.97911071777344, + "logps/rejected": -183.1697540283203, + "loss": 0.2526, + "nll_loss": 0.17497238516807556, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -13.397911071777344, + "rewards/margins": 4.919064998626709, + "rewards/rejected": -18.31697654724121, + "step": 930 + }, + { + "epoch": 2.1727824328228835, + "grad_norm": 16.55530548095703, + "learning_rate": 6.275720164609053e-07, + "logits/chosen": -0.5225564241409302, + "logits/rejected": -0.5253915190696716, + "logps/chosen": -147.48667907714844, + "logps/rejected": -200.44107055664062, + "loss": 0.2383, + "nll_loss": 0.16094490885734558, + "rewards/accuracies": 0.9906250238418579, + "rewards/chosen": -14.748669624328613, + "rewards/margins": 5.295438766479492, + "rewards/rejected": -20.044105529785156, + "step": 940 + }, + { + "epoch": 2.1958971395550417, + "grad_norm": 25.473421096801758, + "learning_rate": 6.224279835390947e-07, + "logits/chosen": -0.6133296489715576, + "logits/rejected": -0.6065386533737183, + "logps/chosen": -147.1841583251953, + "logps/rejected": -198.28070068359375, + "loss": 0.2342, + "nll_loss": 0.17038078606128693, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -14.718416213989258, + "rewards/margins": 5.109654903411865, + "rewards/rejected": -19.82806968688965, + "step": 950 + }, + { + "epoch": 2.2190118462872004, + "grad_norm": 28.808799743652344, + "learning_rate": 6.172839506172839e-07, + "logits/chosen": -0.566586971282959, + "logits/rejected": -0.5580301284790039, + "logps/chosen": -141.78317260742188, + "logps/rejected": -189.71841430664062, + "loss": 0.2432, + "nll_loss": 0.16720861196517944, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -14.178317070007324, + "rewards/margins": 4.793524265289307, + "rewards/rejected": -18.97184181213379, + "step": 960 + }, + { + "epoch": 2.2421265530193586, + "grad_norm": 15.181388854980469, + "learning_rate": 6.121399176954732e-07, + "logits/chosen": -0.5153671503067017, + "logits/rejected": -0.49234214425086975, + "logps/chosen": -142.28048706054688, + "logps/rejected": -192.72178649902344, + "loss": 0.2565, + "nll_loss": 0.173838809132576, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -14.228050231933594, + "rewards/margins": 5.044127464294434, + "rewards/rejected": -19.272180557250977, + "step": 970 + }, + { + "epoch": 2.265241259751517, + "grad_norm": 10.162031173706055, + "learning_rate": 6.069958847736625e-07, + "logits/chosen": -0.3831091523170471, + "logits/rejected": -0.3817598521709442, + "logps/chosen": -142.67413330078125, + "logps/rejected": -191.6265106201172, + "loss": 0.2239, + "nll_loss": 0.15289117395877838, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -14.267412185668945, + "rewards/margins": 4.89523983001709, + "rewards/rejected": -19.16265296936035, + "step": 980 + }, + { + "epoch": 2.2883559664836755, + "grad_norm": 11.667806625366211, + "learning_rate": 6.018518518518519e-07, + "logits/chosen": -0.37663665413856506, + "logits/rejected": -0.36168596148490906, + "logps/chosen": -134.7302703857422, + "logps/rejected": -181.87161254882812, + "loss": 0.2179, + "nll_loss": 0.14360648393630981, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -13.473027229309082, + "rewards/margins": 4.714133262634277, + "rewards/rejected": -18.18716049194336, + "step": 990 + }, + { + "epoch": 2.3114706732158337, + "grad_norm": 13.98948860168457, + "learning_rate": 5.96707818930041e-07, + "logits/chosen": -0.35517022013664246, + "logits/rejected": -0.3607296645641327, + "logps/chosen": -143.46397399902344, + "logps/rejected": -196.64694213867188, + "loss": 0.2393, + "nll_loss": 0.16406962275505066, + "rewards/accuracies": 0.9906250238418579, + "rewards/chosen": -14.346399307250977, + "rewards/margins": 5.318297863006592, + "rewards/rejected": -19.664695739746094, + "step": 1000 + }, + { + "epoch": 2.334585379947992, + "grad_norm": 13.17771053314209, + "learning_rate": 5.915637860082304e-07, + "logits/chosen": -0.3597460389137268, + "logits/rejected": -0.36051079630851746, + "logps/chosen": -138.61643981933594, + "logps/rejected": -192.05581665039062, + "loss": 0.2306, + "nll_loss": 0.16202880442142487, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -13.86164379119873, + "rewards/margins": 5.343939304351807, + "rewards/rejected": -19.205581665039062, + "step": 1010 + }, + { + "epoch": 2.35770008668015, + "grad_norm": 13.457245826721191, + "learning_rate": 5.864197530864198e-07, + "logits/chosen": -0.4916199743747711, + "logits/rejected": -0.5020965933799744, + "logps/chosen": -147.89541625976562, + "logps/rejected": -199.31967163085938, + "loss": 0.2374, + "nll_loss": 0.16406235098838806, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -14.789543151855469, + "rewards/margins": 5.142425060272217, + "rewards/rejected": -19.931964874267578, + "step": 1020 + }, + { + "epoch": 2.3808147934123087, + "grad_norm": 13.335782051086426, + "learning_rate": 5.812757201646091e-07, + "logits/chosen": -0.39383864402770996, + "logits/rejected": -0.40474215149879456, + "logps/chosen": -133.04669189453125, + "logps/rejected": -180.41250610351562, + "loss": 0.242, + "nll_loss": 0.1537107676267624, + "rewards/accuracies": 0.965624988079071, + "rewards/chosen": -13.304669380187988, + "rewards/margins": 4.736581802368164, + "rewards/rejected": -18.041250228881836, + "step": 1030 + }, + { + "epoch": 2.403929500144467, + "grad_norm": 6.159650802612305, + "learning_rate": 5.761316872427983e-07, + "logits/chosen": -0.6221314668655396, + "logits/rejected": -0.5792278051376343, + "logps/chosen": -147.80052185058594, + "logps/rejected": -199.4378662109375, + "loss": 0.2262, + "nll_loss": 0.151776522397995, + "rewards/accuracies": 0.984375, + "rewards/chosen": -14.780054092407227, + "rewards/margins": 5.163733005523682, + "rewards/rejected": -19.943782806396484, + "step": 1040 + }, + { + "epoch": 2.427044206876625, + "grad_norm": 12.739320755004883, + "learning_rate": 5.709876543209876e-07, + "logits/chosen": -0.5569005012512207, + "logits/rejected": -0.5471926927566528, + "logps/chosen": -150.28656005859375, + "logps/rejected": -203.32809448242188, + "loss": 0.2392, + "nll_loss": 0.15395130217075348, + "rewards/accuracies": 0.9906250238418579, + "rewards/chosen": -15.028657913208008, + "rewards/margins": 5.304154872894287, + "rewards/rejected": -20.332813262939453, + "step": 1050 + }, + { + "epoch": 2.4501589136087834, + "grad_norm": 10.99962329864502, + "learning_rate": 5.65843621399177e-07, + "logits/chosen": -0.6100250482559204, + "logits/rejected": -0.6070842146873474, + "logps/chosen": -144.28292846679688, + "logps/rejected": -192.26254272460938, + "loss": 0.2358, + "nll_loss": 0.16113388538360596, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -14.42829418182373, + "rewards/margins": 4.797961235046387, + "rewards/rejected": -19.226253509521484, + "step": 1060 + }, + { + "epoch": 2.473273620340942, + "grad_norm": 14.381885528564453, + "learning_rate": 5.606995884773662e-07, + "logits/chosen": -0.4229808747768402, + "logits/rejected": -0.4043405055999756, + "logps/chosen": -135.27508544921875, + "logps/rejected": -184.1940460205078, + "loss": 0.2726, + "nll_loss": 0.16423283517360687, + "rewards/accuracies": 0.965624988079071, + "rewards/chosen": -13.527506828308105, + "rewards/margins": 4.8918962478637695, + "rewards/rejected": -18.419404983520508, + "step": 1070 + }, + { + "epoch": 2.4963883270731, + "grad_norm": 11.742487907409668, + "learning_rate": 5.555555555555555e-07, + "logits/chosen": -0.4398534297943115, + "logits/rejected": -0.43547695875167847, + "logps/chosen": -134.5975341796875, + "logps/rejected": -182.41848754882812, + "loss": 0.2452, + "nll_loss": 0.16178709268569946, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -13.459753036499023, + "rewards/margins": 4.782095909118652, + "rewards/rejected": -18.24184799194336, + "step": 1080 + }, + { + "epoch": 2.5195030338052584, + "grad_norm": 12.080589294433594, + "learning_rate": 5.504115226337448e-07, + "logits/chosen": -0.45496922731399536, + "logits/rejected": -0.45996856689453125, + "logps/chosen": -132.09829711914062, + "logps/rejected": -180.12393188476562, + "loss": 0.2284, + "nll_loss": 0.1582447737455368, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -13.209829330444336, + "rewards/margins": 4.80256462097168, + "rewards/rejected": -18.012393951416016, + "step": 1090 + }, + { + "epoch": 2.542617740537417, + "grad_norm": 24.479488372802734, + "learning_rate": 5.452674897119342e-07, + "logits/chosen": -0.36444956064224243, + "logits/rejected": -0.3619704842567444, + "logps/chosen": -141.44894409179688, + "logps/rejected": -194.81773376464844, + "loss": 0.2364, + "nll_loss": 0.17286133766174316, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -14.14489459991455, + "rewards/margins": 5.336878776550293, + "rewards/rejected": -19.48177146911621, + "step": 1100 + }, + { + "epoch": 2.5657324472695753, + "grad_norm": 12.051857948303223, + "learning_rate": 5.401234567901234e-07, + "logits/chosen": -0.45673027634620667, + "logits/rejected": -0.4733441472053528, + "logps/chosen": -136.0276336669922, + "logps/rejected": -188.5570068359375, + "loss": 0.2305, + "nll_loss": 0.1618407666683197, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -13.602763175964355, + "rewards/margins": 5.252939224243164, + "rewards/rejected": -18.855701446533203, + "step": 1110 + }, + { + "epoch": 2.5888471540017335, + "grad_norm": 10.467662811279297, + "learning_rate": 5.349794238683127e-07, + "logits/chosen": -0.4598791003227234, + "logits/rejected": -0.4583801329135895, + "logps/chosen": -137.6591033935547, + "logps/rejected": -189.61471557617188, + "loss": 0.2583, + "nll_loss": 0.16606256365776062, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -13.765910148620605, + "rewards/margins": 5.195560932159424, + "rewards/rejected": -18.961471557617188, + "step": 1120 + }, + { + "epoch": 2.611961860733892, + "grad_norm": 17.334087371826172, + "learning_rate": 5.29835390946502e-07, + "logits/chosen": -0.45638832449913025, + "logits/rejected": -0.4596933424472809, + "logps/chosen": -134.4242401123047, + "logps/rejected": -185.4617156982422, + "loss": 0.231, + "nll_loss": 0.15201494097709656, + "rewards/accuracies": 0.971875011920929, + "rewards/chosen": -13.442425727844238, + "rewards/margins": 5.1037468910217285, + "rewards/rejected": -18.546171188354492, + "step": 1130 + }, + { + "epoch": 2.6350765674660503, + "grad_norm": 9.82776927947998, + "learning_rate": 5.246913580246914e-07, + "logits/chosen": -0.4979328513145447, + "logits/rejected": -0.4829026758670807, + "logps/chosen": -142.7810516357422, + "logps/rejected": -195.93936157226562, + "loss": 0.2197, + "nll_loss": 0.14758186042308807, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -14.278106689453125, + "rewards/margins": 5.315831184387207, + "rewards/rejected": -19.593936920166016, + "step": 1140 + }, + { + "epoch": 2.6581912741982086, + "grad_norm": 21.076847076416016, + "learning_rate": 5.195473251028807e-07, + "logits/chosen": -0.4889853894710541, + "logits/rejected": -0.4779161810874939, + "logps/chosen": -147.04873657226562, + "logps/rejected": -195.0872802734375, + "loss": 0.2223, + "nll_loss": 0.155166894197464, + "rewards/accuracies": 0.984375, + "rewards/chosen": -14.704874038696289, + "rewards/margins": 4.803854942321777, + "rewards/rejected": -19.50872802734375, + "step": 1150 + }, + { + "epoch": 2.681305980930367, + "grad_norm": 19.175827026367188, + "learning_rate": 5.144032921810699e-07, + "logits/chosen": -0.4997631013393402, + "logits/rejected": -0.4868396818637848, + "logps/chosen": -132.46238708496094, + "logps/rejected": -182.9662322998047, + "loss": 0.2392, + "nll_loss": 0.15937396883964539, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -13.246240615844727, + "rewards/margins": 5.050384521484375, + "rewards/rejected": -18.29662322998047, + "step": 1160 + }, + { + "epoch": 2.7044206876625254, + "grad_norm": 13.847294807434082, + "learning_rate": 5.092592592592593e-07, + "logits/chosen": -0.42537322640419006, + "logits/rejected": -0.40758857131004333, + "logps/chosen": -132.64317321777344, + "logps/rejected": -185.53622436523438, + "loss": 0.2315, + "nll_loss": 0.1639558970928192, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -13.264317512512207, + "rewards/margins": 5.289304733276367, + "rewards/rejected": -18.55362319946289, + "step": 1170 + }, + { + "epoch": 2.7275353943946836, + "grad_norm": 17.215343475341797, + "learning_rate": 5.041152263374485e-07, + "logits/chosen": -0.4605620503425598, + "logits/rejected": -0.47386521100997925, + "logps/chosen": -142.31393432617188, + "logps/rejected": -201.610107421875, + "loss": 0.2355, + "nll_loss": 0.1665884107351303, + "rewards/accuracies": 0.984375, + "rewards/chosen": -14.231393814086914, + "rewards/margins": 5.929617881774902, + "rewards/rejected": -20.161012649536133, + "step": 1180 + }, + { + "epoch": 2.750650101126842, + "grad_norm": 11.339929580688477, + "learning_rate": 4.989711934156378e-07, + "logits/chosen": -0.5646448731422424, + "logits/rejected": -0.5591720342636108, + "logps/chosen": -144.7230987548828, + "logps/rejected": -198.4960479736328, + "loss": 0.2296, + "nll_loss": 0.17730608582496643, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -14.472311019897461, + "rewards/margins": 5.377293109893799, + "rewards/rejected": -19.8496036529541, + "step": 1190 + }, + { + "epoch": 2.773764807859, + "grad_norm": 10.567920684814453, + "learning_rate": 4.938271604938271e-07, + "logits/chosen": -0.5628112554550171, + "logits/rejected": -0.5627862215042114, + "logps/chosen": -134.7103271484375, + "logps/rejected": -181.05490112304688, + "loss": 0.2401, + "nll_loss": 0.16600725054740906, + "rewards/accuracies": 0.984375, + "rewards/chosen": -13.471035957336426, + "rewards/margins": 4.634454250335693, + "rewards/rejected": -18.105487823486328, + "step": 1200 + }, + { + "epoch": 2.7968795145911587, + "grad_norm": 11.1284818649292, + "learning_rate": 4.886831275720165e-07, + "logits/chosen": -0.5333854556083679, + "logits/rejected": -0.5228737592697144, + "logps/chosen": -129.60784912109375, + "logps/rejected": -179.29922485351562, + "loss": 0.2237, + "nll_loss": 0.15326835215091705, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -12.960784912109375, + "rewards/margins": 4.969139099121094, + "rewards/rejected": -17.929922103881836, + "step": 1210 + }, + { + "epoch": 2.819994221323317, + "grad_norm": 10.869100570678711, + "learning_rate": 4.835390946502057e-07, + "logits/chosen": -0.4685629904270172, + "logits/rejected": -0.4411331117153168, + "logps/chosen": -137.3936767578125, + "logps/rejected": -190.50975036621094, + "loss": 0.2258, + "nll_loss": 0.16754138469696045, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -13.739367485046387, + "rewards/margins": 5.311608791351318, + "rewards/rejected": -19.050975799560547, + "step": 1220 + }, + { + "epoch": 2.843108928055475, + "grad_norm": 11.171156883239746, + "learning_rate": 4.783950617283951e-07, + "logits/chosen": -0.39593321084976196, + "logits/rejected": -0.3724592328071594, + "logps/chosen": -129.14064025878906, + "logps/rejected": -181.44851684570312, + "loss": 0.2196, + "nll_loss": 0.15831029415130615, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -12.914064407348633, + "rewards/margins": 5.230786323547363, + "rewards/rejected": -18.14484977722168, + "step": 1230 + }, + { + "epoch": 2.8662236347876338, + "grad_norm": 16.257095336914062, + "learning_rate": 4.732510288065844e-07, + "logits/chosen": -0.41909652948379517, + "logits/rejected": -0.4289626479148865, + "logps/chosen": -137.97906494140625, + "logps/rejected": -189.48602294921875, + "loss": 0.2401, + "nll_loss": 0.15598097443580627, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -13.797907829284668, + "rewards/margins": 5.15069580078125, + "rewards/rejected": -18.9486026763916, + "step": 1240 + }, + { + "epoch": 2.889338341519792, + "grad_norm": 24.864940643310547, + "learning_rate": 4.6810699588477364e-07, + "logits/chosen": -0.36290091276168823, + "logits/rejected": -0.34600576758384705, + "logps/chosen": -136.03607177734375, + "logps/rejected": -185.31668090820312, + "loss": 0.2201, + "nll_loss": 0.14870640635490417, + "rewards/accuracies": 0.9781249761581421, + "rewards/chosen": -13.603607177734375, + "rewards/margins": 4.9280619621276855, + "rewards/rejected": -18.53166961669922, + "step": 1250 + }, + { + "epoch": 2.91245304825195, + "grad_norm": 9.861152648925781, + "learning_rate": 4.6296296296296297e-07, + "logits/chosen": -0.43973201513290405, + "logits/rejected": -0.44227686524391174, + "logps/chosen": -139.79000854492188, + "logps/rejected": -191.3979949951172, + "loss": 0.2338, + "nll_loss": 0.15694692730903625, + "rewards/accuracies": 0.981249988079071, + "rewards/chosen": -13.97900104522705, + "rewards/margins": 5.160799026489258, + "rewards/rejected": -19.139801025390625, + "step": 1260 + }, + { + "epoch": 2.935567754984109, + "grad_norm": 11.536057472229004, + "learning_rate": 4.5781893004115224e-07, + "logits/chosen": -0.4365859925746918, + "logits/rejected": -0.43007755279541016, + "logps/chosen": -143.85635375976562, + "logps/rejected": -197.02879333496094, + "loss": 0.2355, + "nll_loss": 0.15321387350559235, + "rewards/accuracies": 0.9906250238418579, + "rewards/chosen": -14.385635375976562, + "rewards/margins": 5.317243576049805, + "rewards/rejected": -19.702880859375, + "step": 1270 + }, + { + "epoch": 2.958682461716267, + "grad_norm": 18.637239456176758, + "learning_rate": 4.5267489711934156e-07, + "logits/chosen": -0.47489842772483826, + "logits/rejected": -0.4829436242580414, + "logps/chosen": -140.48260498046875, + "logps/rejected": -196.2875213623047, + "loss": 0.2461, + "nll_loss": 0.16315388679504395, + "rewards/accuracies": 0.984375, + "rewards/chosen": -14.048260688781738, + "rewards/margins": 5.5804924964904785, + "rewards/rejected": -19.628753662109375, + "step": 1280 + }, + { + "epoch": 2.9817971684484252, + "grad_norm": 13.219135284423828, + "learning_rate": 4.4753086419753083e-07, + "logits/chosen": -0.45336833596229553, + "logits/rejected": -0.44670405983924866, + "logps/chosen": -141.3701934814453, + "logps/rejected": -192.05670166015625, + "loss": 0.2244, + "nll_loss": 0.16718199849128723, + "rewards/accuracies": 0.984375, + "rewards/chosen": -14.137018203735352, + "rewards/margins": 5.0686516761779785, + "rewards/rejected": -19.205671310424805, + "step": 1290 + }, + { + "epoch": 2.997977463160936, + "eval_logits/chosen": -0.3714839220046997, + "eval_logits/rejected": -0.3428020179271698, + "eval_logps/chosen": -157.10519409179688, + "eval_logps/rejected": -172.1945343017578, + "eval_loss": 1.3861061334609985, + "eval_nll_loss": 0.20338018238544464, + "eval_rewards/accuracies": 0.656521737575531, + "eval_rewards/chosen": -15.710522651672363, + "eval_rewards/margins": 1.5089313983917236, + "eval_rewards/rejected": -17.219451904296875, + "eval_runtime": 77.2394, + "eval_samples_per_second": 23.641, + "eval_steps_per_second": 1.489, + "step": 1297 + }, + { + "epoch": 3.0049118751805834, + "grad_norm": 5.132666110992432, + "learning_rate": 4.4238683127572015e-07, + "logits/chosen": -0.44278082251548767, + "logits/rejected": -0.44281044602394104, + "logps/chosen": -141.17550659179688, + "logps/rejected": -196.56248474121094, + "loss": 0.2016, + "nll_loss": 0.15163448452949524, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -14.117551803588867, + "rewards/margins": 5.538697719573975, + "rewards/rejected": -19.656248092651367, + "step": 1300 + }, + { + "epoch": 3.028026581912742, + "grad_norm": 3.1660420894622803, + "learning_rate": 4.372427983539094e-07, + "logits/chosen": -0.40755367279052734, + "logits/rejected": -0.3970012962818146, + "logps/chosen": -125.93168640136719, + "logps/rejected": -186.09402465820312, + "loss": 0.1537, + "nll_loss": 0.13879674673080444, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.593169212341309, + "rewards/margins": 6.016233921051025, + "rewards/rejected": -18.609403610229492, + "step": 1310 + }, + { + "epoch": 3.0511412886449003, + "grad_norm": 3.5848960876464844, + "learning_rate": 4.320987654320987e-07, + "logits/chosen": -0.44615453481674194, + "logits/rejected": -0.43949246406555176, + "logps/chosen": -126.3210220336914, + "logps/rejected": -184.44094848632812, + "loss": 0.1556, + "nll_loss": 0.1318623572587967, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.632102012634277, + "rewards/margins": 5.811993598937988, + "rewards/rejected": -18.444095611572266, + "step": 1320 + }, + { + "epoch": 3.0742559953770585, + "grad_norm": 3.971622943878174, + "learning_rate": 4.2695473251028807e-07, + "logits/chosen": -0.34509214758872986, + "logits/rejected": -0.3416140079498291, + "logps/chosen": -130.82965087890625, + "logps/rejected": -189.31130981445312, + "loss": 0.1539, + "nll_loss": 0.13816341757774353, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -13.082966804504395, + "rewards/margins": 5.848166465759277, + "rewards/rejected": -18.931133270263672, + "step": 1330 + }, + { + "epoch": 3.097370702109217, + "grad_norm": 3.245117664337158, + "learning_rate": 4.218106995884774e-07, + "logits/chosen": -0.263519287109375, + "logits/rejected": -0.25365307927131653, + "logps/chosen": -128.29852294921875, + "logps/rejected": -189.9366455078125, + "loss": 0.1518, + "nll_loss": 0.13781467080116272, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.829852104187012, + "rewards/margins": 6.16381311416626, + "rewards/rejected": -18.99366569519043, + "step": 1340 + }, + { + "epoch": 3.1204854088413754, + "grad_norm": 4.314767837524414, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.2783138155937195, + "logits/rejected": -0.3006114363670349, + "logps/chosen": -128.49453735351562, + "logps/rejected": -187.8452606201172, + "loss": 0.1516, + "nll_loss": 0.14406827092170715, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.849452018737793, + "rewards/margins": 5.935072898864746, + "rewards/rejected": -18.784526824951172, + "step": 1350 + }, + { + "epoch": 3.1436001155735336, + "grad_norm": 2.8442511558532715, + "learning_rate": 4.11522633744856e-07, + "logits/chosen": -0.19675478339195251, + "logits/rejected": -0.18994562327861786, + "logps/chosen": -130.37368774414062, + "logps/rejected": -191.08071899414062, + "loss": 0.1502, + "nll_loss": 0.14177414774894714, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.037368774414062, + "rewards/margins": 6.070704936981201, + "rewards/rejected": -19.10807228088379, + "step": 1360 + }, + { + "epoch": 3.166714822305692, + "grad_norm": 4.321190357208252, + "learning_rate": 4.0637860082304526e-07, + "logits/chosen": -0.29594722390174866, + "logits/rejected": -0.2727283537387848, + "logps/chosen": -126.78936767578125, + "logps/rejected": -183.8494873046875, + "loss": 0.1495, + "nll_loss": 0.13010382652282715, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.678936958312988, + "rewards/margins": 5.706011772155762, + "rewards/rejected": -18.38494873046875, + "step": 1370 + }, + { + "epoch": 3.1898295290378504, + "grad_norm": 3.650377035140991, + "learning_rate": 4.0123456790123453e-07, + "logits/chosen": -0.37024635076522827, + "logits/rejected": -0.36072778701782227, + "logps/chosen": -134.62948608398438, + "logps/rejected": -194.2451171875, + "loss": 0.1556, + "nll_loss": 0.1394232213497162, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -13.4629487991333, + "rewards/margins": 5.9615631103515625, + "rewards/rejected": -19.42451286315918, + "step": 1380 + }, + { + "epoch": 3.2129442357700086, + "grad_norm": 5.636937141418457, + "learning_rate": 3.9609053497942385e-07, + "logits/chosen": -0.27522599697113037, + "logits/rejected": -0.27910444140434265, + "logps/chosen": -124.5965805053711, + "logps/rejected": -187.5218505859375, + "loss": 0.1484, + "nll_loss": 0.12636372447013855, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -12.459661483764648, + "rewards/margins": 6.292525768280029, + "rewards/rejected": -18.752187728881836, + "step": 1390 + }, + { + "epoch": 3.236058942502167, + "grad_norm": 3.8186678886413574, + "learning_rate": 3.909465020576131e-07, + "logits/chosen": -0.2928979992866516, + "logits/rejected": -0.2864636480808258, + "logps/chosen": -124.09950256347656, + "logps/rejected": -181.70155334472656, + "loss": 0.1549, + "nll_loss": 0.13333001732826233, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -12.409948348999023, + "rewards/margins": 5.7602057456970215, + "rewards/rejected": -18.170154571533203, + "step": 1400 + }, + { + "epoch": 3.2591736492343255, + "grad_norm": 3.9708776473999023, + "learning_rate": 3.8580246913580245e-07, + "logits/chosen": -0.3393842577934265, + "logits/rejected": -0.32439425587654114, + "logps/chosen": -130.1053009033203, + "logps/rejected": -188.5397491455078, + "loss": 0.1556, + "nll_loss": 0.13221554458141327, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.010530471801758, + "rewards/margins": 5.843444347381592, + "rewards/rejected": -18.853975296020508, + "step": 1410 + }, + { + "epoch": 3.2822883559664837, + "grad_norm": 3.5606882572174072, + "learning_rate": 3.806584362139917e-07, + "logits/chosen": -0.31585693359375, + "logits/rejected": -0.26836958527565, + "logps/chosen": -120.08418273925781, + "logps/rejected": -180.00120544433594, + "loss": 0.1471, + "nll_loss": 0.12899354100227356, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.008419036865234, + "rewards/margins": 5.991702079772949, + "rewards/rejected": -18.000120162963867, + "step": 1420 + }, + { + "epoch": 3.305403062698642, + "grad_norm": 3.3717777729034424, + "learning_rate": 3.7551440329218104e-07, + "logits/chosen": -0.23174750804901123, + "logits/rejected": -0.2522903382778168, + "logps/chosen": -131.6839599609375, + "logps/rejected": -198.05081176757812, + "loss": 0.1565, + "nll_loss": 0.13706137239933014, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.16839599609375, + "rewards/margins": 6.636684417724609, + "rewards/rejected": -19.80508041381836, + "step": 1430 + }, + { + "epoch": 3.3285177694308006, + "grad_norm": 3.782886028289795, + "learning_rate": 3.703703703703703e-07, + "logits/chosen": -0.3117191195487976, + "logits/rejected": -0.31785351037979126, + "logps/chosen": -131.83470153808594, + "logps/rejected": -189.18441772460938, + "loss": 0.1492, + "nll_loss": 0.12388783693313599, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.183469772338867, + "rewards/margins": 5.734971046447754, + "rewards/rejected": -18.918439865112305, + "step": 1440 + }, + { + "epoch": 3.351632476162959, + "grad_norm": 3.158254384994507, + "learning_rate": 3.6522633744855963e-07, + "logits/chosen": -0.3361268639564514, + "logits/rejected": -0.3252175748348236, + "logps/chosen": -128.30125427246094, + "logps/rejected": -186.31838989257812, + "loss": 0.1539, + "nll_loss": 0.13049830496311188, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -12.830126762390137, + "rewards/margins": 5.801713943481445, + "rewards/rejected": -18.631839752197266, + "step": 1450 + }, + { + "epoch": 3.374747182895117, + "grad_norm": 4.768058776855469, + "learning_rate": 3.6008230452674896e-07, + "logits/chosen": -0.23867249488830566, + "logits/rejected": -0.20122122764587402, + "logps/chosen": -123.92413330078125, + "logps/rejected": -186.30250549316406, + "loss": 0.1616, + "nll_loss": 0.14071312546730042, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.392415046691895, + "rewards/margins": 6.2378363609313965, + "rewards/rejected": -18.630252838134766, + "step": 1460 + }, + { + "epoch": 3.397861889627275, + "grad_norm": 3.911938428878784, + "learning_rate": 3.549382716049383e-07, + "logits/chosen": -0.2685008943080902, + "logits/rejected": -0.23969027400016785, + "logps/chosen": -127.1446304321289, + "logps/rejected": -186.02838134765625, + "loss": 0.1486, + "nll_loss": 0.12472818791866302, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -12.714462280273438, + "rewards/margins": 5.888378143310547, + "rewards/rejected": -18.602840423583984, + "step": 1470 + }, + { + "epoch": 3.420976596359434, + "grad_norm": 3.9447271823883057, + "learning_rate": 3.4979423868312755e-07, + "logits/chosen": -0.28780004382133484, + "logits/rejected": -0.290294349193573, + "logps/chosen": -127.8751449584961, + "logps/rejected": -189.95578002929688, + "loss": 0.1473, + "nll_loss": 0.13437309861183167, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.787514686584473, + "rewards/margins": 6.208063125610352, + "rewards/rejected": -18.99557876586914, + "step": 1480 + }, + { + "epoch": 3.444091303091592, + "grad_norm": 6.313704490661621, + "learning_rate": 3.446502057613169e-07, + "logits/chosen": -0.23013484477996826, + "logits/rejected": -0.23306229710578918, + "logps/chosen": -122.0789566040039, + "logps/rejected": -185.14695739746094, + "loss": 0.1478, + "nll_loss": 0.13203728199005127, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.207897186279297, + "rewards/margins": 6.306800842285156, + "rewards/rejected": -18.514698028564453, + "step": 1490 + }, + { + "epoch": 3.4672060098237503, + "grad_norm": 2.906285524368286, + "learning_rate": 3.3950617283950614e-07, + "logits/chosen": -0.3435348868370056, + "logits/rejected": -0.33539697527885437, + "logps/chosen": -123.60890197753906, + "logps/rejected": -183.1199493408203, + "loss": 0.1513, + "nll_loss": 0.13879191875457764, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.36089038848877, + "rewards/margins": 5.951104640960693, + "rewards/rejected": -18.311994552612305, + "step": 1500 + }, + { + "epoch": 3.4903207165559085, + "grad_norm": 2.990963935852051, + "learning_rate": 3.3436213991769547e-07, + "logits/chosen": -0.26741576194763184, + "logits/rejected": -0.273776650428772, + "logps/chosen": -129.36013793945312, + "logps/rejected": -186.50009155273438, + "loss": 0.1465, + "nll_loss": 0.14070597290992737, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -12.936014175415039, + "rewards/margins": 5.713995933532715, + "rewards/rejected": -18.650009155273438, + "step": 1510 + }, + { + "epoch": 3.513435423288067, + "grad_norm": 5.473604679107666, + "learning_rate": 3.2921810699588474e-07, + "logits/chosen": -0.28439709544181824, + "logits/rejected": -0.2706482410430908, + "logps/chosen": -123.5947265625, + "logps/rejected": -185.80001831054688, + "loss": 0.1509, + "nll_loss": 0.1402612030506134, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.359472274780273, + "rewards/margins": 6.220528602600098, + "rewards/rejected": -18.580001831054688, + "step": 1520 + }, + { + "epoch": 3.5365501300202253, + "grad_norm": 6.9896626472473145, + "learning_rate": 3.2407407407407406e-07, + "logits/chosen": -0.3721368908882141, + "logits/rejected": -0.3583984673023224, + "logps/chosen": -128.07249450683594, + "logps/rejected": -187.01959228515625, + "loss": 0.1538, + "nll_loss": 0.13780102133750916, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.807249069213867, + "rewards/margins": 5.894709587097168, + "rewards/rejected": -18.701961517333984, + "step": 1530 + }, + { + "epoch": 3.5596648367523835, + "grad_norm": 2.910080671310425, + "learning_rate": 3.1893004115226333e-07, + "logits/chosen": -0.3633486330509186, + "logits/rejected": -0.34488505125045776, + "logps/chosen": -125.72395324707031, + "logps/rejected": -184.29405212402344, + "loss": 0.1547, + "nll_loss": 0.1316194236278534, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -12.572395324707031, + "rewards/margins": 5.857010841369629, + "rewards/rejected": -18.429405212402344, + "step": 1540 + }, + { + "epoch": 3.582779543484542, + "grad_norm": 3.2864928245544434, + "learning_rate": 3.1378600823045266e-07, + "logits/chosen": -0.36337172985076904, + "logits/rejected": -0.3896876871585846, + "logps/chosen": -130.9540252685547, + "logps/rejected": -192.02456665039062, + "loss": 0.143, + "nll_loss": 0.12916973233222961, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.095403671264648, + "rewards/margins": 6.107052803039551, + "rewards/rejected": -19.202457427978516, + "step": 1550 + }, + { + "epoch": 3.6058942502167004, + "grad_norm": 9.098392486572266, + "learning_rate": 3.086419753086419e-07, + "logits/chosen": -0.26420459151268005, + "logits/rejected": -0.30124431848526, + "logps/chosen": -132.1412353515625, + "logps/rejected": -196.06668090820312, + "loss": 0.1472, + "nll_loss": 0.12210263311862946, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.214123725891113, + "rewards/margins": 6.392544269561768, + "rewards/rejected": -19.60666847229004, + "step": 1560 + }, + { + "epoch": 3.6290089569488586, + "grad_norm": 3.135023593902588, + "learning_rate": 3.0349794238683125e-07, + "logits/chosen": -0.2870226800441742, + "logits/rejected": -0.32922470569610596, + "logps/chosen": -127.20719909667969, + "logps/rejected": -187.71414184570312, + "loss": 0.1606, + "nll_loss": 0.13571253418922424, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.720720291137695, + "rewards/margins": 6.050693511962891, + "rewards/rejected": -18.771413803100586, + "step": 1570 + }, + { + "epoch": 3.6521236636810173, + "grad_norm": 2.965545892715454, + "learning_rate": 2.983539094650205e-07, + "logits/chosen": -0.2955471873283386, + "logits/rejected": -0.29221171140670776, + "logps/chosen": -120.03623962402344, + "logps/rejected": -177.8092041015625, + "loss": 0.141, + "nll_loss": 0.12610065937042236, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.003625869750977, + "rewards/margins": 5.777295112609863, + "rewards/rejected": -17.780920028686523, + "step": 1580 + }, + { + "epoch": 3.6752383704131755, + "grad_norm": 3.8427724838256836, + "learning_rate": 2.932098765432099e-07, + "logits/chosen": -0.294664204120636, + "logits/rejected": -0.315548837184906, + "logps/chosen": -126.55033874511719, + "logps/rejected": -186.32962036132812, + "loss": 0.1472, + "nll_loss": 0.1299527883529663, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.655034065246582, + "rewards/margins": 5.977927207946777, + "rewards/rejected": -18.63296127319336, + "step": 1590 + }, + { + "epoch": 3.6983530771453337, + "grad_norm": 3.386413335800171, + "learning_rate": 2.8806584362139917e-07, + "logits/chosen": -0.21596117317676544, + "logits/rejected": -0.20901863276958466, + "logps/chosen": -118.6823959350586, + "logps/rejected": -177.80654907226562, + "loss": 0.1584, + "nll_loss": 0.14362338185310364, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.86823844909668, + "rewards/margins": 5.912415504455566, + "rewards/rejected": -17.780656814575195, + "step": 1600 + }, + { + "epoch": 3.7214677838774923, + "grad_norm": 3.672924518585205, + "learning_rate": 2.829218106995885e-07, + "logits/chosen": -0.26348841190338135, + "logits/rejected": -0.262240469455719, + "logps/chosen": -124.21568298339844, + "logps/rejected": -183.1221466064453, + "loss": 0.1513, + "nll_loss": 0.11891283839941025, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -12.421568870544434, + "rewards/margins": 5.8906474113464355, + "rewards/rejected": -18.31221580505371, + "step": 1610 + }, + { + "epoch": 3.7445824906096505, + "grad_norm": 3.7650656700134277, + "learning_rate": 2.7777777777777776e-07, + "logits/chosen": -0.278475821018219, + "logits/rejected": -0.2345239669084549, + "logps/chosen": -123.59881591796875, + "logps/rejected": -183.743896484375, + "loss": 0.1518, + "nll_loss": 0.12711484730243683, + "rewards/accuracies": 0.9937499761581421, + "rewards/chosen": -12.359882354736328, + "rewards/margins": 6.014508247375488, + "rewards/rejected": -18.3743896484375, + "step": 1620 + }, + { + "epoch": 3.7676971973418087, + "grad_norm": 3.11409592628479, + "learning_rate": 2.726337448559671e-07, + "logits/chosen": -0.29814380407333374, + "logits/rejected": -0.28927913308143616, + "logps/chosen": -127.12947082519531, + "logps/rejected": -183.96328735351562, + "loss": 0.1502, + "nll_loss": 0.11745184659957886, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -12.712945938110352, + "rewards/margins": 5.683382987976074, + "rewards/rejected": -18.396331787109375, + "step": 1630 + }, + { + "epoch": 3.790811904073967, + "grad_norm": 4.140903949737549, + "learning_rate": 2.6748971193415635e-07, + "logits/chosen": -0.29099392890930176, + "logits/rejected": -0.3041759133338928, + "logps/chosen": -130.06552124023438, + "logps/rejected": -191.20046997070312, + "loss": 0.1509, + "nll_loss": 0.14280778169631958, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.006550788879395, + "rewards/margins": 6.11349630355835, + "rewards/rejected": -19.120046615600586, + "step": 1640 + }, + { + "epoch": 3.813926610806125, + "grad_norm": 8.86196231842041, + "learning_rate": 2.623456790123457e-07, + "logits/chosen": -0.2659907341003418, + "logits/rejected": -0.27678874135017395, + "logps/chosen": -126.56221008300781, + "logps/rejected": -185.51071166992188, + "loss": 0.1458, + "nll_loss": 0.1296006143093109, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.656221389770508, + "rewards/margins": 5.894850730895996, + "rewards/rejected": -18.551071166992188, + "step": 1650 + }, + { + "epoch": 3.837041317538284, + "grad_norm": 7.074207305908203, + "learning_rate": 2.5720164609053495e-07, + "logits/chosen": -0.2648230195045471, + "logits/rejected": -0.2591935098171234, + "logps/chosen": -117.26505279541016, + "logps/rejected": -177.61654663085938, + "loss": 0.1454, + "nll_loss": 0.13034331798553467, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.726505279541016, + "rewards/margins": 6.03515100479126, + "rewards/rejected": -17.761655807495117, + "step": 1660 + }, + { + "epoch": 3.860156024270442, + "grad_norm": 3.6986083984375, + "learning_rate": 2.5205761316872427e-07, + "logits/chosen": -0.3297143876552582, + "logits/rejected": -0.31857237219810486, + "logps/chosen": -133.59078979492188, + "logps/rejected": -194.1522979736328, + "loss": 0.156, + "nll_loss": 0.1323135942220688, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.359077453613281, + "rewards/margins": 6.056151390075684, + "rewards/rejected": -19.41522789001465, + "step": 1670 + }, + { + "epoch": 3.8832707310026002, + "grad_norm": 3.5342583656311035, + "learning_rate": 2.4691358024691354e-07, + "logits/chosen": -0.3504456877708435, + "logits/rejected": -0.3491267263889313, + "logps/chosen": -125.02303314208984, + "logps/rejected": -186.25491333007812, + "loss": 0.1414, + "nll_loss": 0.1284278929233551, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.502302169799805, + "rewards/margins": 6.123185157775879, + "rewards/rejected": -18.62548828125, + "step": 1680 + }, + { + "epoch": 3.906385437734759, + "grad_norm": 9.769820213317871, + "learning_rate": 2.4176954732510286e-07, + "logits/chosen": -0.3653779923915863, + "logits/rejected": -0.3362106382846832, + "logps/chosen": -135.67111206054688, + "logps/rejected": -198.921142578125, + "loss": 0.1563, + "nll_loss": 0.1389894187450409, + "rewards/accuracies": 1.0, + "rewards/chosen": -13.567111015319824, + "rewards/margins": 6.325002193450928, + "rewards/rejected": -19.892114639282227, + "step": 1690 + }, + { + "epoch": 3.929500144466917, + "grad_norm": 12.724737167358398, + "learning_rate": 2.366255144032922e-07, + "logits/chosen": -0.3556443452835083, + "logits/rejected": -0.33838778734207153, + "logps/chosen": -126.82794189453125, + "logps/rejected": -187.8473663330078, + "loss": 0.1457, + "nll_loss": 0.13801956176757812, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.682792663574219, + "rewards/margins": 6.101943016052246, + "rewards/rejected": -18.784738540649414, + "step": 1700 + }, + { + "epoch": 3.9526148511990753, + "grad_norm": 2.656416654586792, + "learning_rate": 2.3148148148148148e-07, + "logits/chosen": -0.3134855329990387, + "logits/rejected": -0.305325984954834, + "logps/chosen": -128.65797424316406, + "logps/rejected": -188.01309204101562, + "loss": 0.1369, + "nll_loss": 0.12594002485275269, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.865796089172363, + "rewards/margins": 5.935511589050293, + "rewards/rejected": -18.80130958557129, + "step": 1710 + }, + { + "epoch": 3.975729557931234, + "grad_norm": 12.101499557495117, + "learning_rate": 2.2633744855967078e-07, + "logits/chosen": -0.4090637266635895, + "logits/rejected": -0.3877164423465729, + "logps/chosen": -134.76638793945312, + "logps/rejected": -194.9758758544922, + "loss": 0.1532, + "nll_loss": 0.14175161719322205, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -13.476638793945312, + "rewards/margins": 6.020949840545654, + "rewards/rejected": -19.497589111328125, + "step": 1720 + }, + { + "epoch": 3.998844264663392, + "grad_norm": 6.0831708908081055, + "learning_rate": 2.2119341563786008e-07, + "logits/chosen": -0.3833851218223572, + "logits/rejected": -0.39498597383499146, + "logps/chosen": -129.8985595703125, + "logps/rejected": -187.89739990234375, + "loss": 0.1472, + "nll_loss": 0.12770399451255798, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -12.98985481262207, + "rewards/margins": 5.799884796142578, + "rewards/rejected": -18.78973960876465, + "step": 1730 + }, + { + "epoch": 3.998844264663392, + "eval_logits/chosen": -0.3029468059539795, + "eval_logits/rejected": -0.270137220621109, + "eval_logps/chosen": -146.46226501464844, + "eval_logps/rejected": -161.38487243652344, + "eval_loss": 1.4029475450515747, + "eval_nll_loss": 0.1876361072063446, + "eval_rewards/accuracies": 0.6521739363670349, + "eval_rewards/chosen": -14.646224975585938, + "eval_rewards/margins": 1.4922590255737305, + "eval_rewards/rejected": -16.138486862182617, + "eval_runtime": 77.4371, + "eval_samples_per_second": 23.58, + "eval_steps_per_second": 1.485, + "step": 1730 + }, + { + "epoch": 4.02195897139555, + "grad_norm": 1.9679253101348877, + "learning_rate": 2.1604938271604935e-07, + "logits/chosen": -0.3585730195045471, + "logits/rejected": -0.3200622498989105, + "logps/chosen": -118.93489074707031, + "logps/rejected": -183.91061401367188, + "loss": 0.1179, + "nll_loss": 0.1184120774269104, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.893487930297852, + "rewards/margins": 6.497572422027588, + "rewards/rejected": -18.391061782836914, + "step": 1740 + }, + { + "epoch": 4.045073678127709, + "grad_norm": 1.426239252090454, + "learning_rate": 2.109053497942387e-07, + "logits/chosen": -0.3198128640651703, + "logits/rejected": -0.3108198940753937, + "logps/chosen": -119.95533752441406, + "logps/rejected": -182.93043518066406, + "loss": 0.1218, + "nll_loss": 0.10763946920633316, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.99553394317627, + "rewards/margins": 6.29750919342041, + "rewards/rejected": -18.293041229248047, + "step": 1750 + }, + { + "epoch": 4.068188384859867, + "grad_norm": 1.8550798892974854, + "learning_rate": 2.05761316872428e-07, + "logits/chosen": -0.28298747539520264, + "logits/rejected": -0.2920450270175934, + "logps/chosen": -117.935791015625, + "logps/rejected": -186.0088653564453, + "loss": 0.1233, + "nll_loss": 0.11667722463607788, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.7935791015625, + "rewards/margins": 6.807305812835693, + "rewards/rejected": -18.60088539123535, + "step": 1760 + }, + { + "epoch": 4.091303091592025, + "grad_norm": 1.947771668434143, + "learning_rate": 2.0061728395061726e-07, + "logits/chosen": -0.21840214729309082, + "logits/rejected": -0.2067776620388031, + "logps/chosen": -115.0444564819336, + "logps/rejected": -179.38697814941406, + "loss": 0.1213, + "nll_loss": 0.1122204065322876, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.504446029663086, + "rewards/margins": 6.4342522621154785, + "rewards/rejected": -17.938695907592773, + "step": 1770 + }, + { + "epoch": 4.114417798324184, + "grad_norm": 1.8407361507415771, + "learning_rate": 1.9547325102880656e-07, + "logits/chosen": -0.29772254824638367, + "logits/rejected": -0.2754737138748169, + "logps/chosen": -113.61384582519531, + "logps/rejected": -177.0957489013672, + "loss": 0.1227, + "nll_loss": 0.10529961436986923, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.361384391784668, + "rewards/margins": 6.348191738128662, + "rewards/rejected": -17.709575653076172, + "step": 1780 + }, + { + "epoch": 4.137532505056342, + "grad_norm": 1.4201513528823853, + "learning_rate": 1.9032921810699586e-07, + "logits/chosen": -0.30481767654418945, + "logits/rejected": -0.2908991277217865, + "logps/chosen": -119.33686828613281, + "logps/rejected": -184.93646240234375, + "loss": 0.1227, + "nll_loss": 0.1168881431221962, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.933687210083008, + "rewards/margins": 6.559959411621094, + "rewards/rejected": -18.493648529052734, + "step": 1790 + }, + { + "epoch": 4.1606472117885005, + "grad_norm": 1.8120708465576172, + "learning_rate": 1.8518518518518516e-07, + "logits/chosen": -0.3080504834651947, + "logits/rejected": -0.30417922139167786, + "logps/chosen": -122.6048812866211, + "logps/rejected": -185.8119659423828, + "loss": 0.126, + "nll_loss": 0.12334553897380829, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.26048755645752, + "rewards/margins": 6.320708751678467, + "rewards/rejected": -18.581195831298828, + "step": 1800 + }, + { + "epoch": 4.183761918520659, + "grad_norm": 2.5624470710754395, + "learning_rate": 1.8004115226337448e-07, + "logits/chosen": -0.24937394261360168, + "logits/rejected": -0.2712889313697815, + "logps/chosen": -124.1614761352539, + "logps/rejected": -188.57559204101562, + "loss": 0.1226, + "nll_loss": 0.1163693517446518, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.41614818572998, + "rewards/margins": 6.44141149520874, + "rewards/rejected": -18.857561111450195, + "step": 1810 + }, + { + "epoch": 4.206876625252817, + "grad_norm": 1.5446466207504272, + "learning_rate": 1.7489711934156378e-07, + "logits/chosen": -0.23896384239196777, + "logits/rejected": -0.2415800839662552, + "logps/chosen": -119.49736022949219, + "logps/rejected": -185.11898803710938, + "loss": 0.1212, + "nll_loss": 0.11859021335840225, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.949737548828125, + "rewards/margins": 6.56216287612915, + "rewards/rejected": -18.511898040771484, + "step": 1820 + }, + { + "epoch": 4.229991331984976, + "grad_norm": 1.7995822429656982, + "learning_rate": 1.6975308641975307e-07, + "logits/chosen": -0.24105176329612732, + "logits/rejected": -0.21960768103599548, + "logps/chosen": -113.63651275634766, + "logps/rejected": -176.64730834960938, + "loss": 0.1216, + "nll_loss": 0.11322028934955597, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.363652229309082, + "rewards/margins": 6.301081657409668, + "rewards/rejected": -17.66473388671875, + "step": 1830 + }, + { + "epoch": 4.253106038717133, + "grad_norm": 1.7273714542388916, + "learning_rate": 1.6460905349794237e-07, + "logits/chosen": -0.253646582365036, + "logits/rejected": -0.26175594329833984, + "logps/chosen": -118.37306213378906, + "logps/rejected": -184.26153564453125, + "loss": 0.1206, + "nll_loss": 0.11956053972244263, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.837307929992676, + "rewards/margins": 6.5888471603393555, + "rewards/rejected": -18.4261531829834, + "step": 1840 + }, + { + "epoch": 4.276220745449292, + "grad_norm": 4.887149810791016, + "learning_rate": 1.5946502057613167e-07, + "logits/chosen": -0.2122907191514969, + "logits/rejected": -0.2090766876935959, + "logps/chosen": -113.57759094238281, + "logps/rejected": -174.99594116210938, + "loss": 0.1184, + "nll_loss": 0.10560585558414459, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -11.357759475708008, + "rewards/margins": 6.141837120056152, + "rewards/rejected": -17.499595642089844, + "step": 1850 + }, + { + "epoch": 4.299335452181451, + "grad_norm": 1.5595005750656128, + "learning_rate": 1.5432098765432096e-07, + "logits/chosen": -0.13843365013599396, + "logits/rejected": -0.1982315182685852, + "logps/chosen": -118.16423034667969, + "logps/rejected": -182.03799438476562, + "loss": 0.1211, + "nll_loss": 0.11699899286031723, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.816422462463379, + "rewards/margins": 6.387377738952637, + "rewards/rejected": -18.203800201416016, + "step": 1860 + }, + { + "epoch": 4.322450158913608, + "grad_norm": 2.2779886722564697, + "learning_rate": 1.4917695473251026e-07, + "logits/chosen": -0.265516459941864, + "logits/rejected": -0.2614438533782959, + "logps/chosen": -124.3641128540039, + "logps/rejected": -187.52963256835938, + "loss": 0.1261, + "nll_loss": 0.11983609199523926, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.43641185760498, + "rewards/margins": 6.316550254821777, + "rewards/rejected": -18.75296401977539, + "step": 1870 + }, + { + "epoch": 4.345564865645767, + "grad_norm": 2.2859365940093994, + "learning_rate": 1.4403292181069958e-07, + "logits/chosen": -0.25305554270744324, + "logits/rejected": -0.2473808228969574, + "logps/chosen": -124.98432922363281, + "logps/rejected": -187.47373962402344, + "loss": 0.1245, + "nll_loss": 0.12777109444141388, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.498431205749512, + "rewards/margins": 6.2489423751831055, + "rewards/rejected": -18.74737548828125, + "step": 1880 + }, + { + "epoch": 4.368679572377926, + "grad_norm": 1.4982426166534424, + "learning_rate": 1.3888888888888888e-07, + "logits/chosen": -0.2519396245479584, + "logits/rejected": -0.24396154284477234, + "logps/chosen": -117.44911193847656, + "logps/rejected": -182.5988006591797, + "loss": 0.1127, + "nll_loss": 0.1127076968550682, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.744911193847656, + "rewards/margins": 6.5149688720703125, + "rewards/rejected": -18.25988006591797, + "step": 1890 + }, + { + "epoch": 4.3917942791100835, + "grad_norm": 2.1417200565338135, + "learning_rate": 1.3374485596707818e-07, + "logits/chosen": -0.19052667915821075, + "logits/rejected": -0.1665157973766327, + "logps/chosen": -116.32462310791016, + "logps/rejected": -181.2820587158203, + "loss": 0.1205, + "nll_loss": 0.11788536608219147, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.632462501525879, + "rewards/margins": 6.495743751525879, + "rewards/rejected": -18.12820816040039, + "step": 1900 + }, + { + "epoch": 4.414908985842242, + "grad_norm": 1.5730674266815186, + "learning_rate": 1.2860082304526747e-07, + "logits/chosen": -0.28410059213638306, + "logits/rejected": -0.24584396183490753, + "logps/chosen": -126.806884765625, + "logps/rejected": -191.36875915527344, + "loss": 0.1188, + "nll_loss": 0.11963550001382828, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.680688858032227, + "rewards/margins": 6.456188201904297, + "rewards/rejected": -19.13687515258789, + "step": 1910 + }, + { + "epoch": 4.438023692574401, + "grad_norm": 2.536539077758789, + "learning_rate": 1.2345679012345677e-07, + "logits/chosen": -0.2129761278629303, + "logits/rejected": -0.1930898129940033, + "logps/chosen": -117.23963928222656, + "logps/rejected": -180.9163818359375, + "loss": 0.1262, + "nll_loss": 0.11052282154560089, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.72396469116211, + "rewards/margins": 6.367676258087158, + "rewards/rejected": -18.09164047241211, + "step": 1920 + }, + { + "epoch": 4.4611383993065585, + "grad_norm": 1.6419086456298828, + "learning_rate": 1.183127572016461e-07, + "logits/chosen": -0.18322396278381348, + "logits/rejected": -0.15920376777648926, + "logps/chosen": -116.58353424072266, + "logps/rejected": -184.9496307373047, + "loss": 0.114, + "nll_loss": 0.10174567997455597, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.658352851867676, + "rewards/margins": 6.836610317230225, + "rewards/rejected": -18.494962692260742, + "step": 1930 + }, + { + "epoch": 4.484253106038717, + "grad_norm": 2.5254459381103516, + "learning_rate": 1.1316872427983539e-07, + "logits/chosen": -0.20438556373119354, + "logits/rejected": -0.19316819310188293, + "logps/chosen": -111.71683502197266, + "logps/rejected": -176.36444091796875, + "loss": 0.1143, + "nll_loss": 0.10253375768661499, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.171684265136719, + "rewards/margins": 6.464761257171631, + "rewards/rejected": -17.636444091796875, + "step": 1940 + }, + { + "epoch": 4.507367812770876, + "grad_norm": 4.048756122589111, + "learning_rate": 1.0802469135802467e-07, + "logits/chosen": -0.20184461772441864, + "logits/rejected": -0.20470590889453888, + "logps/chosen": -112.52592468261719, + "logps/rejected": -176.77975463867188, + "loss": 0.122, + "nll_loss": 0.10450093448162079, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.252592086791992, + "rewards/margins": 6.4253830909729, + "rewards/rejected": -17.677974700927734, + "step": 1950 + }, + { + "epoch": 4.530482519503034, + "grad_norm": 1.5695422887802124, + "learning_rate": 1.02880658436214e-07, + "logits/chosen": -0.15921801328659058, + "logits/rejected": -0.16545803844928741, + "logps/chosen": -116.6390151977539, + "logps/rejected": -182.0139617919922, + "loss": 0.123, + "nll_loss": 0.11899758875370026, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.66390323638916, + "rewards/margins": 6.537497043609619, + "rewards/rejected": -18.201400756835938, + "step": 1960 + }, + { + "epoch": 4.553597226235192, + "grad_norm": 1.8795533180236816, + "learning_rate": 9.773662551440328e-08, + "logits/chosen": -0.21856431663036346, + "logits/rejected": -0.22739803791046143, + "logps/chosen": -111.40470123291016, + "logps/rejected": -175.14663696289062, + "loss": 0.1173, + "nll_loss": 0.10676850378513336, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.140469551086426, + "rewards/margins": 6.374191761016846, + "rewards/rejected": -17.514663696289062, + "step": 1970 + }, + { + "epoch": 4.576711932967351, + "grad_norm": 2.4999828338623047, + "learning_rate": 9.259259259259258e-08, + "logits/chosen": -0.16077259182929993, + "logits/rejected": -0.15148191154003143, + "logps/chosen": -112.52552795410156, + "logps/rejected": -175.3218994140625, + "loss": 0.122, + "nll_loss": 0.11213432252407074, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -11.25255298614502, + "rewards/margins": 6.279637336730957, + "rewards/rejected": -17.532190322875977, + "step": 1980 + }, + { + "epoch": 4.599826639699509, + "grad_norm": 2.170232057571411, + "learning_rate": 8.744855967078189e-08, + "logits/chosen": -0.20790553092956543, + "logits/rejected": -0.19387516379356384, + "logps/chosen": -117.14433288574219, + "logps/rejected": -181.39340209960938, + "loss": 0.1185, + "nll_loss": 0.11259637773036957, + "rewards/accuracies": 0.996874988079071, + "rewards/chosen": -11.714433670043945, + "rewards/margins": 6.424906253814697, + "rewards/rejected": -18.139341354370117, + "step": 1990 + }, + { + "epoch": 4.622941346431667, + "grad_norm": 2.0322587490081787, + "learning_rate": 8.230452674897118e-08, + "logits/chosen": -0.1339203268289566, + "logits/rejected": -0.14758563041687012, + "logps/chosen": -109.77425384521484, + "logps/rejected": -176.02438354492188, + "loss": 0.1248, + "nll_loss": 0.11588220298290253, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.977426528930664, + "rewards/margins": 6.6250104904174805, + "rewards/rejected": -17.602436065673828, + "step": 2000 + }, + { + "epoch": 4.646056053163825, + "grad_norm": 3.8062565326690674, + "learning_rate": 7.716049382716048e-08, + "logits/chosen": -0.25674083828926086, + "logits/rejected": -0.23061016201972961, + "logps/chosen": -122.008056640625, + "logps/rejected": -186.86663818359375, + "loss": 0.1177, + "nll_loss": 0.11457221210002899, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.200803756713867, + "rewards/margins": 6.485858917236328, + "rewards/rejected": -18.686664581298828, + "step": 2010 + }, + { + "epoch": 4.669170759895984, + "grad_norm": 1.300473928451538, + "learning_rate": 7.201646090534979e-08, + "logits/chosen": -0.12542086839675903, + "logits/rejected": -0.12564246356487274, + "logps/chosen": -112.2677993774414, + "logps/rejected": -177.23947143554688, + "loss": 0.1197, + "nll_loss": 0.10939665883779526, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.226778984069824, + "rewards/margins": 6.497168064117432, + "rewards/rejected": -17.723949432373047, + "step": 2020 + }, + { + "epoch": 4.692285466628142, + "grad_norm": 3.699575901031494, + "learning_rate": 6.687242798353909e-08, + "logits/chosen": -0.15934507548809052, + "logits/rejected": -0.15075993537902832, + "logps/chosen": -116.63383483886719, + "logps/rejected": -181.26510620117188, + "loss": 0.1222, + "nll_loss": 0.13159163296222687, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.663382530212402, + "rewards/margins": 6.463127136230469, + "rewards/rejected": -18.126508712768555, + "step": 2030 + }, + { + "epoch": 4.7154001733603, + "grad_norm": 3.081348180770874, + "learning_rate": 6.172839506172839e-08, + "logits/chosen": -0.2664518356323242, + "logits/rejected": -0.24538561701774597, + "logps/chosen": -122.5953140258789, + "logps/rejected": -189.40269470214844, + "loss": 0.122, + "nll_loss": 0.11068514734506607, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.259531021118164, + "rewards/margins": 6.680737495422363, + "rewards/rejected": -18.940269470214844, + "step": 2040 + }, + { + "epoch": 4.738514880092459, + "grad_norm": 1.9295371770858765, + "learning_rate": 5.6584362139917695e-08, + "logits/chosen": -0.3057961165904999, + "logits/rejected": -0.2679705023765564, + "logps/chosen": -119.34764099121094, + "logps/rejected": -184.24545288085938, + "loss": 0.1254, + "nll_loss": 0.11074963957071304, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.934765815734863, + "rewards/margins": 6.489781379699707, + "rewards/rejected": -18.424545288085938, + "step": 2050 + }, + { + "epoch": 4.7616295868246175, + "grad_norm": 1.486010193824768, + "learning_rate": 5.1440329218107e-08, + "logits/chosen": -0.17464767396450043, + "logits/rejected": -0.17597734928131104, + "logps/chosen": -118.97342681884766, + "logps/rejected": -184.82752990722656, + "loss": 0.116, + "nll_loss": 0.11164693534374237, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.89734172821045, + "rewards/margins": 6.585410118103027, + "rewards/rejected": -18.48275375366211, + "step": 2060 + }, + { + "epoch": 4.784744293556775, + "grad_norm": 1.5164188146591187, + "learning_rate": 4.629629629629629e-08, + "logits/chosen": -0.1697818785905838, + "logits/rejected": -0.17655737698078156, + "logps/chosen": -123.55452728271484, + "logps/rejected": -191.81411743164062, + "loss": 0.1175, + "nll_loss": 0.10650823265314102, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.355452537536621, + "rewards/margins": 6.8259596824646, + "rewards/rejected": -19.18140983581543, + "step": 2070 + }, + { + "epoch": 4.807859000288934, + "grad_norm": 2.9849853515625, + "learning_rate": 4.115226337448559e-08, + "logits/chosen": -0.1795181930065155, + "logits/rejected": -0.19433379173278809, + "logps/chosen": -118.71900939941406, + "logps/rejected": -185.427734375, + "loss": 0.1176, + "nll_loss": 0.11160220950841904, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.87190055847168, + "rewards/margins": 6.670874118804932, + "rewards/rejected": -18.542774200439453, + "step": 2080 + }, + { + "epoch": 4.8309737070210925, + "grad_norm": 1.8896292448043823, + "learning_rate": 3.6008230452674896e-08, + "logits/chosen": -0.20320720970630646, + "logits/rejected": -0.21179303526878357, + "logps/chosen": -121.1741714477539, + "logps/rejected": -189.61380004882812, + "loss": 0.1197, + "nll_loss": 0.12176340818405151, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.117416381835938, + "rewards/margins": 6.843962669372559, + "rewards/rejected": -18.961380004882812, + "step": 2090 + }, + { + "epoch": 4.85408841375325, + "grad_norm": 2.13209867477417, + "learning_rate": 3.086419753086419e-08, + "logits/chosen": -0.202679842710495, + "logits/rejected": -0.19807621836662292, + "logps/chosen": -121.65214538574219, + "logps/rejected": -187.36184692382812, + "loss": 0.1117, + "nll_loss": 0.1065160408616066, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.165216445922852, + "rewards/margins": 6.570970058441162, + "rewards/rejected": -18.736186981201172, + "step": 2100 + }, + { + "epoch": 4.877203120485409, + "grad_norm": 2.2168078422546387, + "learning_rate": 2.57201646090535e-08, + "logits/chosen": -0.20957596600055695, + "logits/rejected": -0.19148316979408264, + "logps/chosen": -112.11415100097656, + "logps/rejected": -176.92153930664062, + "loss": 0.12, + "nll_loss": 0.12247494608163834, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.211416244506836, + "rewards/margins": 6.480741024017334, + "rewards/rejected": -17.692157745361328, + "step": 2110 + }, + { + "epoch": 4.900317827217567, + "grad_norm": 1.704630970954895, + "learning_rate": 2.0576131687242796e-08, + "logits/chosen": -0.21424663066864014, + "logits/rejected": -0.24735161662101746, + "logps/chosen": -128.74917602539062, + "logps/rejected": -195.53347778320312, + "loss": 0.1239, + "nll_loss": 0.14099851250648499, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.874917984008789, + "rewards/margins": 6.678428649902344, + "rewards/rejected": -19.553346633911133, + "step": 2120 + }, + { + "epoch": 4.923432533949725, + "grad_norm": 2.0087478160858154, + "learning_rate": 1.5432098765432096e-08, + "logits/chosen": -0.1421460658311844, + "logits/rejected": -0.1667608767747879, + "logps/chosen": -113.1841812133789, + "logps/rejected": -177.54026794433594, + "loss": 0.1199, + "nll_loss": 0.12012244760990143, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.318418502807617, + "rewards/margins": 6.435610771179199, + "rewards/rejected": -17.7540283203125, + "step": 2130 + }, + { + "epoch": 4.946547240681884, + "grad_norm": 3.1608433723449707, + "learning_rate": 1.0288065843621398e-08, + "logits/chosen": -0.20297956466674805, + "logits/rejected": -0.18899144232273102, + "logps/chosen": -118.35282897949219, + "logps/rejected": -183.97708129882812, + "loss": 0.1174, + "nll_loss": 0.10903529822826385, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.835283279418945, + "rewards/margins": 6.562425136566162, + "rewards/rejected": -18.397706985473633, + "step": 2140 + }, + { + "epoch": 4.969661947414043, + "grad_norm": 1.8710432052612305, + "learning_rate": 5.144032921810699e-09, + "logits/chosen": -0.2684074640274048, + "logits/rejected": -0.22125348448753357, + "logps/chosen": -129.27749633789062, + "logps/rejected": -194.1197967529297, + "loss": 0.1183, + "nll_loss": 0.11009220033884048, + "rewards/accuracies": 1.0, + "rewards/chosen": -12.927749633789062, + "rewards/margins": 6.484231472015381, + "rewards/rejected": -19.4119815826416, + "step": 2150 + }, + { + "epoch": 4.9927766541462, + "grad_norm": 1.882362961769104, + "learning_rate": 0.0, + "logits/chosen": -0.15148359537124634, + "logits/rejected": -0.1361338496208191, + "logps/chosen": -107.99227142333984, + "logps/rejected": -173.1522979736328, + "loss": 0.1143, + "nll_loss": 0.10342558473348618, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.799227714538574, + "rewards/margins": 6.516002655029297, + "rewards/rejected": -17.315229415893555, + "step": 2160 + }, + { + "epoch": 4.9927766541462, + "eval_logits/chosen": -0.14757364988327026, + "eval_logits/rejected": -0.11364421248435974, + "eval_logps/chosen": -141.49264526367188, + "eval_logps/rejected": -155.7095184326172, + "eval_loss": 1.4373149871826172, + "eval_nll_loss": 0.17254449427127838, + "eval_rewards/accuracies": 0.654347836971283, + "eval_rewards/chosen": -14.149263381958008, + "eval_rewards/margins": 1.4216874837875366, + "eval_rewards/rejected": -15.570951461791992, + "eval_runtime": 76.6761, + "eval_samples_per_second": 23.814, + "eval_steps_per_second": 1.5, + "step": 2160 + }, + { + "epoch": 4.9927766541462, + "step": 2160, + "total_flos": 0.0, + "train_loss": 0.5995175864961412, + "train_runtime": 46944.6998, + "train_samples_per_second": 5.898, + "train_steps_per_second": 0.046 + } + ], + "logging_steps": 10, + "max_steps": 2160, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}