|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.9927766541462, |
|
"eval_steps": 500, |
|
"global_step": 2160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023114706732158336, |
|
"grad_norm": 68.88048553466797, |
|
"learning_rate": 4.629629629629629e-08, |
|
"logits/chosen": -0.3351331651210785, |
|
"logits/rejected": -0.3151743412017822, |
|
"logps/chosen": -269.4203796386719, |
|
"logps/rejected": -267.72064208984375, |
|
"loss": 2.9236, |
|
"nll_loss": 1.0532859563827515, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -26.94203758239746, |
|
"rewards/margins": -0.1699729710817337, |
|
"rewards/rejected": -26.77206802368164, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04622941346431667, |
|
"grad_norm": 61.09861755371094, |
|
"learning_rate": 9.259259259259258e-08, |
|
"logits/chosen": -0.33865073323249817, |
|
"logits/rejected": -0.3208921253681183, |
|
"logps/chosen": -263.8262634277344, |
|
"logps/rejected": -270.32977294921875, |
|
"loss": 2.896, |
|
"nll_loss": 0.9992793202400208, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -26.38262939453125, |
|
"rewards/margins": 0.6503503918647766, |
|
"rewards/rejected": -27.03297996520996, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06934412019647501, |
|
"grad_norm": 64.75421142578125, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -0.2800094485282898, |
|
"logits/rejected": -0.2686631977558136, |
|
"logps/chosen": -262.0818176269531, |
|
"logps/rejected": -265.42999267578125, |
|
"loss": 2.826, |
|
"nll_loss": 1.124384880065918, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -26.20818519592285, |
|
"rewards/margins": 0.33481523394584656, |
|
"rewards/rejected": -26.54299545288086, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09245882692863334, |
|
"grad_norm": 54.530216217041016, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -0.328824520111084, |
|
"logits/rejected": -0.3197949528694153, |
|
"logps/chosen": -250.150146484375, |
|
"logps/rejected": -252.0699005126953, |
|
"loss": 2.7636, |
|
"nll_loss": 1.1389970779418945, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -25.015010833740234, |
|
"rewards/margins": 0.19197671115398407, |
|
"rewards/rejected": -25.206989288330078, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11557353366079168, |
|
"grad_norm": 54.73969650268555, |
|
"learning_rate": 2.3148148148148148e-07, |
|
"logits/chosen": -0.36699360609054565, |
|
"logits/rejected": -0.344801664352417, |
|
"logps/chosen": -259.365966796875, |
|
"logps/rejected": -257.6177062988281, |
|
"loss": 2.8769, |
|
"nll_loss": 0.9557002782821655, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -25.936599731445312, |
|
"rewards/margins": -0.17483071982860565, |
|
"rewards/rejected": -25.761768341064453, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13868824039295002, |
|
"grad_norm": 61.527992248535156, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -0.4444943368434906, |
|
"logits/rejected": -0.43780913949012756, |
|
"logps/chosen": -241.99569702148438, |
|
"logps/rejected": -240.5470428466797, |
|
"loss": 2.8199, |
|
"nll_loss": 1.0306382179260254, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -24.199569702148438, |
|
"rewards/margins": -0.144865483045578, |
|
"rewards/rejected": -24.054706573486328, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16180294712510834, |
|
"grad_norm": 58.2850341796875, |
|
"learning_rate": 3.2407407407407406e-07, |
|
"logits/chosen": -0.5648446083068848, |
|
"logits/rejected": -0.5444747805595398, |
|
"logps/chosen": -224.255126953125, |
|
"logps/rejected": -223.83773803710938, |
|
"loss": 2.7692, |
|
"nll_loss": 0.9458900690078735, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -22.425512313842773, |
|
"rewards/margins": -0.04173760861158371, |
|
"rewards/rejected": -22.383777618408203, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1849176538572667, |
|
"grad_norm": 50.89101028442383, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -0.7499346733093262, |
|
"logits/rejected": -0.7246556282043457, |
|
"logps/chosen": -214.29019165039062, |
|
"logps/rejected": -215.6709442138672, |
|
"loss": 2.4664, |
|
"nll_loss": 0.8191965222358704, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -21.429019927978516, |
|
"rewards/margins": 0.13807573914527893, |
|
"rewards/rejected": -21.567096710205078, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.208032360589425, |
|
"grad_norm": 51.08415222167969, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.824606716632843, |
|
"logits/rejected": -0.803991436958313, |
|
"logps/chosen": -185.02096557617188, |
|
"logps/rejected": -191.6359405517578, |
|
"loss": 2.215, |
|
"nll_loss": 0.6511534452438354, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -18.50209617614746, |
|
"rewards/margins": 0.6614967584609985, |
|
"rewards/rejected": -19.163593292236328, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"grad_norm": 50.10819625854492, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"logits/chosen": -0.7869374752044678, |
|
"logits/rejected": -0.7605717778205872, |
|
"logps/chosen": -172.6743927001953, |
|
"logps/rejected": -173.7969512939453, |
|
"loss": 2.2028, |
|
"nll_loss": 0.5232411623001099, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -17.267436981201172, |
|
"rewards/margins": 0.1122552752494812, |
|
"rewards/rejected": -17.379695892333984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2542617740537417, |
|
"grad_norm": 49.00399398803711, |
|
"learning_rate": 5.092592592592593e-07, |
|
"logits/chosen": -0.6167671084403992, |
|
"logits/rejected": -0.5838115811347961, |
|
"logps/chosen": -156.83273315429688, |
|
"logps/rejected": -159.6825408935547, |
|
"loss": 1.8947, |
|
"nll_loss": 0.3989648222923279, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -15.683273315429688, |
|
"rewards/margins": 0.2849821150302887, |
|
"rewards/rejected": -15.968255996704102, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27737648078590005, |
|
"grad_norm": 48.19024658203125, |
|
"learning_rate": 5.555555555555555e-07, |
|
"logits/chosen": -0.48373740911483765, |
|
"logits/rejected": -0.46102485060691833, |
|
"logps/chosen": -161.04762268066406, |
|
"logps/rejected": -159.78451538085938, |
|
"loss": 1.8634, |
|
"nll_loss": 0.3991420865058899, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -16.10476303100586, |
|
"rewards/margins": -0.12630942463874817, |
|
"rewards/rejected": -15.9784517288208, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.30049118751805837, |
|
"grad_norm": 63.570125579833984, |
|
"learning_rate": 6.018518518518519e-07, |
|
"logits/chosen": -0.5185505747795105, |
|
"logits/rejected": -0.4863056242465973, |
|
"logps/chosen": -154.00921630859375, |
|
"logps/rejected": -161.2861785888672, |
|
"loss": 1.8664, |
|
"nll_loss": 0.3488847315311432, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -15.400922775268555, |
|
"rewards/margins": 0.7276966571807861, |
|
"rewards/rejected": -16.128618240356445, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3236058942502167, |
|
"grad_norm": 55.390159606933594, |
|
"learning_rate": 6.481481481481481e-07, |
|
"logits/chosen": -0.5367673635482788, |
|
"logits/rejected": -0.5227854251861572, |
|
"logps/chosen": -144.9154815673828, |
|
"logps/rejected": -148.911376953125, |
|
"loss": 1.8519, |
|
"nll_loss": 0.29890117049217224, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -14.491546630859375, |
|
"rewards/margins": 0.39959025382995605, |
|
"rewards/rejected": -14.891136169433594, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34672060098237506, |
|
"grad_norm": 88.29100799560547, |
|
"learning_rate": 6.944444444444444e-07, |
|
"logits/chosen": -0.5234349370002747, |
|
"logits/rejected": -0.5064178705215454, |
|
"logps/chosen": -144.33682250976562, |
|
"logps/rejected": -146.9467315673828, |
|
"loss": 1.8867, |
|
"nll_loss": 0.29581302404403687, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -14.433680534362793, |
|
"rewards/margins": 0.2609911262989044, |
|
"rewards/rejected": -14.694673538208008, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3698353077145334, |
|
"grad_norm": 43.19578170776367, |
|
"learning_rate": 7.407407407407406e-07, |
|
"logits/chosen": -0.47395405173301697, |
|
"logits/rejected": -0.4435350298881531, |
|
"logps/chosen": -155.87083435058594, |
|
"logps/rejected": -157.5062255859375, |
|
"loss": 1.7061, |
|
"nll_loss": 0.3032439351081848, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -15.58708381652832, |
|
"rewards/margins": 0.16353729367256165, |
|
"rewards/rejected": -15.750622749328613, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3929500144466917, |
|
"grad_norm": 54.197662353515625, |
|
"learning_rate": 7.870370370370371e-07, |
|
"logits/chosen": -0.4344661235809326, |
|
"logits/rejected": -0.4211999475955963, |
|
"logps/chosen": -155.08998107910156, |
|
"logps/rejected": -160.6627655029297, |
|
"loss": 1.5591, |
|
"nll_loss": 0.2847481667995453, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.508997917175293, |
|
"rewards/margins": 0.5572806000709534, |
|
"rewards/rejected": -16.066280364990234, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41606472117885, |
|
"grad_norm": 48.73773956298828, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.42254990339279175, |
|
"logits/rejected": -0.4155765473842621, |
|
"logps/chosen": -149.37136840820312, |
|
"logps/rejected": -154.17172241210938, |
|
"loss": 1.61, |
|
"nll_loss": 0.27371498942375183, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -14.93713665008545, |
|
"rewards/margins": 0.48003578186035156, |
|
"rewards/rejected": -15.4171724319458, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4391794279110084, |
|
"grad_norm": 51.67360305786133, |
|
"learning_rate": 8.796296296296296e-07, |
|
"logits/chosen": -0.4299948811531067, |
|
"logits/rejected": -0.4166909158229828, |
|
"logps/chosen": -157.9515380859375, |
|
"logps/rejected": -162.32485961914062, |
|
"loss": 1.6692, |
|
"nll_loss": 0.2900438606739044, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -15.795153617858887, |
|
"rewards/margins": 0.4373341500759125, |
|
"rewards/rejected": -16.232486724853516, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"grad_norm": 45.50596618652344, |
|
"learning_rate": 9.259259259259259e-07, |
|
"logits/chosen": -0.35690927505493164, |
|
"logits/rejected": -0.34764981269836426, |
|
"logps/chosen": -154.99716186523438, |
|
"logps/rejected": -160.2298126220703, |
|
"loss": 1.6466, |
|
"nll_loss": 0.2945239543914795, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -15.499715805053711, |
|
"rewards/margins": 0.5232647061347961, |
|
"rewards/rejected": -16.022979736328125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48540884137532503, |
|
"grad_norm": 52.31976318359375, |
|
"learning_rate": 9.722222222222222e-07, |
|
"logits/chosen": -0.4234965443611145, |
|
"logits/rejected": -0.39612382650375366, |
|
"logps/chosen": -154.9087371826172, |
|
"logps/rejected": -155.92794799804688, |
|
"loss": 1.6004, |
|
"nll_loss": 0.2901446223258972, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -15.490873336791992, |
|
"rewards/margins": 0.10192202031612396, |
|
"rewards/rejected": -15.592794418334961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5085235481074833, |
|
"grad_norm": 54.61393737792969, |
|
"learning_rate": 9.979423868312756e-07, |
|
"logits/chosen": -0.4337913393974304, |
|
"logits/rejected": -0.4053143560886383, |
|
"logps/chosen": -168.09202575683594, |
|
"logps/rejected": -172.47401428222656, |
|
"loss": 1.6616, |
|
"nll_loss": 0.30150192975997925, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -16.809206008911133, |
|
"rewards/margins": 0.43819671869277954, |
|
"rewards/rejected": -17.24740219116211, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5316382548396418, |
|
"grad_norm": 46.82304000854492, |
|
"learning_rate": 9.927983539094649e-07, |
|
"logits/chosen": -0.41667041182518005, |
|
"logits/rejected": -0.3951401710510254, |
|
"logps/chosen": -165.96499633789062, |
|
"logps/rejected": -171.3835906982422, |
|
"loss": 1.6745, |
|
"nll_loss": 0.30009427666664124, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -16.596498489379883, |
|
"rewards/margins": 0.5418606996536255, |
|
"rewards/rejected": -17.13835906982422, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5547529615718001, |
|
"grad_norm": 51.5750846862793, |
|
"learning_rate": 9.876543209876542e-07, |
|
"logits/chosen": -0.3943902254104614, |
|
"logits/rejected": -0.3833962082862854, |
|
"logps/chosen": -163.68643188476562, |
|
"logps/rejected": -167.90953063964844, |
|
"loss": 1.4982, |
|
"nll_loss": 0.2821606993675232, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -16.368642807006836, |
|
"rewards/margins": 0.42231208086013794, |
|
"rewards/rejected": -16.79095458984375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5778676683039584, |
|
"grad_norm": 54.075496673583984, |
|
"learning_rate": 9.825102880658436e-07, |
|
"logits/chosen": -0.4583554267883301, |
|
"logits/rejected": -0.4463082253932953, |
|
"logps/chosen": -160.63284301757812, |
|
"logps/rejected": -163.09634399414062, |
|
"loss": 1.639, |
|
"nll_loss": 0.25729092955589294, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -16.063283920288086, |
|
"rewards/margins": 0.24634972214698792, |
|
"rewards/rejected": -16.309635162353516, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6009823750361167, |
|
"grad_norm": 50.17490768432617, |
|
"learning_rate": 9.77366255144033e-07, |
|
"logits/chosen": -0.4777965545654297, |
|
"logits/rejected": -0.4631553292274475, |
|
"logps/chosen": -154.1898956298828, |
|
"logps/rejected": -162.0362091064453, |
|
"loss": 1.4771, |
|
"nll_loss": 0.27278777956962585, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -15.418991088867188, |
|
"rewards/margins": 0.7846304178237915, |
|
"rewards/rejected": -16.2036190032959, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.624097081768275, |
|
"grad_norm": 44.40957260131836, |
|
"learning_rate": 9.722222222222222e-07, |
|
"logits/chosen": -0.48693957924842834, |
|
"logits/rejected": -0.4778309762477875, |
|
"logps/chosen": -162.27188110351562, |
|
"logps/rejected": -169.07962036132812, |
|
"loss": 1.5028, |
|
"nll_loss": 0.2821035087108612, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -16.227190017700195, |
|
"rewards/margins": 0.6807710528373718, |
|
"rewards/rejected": -16.907960891723633, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6472117885004334, |
|
"grad_norm": 50.629066467285156, |
|
"learning_rate": 9.670781893004115e-07, |
|
"logits/chosen": -0.39725005626678467, |
|
"logits/rejected": -0.3660200238227844, |
|
"logps/chosen": -158.48001098632812, |
|
"logps/rejected": -167.71119689941406, |
|
"loss": 1.4805, |
|
"nll_loss": 0.2827926576137543, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.848001480102539, |
|
"rewards/margins": 0.9231182932853699, |
|
"rewards/rejected": -16.771120071411133, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6703264952325917, |
|
"grad_norm": 55.39129638671875, |
|
"learning_rate": 9.619341563786007e-07, |
|
"logits/chosen": -0.5320179462432861, |
|
"logits/rejected": -0.4930430054664612, |
|
"logps/chosen": -166.970947265625, |
|
"logps/rejected": -172.72909545898438, |
|
"loss": 1.4575, |
|
"nll_loss": 0.2989470362663269, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -16.697093963623047, |
|
"rewards/margins": 0.5758152604103088, |
|
"rewards/rejected": -17.272911071777344, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"grad_norm": 42.369606018066406, |
|
"learning_rate": 9.567901234567902e-07, |
|
"logits/chosen": -0.43348032236099243, |
|
"logits/rejected": -0.4254017472267151, |
|
"logps/chosen": -162.8667449951172, |
|
"logps/rejected": -172.35897827148438, |
|
"loss": 1.4884, |
|
"nll_loss": 0.2910870611667633, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -16.286678314208984, |
|
"rewards/margins": 0.9492223858833313, |
|
"rewards/rejected": -17.235897064208984, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7165559086969084, |
|
"grad_norm": 48.293399810791016, |
|
"learning_rate": 9.516460905349794e-07, |
|
"logits/chosen": -0.509886622428894, |
|
"logits/rejected": -0.49991345405578613, |
|
"logps/chosen": -173.03567504882812, |
|
"logps/rejected": -176.65750122070312, |
|
"loss": 1.5401, |
|
"nll_loss": 0.30316367745399475, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -17.30356788635254, |
|
"rewards/margins": 0.36218342185020447, |
|
"rewards/rejected": -17.665752410888672, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7396706154290668, |
|
"grad_norm": 45.7746467590332, |
|
"learning_rate": 9.465020576131687e-07, |
|
"logits/chosen": -0.503333568572998, |
|
"logits/rejected": -0.4878058433532715, |
|
"logps/chosen": -163.34519958496094, |
|
"logps/rejected": -172.25938415527344, |
|
"loss": 1.5247, |
|
"nll_loss": 0.29550039768218994, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -16.33452033996582, |
|
"rewards/margins": 0.89141845703125, |
|
"rewards/rejected": -17.225940704345703, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7627853221612251, |
|
"grad_norm": 48.05742645263672, |
|
"learning_rate": 9.413580246913579e-07, |
|
"logits/chosen": -0.5755558609962463, |
|
"logits/rejected": -0.5767273902893066, |
|
"logps/chosen": -158.17958068847656, |
|
"logps/rejected": -165.14163208007812, |
|
"loss": 1.4969, |
|
"nll_loss": 0.2938057780265808, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -15.817957878112793, |
|
"rewards/margins": 0.6962078809738159, |
|
"rewards/rejected": -16.5141658782959, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7859000288933834, |
|
"grad_norm": 45.862648010253906, |
|
"learning_rate": 9.362139917695473e-07, |
|
"logits/chosen": -0.6315797567367554, |
|
"logits/rejected": -0.6231464147567749, |
|
"logps/chosen": -164.8571014404297, |
|
"logps/rejected": -170.53570556640625, |
|
"loss": 1.3908, |
|
"nll_loss": 0.28307533264160156, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -16.48571014404297, |
|
"rewards/margins": 0.567859947681427, |
|
"rewards/rejected": -17.053571701049805, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8090147356255417, |
|
"grad_norm": 45.217002868652344, |
|
"learning_rate": 9.310699588477366e-07, |
|
"logits/chosen": -0.5783101320266724, |
|
"logits/rejected": -0.5816030502319336, |
|
"logps/chosen": -167.26516723632812, |
|
"logps/rejected": -176.68746948242188, |
|
"loss": 1.5036, |
|
"nll_loss": 0.2909998297691345, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -16.726520538330078, |
|
"rewards/margins": 0.9422298669815063, |
|
"rewards/rejected": -17.66874885559082, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8321294423577, |
|
"grad_norm": 56.84000778198242, |
|
"learning_rate": 9.259259259259259e-07, |
|
"logits/chosen": -0.5195820927619934, |
|
"logits/rejected": -0.5026860237121582, |
|
"logps/chosen": -171.53640747070312, |
|
"logps/rejected": -177.3377227783203, |
|
"loss": 1.5078, |
|
"nll_loss": 0.29021695256233215, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -17.153636932373047, |
|
"rewards/margins": 0.5801342725753784, |
|
"rewards/rejected": -17.733774185180664, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8552441490898585, |
|
"grad_norm": 50.610069274902344, |
|
"learning_rate": 9.207818930041152e-07, |
|
"logits/chosen": -0.49760836362838745, |
|
"logits/rejected": -0.4677702784538269, |
|
"logps/chosen": -161.1763153076172, |
|
"logps/rejected": -171.69003295898438, |
|
"loss": 1.3722, |
|
"nll_loss": 0.26248103380203247, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -16.117631912231445, |
|
"rewards/margins": 1.051371693611145, |
|
"rewards/rejected": -17.169002532958984, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8783588558220168, |
|
"grad_norm": 54.772438049316406, |
|
"learning_rate": 9.156378600823045e-07, |
|
"logits/chosen": -0.42570480704307556, |
|
"logits/rejected": -0.4065491259098053, |
|
"logps/chosen": -168.25025939941406, |
|
"logps/rejected": -176.4032440185547, |
|
"loss": 1.3843, |
|
"nll_loss": 0.313023179769516, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -16.825023651123047, |
|
"rewards/margins": 0.8152991533279419, |
|
"rewards/rejected": -17.64032554626465, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9014735625541751, |
|
"grad_norm": 50.42124557495117, |
|
"learning_rate": 9.104938271604939e-07, |
|
"logits/chosen": -0.43410390615463257, |
|
"logits/rejected": -0.4136204719543457, |
|
"logps/chosen": -165.08279418945312, |
|
"logps/rejected": -176.14059448242188, |
|
"loss": 1.4235, |
|
"nll_loss": 0.27761662006378174, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -16.50827980041504, |
|
"rewards/margins": 1.105778455734253, |
|
"rewards/rejected": -17.614057540893555, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"grad_norm": 51.66304016113281, |
|
"learning_rate": 9.053497942386831e-07, |
|
"logits/chosen": -0.40831509232521057, |
|
"logits/rejected": -0.3836323916912079, |
|
"logps/chosen": -162.02064514160156, |
|
"logps/rejected": -169.6013946533203, |
|
"loss": 1.3933, |
|
"nll_loss": 0.28827401995658875, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -16.20206642150879, |
|
"rewards/margins": 0.7580735087394714, |
|
"rewards/rejected": -16.960140228271484, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9477029760184917, |
|
"grad_norm": 48.54574966430664, |
|
"learning_rate": 9.002057613168724e-07, |
|
"logits/chosen": -0.36130112409591675, |
|
"logits/rejected": -0.35345903038978577, |
|
"logps/chosen": -159.15536499023438, |
|
"logps/rejected": -170.9656524658203, |
|
"loss": 1.3593, |
|
"nll_loss": 0.2898252308368683, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -15.915536880493164, |
|
"rewards/margins": 1.181027889251709, |
|
"rewards/rejected": -17.09656524658203, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9708176827506501, |
|
"grad_norm": 43.59242248535156, |
|
"learning_rate": 8.950617283950617e-07, |
|
"logits/chosen": -0.4918903410434723, |
|
"logits/rejected": -0.4697975516319275, |
|
"logps/chosen": -165.565673828125, |
|
"logps/rejected": -174.68519592285156, |
|
"loss": 1.3598, |
|
"nll_loss": 0.30875933170318604, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -16.556568145751953, |
|
"rewards/margins": 0.9119526147842407, |
|
"rewards/rejected": -17.468521118164062, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9939323894828085, |
|
"grad_norm": 50.116798400878906, |
|
"learning_rate": 8.89917695473251e-07, |
|
"logits/chosen": -0.49847784638404846, |
|
"logits/rejected": -0.5088882446289062, |
|
"logps/chosen": -167.231201171875, |
|
"logps/rejected": -177.6866455078125, |
|
"loss": 1.4367, |
|
"nll_loss": 0.28403669595718384, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -16.723121643066406, |
|
"rewards/margins": 1.0455443859100342, |
|
"rewards/rejected": -17.768667221069336, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9985553308292401, |
|
"eval_logits/chosen": -0.4373142123222351, |
|
"eval_logits/rejected": -0.40795600414276123, |
|
"eval_logps/chosen": -170.67918395996094, |
|
"eval_logps/rejected": -180.96241760253906, |
|
"eval_loss": 1.392618179321289, |
|
"eval_nll_loss": 0.3199608623981476, |
|
"eval_rewards/accuracies": 0.656521737575531, |
|
"eval_rewards/chosen": -17.067920684814453, |
|
"eval_rewards/margins": 1.0283225774765015, |
|
"eval_rewards/rejected": -18.096242904663086, |
|
"eval_runtime": 77.5612, |
|
"eval_samples_per_second": 23.543, |
|
"eval_steps_per_second": 1.483, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.0170470962149667, |
|
"grad_norm": 35.45933151245117, |
|
"learning_rate": 8.847736625514403e-07, |
|
"logits/chosen": -0.45173630118370056, |
|
"logits/rejected": -0.4663858413696289, |
|
"logps/chosen": -160.457275390625, |
|
"logps/rejected": -179.97222900390625, |
|
"loss": 0.9484, |
|
"nll_loss": 0.30594602227211, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -16.045726776123047, |
|
"rewards/margins": 1.9514964818954468, |
|
"rewards/rejected": -17.997224807739258, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0401618029471251, |
|
"grad_norm": 27.835773468017578, |
|
"learning_rate": 8.796296296296296e-07, |
|
"logits/chosen": -0.3361106514930725, |
|
"logits/rejected": -0.3292810022830963, |
|
"logps/chosen": -149.01544189453125, |
|
"logps/rejected": -169.8839111328125, |
|
"loss": 0.7764, |
|
"nll_loss": 0.25240465998649597, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -14.901544570922852, |
|
"rewards/margins": 2.086846351623535, |
|
"rewards/rejected": -16.988391876220703, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0632765096792833, |
|
"grad_norm": 32.76046371459961, |
|
"learning_rate": 8.744855967078189e-07, |
|
"logits/chosen": -0.4512772560119629, |
|
"logits/rejected": -0.4271810054779053, |
|
"logps/chosen": -152.64132690429688, |
|
"logps/rejected": -174.70986938476562, |
|
"loss": 0.7216, |
|
"nll_loss": 0.25062257051467896, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.264132499694824, |
|
"rewards/margins": 2.206853151321411, |
|
"rewards/rejected": -17.470985412597656, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0863912164114418, |
|
"grad_norm": 46.92816162109375, |
|
"learning_rate": 8.693415637860082e-07, |
|
"logits/chosen": -0.510484516620636, |
|
"logits/rejected": -0.4754946827888489, |
|
"logps/chosen": -151.33753967285156, |
|
"logps/rejected": -175.41604614257812, |
|
"loss": 0.7542, |
|
"nll_loss": 0.2625353932380676, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -15.133753776550293, |
|
"rewards/margins": 2.4078497886657715, |
|
"rewards/rejected": -17.54160499572754, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1095059231436002, |
|
"grad_norm": 45.01936721801758, |
|
"learning_rate": 8.641975308641974e-07, |
|
"logits/chosen": -0.5488854646682739, |
|
"logits/rejected": -0.534773588180542, |
|
"logps/chosen": -158.13259887695312, |
|
"logps/rejected": -183.81103515625, |
|
"loss": 0.7397, |
|
"nll_loss": 0.23221275210380554, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -15.813260078430176, |
|
"rewards/margins": 2.5678436756134033, |
|
"rewards/rejected": -18.381103515625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1326206298757584, |
|
"grad_norm": 29.731250762939453, |
|
"learning_rate": 8.590534979423868e-07, |
|
"logits/chosen": -0.4209683835506439, |
|
"logits/rejected": -0.40175366401672363, |
|
"logps/chosen": -148.5663604736328, |
|
"logps/rejected": -172.50228881835938, |
|
"loss": 0.6839, |
|
"nll_loss": 0.2801415026187897, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -14.856637954711914, |
|
"rewards/margins": 2.3935940265655518, |
|
"rewards/rejected": -17.250232696533203, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1557353366079168, |
|
"grad_norm": 35.19107437133789, |
|
"learning_rate": 8.539094650205761e-07, |
|
"logits/chosen": -0.5119351148605347, |
|
"logits/rejected": -0.48603877425193787, |
|
"logps/chosen": -147.54727172851562, |
|
"logps/rejected": -172.57888793945312, |
|
"loss": 0.7342, |
|
"nll_loss": 0.24299657344818115, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -14.754727363586426, |
|
"rewards/margins": 2.503164529800415, |
|
"rewards/rejected": -17.257890701293945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.178850043340075, |
|
"grad_norm": 36.37306213378906, |
|
"learning_rate": 8.487654320987654e-07, |
|
"logits/chosen": -0.5116412043571472, |
|
"logits/rejected": -0.5097488164901733, |
|
"logps/chosen": -152.76693725585938, |
|
"logps/rejected": -173.20547485351562, |
|
"loss": 0.7418, |
|
"nll_loss": 0.2616187334060669, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -15.276693344116211, |
|
"rewards/margins": 2.0438523292541504, |
|
"rewards/rejected": -17.320547103881836, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2019647500722335, |
|
"grad_norm": 32.158714294433594, |
|
"learning_rate": 8.436213991769548e-07, |
|
"logits/chosen": -0.41989222168922424, |
|
"logits/rejected": -0.40580207109451294, |
|
"logps/chosen": -160.35772705078125, |
|
"logps/rejected": -186.72616577148438, |
|
"loss": 0.7297, |
|
"nll_loss": 0.2849249839782715, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -16.0357723236084, |
|
"rewards/margins": 2.636845111846924, |
|
"rewards/rejected": -18.672618865966797, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2250794568043917, |
|
"grad_norm": 38.98585510253906, |
|
"learning_rate": 8.38477366255144e-07, |
|
"logits/chosen": -0.43002861738204956, |
|
"logits/rejected": -0.43659868836402893, |
|
"logps/chosen": -149.89114379882812, |
|
"logps/rejected": -177.4897918701172, |
|
"loss": 0.7001, |
|
"nll_loss": 0.25785765051841736, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -14.989115715026855, |
|
"rewards/margins": 2.7598659992218018, |
|
"rewards/rejected": -17.748981475830078, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.24819416353655, |
|
"grad_norm": 33.50174331665039, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.5792837142944336, |
|
"logits/rejected": -0.5748234987258911, |
|
"logps/chosen": -154.1841278076172, |
|
"logps/rejected": -175.39093017578125, |
|
"loss": 0.77, |
|
"nll_loss": 0.28076162934303284, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.418413162231445, |
|
"rewards/margins": 2.120678424835205, |
|
"rewards/rejected": -17.539093017578125, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2713088702687085, |
|
"grad_norm": 35.51890182495117, |
|
"learning_rate": 8.281893004115226e-07, |
|
"logits/chosen": -0.6797876358032227, |
|
"logits/rejected": -0.6701671481132507, |
|
"logps/chosen": -164.1734619140625, |
|
"logps/rejected": -189.96820068359375, |
|
"loss": 0.6452, |
|
"nll_loss": 0.2875816822052002, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.417346954345703, |
|
"rewards/margins": 2.579475164413452, |
|
"rewards/rejected": -18.9968204498291, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2944235770008667, |
|
"grad_norm": 36.58209228515625, |
|
"learning_rate": 8.23045267489712e-07, |
|
"logits/chosen": -0.6092251539230347, |
|
"logits/rejected": -0.5988754630088806, |
|
"logps/chosen": -150.59115600585938, |
|
"logps/rejected": -178.7034149169922, |
|
"loss": 0.7005, |
|
"nll_loss": 0.26352283358573914, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.059117317199707, |
|
"rewards/margins": 2.811225652694702, |
|
"rewards/rejected": -17.870342254638672, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3175382837330252, |
|
"grad_norm": 38.884254455566406, |
|
"learning_rate": 8.179012345679011e-07, |
|
"logits/chosen": -0.5773380994796753, |
|
"logits/rejected": -0.5545040369033813, |
|
"logps/chosen": -159.92147827148438, |
|
"logps/rejected": -186.68997192382812, |
|
"loss": 0.7401, |
|
"nll_loss": 0.26087266206741333, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.992147445678711, |
|
"rewards/margins": 2.6768481731414795, |
|
"rewards/rejected": -18.668996810913086, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.3406529904651836, |
|
"grad_norm": 43.70725631713867, |
|
"learning_rate": 8.127572016460905e-07, |
|
"logits/chosen": -0.5863763093948364, |
|
"logits/rejected": -0.5670869946479797, |
|
"logps/chosen": -157.2144012451172, |
|
"logps/rejected": -184.8651123046875, |
|
"loss": 0.72, |
|
"nll_loss": 0.2669151723384857, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -15.721441268920898, |
|
"rewards/margins": 2.7650701999664307, |
|
"rewards/rejected": -18.486513137817383, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.3637676971973418, |
|
"grad_norm": 39.63798904418945, |
|
"learning_rate": 8.076131687242798e-07, |
|
"logits/chosen": -0.529544472694397, |
|
"logits/rejected": -0.5398887395858765, |
|
"logps/chosen": -148.3323974609375, |
|
"logps/rejected": -174.19955444335938, |
|
"loss": 0.6607, |
|
"nll_loss": 0.24997957050800323, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -14.833239555358887, |
|
"rewards/margins": 2.586716890335083, |
|
"rewards/rejected": -17.419958114624023, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3868824039295, |
|
"grad_norm": 36.14802169799805, |
|
"learning_rate": 8.024691358024691e-07, |
|
"logits/chosen": -0.441204309463501, |
|
"logits/rejected": -0.4048687815666199, |
|
"logps/chosen": -156.30531311035156, |
|
"logps/rejected": -183.83956909179688, |
|
"loss": 0.733, |
|
"nll_loss": 0.2541951537132263, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -15.630529403686523, |
|
"rewards/margins": 2.753427743911743, |
|
"rewards/rejected": -18.38395881652832, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4099971106616584, |
|
"grad_norm": 40.05307388305664, |
|
"learning_rate": 7.973251028806583e-07, |
|
"logits/chosen": -0.41722431778907776, |
|
"logits/rejected": -0.4100796580314636, |
|
"logps/chosen": -151.99453735351562, |
|
"logps/rejected": -175.85577392578125, |
|
"loss": 0.7682, |
|
"nll_loss": 0.25730782747268677, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.199453353881836, |
|
"rewards/margins": 2.3861212730407715, |
|
"rewards/rejected": -17.585575103759766, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4331118173938169, |
|
"grad_norm": 24.526100158691406, |
|
"learning_rate": 7.921810699588477e-07, |
|
"logits/chosen": -0.5749002695083618, |
|
"logits/rejected": -0.5751099586486816, |
|
"logps/chosen": -157.60520935058594, |
|
"logps/rejected": -185.5096893310547, |
|
"loss": 0.5956, |
|
"nll_loss": 0.24547366797924042, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -15.760522842407227, |
|
"rewards/margins": 2.790447473526001, |
|
"rewards/rejected": -18.55097007751465, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.456226524125975, |
|
"grad_norm": 36.09085464477539, |
|
"learning_rate": 7.870370370370371e-07, |
|
"logits/chosen": -0.5282450914382935, |
|
"logits/rejected": -0.5175204873085022, |
|
"logps/chosen": -146.50106811523438, |
|
"logps/rejected": -173.6673126220703, |
|
"loss": 0.6405, |
|
"nll_loss": 0.24812671542167664, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -14.650106430053711, |
|
"rewards/margins": 2.7166221141815186, |
|
"rewards/rejected": -17.366729736328125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.4793412308581335, |
|
"grad_norm": 41.768348693847656, |
|
"learning_rate": 7.818930041152262e-07, |
|
"logits/chosen": -0.45312589406967163, |
|
"logits/rejected": -0.4504320025444031, |
|
"logps/chosen": -142.28053283691406, |
|
"logps/rejected": -170.82095336914062, |
|
"loss": 0.6841, |
|
"nll_loss": 0.23785972595214844, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -14.228052139282227, |
|
"rewards/margins": 2.8540425300598145, |
|
"rewards/rejected": -17.082096099853516, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.502455937590292, |
|
"grad_norm": 34.300228118896484, |
|
"learning_rate": 7.767489711934156e-07, |
|
"logits/chosen": -0.5092964172363281, |
|
"logits/rejected": -0.5271193981170654, |
|
"logps/chosen": -155.85000610351562, |
|
"logps/rejected": -186.28884887695312, |
|
"loss": 0.6303, |
|
"nll_loss": 0.24494795501232147, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -15.584999084472656, |
|
"rewards/margins": 3.0438854694366455, |
|
"rewards/rejected": -18.62888526916504, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5255706443224502, |
|
"grad_norm": 33.022884368896484, |
|
"learning_rate": 7.716049382716049e-07, |
|
"logits/chosen": -0.5350406169891357, |
|
"logits/rejected": -0.5363395810127258, |
|
"logps/chosen": -147.15267944335938, |
|
"logps/rejected": -174.66571044921875, |
|
"loss": 0.7096, |
|
"nll_loss": 0.24733343720436096, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.7152681350708, |
|
"rewards/margins": 2.751302480697632, |
|
"rewards/rejected": -17.466571807861328, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5486853510546084, |
|
"grad_norm": 53.42652130126953, |
|
"learning_rate": 7.664609053497943e-07, |
|
"logits/chosen": -0.6187707781791687, |
|
"logits/rejected": -0.6232476234436035, |
|
"logps/chosen": -158.1448211669922, |
|
"logps/rejected": -187.09014892578125, |
|
"loss": 0.6173, |
|
"nll_loss": 0.22900207340717316, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.814483642578125, |
|
"rewards/margins": 2.8945329189300537, |
|
"rewards/rejected": -18.709014892578125, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.5718000577867668, |
|
"grad_norm": 40.11577606201172, |
|
"learning_rate": 7.613168724279834e-07, |
|
"logits/chosen": -0.5888317227363586, |
|
"logits/rejected": -0.600538432598114, |
|
"logps/chosen": -149.23678588867188, |
|
"logps/rejected": -175.3176727294922, |
|
"loss": 0.7099, |
|
"nll_loss": 0.21695959568023682, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.923675537109375, |
|
"rewards/margins": 2.6080896854400635, |
|
"rewards/rejected": -17.531766891479492, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5949147645189252, |
|
"grad_norm": 26.918350219726562, |
|
"learning_rate": 7.561728395061728e-07, |
|
"logits/chosen": -0.6150851845741272, |
|
"logits/rejected": -0.6231178045272827, |
|
"logps/chosen": -164.5893096923828, |
|
"logps/rejected": -196.3010711669922, |
|
"loss": 0.6595, |
|
"nll_loss": 0.23331816494464874, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.45893096923828, |
|
"rewards/margins": 3.171175479888916, |
|
"rewards/rejected": -19.630107879638672, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.6180294712510834, |
|
"grad_norm": 33.39554214477539, |
|
"learning_rate": 7.510288065843621e-07, |
|
"logits/chosen": -0.5018739700317383, |
|
"logits/rejected": -0.4825282692909241, |
|
"logps/chosen": -149.8149871826172, |
|
"logps/rejected": -177.98583984375, |
|
"loss": 0.6348, |
|
"nll_loss": 0.2212187498807907, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -14.981498718261719, |
|
"rewards/margins": 2.817084789276123, |
|
"rewards/rejected": -17.798583984375, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6411441779832419, |
|
"grad_norm": 29.109973907470703, |
|
"learning_rate": 7.458847736625515e-07, |
|
"logits/chosen": -0.47257423400878906, |
|
"logits/rejected": -0.4691304564476013, |
|
"logps/chosen": -138.67837524414062, |
|
"logps/rejected": -164.54855346679688, |
|
"loss": 0.6175, |
|
"nll_loss": 0.1982104480266571, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -13.867838859558105, |
|
"rewards/margins": 2.5870203971862793, |
|
"rewards/rejected": -16.454858779907227, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.6642588847154003, |
|
"grad_norm": 38.35542678833008, |
|
"learning_rate": 7.407407407407406e-07, |
|
"logits/chosen": -0.6042996644973755, |
|
"logits/rejected": -0.6067830324172974, |
|
"logps/chosen": -144.49464416503906, |
|
"logps/rejected": -169.24853515625, |
|
"loss": 0.5938, |
|
"nll_loss": 0.23023180663585663, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -14.449464797973633, |
|
"rewards/margins": 2.4753904342651367, |
|
"rewards/rejected": -16.924854278564453, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.6873735914475585, |
|
"grad_norm": 32.6804084777832, |
|
"learning_rate": 7.3559670781893e-07, |
|
"logits/chosen": -0.6318911910057068, |
|
"logits/rejected": -0.623616099357605, |
|
"logps/chosen": -151.0692596435547, |
|
"logps/rejected": -178.22621154785156, |
|
"loss": 0.6287, |
|
"nll_loss": 0.20305195450782776, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.106924057006836, |
|
"rewards/margins": 2.7156949043273926, |
|
"rewards/rejected": -17.822620391845703, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.7104882981797167, |
|
"grad_norm": 33.47980499267578, |
|
"learning_rate": 7.304526748971193e-07, |
|
"logits/chosen": -0.5788182020187378, |
|
"logits/rejected": -0.5648819208145142, |
|
"logps/chosen": -162.39569091796875, |
|
"logps/rejected": -193.59268188476562, |
|
"loss": 0.5942, |
|
"nll_loss": 0.21426251530647278, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.23956871032715, |
|
"rewards/margins": 3.1196982860565186, |
|
"rewards/rejected": -19.359268188476562, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.7336030049118751, |
|
"grad_norm": 37.14680099487305, |
|
"learning_rate": 7.253086419753086e-07, |
|
"logits/chosen": -0.5623105764389038, |
|
"logits/rejected": -0.5381472110748291, |
|
"logps/chosen": -139.84085083007812, |
|
"logps/rejected": -167.0809326171875, |
|
"loss": 0.598, |
|
"nll_loss": 0.18970206379890442, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -13.984085083007812, |
|
"rewards/margins": 2.7240078449249268, |
|
"rewards/rejected": -16.708093643188477, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.7567177116440336, |
|
"grad_norm": 35.07746124267578, |
|
"learning_rate": 7.201646090534979e-07, |
|
"logits/chosen": -0.5330817103385925, |
|
"logits/rejected": -0.540014386177063, |
|
"logps/chosen": -153.24600219726562, |
|
"logps/rejected": -185.0384063720703, |
|
"loss": 0.6322, |
|
"nll_loss": 0.198031947016716, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.324602127075195, |
|
"rewards/margins": 3.1792402267456055, |
|
"rewards/rejected": -18.503841400146484, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.7798324183761918, |
|
"grad_norm": 34.26885986328125, |
|
"learning_rate": 7.150205761316872e-07, |
|
"logits/chosen": -0.6087044477462769, |
|
"logits/rejected": -0.599485456943512, |
|
"logps/chosen": -145.72488403320312, |
|
"logps/rejected": -171.98873901367188, |
|
"loss": 0.6407, |
|
"nll_loss": 0.18888258934020996, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -14.572488784790039, |
|
"rewards/margins": 2.626385450363159, |
|
"rewards/rejected": -17.19887351989746, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8029471251083502, |
|
"grad_norm": 33.3639030456543, |
|
"learning_rate": 7.098765432098766e-07, |
|
"logits/chosen": -0.6275098323822021, |
|
"logits/rejected": -0.6126091480255127, |
|
"logps/chosen": -149.48826599121094, |
|
"logps/rejected": -179.92613220214844, |
|
"loss": 0.6014, |
|
"nll_loss": 0.2067473828792572, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -14.948827743530273, |
|
"rewards/margins": 3.0437865257263184, |
|
"rewards/rejected": -17.99261474609375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.8260618318405086, |
|
"grad_norm": 34.436153411865234, |
|
"learning_rate": 7.047325102880658e-07, |
|
"logits/chosen": -0.6325902938842773, |
|
"logits/rejected": -0.6320141553878784, |
|
"logps/chosen": -149.53546142578125, |
|
"logps/rejected": -177.4294891357422, |
|
"loss": 0.5987, |
|
"nll_loss": 0.21218529343605042, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -14.953544616699219, |
|
"rewards/margins": 2.7894036769866943, |
|
"rewards/rejected": -17.742948532104492, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.8491765385726668, |
|
"grad_norm": 41.68962097167969, |
|
"learning_rate": 6.995884773662551e-07, |
|
"logits/chosen": -0.5112544298171997, |
|
"logits/rejected": -0.5018970370292664, |
|
"logps/chosen": -139.74612426757812, |
|
"logps/rejected": -170.65365600585938, |
|
"loss": 0.5737, |
|
"nll_loss": 0.18416205048561096, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -13.97461223602295, |
|
"rewards/margins": 3.0907552242279053, |
|
"rewards/rejected": -17.065366744995117, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.872291245304825, |
|
"grad_norm": 34.62812423706055, |
|
"learning_rate": 6.944444444444444e-07, |
|
"logits/chosen": -0.5771014094352722, |
|
"logits/rejected": -0.5736783146858215, |
|
"logps/chosen": -149.42527770996094, |
|
"logps/rejected": -179.3314666748047, |
|
"loss": 0.6492, |
|
"nll_loss": 0.19857726991176605, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.942527770996094, |
|
"rewards/margins": 2.990619421005249, |
|
"rewards/rejected": -17.933147430419922, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.8954059520369835, |
|
"grad_norm": 27.703113555908203, |
|
"learning_rate": 6.893004115226337e-07, |
|
"logits/chosen": -0.6073204278945923, |
|
"logits/rejected": -0.6056413054466248, |
|
"logps/chosen": -151.15286254882812, |
|
"logps/rejected": -184.02236938476562, |
|
"loss": 0.5758, |
|
"nll_loss": 0.20334260165691376, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.115285873413086, |
|
"rewards/margins": 3.2869529724121094, |
|
"rewards/rejected": -18.402238845825195, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.918520658769142, |
|
"grad_norm": 38.63829040527344, |
|
"learning_rate": 6.84156378600823e-07, |
|
"logits/chosen": -0.564698338508606, |
|
"logits/rejected": -0.5553814172744751, |
|
"logps/chosen": -141.9647216796875, |
|
"logps/rejected": -167.49462890625, |
|
"loss": 0.604, |
|
"nll_loss": 0.19638094305992126, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.19647216796875, |
|
"rewards/margins": 2.552992343902588, |
|
"rewards/rejected": -16.74946403503418, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.9416353655013001, |
|
"grad_norm": 37.33395767211914, |
|
"learning_rate": 6.790123456790123e-07, |
|
"logits/chosen": -0.6794390678405762, |
|
"logits/rejected": -0.6817184686660767, |
|
"logps/chosen": -150.2278289794922, |
|
"logps/rejected": -178.04473876953125, |
|
"loss": 0.6078, |
|
"nll_loss": 0.18291929364204407, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.022783279418945, |
|
"rewards/margins": 2.781691074371338, |
|
"rewards/rejected": -17.804473876953125, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.9647500722334585, |
|
"grad_norm": 33.96713638305664, |
|
"learning_rate": 6.738683127572016e-07, |
|
"logits/chosen": -0.716331422328949, |
|
"logits/rejected": -0.7188450694084167, |
|
"logps/chosen": -147.86050415039062, |
|
"logps/rejected": -174.76864624023438, |
|
"loss": 0.5987, |
|
"nll_loss": 0.19556212425231934, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -14.786050796508789, |
|
"rewards/margins": 2.6908116340637207, |
|
"rewards/rejected": -17.476863861083984, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.987864778965617, |
|
"grad_norm": 35.31864929199219, |
|
"learning_rate": 6.687242798353909e-07, |
|
"logits/chosen": -0.6668294668197632, |
|
"logits/rejected": -0.6580954790115356, |
|
"logps/chosen": -149.87158203125, |
|
"logps/rejected": -180.49496459960938, |
|
"loss": 0.5472, |
|
"nll_loss": 0.1864423006772995, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -14.987157821655273, |
|
"rewards/margins": 3.06233811378479, |
|
"rewards/rejected": -18.049495697021484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.999422132331696, |
|
"eval_logits/chosen": -0.5687969923019409, |
|
"eval_logits/rejected": -0.5434355139732361, |
|
"eval_logps/chosen": -162.90855407714844, |
|
"eval_logps/rejected": -175.85232543945312, |
|
"eval_loss": 1.2972584962844849, |
|
"eval_nll_loss": 0.2148308902978897, |
|
"eval_rewards/accuracies": 0.658695638179779, |
|
"eval_rewards/chosen": -16.290855407714844, |
|
"eval_rewards/margins": 1.2943781614303589, |
|
"eval_rewards/rejected": -17.585235595703125, |
|
"eval_runtime": 77.3685, |
|
"eval_samples_per_second": 23.601, |
|
"eval_steps_per_second": 1.486, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.010979485697775, |
|
"grad_norm": 11.489439964294434, |
|
"learning_rate": 6.635802469135802e-07, |
|
"logits/chosen": -0.6154376864433289, |
|
"logits/rejected": -0.581082820892334, |
|
"logps/chosen": -146.31674194335938, |
|
"logps/rejected": -183.1867218017578, |
|
"loss": 0.4233, |
|
"nll_loss": 0.17745935916900635, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -14.631675720214844, |
|
"rewards/margins": 3.6869969367980957, |
|
"rewards/rejected": -18.318674087524414, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.0340941924299334, |
|
"grad_norm": 8.267936706542969, |
|
"learning_rate": 6.584362139917695e-07, |
|
"logits/chosen": -0.5296713709831238, |
|
"logits/rejected": -0.5492919683456421, |
|
"logps/chosen": -135.2528839111328, |
|
"logps/rejected": -184.4834747314453, |
|
"loss": 0.2554, |
|
"nll_loss": 0.17692770063877106, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.525288581848145, |
|
"rewards/margins": 4.923060417175293, |
|
"rewards/rejected": -18.448348999023438, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.057208899162092, |
|
"grad_norm": 17.753084182739258, |
|
"learning_rate": 6.532921810699589e-07, |
|
"logits/chosen": -0.4458081126213074, |
|
"logits/rejected": -0.45663532614707947, |
|
"logps/chosen": -132.5780792236328, |
|
"logps/rejected": -181.31776428222656, |
|
"loss": 0.2358, |
|
"nll_loss": 0.1446482390165329, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -13.257807731628418, |
|
"rewards/margins": 4.87396764755249, |
|
"rewards/rejected": -18.13177490234375, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.0803236058942502, |
|
"grad_norm": 9.170333862304688, |
|
"learning_rate": 6.481481481481481e-07, |
|
"logits/chosen": -0.4914008677005768, |
|
"logits/rejected": -0.4894467890262604, |
|
"logps/chosen": -139.57400512695312, |
|
"logps/rejected": -189.27447509765625, |
|
"loss": 0.2373, |
|
"nll_loss": 0.1590987890958786, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.95740032196045, |
|
"rewards/margins": 4.970045566558838, |
|
"rewards/rejected": -18.927448272705078, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1034383126264085, |
|
"grad_norm": 16.0671329498291, |
|
"learning_rate": 6.430041152263375e-07, |
|
"logits/chosen": -0.29768380522727966, |
|
"logits/rejected": -0.3132530450820923, |
|
"logps/chosen": -133.86160278320312, |
|
"logps/rejected": -184.111083984375, |
|
"loss": 0.2528, |
|
"nll_loss": 0.1800731122493744, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -13.386159896850586, |
|
"rewards/margins": 5.024949073791504, |
|
"rewards/rejected": -18.411109924316406, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.1265530193585667, |
|
"grad_norm": 11.169416427612305, |
|
"learning_rate": 6.378600823045267e-07, |
|
"logits/chosen": -0.25930145382881165, |
|
"logits/rejected": -0.2452802211046219, |
|
"logps/chosen": -138.69859313964844, |
|
"logps/rejected": -188.9458465576172, |
|
"loss": 0.2369, |
|
"nll_loss": 0.15493367612361908, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.86985969543457, |
|
"rewards/margins": 5.024728298187256, |
|
"rewards/rejected": -18.894588470458984, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.1496677260907253, |
|
"grad_norm": 20.787609100341797, |
|
"learning_rate": 6.32716049382716e-07, |
|
"logits/chosen": -0.4232078194618225, |
|
"logits/rejected": -0.4213971197605133, |
|
"logps/chosen": -133.97911071777344, |
|
"logps/rejected": -183.1697540283203, |
|
"loss": 0.2526, |
|
"nll_loss": 0.17497238516807556, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.397911071777344, |
|
"rewards/margins": 4.919064998626709, |
|
"rewards/rejected": -18.31697654724121, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.1727824328228835, |
|
"grad_norm": 16.55530548095703, |
|
"learning_rate": 6.275720164609053e-07, |
|
"logits/chosen": -0.5225564241409302, |
|
"logits/rejected": -0.5253915190696716, |
|
"logps/chosen": -147.48667907714844, |
|
"logps/rejected": -200.44107055664062, |
|
"loss": 0.2383, |
|
"nll_loss": 0.16094490885734558, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -14.748669624328613, |
|
"rewards/margins": 5.295438766479492, |
|
"rewards/rejected": -20.044105529785156, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.1958971395550417, |
|
"grad_norm": 25.473421096801758, |
|
"learning_rate": 6.224279835390947e-07, |
|
"logits/chosen": -0.6133296489715576, |
|
"logits/rejected": -0.6065386533737183, |
|
"logps/chosen": -147.1841583251953, |
|
"logps/rejected": -198.28070068359375, |
|
"loss": 0.2342, |
|
"nll_loss": 0.17038078606128693, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.718416213989258, |
|
"rewards/margins": 5.109654903411865, |
|
"rewards/rejected": -19.82806968688965, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2190118462872004, |
|
"grad_norm": 28.808799743652344, |
|
"learning_rate": 6.172839506172839e-07, |
|
"logits/chosen": -0.566586971282959, |
|
"logits/rejected": -0.5580301284790039, |
|
"logps/chosen": -141.78317260742188, |
|
"logps/rejected": -189.71841430664062, |
|
"loss": 0.2432, |
|
"nll_loss": 0.16720861196517944, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -14.178317070007324, |
|
"rewards/margins": 4.793524265289307, |
|
"rewards/rejected": -18.97184181213379, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.2421265530193586, |
|
"grad_norm": 15.181388854980469, |
|
"learning_rate": 6.121399176954732e-07, |
|
"logits/chosen": -0.5153671503067017, |
|
"logits/rejected": -0.49234214425086975, |
|
"logps/chosen": -142.28048706054688, |
|
"logps/rejected": -192.72178649902344, |
|
"loss": 0.2565, |
|
"nll_loss": 0.173838809132576, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -14.228050231933594, |
|
"rewards/margins": 5.044127464294434, |
|
"rewards/rejected": -19.272180557250977, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.265241259751517, |
|
"grad_norm": 10.162031173706055, |
|
"learning_rate": 6.069958847736625e-07, |
|
"logits/chosen": -0.3831091523170471, |
|
"logits/rejected": -0.3817598521709442, |
|
"logps/chosen": -142.67413330078125, |
|
"logps/rejected": -191.6265106201172, |
|
"loss": 0.2239, |
|
"nll_loss": 0.15289117395877838, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -14.267412185668945, |
|
"rewards/margins": 4.89523983001709, |
|
"rewards/rejected": -19.16265296936035, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.2883559664836755, |
|
"grad_norm": 11.667806625366211, |
|
"learning_rate": 6.018518518518519e-07, |
|
"logits/chosen": -0.37663665413856506, |
|
"logits/rejected": -0.36168596148490906, |
|
"logps/chosen": -134.7302703857422, |
|
"logps/rejected": -181.87161254882812, |
|
"loss": 0.2179, |
|
"nll_loss": 0.14360648393630981, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.473027229309082, |
|
"rewards/margins": 4.714133262634277, |
|
"rewards/rejected": -18.18716049194336, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.3114706732158337, |
|
"grad_norm": 13.98948860168457, |
|
"learning_rate": 5.96707818930041e-07, |
|
"logits/chosen": -0.35517022013664246, |
|
"logits/rejected": -0.3607296645641327, |
|
"logps/chosen": -143.46397399902344, |
|
"logps/rejected": -196.64694213867188, |
|
"loss": 0.2393, |
|
"nll_loss": 0.16406962275505066, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -14.346399307250977, |
|
"rewards/margins": 5.318297863006592, |
|
"rewards/rejected": -19.664695739746094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.334585379947992, |
|
"grad_norm": 13.17771053314209, |
|
"learning_rate": 5.915637860082304e-07, |
|
"logits/chosen": -0.3597460389137268, |
|
"logits/rejected": -0.36051079630851746, |
|
"logps/chosen": -138.61643981933594, |
|
"logps/rejected": -192.05581665039062, |
|
"loss": 0.2306, |
|
"nll_loss": 0.16202880442142487, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.86164379119873, |
|
"rewards/margins": 5.343939304351807, |
|
"rewards/rejected": -19.205581665039062, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.35770008668015, |
|
"grad_norm": 13.457245826721191, |
|
"learning_rate": 5.864197530864198e-07, |
|
"logits/chosen": -0.4916199743747711, |
|
"logits/rejected": -0.5020965933799744, |
|
"logps/chosen": -147.89541625976562, |
|
"logps/rejected": -199.31967163085938, |
|
"loss": 0.2374, |
|
"nll_loss": 0.16406235098838806, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -14.789543151855469, |
|
"rewards/margins": 5.142425060272217, |
|
"rewards/rejected": -19.931964874267578, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.3808147934123087, |
|
"grad_norm": 13.335782051086426, |
|
"learning_rate": 5.812757201646091e-07, |
|
"logits/chosen": -0.39383864402770996, |
|
"logits/rejected": -0.40474215149879456, |
|
"logps/chosen": -133.04669189453125, |
|
"logps/rejected": -180.41250610351562, |
|
"loss": 0.242, |
|
"nll_loss": 0.1537107676267624, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -13.304669380187988, |
|
"rewards/margins": 4.736581802368164, |
|
"rewards/rejected": -18.041250228881836, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.403929500144467, |
|
"grad_norm": 6.159650802612305, |
|
"learning_rate": 5.761316872427983e-07, |
|
"logits/chosen": -0.6221314668655396, |
|
"logits/rejected": -0.5792278051376343, |
|
"logps/chosen": -147.80052185058594, |
|
"logps/rejected": -199.4378662109375, |
|
"loss": 0.2262, |
|
"nll_loss": 0.151776522397995, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.780054092407227, |
|
"rewards/margins": 5.163733005523682, |
|
"rewards/rejected": -19.943782806396484, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.427044206876625, |
|
"grad_norm": 12.739320755004883, |
|
"learning_rate": 5.709876543209876e-07, |
|
"logits/chosen": -0.5569005012512207, |
|
"logits/rejected": -0.5471926927566528, |
|
"logps/chosen": -150.28656005859375, |
|
"logps/rejected": -203.32809448242188, |
|
"loss": 0.2392, |
|
"nll_loss": 0.15395130217075348, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -15.028657913208008, |
|
"rewards/margins": 5.304154872894287, |
|
"rewards/rejected": -20.332813262939453, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.4501589136087834, |
|
"grad_norm": 10.99962329864502, |
|
"learning_rate": 5.65843621399177e-07, |
|
"logits/chosen": -0.6100250482559204, |
|
"logits/rejected": -0.6070842146873474, |
|
"logps/chosen": -144.28292846679688, |
|
"logps/rejected": -192.26254272460938, |
|
"loss": 0.2358, |
|
"nll_loss": 0.16113388538360596, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -14.42829418182373, |
|
"rewards/margins": 4.797961235046387, |
|
"rewards/rejected": -19.226253509521484, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.473273620340942, |
|
"grad_norm": 14.381885528564453, |
|
"learning_rate": 5.606995884773662e-07, |
|
"logits/chosen": -0.4229808747768402, |
|
"logits/rejected": -0.4043405055999756, |
|
"logps/chosen": -135.27508544921875, |
|
"logps/rejected": -184.1940460205078, |
|
"loss": 0.2726, |
|
"nll_loss": 0.16423283517360687, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -13.527506828308105, |
|
"rewards/margins": 4.8918962478637695, |
|
"rewards/rejected": -18.419404983520508, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.4963883270731, |
|
"grad_norm": 11.742487907409668, |
|
"learning_rate": 5.555555555555555e-07, |
|
"logits/chosen": -0.4398534297943115, |
|
"logits/rejected": -0.43547695875167847, |
|
"logps/chosen": -134.5975341796875, |
|
"logps/rejected": -182.41848754882812, |
|
"loss": 0.2452, |
|
"nll_loss": 0.16178709268569946, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.459753036499023, |
|
"rewards/margins": 4.782095909118652, |
|
"rewards/rejected": -18.24184799194336, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.5195030338052584, |
|
"grad_norm": 12.080589294433594, |
|
"learning_rate": 5.504115226337448e-07, |
|
"logits/chosen": -0.45496922731399536, |
|
"logits/rejected": -0.45996856689453125, |
|
"logps/chosen": -132.09829711914062, |
|
"logps/rejected": -180.12393188476562, |
|
"loss": 0.2284, |
|
"nll_loss": 0.1582447737455368, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.209829330444336, |
|
"rewards/margins": 4.80256462097168, |
|
"rewards/rejected": -18.012393951416016, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.542617740537417, |
|
"grad_norm": 24.479488372802734, |
|
"learning_rate": 5.452674897119342e-07, |
|
"logits/chosen": -0.36444956064224243, |
|
"logits/rejected": -0.3619704842567444, |
|
"logps/chosen": -141.44894409179688, |
|
"logps/rejected": -194.81773376464844, |
|
"loss": 0.2364, |
|
"nll_loss": 0.17286133766174316, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -14.14489459991455, |
|
"rewards/margins": 5.336878776550293, |
|
"rewards/rejected": -19.48177146911621, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.5657324472695753, |
|
"grad_norm": 12.051857948303223, |
|
"learning_rate": 5.401234567901234e-07, |
|
"logits/chosen": -0.45673027634620667, |
|
"logits/rejected": -0.4733441472053528, |
|
"logps/chosen": -136.0276336669922, |
|
"logps/rejected": -188.5570068359375, |
|
"loss": 0.2305, |
|
"nll_loss": 0.1618407666683197, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.602763175964355, |
|
"rewards/margins": 5.252939224243164, |
|
"rewards/rejected": -18.855701446533203, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.5888471540017335, |
|
"grad_norm": 10.467662811279297, |
|
"learning_rate": 5.349794238683127e-07, |
|
"logits/chosen": -0.4598791003227234, |
|
"logits/rejected": -0.4583801329135895, |
|
"logps/chosen": -137.6591033935547, |
|
"logps/rejected": -189.61471557617188, |
|
"loss": 0.2583, |
|
"nll_loss": 0.16606256365776062, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.765910148620605, |
|
"rewards/margins": 5.195560932159424, |
|
"rewards/rejected": -18.961471557617188, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.611961860733892, |
|
"grad_norm": 17.334087371826172, |
|
"learning_rate": 5.29835390946502e-07, |
|
"logits/chosen": -0.45638832449913025, |
|
"logits/rejected": -0.4596933424472809, |
|
"logps/chosen": -134.4242401123047, |
|
"logps/rejected": -185.4617156982422, |
|
"loss": 0.231, |
|
"nll_loss": 0.15201494097709656, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -13.442425727844238, |
|
"rewards/margins": 5.1037468910217285, |
|
"rewards/rejected": -18.546171188354492, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.6350765674660503, |
|
"grad_norm": 9.82776927947998, |
|
"learning_rate": 5.246913580246914e-07, |
|
"logits/chosen": -0.4979328513145447, |
|
"logits/rejected": -0.4829026758670807, |
|
"logps/chosen": -142.7810516357422, |
|
"logps/rejected": -195.93936157226562, |
|
"loss": 0.2197, |
|
"nll_loss": 0.14758186042308807, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.278106689453125, |
|
"rewards/margins": 5.315831184387207, |
|
"rewards/rejected": -19.593936920166016, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.6581912741982086, |
|
"grad_norm": 21.076847076416016, |
|
"learning_rate": 5.195473251028807e-07, |
|
"logits/chosen": -0.4889853894710541, |
|
"logits/rejected": -0.4779161810874939, |
|
"logps/chosen": -147.04873657226562, |
|
"logps/rejected": -195.0872802734375, |
|
"loss": 0.2223, |
|
"nll_loss": 0.155166894197464, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.704874038696289, |
|
"rewards/margins": 4.803854942321777, |
|
"rewards/rejected": -19.50872802734375, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.681305980930367, |
|
"grad_norm": 19.175827026367188, |
|
"learning_rate": 5.144032921810699e-07, |
|
"logits/chosen": -0.4997631013393402, |
|
"logits/rejected": -0.4868396818637848, |
|
"logps/chosen": -132.46238708496094, |
|
"logps/rejected": -182.9662322998047, |
|
"loss": 0.2392, |
|
"nll_loss": 0.15937396883964539, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.246240615844727, |
|
"rewards/margins": 5.050384521484375, |
|
"rewards/rejected": -18.29662322998047, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.7044206876625254, |
|
"grad_norm": 13.847294807434082, |
|
"learning_rate": 5.092592592592593e-07, |
|
"logits/chosen": -0.42537322640419006, |
|
"logits/rejected": -0.40758857131004333, |
|
"logps/chosen": -132.64317321777344, |
|
"logps/rejected": -185.53622436523438, |
|
"loss": 0.2315, |
|
"nll_loss": 0.1639558970928192, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -13.264317512512207, |
|
"rewards/margins": 5.289304733276367, |
|
"rewards/rejected": -18.55362319946289, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.7275353943946836, |
|
"grad_norm": 17.215343475341797, |
|
"learning_rate": 5.041152263374485e-07, |
|
"logits/chosen": -0.4605620503425598, |
|
"logits/rejected": -0.47386521100997925, |
|
"logps/chosen": -142.31393432617188, |
|
"logps/rejected": -201.610107421875, |
|
"loss": 0.2355, |
|
"nll_loss": 0.1665884107351303, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.231393814086914, |
|
"rewards/margins": 5.929617881774902, |
|
"rewards/rejected": -20.161012649536133, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.750650101126842, |
|
"grad_norm": 11.339929580688477, |
|
"learning_rate": 4.989711934156378e-07, |
|
"logits/chosen": -0.5646448731422424, |
|
"logits/rejected": -0.5591720342636108, |
|
"logps/chosen": -144.7230987548828, |
|
"logps/rejected": -198.4960479736328, |
|
"loss": 0.2296, |
|
"nll_loss": 0.17730608582496643, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.472311019897461, |
|
"rewards/margins": 5.377293109893799, |
|
"rewards/rejected": -19.8496036529541, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.773764807859, |
|
"grad_norm": 10.567920684814453, |
|
"learning_rate": 4.938271604938271e-07, |
|
"logits/chosen": -0.5628112554550171, |
|
"logits/rejected": -0.5627862215042114, |
|
"logps/chosen": -134.7103271484375, |
|
"logps/rejected": -181.05490112304688, |
|
"loss": 0.2401, |
|
"nll_loss": 0.16600725054740906, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -13.471035957336426, |
|
"rewards/margins": 4.634454250335693, |
|
"rewards/rejected": -18.105487823486328, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7968795145911587, |
|
"grad_norm": 11.1284818649292, |
|
"learning_rate": 4.886831275720165e-07, |
|
"logits/chosen": -0.5333854556083679, |
|
"logits/rejected": -0.5228737592697144, |
|
"logps/chosen": -129.60784912109375, |
|
"logps/rejected": -179.29922485351562, |
|
"loss": 0.2237, |
|
"nll_loss": 0.15326835215091705, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.960784912109375, |
|
"rewards/margins": 4.969139099121094, |
|
"rewards/rejected": -17.929922103881836, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.819994221323317, |
|
"grad_norm": 10.869100570678711, |
|
"learning_rate": 4.835390946502057e-07, |
|
"logits/chosen": -0.4685629904270172, |
|
"logits/rejected": -0.4411331117153168, |
|
"logps/chosen": -137.3936767578125, |
|
"logps/rejected": -190.50975036621094, |
|
"loss": 0.2258, |
|
"nll_loss": 0.16754138469696045, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.739367485046387, |
|
"rewards/margins": 5.311608791351318, |
|
"rewards/rejected": -19.050975799560547, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.843108928055475, |
|
"grad_norm": 11.171156883239746, |
|
"learning_rate": 4.783950617283951e-07, |
|
"logits/chosen": -0.39593321084976196, |
|
"logits/rejected": -0.3724592328071594, |
|
"logps/chosen": -129.14064025878906, |
|
"logps/rejected": -181.44851684570312, |
|
"loss": 0.2196, |
|
"nll_loss": 0.15831029415130615, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -12.914064407348633, |
|
"rewards/margins": 5.230786323547363, |
|
"rewards/rejected": -18.14484977722168, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.8662236347876338, |
|
"grad_norm": 16.257095336914062, |
|
"learning_rate": 4.732510288065844e-07, |
|
"logits/chosen": -0.41909652948379517, |
|
"logits/rejected": -0.4289626479148865, |
|
"logps/chosen": -137.97906494140625, |
|
"logps/rejected": -189.48602294921875, |
|
"loss": 0.2401, |
|
"nll_loss": 0.15598097443580627, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.797907829284668, |
|
"rewards/margins": 5.15069580078125, |
|
"rewards/rejected": -18.9486026763916, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.889338341519792, |
|
"grad_norm": 24.864940643310547, |
|
"learning_rate": 4.6810699588477364e-07, |
|
"logits/chosen": -0.36290091276168823, |
|
"logits/rejected": -0.34600576758384705, |
|
"logps/chosen": -136.03607177734375, |
|
"logps/rejected": -185.31668090820312, |
|
"loss": 0.2201, |
|
"nll_loss": 0.14870640635490417, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.603607177734375, |
|
"rewards/margins": 4.9280619621276855, |
|
"rewards/rejected": -18.53166961669922, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.91245304825195, |
|
"grad_norm": 9.861152648925781, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"logits/chosen": -0.43973201513290405, |
|
"logits/rejected": -0.44227686524391174, |
|
"logps/chosen": -139.79000854492188, |
|
"logps/rejected": -191.3979949951172, |
|
"loss": 0.2338, |
|
"nll_loss": 0.15694692730903625, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.97900104522705, |
|
"rewards/margins": 5.160799026489258, |
|
"rewards/rejected": -19.139801025390625, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.935567754984109, |
|
"grad_norm": 11.536057472229004, |
|
"learning_rate": 4.5781893004115224e-07, |
|
"logits/chosen": -0.4365859925746918, |
|
"logits/rejected": -0.43007755279541016, |
|
"logps/chosen": -143.85635375976562, |
|
"logps/rejected": -197.02879333496094, |
|
"loss": 0.2355, |
|
"nll_loss": 0.15321387350559235, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -14.385635375976562, |
|
"rewards/margins": 5.317243576049805, |
|
"rewards/rejected": -19.702880859375, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.958682461716267, |
|
"grad_norm": 18.637239456176758, |
|
"learning_rate": 4.5267489711934156e-07, |
|
"logits/chosen": -0.47489842772483826, |
|
"logits/rejected": -0.4829436242580414, |
|
"logps/chosen": -140.48260498046875, |
|
"logps/rejected": -196.2875213623047, |
|
"loss": 0.2461, |
|
"nll_loss": 0.16315388679504395, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.048260688781738, |
|
"rewards/margins": 5.5804924964904785, |
|
"rewards/rejected": -19.628753662109375, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.9817971684484252, |
|
"grad_norm": 13.219135284423828, |
|
"learning_rate": 4.4753086419753083e-07, |
|
"logits/chosen": -0.45336833596229553, |
|
"logits/rejected": -0.44670405983924866, |
|
"logps/chosen": -141.3701934814453, |
|
"logps/rejected": -192.05670166015625, |
|
"loss": 0.2244, |
|
"nll_loss": 0.16718199849128723, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.137018203735352, |
|
"rewards/margins": 5.0686516761779785, |
|
"rewards/rejected": -19.205671310424805, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.997977463160936, |
|
"eval_logits/chosen": -0.3714839220046997, |
|
"eval_logits/rejected": -0.3428020179271698, |
|
"eval_logps/chosen": -157.10519409179688, |
|
"eval_logps/rejected": -172.1945343017578, |
|
"eval_loss": 1.3861061334609985, |
|
"eval_nll_loss": 0.20338018238544464, |
|
"eval_rewards/accuracies": 0.656521737575531, |
|
"eval_rewards/chosen": -15.710522651672363, |
|
"eval_rewards/margins": 1.5089313983917236, |
|
"eval_rewards/rejected": -17.219451904296875, |
|
"eval_runtime": 77.2394, |
|
"eval_samples_per_second": 23.641, |
|
"eval_steps_per_second": 1.489, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 3.0049118751805834, |
|
"grad_norm": 5.132666110992432, |
|
"learning_rate": 4.4238683127572015e-07, |
|
"logits/chosen": -0.44278082251548767, |
|
"logits/rejected": -0.44281044602394104, |
|
"logps/chosen": -141.17550659179688, |
|
"logps/rejected": -196.56248474121094, |
|
"loss": 0.2016, |
|
"nll_loss": 0.15163448452949524, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -14.117551803588867, |
|
"rewards/margins": 5.538697719573975, |
|
"rewards/rejected": -19.656248092651367, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.028026581912742, |
|
"grad_norm": 3.1660420894622803, |
|
"learning_rate": 4.372427983539094e-07, |
|
"logits/chosen": -0.40755367279052734, |
|
"logits/rejected": -0.3970012962818146, |
|
"logps/chosen": -125.93168640136719, |
|
"logps/rejected": -186.09402465820312, |
|
"loss": 0.1537, |
|
"nll_loss": 0.13879674673080444, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.593169212341309, |
|
"rewards/margins": 6.016233921051025, |
|
"rewards/rejected": -18.609403610229492, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.0511412886449003, |
|
"grad_norm": 3.5848960876464844, |
|
"learning_rate": 4.320987654320987e-07, |
|
"logits/chosen": -0.44615453481674194, |
|
"logits/rejected": -0.43949246406555176, |
|
"logps/chosen": -126.3210220336914, |
|
"logps/rejected": -184.44094848632812, |
|
"loss": 0.1556, |
|
"nll_loss": 0.1318623572587967, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.632102012634277, |
|
"rewards/margins": 5.811993598937988, |
|
"rewards/rejected": -18.444095611572266, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.0742559953770585, |
|
"grad_norm": 3.971622943878174, |
|
"learning_rate": 4.2695473251028807e-07, |
|
"logits/chosen": -0.34509214758872986, |
|
"logits/rejected": -0.3416140079498291, |
|
"logps/chosen": -130.82965087890625, |
|
"logps/rejected": -189.31130981445312, |
|
"loss": 0.1539, |
|
"nll_loss": 0.13816341757774353, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -13.082966804504395, |
|
"rewards/margins": 5.848166465759277, |
|
"rewards/rejected": -18.931133270263672, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.097370702109217, |
|
"grad_norm": 3.245117664337158, |
|
"learning_rate": 4.218106995884774e-07, |
|
"logits/chosen": -0.263519287109375, |
|
"logits/rejected": -0.25365307927131653, |
|
"logps/chosen": -128.29852294921875, |
|
"logps/rejected": -189.9366455078125, |
|
"loss": 0.1518, |
|
"nll_loss": 0.13781467080116272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.829852104187012, |
|
"rewards/margins": 6.16381311416626, |
|
"rewards/rejected": -18.99366569519043, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.1204854088413754, |
|
"grad_norm": 4.314767837524414, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.2783138155937195, |
|
"logits/rejected": -0.3006114363670349, |
|
"logps/chosen": -128.49453735351562, |
|
"logps/rejected": -187.8452606201172, |
|
"loss": 0.1516, |
|
"nll_loss": 0.14406827092170715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.849452018737793, |
|
"rewards/margins": 5.935072898864746, |
|
"rewards/rejected": -18.784526824951172, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.1436001155735336, |
|
"grad_norm": 2.8442511558532715, |
|
"learning_rate": 4.11522633744856e-07, |
|
"logits/chosen": -0.19675478339195251, |
|
"logits/rejected": -0.18994562327861786, |
|
"logps/chosen": -130.37368774414062, |
|
"logps/rejected": -191.08071899414062, |
|
"loss": 0.1502, |
|
"nll_loss": 0.14177414774894714, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.037368774414062, |
|
"rewards/margins": 6.070704936981201, |
|
"rewards/rejected": -19.10807228088379, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.166714822305692, |
|
"grad_norm": 4.321190357208252, |
|
"learning_rate": 4.0637860082304526e-07, |
|
"logits/chosen": -0.29594722390174866, |
|
"logits/rejected": -0.2727283537387848, |
|
"logps/chosen": -126.78936767578125, |
|
"logps/rejected": -183.8494873046875, |
|
"loss": 0.1495, |
|
"nll_loss": 0.13010382652282715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.678936958312988, |
|
"rewards/margins": 5.706011772155762, |
|
"rewards/rejected": -18.38494873046875, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.1898295290378504, |
|
"grad_norm": 3.650377035140991, |
|
"learning_rate": 4.0123456790123453e-07, |
|
"logits/chosen": -0.37024635076522827, |
|
"logits/rejected": -0.36072778701782227, |
|
"logps/chosen": -134.62948608398438, |
|
"logps/rejected": -194.2451171875, |
|
"loss": 0.1556, |
|
"nll_loss": 0.1394232213497162, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -13.4629487991333, |
|
"rewards/margins": 5.9615631103515625, |
|
"rewards/rejected": -19.42451286315918, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.2129442357700086, |
|
"grad_norm": 5.636937141418457, |
|
"learning_rate": 3.9609053497942385e-07, |
|
"logits/chosen": -0.27522599697113037, |
|
"logits/rejected": -0.27910444140434265, |
|
"logps/chosen": -124.5965805053711, |
|
"logps/rejected": -187.5218505859375, |
|
"loss": 0.1484, |
|
"nll_loss": 0.12636372447013855, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -12.459661483764648, |
|
"rewards/margins": 6.292525768280029, |
|
"rewards/rejected": -18.752187728881836, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.236058942502167, |
|
"grad_norm": 3.8186678886413574, |
|
"learning_rate": 3.909465020576131e-07, |
|
"logits/chosen": -0.2928979992866516, |
|
"logits/rejected": -0.2864636480808258, |
|
"logps/chosen": -124.09950256347656, |
|
"logps/rejected": -181.70155334472656, |
|
"loss": 0.1549, |
|
"nll_loss": 0.13333001732826233, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -12.409948348999023, |
|
"rewards/margins": 5.7602057456970215, |
|
"rewards/rejected": -18.170154571533203, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.2591736492343255, |
|
"grad_norm": 3.9708776473999023, |
|
"learning_rate": 3.8580246913580245e-07, |
|
"logits/chosen": -0.3393842577934265, |
|
"logits/rejected": -0.32439425587654114, |
|
"logps/chosen": -130.1053009033203, |
|
"logps/rejected": -188.5397491455078, |
|
"loss": 0.1556, |
|
"nll_loss": 0.13221554458141327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.010530471801758, |
|
"rewards/margins": 5.843444347381592, |
|
"rewards/rejected": -18.853975296020508, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.2822883559664837, |
|
"grad_norm": 3.5606882572174072, |
|
"learning_rate": 3.806584362139917e-07, |
|
"logits/chosen": -0.31585693359375, |
|
"logits/rejected": -0.26836958527565, |
|
"logps/chosen": -120.08418273925781, |
|
"logps/rejected": -180.00120544433594, |
|
"loss": 0.1471, |
|
"nll_loss": 0.12899354100227356, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.008419036865234, |
|
"rewards/margins": 5.991702079772949, |
|
"rewards/rejected": -18.000120162963867, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.305403062698642, |
|
"grad_norm": 3.3717777729034424, |
|
"learning_rate": 3.7551440329218104e-07, |
|
"logits/chosen": -0.23174750804901123, |
|
"logits/rejected": -0.2522903382778168, |
|
"logps/chosen": -131.6839599609375, |
|
"logps/rejected": -198.05081176757812, |
|
"loss": 0.1565, |
|
"nll_loss": 0.13706137239933014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.16839599609375, |
|
"rewards/margins": 6.636684417724609, |
|
"rewards/rejected": -19.80508041381836, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.3285177694308006, |
|
"grad_norm": 3.782886028289795, |
|
"learning_rate": 3.703703703703703e-07, |
|
"logits/chosen": -0.3117191195487976, |
|
"logits/rejected": -0.31785351037979126, |
|
"logps/chosen": -131.83470153808594, |
|
"logps/rejected": -189.18441772460938, |
|
"loss": 0.1492, |
|
"nll_loss": 0.12388783693313599, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.183469772338867, |
|
"rewards/margins": 5.734971046447754, |
|
"rewards/rejected": -18.918439865112305, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.351632476162959, |
|
"grad_norm": 3.158254384994507, |
|
"learning_rate": 3.6522633744855963e-07, |
|
"logits/chosen": -0.3361268639564514, |
|
"logits/rejected": -0.3252175748348236, |
|
"logps/chosen": -128.30125427246094, |
|
"logps/rejected": -186.31838989257812, |
|
"loss": 0.1539, |
|
"nll_loss": 0.13049830496311188, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -12.830126762390137, |
|
"rewards/margins": 5.801713943481445, |
|
"rewards/rejected": -18.631839752197266, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.374747182895117, |
|
"grad_norm": 4.768058776855469, |
|
"learning_rate": 3.6008230452674896e-07, |
|
"logits/chosen": -0.23867249488830566, |
|
"logits/rejected": -0.20122122764587402, |
|
"logps/chosen": -123.92413330078125, |
|
"logps/rejected": -186.30250549316406, |
|
"loss": 0.1616, |
|
"nll_loss": 0.14071312546730042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.392415046691895, |
|
"rewards/margins": 6.2378363609313965, |
|
"rewards/rejected": -18.630252838134766, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.397861889627275, |
|
"grad_norm": 3.911938428878784, |
|
"learning_rate": 3.549382716049383e-07, |
|
"logits/chosen": -0.2685008943080902, |
|
"logits/rejected": -0.23969027400016785, |
|
"logps/chosen": -127.1446304321289, |
|
"logps/rejected": -186.02838134765625, |
|
"loss": 0.1486, |
|
"nll_loss": 0.12472818791866302, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -12.714462280273438, |
|
"rewards/margins": 5.888378143310547, |
|
"rewards/rejected": -18.602840423583984, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.420976596359434, |
|
"grad_norm": 3.9447271823883057, |
|
"learning_rate": 3.4979423868312755e-07, |
|
"logits/chosen": -0.28780004382133484, |
|
"logits/rejected": -0.290294349193573, |
|
"logps/chosen": -127.8751449584961, |
|
"logps/rejected": -189.95578002929688, |
|
"loss": 0.1473, |
|
"nll_loss": 0.13437309861183167, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.787514686584473, |
|
"rewards/margins": 6.208063125610352, |
|
"rewards/rejected": -18.99557876586914, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.444091303091592, |
|
"grad_norm": 6.313704490661621, |
|
"learning_rate": 3.446502057613169e-07, |
|
"logits/chosen": -0.23013484477996826, |
|
"logits/rejected": -0.23306229710578918, |
|
"logps/chosen": -122.0789566040039, |
|
"logps/rejected": -185.14695739746094, |
|
"loss": 0.1478, |
|
"nll_loss": 0.13203728199005127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.207897186279297, |
|
"rewards/margins": 6.306800842285156, |
|
"rewards/rejected": -18.514698028564453, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.4672060098237503, |
|
"grad_norm": 2.906285524368286, |
|
"learning_rate": 3.3950617283950614e-07, |
|
"logits/chosen": -0.3435348868370056, |
|
"logits/rejected": -0.33539697527885437, |
|
"logps/chosen": -123.60890197753906, |
|
"logps/rejected": -183.1199493408203, |
|
"loss": 0.1513, |
|
"nll_loss": 0.13879191875457764, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.36089038848877, |
|
"rewards/margins": 5.951104640960693, |
|
"rewards/rejected": -18.311994552612305, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.4903207165559085, |
|
"grad_norm": 2.990963935852051, |
|
"learning_rate": 3.3436213991769547e-07, |
|
"logits/chosen": -0.26741576194763184, |
|
"logits/rejected": -0.273776650428772, |
|
"logps/chosen": -129.36013793945312, |
|
"logps/rejected": -186.50009155273438, |
|
"loss": 0.1465, |
|
"nll_loss": 0.14070597290992737, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -12.936014175415039, |
|
"rewards/margins": 5.713995933532715, |
|
"rewards/rejected": -18.650009155273438, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.513435423288067, |
|
"grad_norm": 5.473604679107666, |
|
"learning_rate": 3.2921810699588474e-07, |
|
"logits/chosen": -0.28439709544181824, |
|
"logits/rejected": -0.2706482410430908, |
|
"logps/chosen": -123.5947265625, |
|
"logps/rejected": -185.80001831054688, |
|
"loss": 0.1509, |
|
"nll_loss": 0.1402612030506134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.359472274780273, |
|
"rewards/margins": 6.220528602600098, |
|
"rewards/rejected": -18.580001831054688, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.5365501300202253, |
|
"grad_norm": 6.9896626472473145, |
|
"learning_rate": 3.2407407407407406e-07, |
|
"logits/chosen": -0.3721368908882141, |
|
"logits/rejected": -0.3583984673023224, |
|
"logps/chosen": -128.07249450683594, |
|
"logps/rejected": -187.01959228515625, |
|
"loss": 0.1538, |
|
"nll_loss": 0.13780102133750916, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.807249069213867, |
|
"rewards/margins": 5.894709587097168, |
|
"rewards/rejected": -18.701961517333984, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.5596648367523835, |
|
"grad_norm": 2.910080671310425, |
|
"learning_rate": 3.1893004115226333e-07, |
|
"logits/chosen": -0.3633486330509186, |
|
"logits/rejected": -0.34488505125045776, |
|
"logps/chosen": -125.72395324707031, |
|
"logps/rejected": -184.29405212402344, |
|
"loss": 0.1547, |
|
"nll_loss": 0.1316194236278534, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -12.572395324707031, |
|
"rewards/margins": 5.857010841369629, |
|
"rewards/rejected": -18.429405212402344, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.582779543484542, |
|
"grad_norm": 3.2864928245544434, |
|
"learning_rate": 3.1378600823045266e-07, |
|
"logits/chosen": -0.36337172985076904, |
|
"logits/rejected": -0.3896876871585846, |
|
"logps/chosen": -130.9540252685547, |
|
"logps/rejected": -192.02456665039062, |
|
"loss": 0.143, |
|
"nll_loss": 0.12916973233222961, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.095403671264648, |
|
"rewards/margins": 6.107052803039551, |
|
"rewards/rejected": -19.202457427978516, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.6058942502167004, |
|
"grad_norm": 9.098392486572266, |
|
"learning_rate": 3.086419753086419e-07, |
|
"logits/chosen": -0.26420459151268005, |
|
"logits/rejected": -0.30124431848526, |
|
"logps/chosen": -132.1412353515625, |
|
"logps/rejected": -196.06668090820312, |
|
"loss": 0.1472, |
|
"nll_loss": 0.12210263311862946, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.214123725891113, |
|
"rewards/margins": 6.392544269561768, |
|
"rewards/rejected": -19.60666847229004, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.6290089569488586, |
|
"grad_norm": 3.135023593902588, |
|
"learning_rate": 3.0349794238683125e-07, |
|
"logits/chosen": -0.2870226800441742, |
|
"logits/rejected": -0.32922470569610596, |
|
"logps/chosen": -127.20719909667969, |
|
"logps/rejected": -187.71414184570312, |
|
"loss": 0.1606, |
|
"nll_loss": 0.13571253418922424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.720720291137695, |
|
"rewards/margins": 6.050693511962891, |
|
"rewards/rejected": -18.771413803100586, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.6521236636810173, |
|
"grad_norm": 2.965545892715454, |
|
"learning_rate": 2.983539094650205e-07, |
|
"logits/chosen": -0.2955471873283386, |
|
"logits/rejected": -0.29221171140670776, |
|
"logps/chosen": -120.03623962402344, |
|
"logps/rejected": -177.8092041015625, |
|
"loss": 0.141, |
|
"nll_loss": 0.12610065937042236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.003625869750977, |
|
"rewards/margins": 5.777295112609863, |
|
"rewards/rejected": -17.780920028686523, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.6752383704131755, |
|
"grad_norm": 3.8427724838256836, |
|
"learning_rate": 2.932098765432099e-07, |
|
"logits/chosen": -0.294664204120636, |
|
"logits/rejected": -0.315548837184906, |
|
"logps/chosen": -126.55033874511719, |
|
"logps/rejected": -186.32962036132812, |
|
"loss": 0.1472, |
|
"nll_loss": 0.1299527883529663, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.655034065246582, |
|
"rewards/margins": 5.977927207946777, |
|
"rewards/rejected": -18.63296127319336, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.6983530771453337, |
|
"grad_norm": 3.386413335800171, |
|
"learning_rate": 2.8806584362139917e-07, |
|
"logits/chosen": -0.21596117317676544, |
|
"logits/rejected": -0.20901863276958466, |
|
"logps/chosen": -118.6823959350586, |
|
"logps/rejected": -177.80654907226562, |
|
"loss": 0.1584, |
|
"nll_loss": 0.14362338185310364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.86823844909668, |
|
"rewards/margins": 5.912415504455566, |
|
"rewards/rejected": -17.780656814575195, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.7214677838774923, |
|
"grad_norm": 3.672924518585205, |
|
"learning_rate": 2.829218106995885e-07, |
|
"logits/chosen": -0.26348841190338135, |
|
"logits/rejected": -0.262240469455719, |
|
"logps/chosen": -124.21568298339844, |
|
"logps/rejected": -183.1221466064453, |
|
"loss": 0.1513, |
|
"nll_loss": 0.11891283839941025, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -12.421568870544434, |
|
"rewards/margins": 5.8906474113464355, |
|
"rewards/rejected": -18.31221580505371, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.7445824906096505, |
|
"grad_norm": 3.7650656700134277, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"logits/chosen": -0.278475821018219, |
|
"logits/rejected": -0.2345239669084549, |
|
"logps/chosen": -123.59881591796875, |
|
"logps/rejected": -183.743896484375, |
|
"loss": 0.1518, |
|
"nll_loss": 0.12711484730243683, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -12.359882354736328, |
|
"rewards/margins": 6.014508247375488, |
|
"rewards/rejected": -18.3743896484375, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.7676971973418087, |
|
"grad_norm": 3.11409592628479, |
|
"learning_rate": 2.726337448559671e-07, |
|
"logits/chosen": -0.29814380407333374, |
|
"logits/rejected": -0.28927913308143616, |
|
"logps/chosen": -127.12947082519531, |
|
"logps/rejected": -183.96328735351562, |
|
"loss": 0.1502, |
|
"nll_loss": 0.11745184659957886, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -12.712945938110352, |
|
"rewards/margins": 5.683382987976074, |
|
"rewards/rejected": -18.396331787109375, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.790811904073967, |
|
"grad_norm": 4.140903949737549, |
|
"learning_rate": 2.6748971193415635e-07, |
|
"logits/chosen": -0.29099392890930176, |
|
"logits/rejected": -0.3041759133338928, |
|
"logps/chosen": -130.06552124023438, |
|
"logps/rejected": -191.20046997070312, |
|
"loss": 0.1509, |
|
"nll_loss": 0.14280778169631958, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.006550788879395, |
|
"rewards/margins": 6.11349630355835, |
|
"rewards/rejected": -19.120046615600586, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.813926610806125, |
|
"grad_norm": 8.86196231842041, |
|
"learning_rate": 2.623456790123457e-07, |
|
"logits/chosen": -0.2659907341003418, |
|
"logits/rejected": -0.27678874135017395, |
|
"logps/chosen": -126.56221008300781, |
|
"logps/rejected": -185.51071166992188, |
|
"loss": 0.1458, |
|
"nll_loss": 0.1296006143093109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.656221389770508, |
|
"rewards/margins": 5.894850730895996, |
|
"rewards/rejected": -18.551071166992188, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.837041317538284, |
|
"grad_norm": 7.074207305908203, |
|
"learning_rate": 2.5720164609053495e-07, |
|
"logits/chosen": -0.2648230195045471, |
|
"logits/rejected": -0.2591935098171234, |
|
"logps/chosen": -117.26505279541016, |
|
"logps/rejected": -177.61654663085938, |
|
"loss": 0.1454, |
|
"nll_loss": 0.13034331798553467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.726505279541016, |
|
"rewards/margins": 6.03515100479126, |
|
"rewards/rejected": -17.761655807495117, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.860156024270442, |
|
"grad_norm": 3.6986083984375, |
|
"learning_rate": 2.5205761316872427e-07, |
|
"logits/chosen": -0.3297143876552582, |
|
"logits/rejected": -0.31857237219810486, |
|
"logps/chosen": -133.59078979492188, |
|
"logps/rejected": -194.1522979736328, |
|
"loss": 0.156, |
|
"nll_loss": 0.1323135942220688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.359077453613281, |
|
"rewards/margins": 6.056151390075684, |
|
"rewards/rejected": -19.41522789001465, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.8832707310026002, |
|
"grad_norm": 3.5342583656311035, |
|
"learning_rate": 2.4691358024691354e-07, |
|
"logits/chosen": -0.3504456877708435, |
|
"logits/rejected": -0.3491267263889313, |
|
"logps/chosen": -125.02303314208984, |
|
"logps/rejected": -186.25491333007812, |
|
"loss": 0.1414, |
|
"nll_loss": 0.1284278929233551, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.502302169799805, |
|
"rewards/margins": 6.123185157775879, |
|
"rewards/rejected": -18.62548828125, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.906385437734759, |
|
"grad_norm": 9.769820213317871, |
|
"learning_rate": 2.4176954732510286e-07, |
|
"logits/chosen": -0.3653779923915863, |
|
"logits/rejected": -0.3362106382846832, |
|
"logps/chosen": -135.67111206054688, |
|
"logps/rejected": -198.921142578125, |
|
"loss": 0.1563, |
|
"nll_loss": 0.1389894187450409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.567111015319824, |
|
"rewards/margins": 6.325002193450928, |
|
"rewards/rejected": -19.892114639282227, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.929500144466917, |
|
"grad_norm": 12.724737167358398, |
|
"learning_rate": 2.366255144032922e-07, |
|
"logits/chosen": -0.3556443452835083, |
|
"logits/rejected": -0.33838778734207153, |
|
"logps/chosen": -126.82794189453125, |
|
"logps/rejected": -187.8473663330078, |
|
"loss": 0.1457, |
|
"nll_loss": 0.13801956176757812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.682792663574219, |
|
"rewards/margins": 6.101943016052246, |
|
"rewards/rejected": -18.784738540649414, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.9526148511990753, |
|
"grad_norm": 2.656416654586792, |
|
"learning_rate": 2.3148148148148148e-07, |
|
"logits/chosen": -0.3134855329990387, |
|
"logits/rejected": -0.305325984954834, |
|
"logps/chosen": -128.65797424316406, |
|
"logps/rejected": -188.01309204101562, |
|
"loss": 0.1369, |
|
"nll_loss": 0.12594002485275269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.865796089172363, |
|
"rewards/margins": 5.935511589050293, |
|
"rewards/rejected": -18.80130958557129, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.975729557931234, |
|
"grad_norm": 12.101499557495117, |
|
"learning_rate": 2.2633744855967078e-07, |
|
"logits/chosen": -0.4090637266635895, |
|
"logits/rejected": -0.3877164423465729, |
|
"logps/chosen": -134.76638793945312, |
|
"logps/rejected": -194.9758758544922, |
|
"loss": 0.1532, |
|
"nll_loss": 0.14175161719322205, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -13.476638793945312, |
|
"rewards/margins": 6.020949840545654, |
|
"rewards/rejected": -19.497589111328125, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.998844264663392, |
|
"grad_norm": 6.0831708908081055, |
|
"learning_rate": 2.2119341563786008e-07, |
|
"logits/chosen": -0.3833851218223572, |
|
"logits/rejected": -0.39498597383499146, |
|
"logps/chosen": -129.8985595703125, |
|
"logps/rejected": -187.89739990234375, |
|
"loss": 0.1472, |
|
"nll_loss": 0.12770399451255798, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -12.98985481262207, |
|
"rewards/margins": 5.799884796142578, |
|
"rewards/rejected": -18.78973960876465, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.998844264663392, |
|
"eval_logits/chosen": -0.3029468059539795, |
|
"eval_logits/rejected": -0.270137220621109, |
|
"eval_logps/chosen": -146.46226501464844, |
|
"eval_logps/rejected": -161.38487243652344, |
|
"eval_loss": 1.4029475450515747, |
|
"eval_nll_loss": 0.1876361072063446, |
|
"eval_rewards/accuracies": 0.6521739363670349, |
|
"eval_rewards/chosen": -14.646224975585938, |
|
"eval_rewards/margins": 1.4922590255737305, |
|
"eval_rewards/rejected": -16.138486862182617, |
|
"eval_runtime": 77.4371, |
|
"eval_samples_per_second": 23.58, |
|
"eval_steps_per_second": 1.485, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.02195897139555, |
|
"grad_norm": 1.9679253101348877, |
|
"learning_rate": 2.1604938271604935e-07, |
|
"logits/chosen": -0.3585730195045471, |
|
"logits/rejected": -0.3200622498989105, |
|
"logps/chosen": -118.93489074707031, |
|
"logps/rejected": -183.91061401367188, |
|
"loss": 0.1179, |
|
"nll_loss": 0.1184120774269104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.893487930297852, |
|
"rewards/margins": 6.497572422027588, |
|
"rewards/rejected": -18.391061782836914, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.045073678127709, |
|
"grad_norm": 1.426239252090454, |
|
"learning_rate": 2.109053497942387e-07, |
|
"logits/chosen": -0.3198128640651703, |
|
"logits/rejected": -0.3108198940753937, |
|
"logps/chosen": -119.95533752441406, |
|
"logps/rejected": -182.93043518066406, |
|
"loss": 0.1218, |
|
"nll_loss": 0.10763946920633316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.99553394317627, |
|
"rewards/margins": 6.29750919342041, |
|
"rewards/rejected": -18.293041229248047, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.068188384859867, |
|
"grad_norm": 1.8550798892974854, |
|
"learning_rate": 2.05761316872428e-07, |
|
"logits/chosen": -0.28298747539520264, |
|
"logits/rejected": -0.2920450270175934, |
|
"logps/chosen": -117.935791015625, |
|
"logps/rejected": -186.0088653564453, |
|
"loss": 0.1233, |
|
"nll_loss": 0.11667722463607788, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.7935791015625, |
|
"rewards/margins": 6.807305812835693, |
|
"rewards/rejected": -18.60088539123535, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.091303091592025, |
|
"grad_norm": 1.947771668434143, |
|
"learning_rate": 2.0061728395061726e-07, |
|
"logits/chosen": -0.21840214729309082, |
|
"logits/rejected": -0.2067776620388031, |
|
"logps/chosen": -115.0444564819336, |
|
"logps/rejected": -179.38697814941406, |
|
"loss": 0.1213, |
|
"nll_loss": 0.1122204065322876, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.504446029663086, |
|
"rewards/margins": 6.4342522621154785, |
|
"rewards/rejected": -17.938695907592773, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.114417798324184, |
|
"grad_norm": 1.8407361507415771, |
|
"learning_rate": 1.9547325102880656e-07, |
|
"logits/chosen": -0.29772254824638367, |
|
"logits/rejected": -0.2754737138748169, |
|
"logps/chosen": -113.61384582519531, |
|
"logps/rejected": -177.0957489013672, |
|
"loss": 0.1227, |
|
"nll_loss": 0.10529961436986923, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.361384391784668, |
|
"rewards/margins": 6.348191738128662, |
|
"rewards/rejected": -17.709575653076172, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.137532505056342, |
|
"grad_norm": 1.4201513528823853, |
|
"learning_rate": 1.9032921810699586e-07, |
|
"logits/chosen": -0.30481767654418945, |
|
"logits/rejected": -0.2908991277217865, |
|
"logps/chosen": -119.33686828613281, |
|
"logps/rejected": -184.93646240234375, |
|
"loss": 0.1227, |
|
"nll_loss": 0.1168881431221962, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.933687210083008, |
|
"rewards/margins": 6.559959411621094, |
|
"rewards/rejected": -18.493648529052734, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.1606472117885005, |
|
"grad_norm": 1.8120708465576172, |
|
"learning_rate": 1.8518518518518516e-07, |
|
"logits/chosen": -0.3080504834651947, |
|
"logits/rejected": -0.30417922139167786, |
|
"logps/chosen": -122.6048812866211, |
|
"logps/rejected": -185.8119659423828, |
|
"loss": 0.126, |
|
"nll_loss": 0.12334553897380829, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.26048755645752, |
|
"rewards/margins": 6.320708751678467, |
|
"rewards/rejected": -18.581195831298828, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.183761918520659, |
|
"grad_norm": 2.5624470710754395, |
|
"learning_rate": 1.8004115226337448e-07, |
|
"logits/chosen": -0.24937394261360168, |
|
"logits/rejected": -0.2712889313697815, |
|
"logps/chosen": -124.1614761352539, |
|
"logps/rejected": -188.57559204101562, |
|
"loss": 0.1226, |
|
"nll_loss": 0.1163693517446518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.41614818572998, |
|
"rewards/margins": 6.44141149520874, |
|
"rewards/rejected": -18.857561111450195, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.206876625252817, |
|
"grad_norm": 1.5446466207504272, |
|
"learning_rate": 1.7489711934156378e-07, |
|
"logits/chosen": -0.23896384239196777, |
|
"logits/rejected": -0.2415800839662552, |
|
"logps/chosen": -119.49736022949219, |
|
"logps/rejected": -185.11898803710938, |
|
"loss": 0.1212, |
|
"nll_loss": 0.11859021335840225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.949737548828125, |
|
"rewards/margins": 6.56216287612915, |
|
"rewards/rejected": -18.511898040771484, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.229991331984976, |
|
"grad_norm": 1.7995822429656982, |
|
"learning_rate": 1.6975308641975307e-07, |
|
"logits/chosen": -0.24105176329612732, |
|
"logits/rejected": -0.21960768103599548, |
|
"logps/chosen": -113.63651275634766, |
|
"logps/rejected": -176.64730834960938, |
|
"loss": 0.1216, |
|
"nll_loss": 0.11322028934955597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.363652229309082, |
|
"rewards/margins": 6.301081657409668, |
|
"rewards/rejected": -17.66473388671875, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.253106038717133, |
|
"grad_norm": 1.7273714542388916, |
|
"learning_rate": 1.6460905349794237e-07, |
|
"logits/chosen": -0.253646582365036, |
|
"logits/rejected": -0.26175594329833984, |
|
"logps/chosen": -118.37306213378906, |
|
"logps/rejected": -184.26153564453125, |
|
"loss": 0.1206, |
|
"nll_loss": 0.11956053972244263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.837307929992676, |
|
"rewards/margins": 6.5888471603393555, |
|
"rewards/rejected": -18.4261531829834, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.276220745449292, |
|
"grad_norm": 4.887149810791016, |
|
"learning_rate": 1.5946502057613167e-07, |
|
"logits/chosen": -0.2122907191514969, |
|
"logits/rejected": -0.2090766876935959, |
|
"logps/chosen": -113.57759094238281, |
|
"logps/rejected": -174.99594116210938, |
|
"loss": 0.1184, |
|
"nll_loss": 0.10560585558414459, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -11.357759475708008, |
|
"rewards/margins": 6.141837120056152, |
|
"rewards/rejected": -17.499595642089844, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.299335452181451, |
|
"grad_norm": 1.5595005750656128, |
|
"learning_rate": 1.5432098765432096e-07, |
|
"logits/chosen": -0.13843365013599396, |
|
"logits/rejected": -0.1982315182685852, |
|
"logps/chosen": -118.16423034667969, |
|
"logps/rejected": -182.03799438476562, |
|
"loss": 0.1211, |
|
"nll_loss": 0.11699899286031723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.816422462463379, |
|
"rewards/margins": 6.387377738952637, |
|
"rewards/rejected": -18.203800201416016, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.322450158913608, |
|
"grad_norm": 2.2779886722564697, |
|
"learning_rate": 1.4917695473251026e-07, |
|
"logits/chosen": -0.265516459941864, |
|
"logits/rejected": -0.2614438533782959, |
|
"logps/chosen": -124.3641128540039, |
|
"logps/rejected": -187.52963256835938, |
|
"loss": 0.1261, |
|
"nll_loss": 0.11983609199523926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.43641185760498, |
|
"rewards/margins": 6.316550254821777, |
|
"rewards/rejected": -18.75296401977539, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.345564865645767, |
|
"grad_norm": 2.2859365940093994, |
|
"learning_rate": 1.4403292181069958e-07, |
|
"logits/chosen": -0.25305554270744324, |
|
"logits/rejected": -0.2473808228969574, |
|
"logps/chosen": -124.98432922363281, |
|
"logps/rejected": -187.47373962402344, |
|
"loss": 0.1245, |
|
"nll_loss": 0.12777109444141388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.498431205749512, |
|
"rewards/margins": 6.2489423751831055, |
|
"rewards/rejected": -18.74737548828125, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.368679572377926, |
|
"grad_norm": 1.4982426166534424, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"logits/chosen": -0.2519396245479584, |
|
"logits/rejected": -0.24396154284477234, |
|
"logps/chosen": -117.44911193847656, |
|
"logps/rejected": -182.5988006591797, |
|
"loss": 0.1127, |
|
"nll_loss": 0.1127076968550682, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.744911193847656, |
|
"rewards/margins": 6.5149688720703125, |
|
"rewards/rejected": -18.25988006591797, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.3917942791100835, |
|
"grad_norm": 2.1417200565338135, |
|
"learning_rate": 1.3374485596707818e-07, |
|
"logits/chosen": -0.19052667915821075, |
|
"logits/rejected": -0.1665157973766327, |
|
"logps/chosen": -116.32462310791016, |
|
"logps/rejected": -181.2820587158203, |
|
"loss": 0.1205, |
|
"nll_loss": 0.11788536608219147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.632462501525879, |
|
"rewards/margins": 6.495743751525879, |
|
"rewards/rejected": -18.12820816040039, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.414908985842242, |
|
"grad_norm": 1.5730674266815186, |
|
"learning_rate": 1.2860082304526747e-07, |
|
"logits/chosen": -0.28410059213638306, |
|
"logits/rejected": -0.24584396183490753, |
|
"logps/chosen": -126.806884765625, |
|
"logps/rejected": -191.36875915527344, |
|
"loss": 0.1188, |
|
"nll_loss": 0.11963550001382828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.680688858032227, |
|
"rewards/margins": 6.456188201904297, |
|
"rewards/rejected": -19.13687515258789, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.438023692574401, |
|
"grad_norm": 2.536539077758789, |
|
"learning_rate": 1.2345679012345677e-07, |
|
"logits/chosen": -0.2129761278629303, |
|
"logits/rejected": -0.1930898129940033, |
|
"logps/chosen": -117.23963928222656, |
|
"logps/rejected": -180.9163818359375, |
|
"loss": 0.1262, |
|
"nll_loss": 0.11052282154560089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.72396469116211, |
|
"rewards/margins": 6.367676258087158, |
|
"rewards/rejected": -18.09164047241211, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.4611383993065585, |
|
"grad_norm": 1.6419086456298828, |
|
"learning_rate": 1.183127572016461e-07, |
|
"logits/chosen": -0.18322396278381348, |
|
"logits/rejected": -0.15920376777648926, |
|
"logps/chosen": -116.58353424072266, |
|
"logps/rejected": -184.9496307373047, |
|
"loss": 0.114, |
|
"nll_loss": 0.10174567997455597, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.658352851867676, |
|
"rewards/margins": 6.836610317230225, |
|
"rewards/rejected": -18.494962692260742, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.484253106038717, |
|
"grad_norm": 2.5254459381103516, |
|
"learning_rate": 1.1316872427983539e-07, |
|
"logits/chosen": -0.20438556373119354, |
|
"logits/rejected": -0.19316819310188293, |
|
"logps/chosen": -111.71683502197266, |
|
"logps/rejected": -176.36444091796875, |
|
"loss": 0.1143, |
|
"nll_loss": 0.10253375768661499, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.171684265136719, |
|
"rewards/margins": 6.464761257171631, |
|
"rewards/rejected": -17.636444091796875, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.507367812770876, |
|
"grad_norm": 4.048756122589111, |
|
"learning_rate": 1.0802469135802467e-07, |
|
"logits/chosen": -0.20184461772441864, |
|
"logits/rejected": -0.20470590889453888, |
|
"logps/chosen": -112.52592468261719, |
|
"logps/rejected": -176.77975463867188, |
|
"loss": 0.122, |
|
"nll_loss": 0.10450093448162079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.252592086791992, |
|
"rewards/margins": 6.4253830909729, |
|
"rewards/rejected": -17.677974700927734, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.530482519503034, |
|
"grad_norm": 1.5695422887802124, |
|
"learning_rate": 1.02880658436214e-07, |
|
"logits/chosen": -0.15921801328659058, |
|
"logits/rejected": -0.16545803844928741, |
|
"logps/chosen": -116.6390151977539, |
|
"logps/rejected": -182.0139617919922, |
|
"loss": 0.123, |
|
"nll_loss": 0.11899758875370026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.66390323638916, |
|
"rewards/margins": 6.537497043609619, |
|
"rewards/rejected": -18.201400756835938, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.553597226235192, |
|
"grad_norm": 1.8795533180236816, |
|
"learning_rate": 9.773662551440328e-08, |
|
"logits/chosen": -0.21856431663036346, |
|
"logits/rejected": -0.22739803791046143, |
|
"logps/chosen": -111.40470123291016, |
|
"logps/rejected": -175.14663696289062, |
|
"loss": 0.1173, |
|
"nll_loss": 0.10676850378513336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.140469551086426, |
|
"rewards/margins": 6.374191761016846, |
|
"rewards/rejected": -17.514663696289062, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.576711932967351, |
|
"grad_norm": 2.4999828338623047, |
|
"learning_rate": 9.259259259259258e-08, |
|
"logits/chosen": -0.16077259182929993, |
|
"logits/rejected": -0.15148191154003143, |
|
"logps/chosen": -112.52552795410156, |
|
"logps/rejected": -175.3218994140625, |
|
"loss": 0.122, |
|
"nll_loss": 0.11213432252407074, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -11.25255298614502, |
|
"rewards/margins": 6.279637336730957, |
|
"rewards/rejected": -17.532190322875977, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.599826639699509, |
|
"grad_norm": 2.170232057571411, |
|
"learning_rate": 8.744855967078189e-08, |
|
"logits/chosen": -0.20790553092956543, |
|
"logits/rejected": -0.19387516379356384, |
|
"logps/chosen": -117.14433288574219, |
|
"logps/rejected": -181.39340209960938, |
|
"loss": 0.1185, |
|
"nll_loss": 0.11259637773036957, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -11.714433670043945, |
|
"rewards/margins": 6.424906253814697, |
|
"rewards/rejected": -18.139341354370117, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.622941346431667, |
|
"grad_norm": 2.0322587490081787, |
|
"learning_rate": 8.230452674897118e-08, |
|
"logits/chosen": -0.1339203268289566, |
|
"logits/rejected": -0.14758563041687012, |
|
"logps/chosen": -109.77425384521484, |
|
"logps/rejected": -176.02438354492188, |
|
"loss": 0.1248, |
|
"nll_loss": 0.11588220298290253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.977426528930664, |
|
"rewards/margins": 6.6250104904174805, |
|
"rewards/rejected": -17.602436065673828, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.646056053163825, |
|
"grad_norm": 3.8062565326690674, |
|
"learning_rate": 7.716049382716048e-08, |
|
"logits/chosen": -0.25674083828926086, |
|
"logits/rejected": -0.23061016201972961, |
|
"logps/chosen": -122.008056640625, |
|
"logps/rejected": -186.86663818359375, |
|
"loss": 0.1177, |
|
"nll_loss": 0.11457221210002899, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.200803756713867, |
|
"rewards/margins": 6.485858917236328, |
|
"rewards/rejected": -18.686664581298828, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.669170759895984, |
|
"grad_norm": 1.300473928451538, |
|
"learning_rate": 7.201646090534979e-08, |
|
"logits/chosen": -0.12542086839675903, |
|
"logits/rejected": -0.12564246356487274, |
|
"logps/chosen": -112.2677993774414, |
|
"logps/rejected": -177.23947143554688, |
|
"loss": 0.1197, |
|
"nll_loss": 0.10939665883779526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.226778984069824, |
|
"rewards/margins": 6.497168064117432, |
|
"rewards/rejected": -17.723949432373047, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.692285466628142, |
|
"grad_norm": 3.699575901031494, |
|
"learning_rate": 6.687242798353909e-08, |
|
"logits/chosen": -0.15934507548809052, |
|
"logits/rejected": -0.15075993537902832, |
|
"logps/chosen": -116.63383483886719, |
|
"logps/rejected": -181.26510620117188, |
|
"loss": 0.1222, |
|
"nll_loss": 0.13159163296222687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.663382530212402, |
|
"rewards/margins": 6.463127136230469, |
|
"rewards/rejected": -18.126508712768555, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.7154001733603, |
|
"grad_norm": 3.081348180770874, |
|
"learning_rate": 6.172839506172839e-08, |
|
"logits/chosen": -0.2664518356323242, |
|
"logits/rejected": -0.24538561701774597, |
|
"logps/chosen": -122.5953140258789, |
|
"logps/rejected": -189.40269470214844, |
|
"loss": 0.122, |
|
"nll_loss": 0.11068514734506607, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.259531021118164, |
|
"rewards/margins": 6.680737495422363, |
|
"rewards/rejected": -18.940269470214844, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.738514880092459, |
|
"grad_norm": 1.9295371770858765, |
|
"learning_rate": 5.6584362139917695e-08, |
|
"logits/chosen": -0.3057961165904999, |
|
"logits/rejected": -0.2679705023765564, |
|
"logps/chosen": -119.34764099121094, |
|
"logps/rejected": -184.24545288085938, |
|
"loss": 0.1254, |
|
"nll_loss": 0.11074963957071304, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.934765815734863, |
|
"rewards/margins": 6.489781379699707, |
|
"rewards/rejected": -18.424545288085938, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.7616295868246175, |
|
"grad_norm": 1.486010193824768, |
|
"learning_rate": 5.1440329218107e-08, |
|
"logits/chosen": -0.17464767396450043, |
|
"logits/rejected": -0.17597734928131104, |
|
"logps/chosen": -118.97342681884766, |
|
"logps/rejected": -184.82752990722656, |
|
"loss": 0.116, |
|
"nll_loss": 0.11164693534374237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.89734172821045, |
|
"rewards/margins": 6.585410118103027, |
|
"rewards/rejected": -18.48275375366211, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.784744293556775, |
|
"grad_norm": 1.5164188146591187, |
|
"learning_rate": 4.629629629629629e-08, |
|
"logits/chosen": -0.1697818785905838, |
|
"logits/rejected": -0.17655737698078156, |
|
"logps/chosen": -123.55452728271484, |
|
"logps/rejected": -191.81411743164062, |
|
"loss": 0.1175, |
|
"nll_loss": 0.10650823265314102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.355452537536621, |
|
"rewards/margins": 6.8259596824646, |
|
"rewards/rejected": -19.18140983581543, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.807859000288934, |
|
"grad_norm": 2.9849853515625, |
|
"learning_rate": 4.115226337448559e-08, |
|
"logits/chosen": -0.1795181930065155, |
|
"logits/rejected": -0.19433379173278809, |
|
"logps/chosen": -118.71900939941406, |
|
"logps/rejected": -185.427734375, |
|
"loss": 0.1176, |
|
"nll_loss": 0.11160220950841904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.87190055847168, |
|
"rewards/margins": 6.670874118804932, |
|
"rewards/rejected": -18.542774200439453, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.8309737070210925, |
|
"grad_norm": 1.8896292448043823, |
|
"learning_rate": 3.6008230452674896e-08, |
|
"logits/chosen": -0.20320720970630646, |
|
"logits/rejected": -0.21179303526878357, |
|
"logps/chosen": -121.1741714477539, |
|
"logps/rejected": -189.61380004882812, |
|
"loss": 0.1197, |
|
"nll_loss": 0.12176340818405151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.117416381835938, |
|
"rewards/margins": 6.843962669372559, |
|
"rewards/rejected": -18.961380004882812, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.85408841375325, |
|
"grad_norm": 2.13209867477417, |
|
"learning_rate": 3.086419753086419e-08, |
|
"logits/chosen": -0.202679842710495, |
|
"logits/rejected": -0.19807621836662292, |
|
"logps/chosen": -121.65214538574219, |
|
"logps/rejected": -187.36184692382812, |
|
"loss": 0.1117, |
|
"nll_loss": 0.1065160408616066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.165216445922852, |
|
"rewards/margins": 6.570970058441162, |
|
"rewards/rejected": -18.736186981201172, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.877203120485409, |
|
"grad_norm": 2.2168078422546387, |
|
"learning_rate": 2.57201646090535e-08, |
|
"logits/chosen": -0.20957596600055695, |
|
"logits/rejected": -0.19148316979408264, |
|
"logps/chosen": -112.11415100097656, |
|
"logps/rejected": -176.92153930664062, |
|
"loss": 0.12, |
|
"nll_loss": 0.12247494608163834, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.211416244506836, |
|
"rewards/margins": 6.480741024017334, |
|
"rewards/rejected": -17.692157745361328, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.900317827217567, |
|
"grad_norm": 1.704630970954895, |
|
"learning_rate": 2.0576131687242796e-08, |
|
"logits/chosen": -0.21424663066864014, |
|
"logits/rejected": -0.24735161662101746, |
|
"logps/chosen": -128.74917602539062, |
|
"logps/rejected": -195.53347778320312, |
|
"loss": 0.1239, |
|
"nll_loss": 0.14099851250648499, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.874917984008789, |
|
"rewards/margins": 6.678428649902344, |
|
"rewards/rejected": -19.553346633911133, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.923432533949725, |
|
"grad_norm": 2.0087478160858154, |
|
"learning_rate": 1.5432098765432096e-08, |
|
"logits/chosen": -0.1421460658311844, |
|
"logits/rejected": -0.1667608767747879, |
|
"logps/chosen": -113.1841812133789, |
|
"logps/rejected": -177.54026794433594, |
|
"loss": 0.1199, |
|
"nll_loss": 0.12012244760990143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.318418502807617, |
|
"rewards/margins": 6.435610771179199, |
|
"rewards/rejected": -17.7540283203125, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.946547240681884, |
|
"grad_norm": 3.1608433723449707, |
|
"learning_rate": 1.0288065843621398e-08, |
|
"logits/chosen": -0.20297956466674805, |
|
"logits/rejected": -0.18899144232273102, |
|
"logps/chosen": -118.35282897949219, |
|
"logps/rejected": -183.97708129882812, |
|
"loss": 0.1174, |
|
"nll_loss": 0.10903529822826385, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.835283279418945, |
|
"rewards/margins": 6.562425136566162, |
|
"rewards/rejected": -18.397706985473633, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.969661947414043, |
|
"grad_norm": 1.8710432052612305, |
|
"learning_rate": 5.144032921810699e-09, |
|
"logits/chosen": -0.2684074640274048, |
|
"logits/rejected": -0.22125348448753357, |
|
"logps/chosen": -129.27749633789062, |
|
"logps/rejected": -194.1197967529297, |
|
"loss": 0.1183, |
|
"nll_loss": 0.11009220033884048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.927749633789062, |
|
"rewards/margins": 6.484231472015381, |
|
"rewards/rejected": -19.4119815826416, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.9927766541462, |
|
"grad_norm": 1.882362961769104, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.15148359537124634, |
|
"logits/rejected": -0.1361338496208191, |
|
"logps/chosen": -107.99227142333984, |
|
"logps/rejected": -173.1522979736328, |
|
"loss": 0.1143, |
|
"nll_loss": 0.10342558473348618, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.799227714538574, |
|
"rewards/margins": 6.516002655029297, |
|
"rewards/rejected": -17.315229415893555, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.9927766541462, |
|
"eval_logits/chosen": -0.14757364988327026, |
|
"eval_logits/rejected": -0.11364421248435974, |
|
"eval_logps/chosen": -141.49264526367188, |
|
"eval_logps/rejected": -155.7095184326172, |
|
"eval_loss": 1.4373149871826172, |
|
"eval_nll_loss": 0.17254449427127838, |
|
"eval_rewards/accuracies": 0.654347836971283, |
|
"eval_rewards/chosen": -14.149263381958008, |
|
"eval_rewards/margins": 1.4216874837875366, |
|
"eval_rewards/rejected": -15.570951461791992, |
|
"eval_runtime": 76.6761, |
|
"eval_samples_per_second": 23.814, |
|
"eval_steps_per_second": 1.5, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.9927766541462, |
|
"step": 2160, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5995175864961412, |
|
"train_runtime": 46944.6998, |
|
"train_samples_per_second": 5.898, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|