|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.491866769945778, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.061967467079783116, |
|
"grad_norm": 0.6070870757102966, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -2.0001754760742188, |
|
"logits/rejected": -1.449440598487854, |
|
"logps/chosen": -374.65521240234375, |
|
"logps/rejected": -215.3085479736328, |
|
"loss": 1.007, |
|
"rewards/accuracies": 0.44062501192092896, |
|
"rewards/chosen": -0.3046182096004486, |
|
"rewards/margins": -0.20184263586997986, |
|
"rewards/rejected": -0.10277555137872696, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12393493415956623, |
|
"grad_norm": 0.5136411190032959, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": -2.083824872970581, |
|
"logits/rejected": -1.584017038345337, |
|
"logps/chosen": -341.329833984375, |
|
"logps/rejected": -208.3067169189453, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.0636544227600098, |
|
"rewards/margins": 2.9626474380493164, |
|
"rewards/rejected": -0.8989933133125305, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18590240123934934, |
|
"grad_norm": 0.18788862228393555, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -2.0708529949188232, |
|
"logits/rejected": -1.5524569749832153, |
|
"logps/chosen": -329.73193359375, |
|
"logps/rejected": -221.080078125, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.1646170616149902, |
|
"rewards/margins": 4.800443649291992, |
|
"rewards/rejected": -2.635826826095581, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24786986831913246, |
|
"grad_norm": 0.2149907350540161, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -1.964525580406189, |
|
"logits/rejected": -1.425443172454834, |
|
"logps/chosen": -337.01165771484375, |
|
"logps/rejected": -236.92935180664062, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 1.6233104467391968, |
|
"rewards/margins": 6.25473690032959, |
|
"rewards/rejected": -4.631426811218262, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.30983733539891556, |
|
"grad_norm": 0.13132674992084503, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": -1.8194172382354736, |
|
"logits/rejected": -1.3340699672698975, |
|
"logps/chosen": -329.0172424316406, |
|
"logps/rejected": -260.6822814941406, |
|
"loss": 0.024, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.2860015332698822, |
|
"rewards/margins": 7.288111686706543, |
|
"rewards/rejected": -7.002110958099365, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3718048024786987, |
|
"grad_norm": 0.06768889725208282, |
|
"learning_rate": 0.00019999177886783194, |
|
"logits/chosen": -1.818981409072876, |
|
"logits/rejected": -1.3484697341918945, |
|
"logps/chosen": -359.87005615234375, |
|
"logps/rejected": -294.05047607421875, |
|
"loss": 0.021, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.2960149049758911, |
|
"rewards/margins": 8.185277938842773, |
|
"rewards/rejected": -7.889264106750488, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4337722695584818, |
|
"grad_norm": 0.00373012013733387, |
|
"learning_rate": 0.000199967116823068, |
|
"logits/chosen": -1.747314453125, |
|
"logits/rejected": -1.209826946258545, |
|
"logps/chosen": -356.72686767578125, |
|
"logps/rejected": -287.92205810546875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20157980918884277, |
|
"rewards/margins": 8.92736530303955, |
|
"rewards/rejected": -8.725785255432129, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4957397366382649, |
|
"grad_norm": 0.08832018822431564, |
|
"learning_rate": 0.00019992601792070679, |
|
"logits/chosen": -1.760593056678772, |
|
"logits/rejected": -1.227081060409546, |
|
"logps/chosen": -359.7059326171875, |
|
"logps/rejected": -307.3652648925781, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.5851167440414429, |
|
"rewards/margins": 9.88296890258789, |
|
"rewards/rejected": -10.468085289001465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.557707203718048, |
|
"grad_norm": 0.12635135650634766, |
|
"learning_rate": 0.00019986848891833845, |
|
"logits/chosen": -1.6951453685760498, |
|
"logits/rejected": -1.1247837543487549, |
|
"logps/chosen": -369.36383056640625, |
|
"logps/rejected": -313.21380615234375, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.165026903152466, |
|
"rewards/margins": 9.382209777832031, |
|
"rewards/rejected": -11.547235488891602, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6196746707978311, |
|
"grad_norm": 0.5119428038597107, |
|
"learning_rate": 0.00019979453927503364, |
|
"logits/chosen": -1.5557712316513062, |
|
"logits/rejected": -0.9883753657341003, |
|
"logps/chosen": -378.3529357910156, |
|
"logps/rejected": -338.2301330566406, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -1.9073299169540405, |
|
"rewards/margins": 10.27137565612793, |
|
"rewards/rejected": -12.178706169128418, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6816421378776143, |
|
"grad_norm": 0.012499742209911346, |
|
"learning_rate": 0.0001997041811497882, |
|
"logits/chosen": -1.639301061630249, |
|
"logits/rejected": -1.059734582901001, |
|
"logps/chosen": -403.56439208984375, |
|
"logps/rejected": -362.4933776855469, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.50722599029541, |
|
"rewards/margins": 11.781638145446777, |
|
"rewards/rejected": -16.288862228393555, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7436096049573974, |
|
"grad_norm": 0.015822602435946465, |
|
"learning_rate": 0.00019959742939952392, |
|
"logits/chosen": -1.801640510559082, |
|
"logits/rejected": -1.2558636665344238, |
|
"logps/chosen": -358.8158264160156, |
|
"logps/rejected": -329.281494140625, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -1.591296911239624, |
|
"rewards/margins": 11.404090881347656, |
|
"rewards/rejected": -12.995388984680176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8055770720371804, |
|
"grad_norm": 0.06576687842607498, |
|
"learning_rate": 0.00019947430157664576, |
|
"logits/chosen": -1.816361427307129, |
|
"logits/rejected": -1.3142831325531006, |
|
"logps/chosen": -375.107421875, |
|
"logps/rejected": -361.25567626953125, |
|
"loss": 0.0121, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -2.420842409133911, |
|
"rewards/margins": 11.270395278930664, |
|
"rewards/rejected": -13.691238403320312, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8675445391169636, |
|
"grad_norm": 0.01211523823440075, |
|
"learning_rate": 0.00019933481792615583, |
|
"logits/chosen": -1.7951005697250366, |
|
"logits/rejected": -1.256089448928833, |
|
"logps/chosen": -363.334228515625, |
|
"logps/rejected": -335.49615478515625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6655162572860718, |
|
"rewards/margins": 11.434516906738281, |
|
"rewards/rejected": -13.1000337600708, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9295120061967467, |
|
"grad_norm": 0.005867226514965296, |
|
"learning_rate": 0.0001991790013823246, |
|
"logits/chosen": -1.8247705698013306, |
|
"logits/rejected": -1.2836697101593018, |
|
"logps/chosen": -373.73175048828125, |
|
"logps/rejected": -328.99371337890625, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.960078239440918, |
|
"rewards/margins": 11.281866073608398, |
|
"rewards/rejected": -13.241943359375, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9914794732765299, |
|
"grad_norm": 0.11168529838323593, |
|
"learning_rate": 0.0001990068775649202, |
|
"logits/chosen": -1.8314838409423828, |
|
"logits/rejected": -1.3281538486480713, |
|
"logps/chosen": -362.94549560546875, |
|
"logps/rejected": -310.90692138671875, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7653158903121948, |
|
"rewards/margins": 10.92064094543457, |
|
"rewards/rejected": -11.685956001281738, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.053446940356313, |
|
"grad_norm": 0.053166139870882034, |
|
"learning_rate": 0.00019881847477499557, |
|
"logits/chosen": -1.8288739919662476, |
|
"logits/rejected": -1.2687069177627563, |
|
"logps/chosen": -379.93914794921875, |
|
"logps/rejected": -346.6662902832031, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3435510993003845, |
|
"rewards/margins": 12.371174812316895, |
|
"rewards/rejected": -12.714726448059082, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.115414407436096, |
|
"grad_norm": 0.007846315391361713, |
|
"learning_rate": 0.0001986138239902355, |
|
"logits/chosen": -1.8146957159042358, |
|
"logits/rejected": -1.1931467056274414, |
|
"logps/chosen": -361.128173828125, |
|
"logps/rejected": -333.5379333496094, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7167718410491943, |
|
"rewards/margins": 13.46613597869873, |
|
"rewards/rejected": -14.182907104492188, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.1773818745158793, |
|
"grad_norm": 0.0029342020861804485, |
|
"learning_rate": 0.00019839295885986296, |
|
"logits/chosen": -1.8402125835418701, |
|
"logits/rejected": -1.3026095628738403, |
|
"logps/chosen": -367.6770935058594, |
|
"logps/rejected": -334.61505126953125, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48788753151893616, |
|
"rewards/margins": 12.355894088745117, |
|
"rewards/rejected": -12.843780517578125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.2393493415956622, |
|
"grad_norm": 0.0005422068061307073, |
|
"learning_rate": 0.00019815591569910654, |
|
"logits/chosen": -1.781711220741272, |
|
"logits/rejected": -1.2187694311141968, |
|
"logps/chosen": -368.02130126953125, |
|
"logps/rejected": -336.0605163574219, |
|
"loss": 0.004, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -0.474712073802948, |
|
"rewards/margins": 13.070175170898438, |
|
"rewards/rejected": -13.544886589050293, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3013168086754454, |
|
"grad_norm": 0.004247570876032114, |
|
"learning_rate": 0.0001979027334832293, |
|
"logits/chosen": -1.729142189025879, |
|
"logits/rejected": -1.1420295238494873, |
|
"logps/chosen": -363.62261962890625, |
|
"logps/rejected": -350.509765625, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9223267436027527, |
|
"rewards/margins": 14.022272109985352, |
|
"rewards/rejected": -14.944600105285645, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3632842757552286, |
|
"grad_norm": 0.025411546230316162, |
|
"learning_rate": 0.00019763345384112043, |
|
"logits/chosen": -1.6916519403457642, |
|
"logits/rejected": -1.1293952465057373, |
|
"logps/chosen": -368.69122314453125, |
|
"logps/rejected": -357.363037109375, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3415491580963135, |
|
"rewards/margins": 13.441192626953125, |
|
"rewards/rejected": -14.782742500305176, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4252517428350115, |
|
"grad_norm": 0.023552559316158295, |
|
"learning_rate": 0.00019734812104845047, |
|
"logits/chosen": -1.6404588222503662, |
|
"logits/rejected": -1.0976492166519165, |
|
"logps/chosen": -358.5830993652344, |
|
"logps/rejected": -323.82977294921875, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1879071146249771, |
|
"rewards/margins": 11.893779754638672, |
|
"rewards/rejected": -12.081686019897461, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.4872192099147947, |
|
"grad_norm": 0.04839726537466049, |
|
"learning_rate": 0.0001970467820203915, |
|
"logits/chosen": -1.4514319896697998, |
|
"logits/rejected": -0.7945712208747864, |
|
"logps/chosen": -395.62109375, |
|
"logps/rejected": -361.99224853515625, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -2.660977602005005, |
|
"rewards/margins": 13.56675910949707, |
|
"rewards/rejected": -16.227737426757812, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.549186676994578, |
|
"grad_norm": 0.04717102646827698, |
|
"learning_rate": 0.00019672948630390294, |
|
"logits/chosen": -1.6030662059783936, |
|
"logits/rejected": -1.008603811264038, |
|
"logps/chosen": -382.2178955078125, |
|
"logps/rejected": -384.981201171875, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.418046474456787, |
|
"rewards/margins": 14.233471870422363, |
|
"rewards/rejected": -17.65151596069336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6111541440743609, |
|
"grad_norm": 0.022282173857092857, |
|
"learning_rate": 0.00019639628606958533, |
|
"logits/chosen": -1.943267822265625, |
|
"logits/rejected": -1.5064051151275635, |
|
"logps/chosen": -350.5743408203125, |
|
"logps/rejected": -292.48321533203125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -0.508022129535675, |
|
"rewards/margins": 10.412274360656738, |
|
"rewards/rejected": -10.920295715332031, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.673121611154144, |
|
"grad_norm": 0.009392939507961273, |
|
"learning_rate": 0.00019604723610310194, |
|
"logits/chosen": -1.932124376296997, |
|
"logits/rejected": -1.507216215133667, |
|
"logps/chosen": -366.7988586425781, |
|
"logps/rejected": -342.846923828125, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8672822713851929, |
|
"rewards/margins": 11.667869567871094, |
|
"rewards/rejected": -12.535151481628418, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.7350890782339272, |
|
"grad_norm": 0.008884243667125702, |
|
"learning_rate": 0.00019568239379617088, |
|
"logits/chosen": -1.8822323083877563, |
|
"logits/rejected": -1.4790470600128174, |
|
"logps/chosen": -364.321044921875, |
|
"logps/rejected": -341.40081787109375, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -1.8236000537872314, |
|
"rewards/margins": 12.299530982971191, |
|
"rewards/rejected": -14.123130798339844, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.7970565453137102, |
|
"grad_norm": 0.0044061969965696335, |
|
"learning_rate": 0.00019530181913712872, |
|
"logits/chosen": -1.926490068435669, |
|
"logits/rejected": -1.4624470472335815, |
|
"logps/chosen": -372.48468017578125, |
|
"logps/rejected": -331.5034484863281, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -1.4063794612884521, |
|
"rewards/margins": 12.16389274597168, |
|
"rewards/rejected": -13.570272445678711, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.8590240123934936, |
|
"grad_norm": 0.028566114604473114, |
|
"learning_rate": 0.00019490557470106686, |
|
"logits/chosen": -1.92436945438385, |
|
"logits/rejected": -1.499299168586731, |
|
"logps/chosen": -355.2225646972656, |
|
"logps/rejected": -351.27313232421875, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.2374690771102905, |
|
"rewards/margins": 13.03515338897705, |
|
"rewards/rejected": -14.272623062133789, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9209914794732765, |
|
"grad_norm": 0.006185224745422602, |
|
"learning_rate": 0.00019449372563954293, |
|
"logits/chosen": -1.9587417840957642, |
|
"logits/rejected": -1.4495702981948853, |
|
"logps/chosen": -383.0813903808594, |
|
"logps/rejected": -355.744873046875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4742207527160645, |
|
"rewards/margins": 13.494425773620605, |
|
"rewards/rejected": -15.968646049499512, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.9829589465530595, |
|
"grad_norm": 0.006004327442497015, |
|
"learning_rate": 0.00019406633966986828, |
|
"logits/chosen": -1.9453758001327515, |
|
"logits/rejected": -1.512027621269226, |
|
"logps/chosen": -392.6808166503906, |
|
"logps/rejected": -378.18316650390625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3614554405212402, |
|
"rewards/margins": 13.526113510131836, |
|
"rewards/rejected": -15.88757038116455, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.044926413632843, |
|
"grad_norm": 0.013266593217849731, |
|
"learning_rate": 0.00019362348706397373, |
|
"logits/chosen": -1.9494597911834717, |
|
"logits/rejected": -1.4765260219573975, |
|
"logps/chosen": -373.5834045410156, |
|
"logps/rejected": -355.810546875, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.2433362007141113, |
|
"rewards/margins": 13.272119522094727, |
|
"rewards/rejected": -15.51545524597168, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.106893880712626, |
|
"grad_norm": 0.0013421621406450868, |
|
"learning_rate": 0.0001931652406368554, |
|
"logits/chosen": -1.879929542541504, |
|
"logits/rejected": -1.4265925884246826, |
|
"logps/chosen": -377.5626220703125, |
|
"logps/rejected": -365.1024475097656, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.054849624633789, |
|
"rewards/margins": 14.068676948547363, |
|
"rewards/rejected": -16.123525619506836, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.168861347792409, |
|
"grad_norm": 0.0016059954650700092, |
|
"learning_rate": 0.0001926916757346022, |
|
"logits/chosen": -1.8783481121063232, |
|
"logits/rejected": -1.4017314910888672, |
|
"logps/chosen": -375.7680969238281, |
|
"logps/rejected": -356.9335021972656, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.0512871742248535, |
|
"rewards/margins": 14.513456344604492, |
|
"rewards/rejected": -16.564743041992188, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.230828814872192, |
|
"grad_norm": 0.0020687805954366922, |
|
"learning_rate": 0.00019220287022200707, |
|
"logits/chosen": -1.8722127676010132, |
|
"logits/rejected": -1.4170135259628296, |
|
"logps/chosen": -360.9228515625, |
|
"logps/rejected": -376.93304443359375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.443851947784424, |
|
"rewards/margins": 15.007545471191406, |
|
"rewards/rejected": -17.451396942138672, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.292796281951975, |
|
"grad_norm": 0.03182324767112732, |
|
"learning_rate": 0.00019169890446976454, |
|
"logits/chosen": -1.8520162105560303, |
|
"logits/rejected": -1.316450834274292, |
|
"logps/chosen": -392.74285888671875, |
|
"logps/rejected": -379.98138427734375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.4773547649383545, |
|
"rewards/margins": 15.281835556030273, |
|
"rewards/rejected": -17.75918960571289, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.3547637490317586, |
|
"grad_norm": 0.015935391187667847, |
|
"learning_rate": 0.0001911798613412557, |
|
"logits/chosen": -1.8732004165649414, |
|
"logits/rejected": -1.374529480934143, |
|
"logps/chosen": -386.89178466796875, |
|
"logps/rejected": -386.22894287109375, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -2.536558151245117, |
|
"rewards/margins": 15.137763977050781, |
|
"rewards/rejected": -17.6743221282959, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.4167312161115415, |
|
"grad_norm": 0.00028358056442812085, |
|
"learning_rate": 0.0001906458261789238, |
|
"logits/chosen": -1.8395631313323975, |
|
"logits/rejected": -1.3308550119400024, |
|
"logps/chosen": -388.93792724609375, |
|
"logps/rejected": -391.17559814453125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.6551461219787598, |
|
"rewards/margins": 15.461560249328613, |
|
"rewards/rejected": -18.116708755493164, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.4786986831913245, |
|
"grad_norm": 0.001103501650504768, |
|
"learning_rate": 0.0001900968867902419, |
|
"logits/chosen": -1.8540499210357666, |
|
"logits/rejected": -1.3438807725906372, |
|
"logps/chosen": -397.89093017578125, |
|
"logps/rejected": -393.6608581542969, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.684976100921631, |
|
"rewards/margins": 15.562596321105957, |
|
"rewards/rejected": -18.247573852539062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.5406661502711074, |
|
"grad_norm": 0.05029486119747162, |
|
"learning_rate": 0.0001895331334332753, |
|
"logits/chosen": -1.8151705265045166, |
|
"logits/rejected": -1.3103126287460327, |
|
"logps/chosen": -396.3746643066406, |
|
"logps/rejected": -391.5860900878906, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -3.1363155841827393, |
|
"rewards/margins": 15.38147258758545, |
|
"rewards/rejected": -18.51778793334961, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.602633617350891, |
|
"grad_norm": 0.0015266811242327094, |
|
"learning_rate": 0.0001889546588018412, |
|
"logits/chosen": -1.850388765335083, |
|
"logits/rejected": -1.3118959665298462, |
|
"logps/chosen": -381.0390319824219, |
|
"logps/rejected": -371.218505859375, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7308974266052246, |
|
"rewards/margins": 15.474958419799805, |
|
"rewards/rejected": -18.205854415893555, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.664601084430674, |
|
"grad_norm": 0.010239909403026104, |
|
"learning_rate": 0.00018836155801026753, |
|
"logits/chosen": -1.8376766443252563, |
|
"logits/rejected": -1.337482213973999, |
|
"logps/chosen": -380.15032958984375, |
|
"logps/rejected": -385.6625061035156, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.8081612586975098, |
|
"rewards/margins": 15.317975997924805, |
|
"rewards/rejected": -18.12613868713379, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.726568551510457, |
|
"grad_norm": 0.005239796359091997, |
|
"learning_rate": 0.00018775392857775432, |
|
"logits/chosen": -1.8260116577148438, |
|
"logits/rejected": -1.3371708393096924, |
|
"logps/chosen": -386.72052001953125, |
|
"logps/rejected": -393.1973571777344, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4543259143829346, |
|
"rewards/margins": 15.405393600463867, |
|
"rewards/rejected": -18.859722137451172, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.78853601859024, |
|
"grad_norm": 0.0014312748098745942, |
|
"learning_rate": 0.00018713187041233896, |
|
"logits/chosen": -1.8437349796295166, |
|
"logits/rejected": -1.295083999633789, |
|
"logps/chosen": -396.12713623046875, |
|
"logps/rejected": -400.5750427246094, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4062328338623047, |
|
"rewards/margins": 17.027809143066406, |
|
"rewards/rejected": -20.434043884277344, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.850503485670023, |
|
"grad_norm": 0.03151211887598038, |
|
"learning_rate": 0.00018649548579446936, |
|
"logits/chosen": -1.8418632745742798, |
|
"logits/rejected": -1.3832991123199463, |
|
"logps/chosen": -387.4415588378906, |
|
"logps/rejected": -418.4268493652344, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -3.485564708709717, |
|
"rewards/margins": 15.658266067504883, |
|
"rewards/rejected": -19.14383316040039, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.9124709527498065, |
|
"grad_norm": 0.003437014762312174, |
|
"learning_rate": 0.00018584487936018661, |
|
"logits/chosen": -1.957241415977478, |
|
"logits/rejected": -1.4707096815109253, |
|
"logps/chosen": -370.52734375, |
|
"logps/rejected": -367.0068054199219, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7640680074691772, |
|
"rewards/margins": 14.591270446777344, |
|
"rewards/rejected": -16.3553409576416, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.9744384198295895, |
|
"grad_norm": 0.0018515066476538777, |
|
"learning_rate": 0.00018518015808392045, |
|
"logits/chosen": -1.8616878986358643, |
|
"logits/rejected": -1.3850669860839844, |
|
"logps/chosen": -370.74847412109375, |
|
"logps/rejected": -395.7770690917969, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.756985664367676, |
|
"rewards/margins": 15.77873420715332, |
|
"rewards/rejected": -18.53571891784668, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.0364058869093724, |
|
"grad_norm": 0.0055403695441782475, |
|
"learning_rate": 0.00018450143126090015, |
|
"logits/chosen": -1.9129266738891602, |
|
"logits/rejected": -1.4352341890335083, |
|
"logps/chosen": -378.54547119140625, |
|
"logps/rejected": -389.22955322265625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.454970359802246, |
|
"rewards/margins": 15.567869186401367, |
|
"rewards/rejected": -18.022838592529297, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.098373353989156, |
|
"grad_norm": 0.0003845282772090286, |
|
"learning_rate": 0.00018380881048918405, |
|
"logits/chosen": -1.955512285232544, |
|
"logits/rejected": -1.4428436756134033, |
|
"logps/chosen": -375.7381286621094, |
|
"logps/rejected": -373.1043701171875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9304916858673096, |
|
"rewards/margins": 15.572137832641602, |
|
"rewards/rejected": -17.502628326416016, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1603408210689388, |
|
"grad_norm": 0.000813652528449893, |
|
"learning_rate": 0.00018310240965131041, |
|
"logits/chosen": -1.9499313831329346, |
|
"logits/rejected": -1.4106732606887817, |
|
"logps/chosen": -363.78314208984375, |
|
"logps/rejected": -364.62835693359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.788172721862793, |
|
"rewards/margins": 15.584823608398438, |
|
"rewards/rejected": -17.372997283935547, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.2223082881487217, |
|
"grad_norm": 0.0015642641810700297, |
|
"learning_rate": 0.00018238234489557215, |
|
"logits/chosen": -1.9376710653305054, |
|
"logits/rejected": -1.4058828353881836, |
|
"logps/chosen": -391.0188903808594, |
|
"logps/rejected": -384.52716064453125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -1.709324598312378, |
|
"rewards/margins": 16.003910064697266, |
|
"rewards/rejected": -17.713237762451172, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.284275755228505, |
|
"grad_norm": 0.013190961442887783, |
|
"learning_rate": 0.00018164873461691986, |
|
"logits/chosen": -1.9225285053253174, |
|
"logits/rejected": -1.4039231538772583, |
|
"logps/chosen": -389.7248840332031, |
|
"logps/rejected": -403.44891357421875, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2535457611083984, |
|
"rewards/margins": 17.14788818359375, |
|
"rewards/rejected": -19.401432037353516, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.346243222308288, |
|
"grad_norm": 0.0009441258735023439, |
|
"learning_rate": 0.00018090169943749476, |
|
"logits/chosen": -1.9266620874404907, |
|
"logits/rejected": -1.3820419311523438, |
|
"logps/chosen": -377.3229064941406, |
|
"logps/rejected": -394.3813171386719, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -2.6834962368011475, |
|
"rewards/margins": 16.853666305541992, |
|
"rewards/rejected": -19.537160873413086, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.4082106893880715, |
|
"grad_norm": 0.000891213770955801, |
|
"learning_rate": 0.00018014136218679567, |
|
"logits/chosen": -1.8898261785507202, |
|
"logits/rejected": -1.3582581281661987, |
|
"logps/chosen": -367.8475341796875, |
|
"logps/rejected": -381.94219970703125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8650197982788086, |
|
"rewards/margins": 16.576953887939453, |
|
"rewards/rejected": -19.441974639892578, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.4701781564678544, |
|
"grad_norm": 0.0021270292345434427, |
|
"learning_rate": 0.00017936784788148328, |
|
"logits/chosen": -1.9054046869277954, |
|
"logits/rejected": -1.3137685060501099, |
|
"logps/chosen": -396.55718994140625, |
|
"logps/rejected": -399.8603515625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9427146911621094, |
|
"rewards/margins": 17.294252395629883, |
|
"rewards/rejected": -20.236968994140625, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.5321456235476374, |
|
"grad_norm": 0.0006443614838644862, |
|
"learning_rate": 0.00017858128370482426, |
|
"logits/chosen": -1.8784294128417969, |
|
"logits/rejected": -1.3266098499298096, |
|
"logps/chosen": -376.5830993652344, |
|
"logps/rejected": -384.6981506347656, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.870404005050659, |
|
"rewards/margins": 17.322202682495117, |
|
"rewards/rejected": -20.192609786987305, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.5941130906274203, |
|
"grad_norm": 0.0011427829740568995, |
|
"learning_rate": 0.00017778179898577973, |
|
"logits/chosen": -1.8605209589004517, |
|
"logits/rejected": -1.3551753759384155, |
|
"logps/chosen": -393.83099365234375, |
|
"logps/rejected": -431.01824951171875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.188037872314453, |
|
"rewards/margins": 17.58969497680664, |
|
"rewards/rejected": -21.77773094177246, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.6560805577072037, |
|
"grad_norm": 0.00015023932792246342, |
|
"learning_rate": 0.00017696952517774062, |
|
"logits/chosen": -1.8713442087173462, |
|
"logits/rejected": -1.2884734869003296, |
|
"logps/chosen": -389.5274658203125, |
|
"logps/rejected": -406.44696044921875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -3.2542433738708496, |
|
"rewards/margins": 18.175609588623047, |
|
"rewards/rejected": -21.429855346679688, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.7180480247869867, |
|
"grad_norm": 0.0034171934239566326, |
|
"learning_rate": 0.00017614459583691346, |
|
"logits/chosen": -1.8342435359954834, |
|
"logits/rejected": -1.33168625831604, |
|
"logps/chosen": -392.7457275390625, |
|
"logps/rejected": -424.7430725097656, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.033926963806152, |
|
"rewards/margins": 17.532773971557617, |
|
"rewards/rejected": -21.566701889038086, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.78001549186677, |
|
"grad_norm": 0.00014497939264401793, |
|
"learning_rate": 0.00017530714660036112, |
|
"logits/chosen": -1.8120412826538086, |
|
"logits/rejected": -1.2837426662445068, |
|
"logps/chosen": -400.38055419921875, |
|
"logps/rejected": -432.98175048828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.632486343383789, |
|
"rewards/margins": 18.09763526916504, |
|
"rewards/rejected": -21.730119705200195, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.841982958946553, |
|
"grad_norm": 0.00035277256392873824, |
|
"learning_rate": 0.0001744573151637007, |
|
"logits/chosen": -1.7961149215698242, |
|
"logits/rejected": -1.2880661487579346, |
|
"logps/chosen": -389.3721618652344, |
|
"logps/rejected": -458.435546875, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.226949214935303, |
|
"rewards/margins": 18.70314598083496, |
|
"rewards/rejected": -22.930095672607422, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.903950426026336, |
|
"grad_norm": 0.0018203147919848561, |
|
"learning_rate": 0.0001735952412584635, |
|
"logits/chosen": -1.8189284801483154, |
|
"logits/rejected": -1.2755413055419922, |
|
"logps/chosen": -403.92608642578125, |
|
"logps/rejected": -437.57470703125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.302323818206787, |
|
"rewards/margins": 18.439044952392578, |
|
"rewards/rejected": -22.741369247436523, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.9659178931061194, |
|
"grad_norm": 0.000810753321275115, |
|
"learning_rate": 0.00017272106662911973, |
|
"logits/chosen": -1.8001739978790283, |
|
"logits/rejected": -1.2190439701080322, |
|
"logps/chosen": -392.6038513183594, |
|
"logps/rejected": -409.79754638671875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5174388885498047, |
|
"rewards/margins": 18.15955924987793, |
|
"rewards/rejected": -21.676998138427734, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.027885360185903, |
|
"grad_norm": 0.0008877617656253278, |
|
"learning_rate": 0.00017183493500977278, |
|
"logits/chosen": -1.7996867895126343, |
|
"logits/rejected": -1.2403078079223633, |
|
"logps/chosen": -376.8688659667969, |
|
"logps/rejected": -401.3122863769531, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -3.8793225288391113, |
|
"rewards/margins": 17.706012725830078, |
|
"rewards/rejected": -21.58533477783203, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.089852827265686, |
|
"grad_norm": 0.0007201443077065051, |
|
"learning_rate": 0.0001709369921005258, |
|
"logits/chosen": -1.7817294597625732, |
|
"logits/rejected": -1.3144575357437134, |
|
"logps/chosen": -362.8156433105469, |
|
"logps/rejected": -421.5276794433594, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -3.907405376434326, |
|
"rewards/margins": 17.486907958984375, |
|
"rewards/rejected": -21.394317626953125, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.151820294345469, |
|
"grad_norm": 0.0004134229675401002, |
|
"learning_rate": 0.00017002738554352552, |
|
"logits/chosen": -1.7647602558135986, |
|
"logits/rejected": -1.2397964000701904, |
|
"logps/chosen": -400.63525390625, |
|
"logps/rejected": -434.27734375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.650538444519043, |
|
"rewards/margins": 17.86612319946289, |
|
"rewards/rejected": -22.516660690307617, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.213787761425252, |
|
"grad_norm": 0.0018414207734167576, |
|
"learning_rate": 0.00016910626489868649, |
|
"logits/chosen": -1.8098886013031006, |
|
"logits/rejected": -1.2557048797607422, |
|
"logps/chosen": -403.9068908691406, |
|
"logps/rejected": -441.5738220214844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8825366497039795, |
|
"rewards/margins": 19.2824764251709, |
|
"rewards/rejected": -23.165014266967773, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.275755228505035, |
|
"grad_norm": 0.000604189292062074, |
|
"learning_rate": 0.00016817378161909996, |
|
"logits/chosen": -1.7331501245498657, |
|
"logits/rejected": -1.1988348960876465, |
|
"logps/chosen": -379.48004150390625, |
|
"logps/rejected": -416.23504638671875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.858603477478027, |
|
"rewards/margins": 17.692523956298828, |
|
"rewards/rejected": -22.551128387451172, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.337722695584818, |
|
"grad_norm": 0.0018184883520007133, |
|
"learning_rate": 0.0001672300890261317, |
|
"logits/chosen": -1.786969780921936, |
|
"logits/rejected": -1.1631317138671875, |
|
"logps/chosen": -399.63836669921875, |
|
"logps/rejected": -406.0413513183594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.497194766998291, |
|
"rewards/margins": 17.606014251708984, |
|
"rewards/rejected": -22.103206634521484, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.3996901626646014, |
|
"grad_norm": 0.0004817396984435618, |
|
"learning_rate": 0.0001662753422842123, |
|
"logits/chosen": -1.803607702255249, |
|
"logits/rejected": -1.2023392915725708, |
|
"logps/chosen": -397.8926086425781, |
|
"logps/rejected": -415.9464416503906, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.3522844314575195, |
|
"rewards/margins": 18.28469467163086, |
|
"rewards/rejected": -22.636978149414062, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.461657629744384, |
|
"grad_norm": 0.0003521572216413915, |
|
"learning_rate": 0.00016530969837532487, |
|
"logits/chosen": -1.745550513267517, |
|
"logits/rejected": -1.2345880270004272, |
|
"logps/chosen": -398.3353271484375, |
|
"logps/rejected": -455.84991455078125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.580657005310059, |
|
"rewards/margins": 18.520645141601562, |
|
"rewards/rejected": -23.101301193237305, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.523625096824167, |
|
"grad_norm": 0.001398236840032041, |
|
"learning_rate": 0.00016433331607319343, |
|
"logits/chosen": -1.7653003931045532, |
|
"logits/rejected": -1.2409374713897705, |
|
"logps/chosen": -390.4782409667969, |
|
"logps/rejected": -445.02203369140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.244819641113281, |
|
"rewards/margins": 19.066150665283203, |
|
"rewards/rejected": -23.31096839904785, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.58559256390395, |
|
"grad_norm": 0.0006393153453245759, |
|
"learning_rate": 0.00016334635591717703, |
|
"logits/chosen": -1.7738897800445557, |
|
"logits/rejected": -1.2459341287612915, |
|
"logps/chosen": -405.1599426269531, |
|
"logps/rejected": -465.34796142578125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.869115114212036, |
|
"rewards/margins": 20.113529205322266, |
|
"rewards/rejected": -23.98264503479004, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.647560030983733, |
|
"grad_norm": 0.0002729636325966567, |
|
"learning_rate": 0.00016234898018587337, |
|
"logits/chosen": -1.7716586589813232, |
|
"logits/rejected": -1.156842589378357, |
|
"logps/chosen": -400.9200439453125, |
|
"logps/rejected": -419.4234924316406, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.553537368774414, |
|
"rewards/margins": 18.427448272705078, |
|
"rewards/rejected": -22.980987548828125, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.709527498063517, |
|
"grad_norm": 0.0016045222291722894, |
|
"learning_rate": 0.00016134135287043669, |
|
"logits/chosen": -1.7796188592910767, |
|
"logits/rejected": -1.1779518127441406, |
|
"logps/chosen": -407.48773193359375, |
|
"logps/rejected": -439.03143310546875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.231381416320801, |
|
"rewards/margins": 19.530107498168945, |
|
"rewards/rejected": -23.761486053466797, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.7714949651433, |
|
"grad_norm": 0.0001898371265269816, |
|
"learning_rate": 0.00016032363964761363, |
|
"logits/chosen": -1.7506084442138672, |
|
"logits/rejected": -1.1158758401870728, |
|
"logps/chosen": -412.0704650878906, |
|
"logps/rejected": -419.58477783203125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.735566139221191, |
|
"rewards/margins": 18.557144165039062, |
|
"rewards/rejected": -23.292709350585938, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.833462432223083, |
|
"grad_norm": 0.0011102559510618448, |
|
"learning_rate": 0.00015929600785250257, |
|
"logits/chosen": -1.772351861000061, |
|
"logits/rejected": -1.199371576309204, |
|
"logps/chosen": -411.6983337402344, |
|
"logps/rejected": -456.08526611328125, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.344552516937256, |
|
"rewards/margins": 19.66854476928711, |
|
"rewards/rejected": -24.01309585571289, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.895429899302866, |
|
"grad_norm": 0.0002147419872926548, |
|
"learning_rate": 0.0001582586264510396, |
|
"logits/chosen": -1.7624610662460327, |
|
"logits/rejected": -1.1555306911468506, |
|
"logps/chosen": -392.86846923828125, |
|
"logps/rejected": -411.6356506347656, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8805503845214844, |
|
"rewards/margins": 18.482906341552734, |
|
"rewards/rejected": -22.36345672607422, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.957397366382649, |
|
"grad_norm": 0.00014843855751678348, |
|
"learning_rate": 0.00015721166601221698, |
|
"logits/chosen": -1.7433449029922485, |
|
"logits/rejected": -1.1605427265167236, |
|
"logps/chosen": -402.5615539550781, |
|
"logps/rejected": -437.72601318359375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.379772186279297, |
|
"rewards/margins": 19.26140022277832, |
|
"rewards/rejected": -23.641170501708984, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.019364833462432, |
|
"grad_norm": 9.896748815663159e-05, |
|
"learning_rate": 0.0001561552986800375, |
|
"logits/chosen": -1.7666635513305664, |
|
"logits/rejected": -1.2081592082977295, |
|
"logps/chosen": -409.02685546875, |
|
"logps/rejected": -462.6644592285156, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.824324607849121, |
|
"rewards/margins": 19.418132781982422, |
|
"rewards/rejected": -24.242456436157227, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.081332300542216, |
|
"grad_norm": 6.193404988152906e-05, |
|
"learning_rate": 0.00015508969814521025, |
|
"logits/chosen": -1.7530428171157837, |
|
"logits/rejected": -1.2155699729919434, |
|
"logps/chosen": -396.701171875, |
|
"logps/rejected": -438.2998046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.757896423339844, |
|
"rewards/margins": 18.720035552978516, |
|
"rewards/rejected": -23.47793197631836, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.143299767621999, |
|
"grad_norm": 0.0005012938636355102, |
|
"learning_rate": 0.00015401503961659204, |
|
"logits/chosen": -1.76808762550354, |
|
"logits/rejected": -1.2039562463760376, |
|
"logps/chosen": -416.18133544921875, |
|
"logps/rejected": -471.65032958984375, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.3714799880981445, |
|
"rewards/margins": 20.104217529296875, |
|
"rewards/rejected": -24.475696563720703, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.205267234701782, |
|
"grad_norm": 0.0007204354042187333, |
|
"learning_rate": 0.00015293149979237876, |
|
"logits/chosen": -1.700727105140686, |
|
"logits/rejected": -1.1688693761825562, |
|
"logps/chosen": -395.04620361328125, |
|
"logps/rejected": -459.3890686035156, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.376019477844238, |
|
"rewards/margins": 19.267929077148438, |
|
"rewards/rejected": -24.643945693969727, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.267234701781565, |
|
"grad_norm": 0.00012067196075804532, |
|
"learning_rate": 0.00015183925683105254, |
|
"logits/chosen": -1.7348114252090454, |
|
"logits/rejected": -1.1479172706604004, |
|
"logps/chosen": -411.1114807128906, |
|
"logps/rejected": -467.02777099609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.358091354370117, |
|
"rewards/margins": 20.035839080810547, |
|
"rewards/rejected": -24.393932342529297, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.329202168861348, |
|
"grad_norm": 0.0015901889419183135, |
|
"learning_rate": 0.00015073849032208822, |
|
"logits/chosen": -1.7161178588867188, |
|
"logits/rejected": -1.1550828218460083, |
|
"logps/chosen": -408.5069885253906, |
|
"logps/rejected": -455.2245178222656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.192176342010498, |
|
"rewards/margins": 19.474624633789062, |
|
"rewards/rejected": -24.66680145263672, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.3911696359411305, |
|
"grad_norm": 2.9804143196088262e-05, |
|
"learning_rate": 0.00014962938125642503, |
|
"logits/chosen": -1.7266225814819336, |
|
"logits/rejected": -1.1720420122146606, |
|
"logps/chosen": -404.70721435546875, |
|
"logps/rejected": -468.11956787109375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.117176532745361, |
|
"rewards/margins": 19.882728576660156, |
|
"rewards/rejected": -24.99990463256836, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.453137103020914, |
|
"grad_norm": 0.001581120421178639, |
|
"learning_rate": 0.00014851211199670721, |
|
"logits/chosen": -1.7630701065063477, |
|
"logits/rejected": -1.1630027294158936, |
|
"logps/chosen": -387.80364990234375, |
|
"logps/rejected": -445.5340270996094, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -4.650803565979004, |
|
"rewards/margins": 19.620697021484375, |
|
"rewards/rejected": -24.271501541137695, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.515104570100697, |
|
"grad_norm": 7.492147415177897e-05, |
|
"learning_rate": 0.00014738686624729986, |
|
"logits/chosen": -1.7199184894561768, |
|
"logits/rejected": -1.1519477367401123, |
|
"logps/chosen": -398.6278991699219, |
|
"logps/rejected": -449.28826904296875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.650136947631836, |
|
"rewards/margins": 19.344139099121094, |
|
"rewards/rejected": -23.99427604675293, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.57707203718048, |
|
"grad_norm": 0.0007189544849097729, |
|
"learning_rate": 0.00014625382902408356, |
|
"logits/chosen": -1.7485740184783936, |
|
"logits/rejected": -1.15171217918396, |
|
"logps/chosen": -413.4642639160156, |
|
"logps/rejected": -454.82623291015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.690885543823242, |
|
"rewards/margins": 19.775279998779297, |
|
"rewards/rejected": -24.466161727905273, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.639039504260263, |
|
"grad_norm": 9.353666246170178e-05, |
|
"learning_rate": 0.00014511318662403347, |
|
"logits/chosen": -1.7578392028808594, |
|
"logits/rejected": -1.1830543279647827, |
|
"logps/chosen": -395.25433349609375, |
|
"logps/rejected": -461.00128173828125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.259980201721191, |
|
"rewards/margins": 20.097646713256836, |
|
"rewards/rejected": -24.35762596130371, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.701006971340046, |
|
"grad_norm": 0.00011017426731996238, |
|
"learning_rate": 0.00014396512659458824, |
|
"logits/chosen": -1.718340277671814, |
|
"logits/rejected": -1.1603585481643677, |
|
"logps/chosen": -397.50201416015625, |
|
"logps/rejected": -441.17120361328125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.123129844665527, |
|
"rewards/margins": 18.981271743774414, |
|
"rewards/rejected": -24.104402542114258, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.76297443841983, |
|
"grad_norm": 0.0007490446441806853, |
|
"learning_rate": 0.0001428098377028126, |
|
"logits/chosen": -1.7352231740951538, |
|
"logits/rejected": -1.1633882522583008, |
|
"logps/chosen": -395.93719482421875, |
|
"logps/rejected": -450.5420837402344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.510663032531738, |
|
"rewards/margins": 20.08230972290039, |
|
"rewards/rejected": -24.59296989440918, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.824941905499613, |
|
"grad_norm": 0.002562998328357935, |
|
"learning_rate": 0.0001416475099043599, |
|
"logits/chosen": -1.7280263900756836, |
|
"logits/rejected": -1.0888252258300781, |
|
"logps/chosen": -383.5231628417969, |
|
"logps/rejected": -423.22735595703125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.361128330230713, |
|
"rewards/margins": 19.707561492919922, |
|
"rewards/rejected": -24.06869125366211, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.886909372579396, |
|
"grad_norm": 0.0003409655182622373, |
|
"learning_rate": 0.00014047833431223938, |
|
"logits/chosen": -1.7228466272354126, |
|
"logits/rejected": -1.1678210496902466, |
|
"logps/chosen": -427.7156677246094, |
|
"logps/rejected": -484.9002990722656, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.072082996368408, |
|
"rewards/margins": 19.94878387451172, |
|
"rewards/rejected": -25.0208683013916, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.948876839659179, |
|
"grad_norm": 3.485321212792769e-05, |
|
"learning_rate": 0.00013930250316539238, |
|
"logits/chosen": -1.7439708709716797, |
|
"logits/rejected": -1.1591265201568604, |
|
"logps/chosen": -409.28485107421875, |
|
"logps/rejected": -464.5729064941406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.592177867889404, |
|
"rewards/margins": 20.056758880615234, |
|
"rewards/rejected": -24.64893913269043, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.010844306738962, |
|
"grad_norm": 0.0024052930530160666, |
|
"learning_rate": 0.00013812020979708418, |
|
"logits/chosen": -1.766571044921875, |
|
"logits/rejected": -1.1335632801055908, |
|
"logps/chosen": -409.98095703125, |
|
"logps/rejected": -432.7437438964844, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.963695526123047, |
|
"rewards/margins": 19.679019927978516, |
|
"rewards/rejected": -24.642715454101562, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 6.072811773818745, |
|
"grad_norm": 7.735176041023806e-05, |
|
"learning_rate": 0.00013693164860311565, |
|
"logits/chosen": -1.7631984949111938, |
|
"logits/rejected": -1.1198147535324097, |
|
"logps/chosen": -398.9923400878906, |
|
"logps/rejected": -429.88861083984375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.000827312469482, |
|
"rewards/margins": 20.33033561706543, |
|
"rewards/rejected": -24.331165313720703, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 6.134779240898529, |
|
"grad_norm": 0.0003688503638841212, |
|
"learning_rate": 0.0001357370150098601, |
|
"logits/chosen": -1.7265870571136475, |
|
"logits/rejected": -1.1435579061508179, |
|
"logps/chosen": -390.2747497558594, |
|
"logps/rejected": -457.9873962402344, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.666455268859863, |
|
"rewards/margins": 20.30272102355957, |
|
"rewards/rejected": -24.969173431396484, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.196746707978312, |
|
"grad_norm": 0.0016685057198628783, |
|
"learning_rate": 0.00013453650544213076, |
|
"logits/chosen": -1.7364275455474854, |
|
"logits/rejected": -1.1212728023529053, |
|
"logps/chosen": -404.72869873046875, |
|
"logps/rejected": -440.9786071777344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.581490993499756, |
|
"rewards/margins": 19.78643035888672, |
|
"rewards/rejected": -24.367919921875, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.258714175058095, |
|
"grad_norm": 0.00023198116105049849, |
|
"learning_rate": 0.00013333031729088419, |
|
"logits/chosen": -1.7448314428329468, |
|
"logits/rejected": -1.1462557315826416, |
|
"logps/chosen": -401.00048828125, |
|
"logps/rejected": -452.0621032714844, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.24946928024292, |
|
"rewards/margins": 20.46927833557129, |
|
"rewards/rejected": -24.718748092651367, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 6.3206816421378775, |
|
"grad_norm": 0.00022464637004304677, |
|
"learning_rate": 0.00013211864888076457, |
|
"logits/chosen": -1.691931962966919, |
|
"logits/rejected": -1.16156005859375, |
|
"logps/chosen": -417.93585205078125, |
|
"logps/rejected": -468.42791748046875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.851279258728027, |
|
"rewards/margins": 19.037456512451172, |
|
"rewards/rejected": -24.888734817504883, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.3826491092176605, |
|
"grad_norm": 0.0001370076060993597, |
|
"learning_rate": 0.00013090169943749476, |
|
"logits/chosen": -1.7306629419326782, |
|
"logits/rejected": -1.16789972782135, |
|
"logps/chosen": -400.44989013671875, |
|
"logps/rejected": -461.5997009277344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.793812274932861, |
|
"rewards/margins": 20.2277889251709, |
|
"rewards/rejected": -25.02159881591797, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 6.4446165762974434, |
|
"grad_norm": 0.0007584911654703319, |
|
"learning_rate": 0.00012967966905511906, |
|
"logits/chosen": -1.7538254261016846, |
|
"logits/rejected": -1.1523357629776, |
|
"logps/chosen": -400.55078125, |
|
"logps/rejected": -457.19439697265625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.016867637634277, |
|
"rewards/margins": 20.043991088867188, |
|
"rewards/rejected": -25.06085777282715, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 6.506584043377227, |
|
"grad_norm": 0.00025258222012780607, |
|
"learning_rate": 0.00012845275866310324, |
|
"logits/chosen": -1.709283471107483, |
|
"logits/rejected": -1.1272356510162354, |
|
"logps/chosen": -393.4644775390625, |
|
"logps/rejected": -445.11932373046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.725881576538086, |
|
"rewards/margins": 20.157442092895508, |
|
"rewards/rejected": -24.88332176208496, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.56855151045701, |
|
"grad_norm": 0.0005373629392124712, |
|
"learning_rate": 0.00012722116999329712, |
|
"logits/chosen": -1.7319450378417969, |
|
"logits/rejected": -1.146323323249817, |
|
"logps/chosen": -400.94219970703125, |
|
"logps/rejected": -457.70294189453125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.707498073577881, |
|
"rewards/margins": 19.930648803710938, |
|
"rewards/rejected": -24.638147354125977, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 6.630518977536793, |
|
"grad_norm": 3.2575491786701605e-05, |
|
"learning_rate": 0.0001259851055467653, |
|
"logits/chosen": -1.7204310894012451, |
|
"logits/rejected": -1.1470435857772827, |
|
"logps/chosen": -407.14794921875, |
|
"logps/rejected": -463.16937255859375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.401209831237793, |
|
"rewards/margins": 19.731382369995117, |
|
"rewards/rejected": -25.132593154907227, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 6.692486444616576, |
|
"grad_norm": 4.120891753700562e-05, |
|
"learning_rate": 0.00012474476856049144, |
|
"logits/chosen": -1.758186936378479, |
|
"logits/rejected": -1.0516242980957031, |
|
"logps/chosen": -422.578125, |
|
"logps/rejected": -450.13360595703125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.05043888092041, |
|
"rewards/margins": 20.296903610229492, |
|
"rewards/rejected": -25.347341537475586, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 6.754453911696359, |
|
"grad_norm": 0.0018112401012331247, |
|
"learning_rate": 0.00012350036297396154, |
|
"logits/chosen": -1.7569530010223389, |
|
"logits/rejected": -1.1236534118652344, |
|
"logps/chosen": -398.9664001464844, |
|
"logps/rejected": -440.2588806152344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.544419288635254, |
|
"rewards/margins": 20.12918472290039, |
|
"rewards/rejected": -24.673603057861328, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 6.816421378776143, |
|
"grad_norm": 0.0009737831423990428, |
|
"learning_rate": 0.00012225209339563145, |
|
"logits/chosen": -1.709917664527893, |
|
"logits/rejected": -1.1064178943634033, |
|
"logps/chosen": -414.5459899902344, |
|
"logps/rejected": -465.4837341308594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.264222145080566, |
|
"rewards/margins": 20.37704849243164, |
|
"rewards/rejected": -25.64126968383789, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.878388845855926, |
|
"grad_norm": 0.000668133026920259, |
|
"learning_rate": 0.00012100016506928493, |
|
"logits/chosen": -1.733787178993225, |
|
"logits/rejected": -1.1450860500335693, |
|
"logps/chosen": -403.2812805175781, |
|
"logps/rejected": -477.0782165527344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.641029357910156, |
|
"rewards/margins": 21.0471134185791, |
|
"rewards/rejected": -25.68814468383789, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 6.940356312935709, |
|
"grad_norm": 0.00028338556876406074, |
|
"learning_rate": 0.00011974478384028672, |
|
"logits/chosen": -1.703685998916626, |
|
"logits/rejected": -1.0926717519760132, |
|
"logps/chosen": -415.73248291015625, |
|
"logps/rejected": -474.7493591308594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.833617210388184, |
|
"rewards/margins": 19.839744567871094, |
|
"rewards/rejected": -25.67336082458496, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.002323780015492, |
|
"grad_norm": 9.248249261872843e-05, |
|
"learning_rate": 0.00011848615612173688, |
|
"logits/chosen": -1.727691888809204, |
|
"logits/rejected": -1.1385018825531006, |
|
"logps/chosen": -404.37158203125, |
|
"logps/rejected": -455.1560974121094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.189269065856934, |
|
"rewards/margins": 20.383289337158203, |
|
"rewards/rejected": -25.572555541992188, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 7.064291247095275, |
|
"grad_norm": 1.9335082470206544e-05, |
|
"learning_rate": 0.0001172244888605319, |
|
"logits/chosen": -1.687378168106079, |
|
"logits/rejected": -1.1057562828063965, |
|
"logps/chosen": -406.32733154296875, |
|
"logps/rejected": -474.8482360839844, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.787657737731934, |
|
"rewards/margins": 20.789146423339844, |
|
"rewards/rejected": -25.576807022094727, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 7.126258714175058, |
|
"grad_norm": 8.403878018725663e-05, |
|
"learning_rate": 0.00011595998950333793, |
|
"logits/chosen": -1.6789989471435547, |
|
"logits/rejected": -1.1095144748687744, |
|
"logps/chosen": -409.31524658203125, |
|
"logps/rejected": -472.5364685058594, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.127674579620361, |
|
"rewards/margins": 20.548160552978516, |
|
"rewards/rejected": -25.675832748413086, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.188226181254842, |
|
"grad_norm": 0.0001840272598201409, |
|
"learning_rate": 0.00011469286596248181, |
|
"logits/chosen": -1.7186450958251953, |
|
"logits/rejected": -1.0815023183822632, |
|
"logps/chosen": -402.4718322753906, |
|
"logps/rejected": -446.8160095214844, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.929797172546387, |
|
"rewards/margins": 20.37470245361328, |
|
"rewards/rejected": -25.304500579833984, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 7.2501936483346245, |
|
"grad_norm": 0.00030283021624200046, |
|
"learning_rate": 0.00011342332658176555, |
|
"logits/chosen": -1.7267248630523682, |
|
"logits/rejected": -1.1029185056686401, |
|
"logps/chosen": -407.1277160644531, |
|
"logps/rejected": -443.208251953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.898409843444824, |
|
"rewards/margins": 19.7962589263916, |
|
"rewards/rejected": -24.69466781616211, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.3121611154144075, |
|
"grad_norm": 0.000179938884684816, |
|
"learning_rate": 0.00011221521661813197, |
|
"logits/chosen": -1.7125059366226196, |
|
"logits/rejected": -1.107881784439087, |
|
"logps/chosen": -411.54571533203125, |
|
"logps/rejected": -468.47821044921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.634856700897217, |
|
"rewards/margins": 20.49616050720215, |
|
"rewards/rejected": -26.131017684936523, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 7.3741285824941905, |
|
"grad_norm": 0.00018190982518717647, |
|
"learning_rate": 0.0001109415670719721, |
|
"logits/chosen": -1.6849457025527954, |
|
"logits/rejected": -1.0680724382400513, |
|
"logps/chosen": -408.02587890625, |
|
"logps/rejected": -460.41015625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.917786598205566, |
|
"rewards/margins": 20.782718658447266, |
|
"rewards/rejected": -25.700504302978516, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 7.436096049573973, |
|
"grad_norm": 0.00010547572310315445, |
|
"learning_rate": 0.00010966611848443176, |
|
"logits/chosen": -1.6835496425628662, |
|
"logits/rejected": -1.0897111892700195, |
|
"logps/chosen": -407.20318603515625, |
|
"logps/rejected": -464.83935546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.396719932556152, |
|
"rewards/margins": 20.730510711669922, |
|
"rewards/rejected": -26.127233505249023, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.498063516653756, |
|
"grad_norm": 0.0002746889949776232, |
|
"learning_rate": 0.00010838908056813919, |
|
"logits/chosen": -1.7222875356674194, |
|
"logits/rejected": -1.0569690465927124, |
|
"logps/chosen": -397.06500244140625, |
|
"logps/rejected": -429.73663330078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.969448566436768, |
|
"rewards/margins": 20.237773895263672, |
|
"rewards/rejected": -25.20722007751465, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 7.56003098373354, |
|
"grad_norm": 0.0010378537699580193, |
|
"learning_rate": 0.00010711066329704423, |
|
"logits/chosen": -1.7328182458877563, |
|
"logits/rejected": -1.0489845275878906, |
|
"logps/chosen": -410.6394958496094, |
|
"logps/rejected": -457.23126220703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.752233505249023, |
|
"rewards/margins": 20.957183837890625, |
|
"rewards/rejected": -25.70941734313965, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 7.621998450813323, |
|
"grad_norm": 0.00035315402783453465, |
|
"learning_rate": 0.00010583107687189388, |
|
"logits/chosen": -1.7303959131240845, |
|
"logits/rejected": -1.0627490282058716, |
|
"logps/chosen": -394.2586364746094, |
|
"logps/rejected": -438.1336975097656, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.017716407775879, |
|
"rewards/margins": 20.087886810302734, |
|
"rewards/rejected": -25.105602264404297, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 7.683965917893106, |
|
"grad_norm": 5.2913201216142625e-05, |
|
"learning_rate": 0.00010455053168567064, |
|
"logits/chosen": -1.701934814453125, |
|
"logits/rejected": -1.0837266445159912, |
|
"logps/chosen": -411.44390869140625, |
|
"logps/rejected": -451.9497985839844, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.648865699768066, |
|
"rewards/margins": 20.401885986328125, |
|
"rewards/rejected": -26.050750732421875, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 7.745933384972889, |
|
"grad_norm": 0.0004144099075347185, |
|
"learning_rate": 0.00010326923828899894, |
|
"logits/chosen": -1.66423761844635, |
|
"logits/rejected": -1.0931271314620972, |
|
"logps/chosen": -413.04266357421875, |
|
"logps/rejected": -468.1424255371094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.445749282836914, |
|
"rewards/margins": 20.35373306274414, |
|
"rewards/rejected": -25.799480438232422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.807900852052672, |
|
"grad_norm": 0.0005614625406451523, |
|
"learning_rate": 0.00010198740735552596, |
|
"logits/chosen": -1.7007503509521484, |
|
"logits/rejected": -1.0203969478607178, |
|
"logps/chosen": -409.26434326171875, |
|
"logps/rejected": -450.35284423828125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.619626522064209, |
|
"rewards/margins": 20.54979133605957, |
|
"rewards/rejected": -26.169414520263672, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 7.869868319132456, |
|
"grad_norm": 0.00046529798419214785, |
|
"learning_rate": 0.00010070524964728218, |
|
"logits/chosen": -1.6950366497039795, |
|
"logits/rejected": -1.0599762201309204, |
|
"logps/chosen": -388.9576416015625, |
|
"logps/rejected": -438.4559020996094, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.69763708114624, |
|
"rewards/margins": 19.549518585205078, |
|
"rewards/rejected": -25.247156143188477, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 7.931835786212239, |
|
"grad_norm": 0.0005010979948565364, |
|
"learning_rate": 9.942297598002714e-05, |
|
"logits/chosen": -1.6910135746002197, |
|
"logits/rejected": -1.088746190071106, |
|
"logps/chosen": -409.673583984375, |
|
"logps/rejected": -460.9344177246094, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.090248107910156, |
|
"rewards/margins": 20.458660125732422, |
|
"rewards/rejected": -25.548908233642578, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 7.993803253292022, |
|
"grad_norm": 2.1018489860580303e-05, |
|
"learning_rate": 9.814079718858677e-05, |
|
"logits/chosen": -1.6951793432235718, |
|
"logits/rejected": -1.1038161516189575, |
|
"logps/chosen": -427.29669189453125, |
|
"logps/rejected": -482.02362060546875, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.359341621398926, |
|
"rewards/margins": 20.788881301879883, |
|
"rewards/rejected": -26.148223876953125, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 8.055770720371806, |
|
"grad_norm": 0.00020114157814532518, |
|
"learning_rate": 9.685892409218717e-05, |
|
"logits/chosen": -1.702978491783142, |
|
"logits/rejected": -1.0864311456680298, |
|
"logps/chosen": -405.50567626953125, |
|
"logps/rejected": -455.3516540527344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.847678184509277, |
|
"rewards/margins": 20.718107223510742, |
|
"rewards/rejected": -25.565786361694336, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.117738187451588, |
|
"grad_norm": 0.00014650092634838074, |
|
"learning_rate": 9.557756745979138e-05, |
|
"logits/chosen": -1.692112922668457, |
|
"logits/rejected": -1.106385588645935, |
|
"logps/chosen": -400.7706298828125, |
|
"logps/rejected": -458.6825256347656, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.732221603393555, |
|
"rewards/margins": 21.058570861816406, |
|
"rewards/rejected": -25.79079246520996, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 8.179705654531372, |
|
"grad_norm": 0.0003632131847552955, |
|
"learning_rate": 9.429693797544388e-05, |
|
"logits/chosen": -1.727189302444458, |
|
"logits/rejected": -1.0760419368743896, |
|
"logps/chosen": -401.86767578125, |
|
"logps/rejected": -446.3102111816406, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.723801612854004, |
|
"rewards/margins": 20.717304229736328, |
|
"rewards/rejected": -25.441104888916016, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 8.241673121611154, |
|
"grad_norm": 0.00047560204984620214, |
|
"learning_rate": 9.301724620362973e-05, |
|
"logits/chosen": -1.7449928522109985, |
|
"logits/rejected": -1.0541192293167114, |
|
"logps/chosen": -409.01959228515625, |
|
"logps/rejected": -449.57666015625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.505074501037598, |
|
"rewards/margins": 20.396114349365234, |
|
"rewards/rejected": -25.901187896728516, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 8.303640588690937, |
|
"grad_norm": 0.0010067891562357545, |
|
"learning_rate": 9.173870255465275e-05, |
|
"logits/chosen": -1.7413511276245117, |
|
"logits/rejected": -1.073628544807434, |
|
"logps/chosen": -413.9063415527344, |
|
"logps/rejected": -457.25042724609375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.8417158126831055, |
|
"rewards/margins": 20.952346801757812, |
|
"rewards/rejected": -25.7940616607666, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 8.36560805577072, |
|
"grad_norm": 0.0007608987507410347, |
|
"learning_rate": 9.046151725003931e-05, |
|
"logits/chosen": -1.738470435142517, |
|
"logits/rejected": -1.118428111076355, |
|
"logps/chosen": -406.96368408203125, |
|
"logps/rejected": -458.2310485839844, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.3068695068359375, |
|
"rewards/margins": 20.518783569335938, |
|
"rewards/rejected": -25.825653076171875, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.427575522850503, |
|
"grad_norm": 0.00037170801078900695, |
|
"learning_rate": 8.918590028797327e-05, |
|
"logits/chosen": -1.6667039394378662, |
|
"logits/rejected": -1.076485276222229, |
|
"logps/chosen": -417.1942443847656, |
|
"logps/rejected": -475.34478759765625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.047384262084961, |
|
"rewards/margins": 21.4394588470459, |
|
"rewards/rejected": -26.48684310913086, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 8.489542989930287, |
|
"grad_norm": 0.00017155329987872392, |
|
"learning_rate": 8.791206140876746e-05, |
|
"logits/chosen": -1.6952327489852905, |
|
"logits/rejected": -1.0440196990966797, |
|
"logps/chosen": -390.47991943359375, |
|
"logps/rejected": -446.51611328125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.408968448638916, |
|
"rewards/margins": 20.748926162719727, |
|
"rewards/rejected": -25.157894134521484, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 8.55151045701007, |
|
"grad_norm": 4.225455268169753e-05, |
|
"learning_rate": 8.664021006037762e-05, |
|
"logits/chosen": -1.7128692865371704, |
|
"logits/rejected": -1.0821470022201538, |
|
"logps/chosen": -424.44549560546875, |
|
"logps/rejected": -469.12652587890625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.529724597930908, |
|
"rewards/margins": 20.326000213623047, |
|
"rewards/rejected": -25.855722427368164, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 8.613477924089853, |
|
"grad_norm": 0.0004146189312450588, |
|
"learning_rate": 8.537055536396439e-05, |
|
"logits/chosen": -1.7189327478408813, |
|
"logits/rejected": -1.1234623193740845, |
|
"logps/chosen": -413.88092041015625, |
|
"logps/rejected": -489.74432373046875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.031737327575684, |
|
"rewards/margins": 20.76127815246582, |
|
"rewards/rejected": -26.793010711669922, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 8.675445391169635, |
|
"grad_norm": 0.0011191857047379017, |
|
"learning_rate": 8.410330607950913e-05, |
|
"logits/chosen": -1.6889803409576416, |
|
"logits/rejected": -1.0510902404785156, |
|
"logps/chosen": -409.9695739746094, |
|
"logps/rejected": -461.45257568359375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.462882041931152, |
|
"rewards/margins": 20.715688705444336, |
|
"rewards/rejected": -26.178569793701172, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.737412858249419, |
|
"grad_norm": 0.0015039819991216063, |
|
"learning_rate": 8.283867057148902e-05, |
|
"logits/chosen": -1.6871960163116455, |
|
"logits/rejected": -1.1272326707839966, |
|
"logps/chosen": -424.3963928222656, |
|
"logps/rejected": -478.30535888671875, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.292850971221924, |
|
"rewards/margins": 20.825016021728516, |
|
"rewards/rejected": -26.117868423461914, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 8.799380325329203, |
|
"grad_norm": 0.00024371009203605354, |
|
"learning_rate": 8.157685677461708e-05, |
|
"logits/chosen": -1.7314860820770264, |
|
"logits/rejected": -1.0632710456848145, |
|
"logps/chosen": -411.5020446777344, |
|
"logps/rejected": -450.3389587402344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.654230117797852, |
|
"rewards/margins": 21.339710235595703, |
|
"rewards/rejected": -25.993938446044922, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 8.861347792408985, |
|
"grad_norm": 0.0004402414197102189, |
|
"learning_rate": 8.031807215965337e-05, |
|
"logits/chosen": -1.7364399433135986, |
|
"logits/rejected": -1.0983723402023315, |
|
"logps/chosen": -417.08746337890625, |
|
"logps/rejected": -472.83984375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.4446940422058105, |
|
"rewards/margins": 21.18663215637207, |
|
"rewards/rejected": -26.63132667541504, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 8.923315259488769, |
|
"grad_norm": 0.00047181983245536685, |
|
"learning_rate": 7.906252369929154e-05, |
|
"logits/chosen": -1.6905673742294312, |
|
"logits/rejected": -1.084665060043335, |
|
"logps/chosen": -393.9977111816406, |
|
"logps/rejected": -455.0557556152344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.183560371398926, |
|
"rewards/margins": 20.739307403564453, |
|
"rewards/rejected": -25.922870635986328, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 8.98528272656855, |
|
"grad_norm": 0.0003129359392914921, |
|
"learning_rate": 7.781041783412845e-05, |
|
"logits/chosen": -1.6950937509536743, |
|
"logits/rejected": -1.0535084009170532, |
|
"logps/chosen": -418.62701416015625, |
|
"logps/rejected": -476.28387451171875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.857310771942139, |
|
"rewards/margins": 21.914113998413086, |
|
"rewards/rejected": -26.771427154541016, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.047250193648335, |
|
"grad_norm": 0.0004019307089038193, |
|
"learning_rate": 7.656196043872012e-05, |
|
"logits/chosen": -1.7096707820892334, |
|
"logits/rejected": -1.1031239032745361, |
|
"logps/chosen": -416.05206298828125, |
|
"logps/rejected": -494.614990234375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.534869194030762, |
|
"rewards/margins": 21.93942642211914, |
|
"rewards/rejected": -27.474294662475586, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 9.109217660728119, |
|
"grad_norm": 0.0007387935766018927, |
|
"learning_rate": 7.531735678773171e-05, |
|
"logits/chosen": -1.7090095281600952, |
|
"logits/rejected": -1.0878323316574097, |
|
"logps/chosen": -400.01513671875, |
|
"logps/rejected": -477.05535888671875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.275289058685303, |
|
"rewards/margins": 21.69790267944336, |
|
"rewards/rejected": -26.973194122314453, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 9.1711851278079, |
|
"grad_norm": 0.00027141955797560513, |
|
"learning_rate": 7.407681152218535e-05, |
|
"logits/chosen": -1.6808192729949951, |
|
"logits/rejected": -1.0295798778533936, |
|
"logps/chosen": -404.32513427734375, |
|
"logps/rejected": -460.8975524902344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.207651615142822, |
|
"rewards/margins": 20.58077049255371, |
|
"rewards/rejected": -25.788421630859375, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 9.233152594887684, |
|
"grad_norm": 0.0005088089383207262, |
|
"learning_rate": 7.284052861581288e-05, |
|
"logits/chosen": -1.7368125915527344, |
|
"logits/rejected": -1.0655357837677002, |
|
"logps/chosen": -410.697021484375, |
|
"logps/rejected": -453.0840759277344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.395773410797119, |
|
"rewards/margins": 20.73539924621582, |
|
"rewards/rejected": -26.13117027282715, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 9.295120061967467, |
|
"grad_norm": 0.0002143807359971106, |
|
"learning_rate": 7.160871134151775e-05, |
|
"logits/chosen": -1.6661646366119385, |
|
"logits/rejected": -1.092222809791565, |
|
"logps/chosen": -405.39154052734375, |
|
"logps/rejected": -485.67578125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.456831455230713, |
|
"rewards/margins": 21.252620697021484, |
|
"rewards/rejected": -26.70945167541504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.35708752904725, |
|
"grad_norm": 8.41324872453697e-05, |
|
"learning_rate": 7.038156223795224e-05, |
|
"logits/chosen": -1.7362842559814453, |
|
"logits/rejected": -1.082162857055664, |
|
"logps/chosen": -410.0975646972656, |
|
"logps/rejected": -466.8894958496094, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.814949989318848, |
|
"rewards/margins": 21.61594009399414, |
|
"rewards/rejected": -26.430889129638672, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 9.419054996127032, |
|
"grad_norm": 2.4985982236103155e-05, |
|
"learning_rate": 6.915928307621584e-05, |
|
"logits/chosen": -1.7000200748443604, |
|
"logits/rejected": -1.0128730535507202, |
|
"logps/chosen": -417.96405029296875, |
|
"logps/rejected": -461.15362548828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.359194278717041, |
|
"rewards/margins": 21.4404296875, |
|
"rewards/rejected": -25.79962158203125, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 9.481022463206816, |
|
"grad_norm": 0.0002187406353186816, |
|
"learning_rate": 6.794207482667918e-05, |
|
"logits/chosen": -1.6875083446502686, |
|
"logits/rejected": -1.0425808429718018, |
|
"logps/chosen": -409.68170166015625, |
|
"logps/rejected": -456.98114013671875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.01973295211792, |
|
"rewards/margins": 20.8963623046875, |
|
"rewards/rejected": -25.916095733642578, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 9.5429899302866, |
|
"grad_norm": 0.0001037058827932924, |
|
"learning_rate": 6.673013762594022e-05, |
|
"logits/chosen": -1.6812347173690796, |
|
"logits/rejected": -1.0920425653457642, |
|
"logps/chosen": -409.3445129394531, |
|
"logps/rejected": -463.01702880859375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.606844425201416, |
|
"rewards/margins": 20.97027015686035, |
|
"rewards/rejected": -26.57711410522461, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 9.604957397366382, |
|
"grad_norm": 6.546611984958872e-05, |
|
"learning_rate": 6.552367074391708e-05, |
|
"logits/chosen": -1.6708405017852783, |
|
"logits/rejected": -1.0272510051727295, |
|
"logps/chosen": -421.3130798339844, |
|
"logps/rejected": -468.8424377441406, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.817858695983887, |
|
"rewards/margins": 21.14541244506836, |
|
"rewards/rejected": -26.963272094726562, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.666924864446166, |
|
"grad_norm": 0.0009899769211187959, |
|
"learning_rate": 6.432287255108363e-05, |
|
"logits/chosen": -1.7139580249786377, |
|
"logits/rejected": -1.0682191848754883, |
|
"logps/chosen": -415.08154296875, |
|
"logps/rejected": -463.1947326660156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.88477087020874, |
|
"rewards/margins": 20.44330596923828, |
|
"rewards/rejected": -26.328075408935547, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 9.728892331525948, |
|
"grad_norm": 0.0010677826358005404, |
|
"learning_rate": 6.312794048585286e-05, |
|
"logits/chosen": -1.6608006954193115, |
|
"logits/rejected": -1.0799270868301392, |
|
"logps/chosen": -393.5787353515625, |
|
"logps/rejected": -458.1851501464844, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.194777488708496, |
|
"rewards/margins": 20.60002899169922, |
|
"rewards/rejected": -25.7948055267334, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 9.790859798605732, |
|
"grad_norm": 0.00037055814755149186, |
|
"learning_rate": 6.193907102211358e-05, |
|
"logits/chosen": -1.700254201889038, |
|
"logits/rejected": -1.149086594581604, |
|
"logps/chosen": -414.83575439453125, |
|
"logps/rejected": -480.109375, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -6.013056755065918, |
|
"rewards/margins": 20.352540969848633, |
|
"rewards/rejected": -26.3655948638916, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 9.852827265685516, |
|
"grad_norm": 0.00012906199845019728, |
|
"learning_rate": 6.075645963692567e-05, |
|
"logits/chosen": -1.6764156818389893, |
|
"logits/rejected": -1.0942738056182861, |
|
"logps/chosen": -410.2710876464844, |
|
"logps/rejected": -480.7608337402344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.723294734954834, |
|
"rewards/margins": 21.212993621826172, |
|
"rewards/rejected": -26.936288833618164, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 9.914794732765298, |
|
"grad_norm": 9.71817207755521e-05, |
|
"learning_rate": 5.9580300778379087e-05, |
|
"logits/chosen": -1.6972318887710571, |
|
"logits/rejected": -1.06034255027771, |
|
"logps/chosen": -414.45697021484375, |
|
"logps/rejected": -478.67608642578125, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.851905822753906, |
|
"rewards/margins": 22.140657424926758, |
|
"rewards/rejected": -26.992563247680664, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.976762199845082, |
|
"grad_norm": 0.0005355001194402575, |
|
"learning_rate": 5.8410787833622414e-05, |
|
"logits/chosen": -1.701051950454712, |
|
"logits/rejected": -1.0390212535858154, |
|
"logps/chosen": -392.62689208984375, |
|
"logps/rejected": -438.70660400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.652411937713623, |
|
"rewards/margins": 21.09701156616211, |
|
"rewards/rejected": -25.749425888061523, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 10.038729666924864, |
|
"grad_norm": 0.0007227555033750832, |
|
"learning_rate": 5.724811309706547e-05, |
|
"logits/chosen": -1.7204704284667969, |
|
"logits/rejected": -1.0700039863586426, |
|
"logps/chosen": -430.43206787109375, |
|
"logps/rejected": -488.071044921875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.367037296295166, |
|
"rewards/margins": 21.72504425048828, |
|
"rewards/rejected": -27.092077255249023, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 10.100697134004648, |
|
"grad_norm": 0.00017314284923486412, |
|
"learning_rate": 5.6092467738761776e-05, |
|
"logits/chosen": -1.6834897994995117, |
|
"logits/rejected": -1.0887248516082764, |
|
"logps/chosen": -416.51348876953125, |
|
"logps/rejected": -469.4505920410156, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.5038862228393555, |
|
"rewards/margins": 21.196359634399414, |
|
"rewards/rejected": -26.700244903564453, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 10.162664601084431, |
|
"grad_norm": 0.00027020045672543347, |
|
"learning_rate": 5.494404177297595e-05, |
|
"logits/chosen": -1.696730613708496, |
|
"logits/rejected": -1.0611952543258667, |
|
"logps/chosen": -399.0355529785156, |
|
"logps/rejected": -449.93646240234375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.055383682250977, |
|
"rewards/margins": 20.96977996826172, |
|
"rewards/rejected": -26.025165557861328, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 10.224632068164214, |
|
"grad_norm": 0.0003596362948883325, |
|
"learning_rate": 5.380302402694104e-05, |
|
"logits/chosen": -1.7198495864868164, |
|
"logits/rejected": -1.0654425621032715, |
|
"logps/chosen": -390.9352722167969, |
|
"logps/rejected": -453.2206115722656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.068055629730225, |
|
"rewards/margins": 20.917200088500977, |
|
"rewards/rejected": -25.98525619506836, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.286599535243997, |
|
"grad_norm": 2.4758495783316903e-05, |
|
"learning_rate": 5.266960210981089e-05, |
|
"logits/chosen": -1.664912462234497, |
|
"logits/rejected": -1.0661206245422363, |
|
"logps/chosen": -402.9308166503906, |
|
"logps/rejected": -467.4169921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.694643020629883, |
|
"rewards/margins": 21.313457489013672, |
|
"rewards/rejected": -27.008098602294922, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 10.34856700232378, |
|
"grad_norm": 0.00036736109177581966, |
|
"learning_rate": 5.15439623818132e-05, |
|
"logits/chosen": -1.7021472454071045, |
|
"logits/rejected": -1.1036940813064575, |
|
"logps/chosen": -395.59149169921875, |
|
"logps/rejected": -463.43316650390625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.990979194641113, |
|
"rewards/margins": 20.853925704956055, |
|
"rewards/rejected": -26.84490394592285, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 10.410534469403563, |
|
"grad_norm": 0.00021753676992375404, |
|
"learning_rate": 5.042628992360755e-05, |
|
"logits/chosen": -1.6948877573013306, |
|
"logits/rejected": -1.0948389768600464, |
|
"logps/chosen": -417.33160400390625, |
|
"logps/rejected": -491.01483154296875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.629961967468262, |
|
"rewards/margins": 21.473012924194336, |
|
"rewards/rejected": -27.102975845336914, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 10.472501936483347, |
|
"grad_norm": 0.0005015567876398563, |
|
"learning_rate": 4.9316768505853864e-05, |
|
"logits/chosen": -1.7080516815185547, |
|
"logits/rejected": -1.0318862199783325, |
|
"logps/chosen": -397.1073913574219, |
|
"logps/rejected": -439.6314392089844, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.5096540451049805, |
|
"rewards/margins": 20.36575698852539, |
|
"rewards/rejected": -25.875408172607422, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 10.53446940356313, |
|
"grad_norm": 0.000426275102654472, |
|
"learning_rate": 4.8215580558996546e-05, |
|
"logits/chosen": -1.6764377355575562, |
|
"logits/rejected": -1.0771383047103882, |
|
"logps/chosen": -404.91937255859375, |
|
"logps/rejected": -485.12548828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.389082908630371, |
|
"rewards/margins": 21.155742645263672, |
|
"rewards/rejected": -26.54482650756836, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.596436870642913, |
|
"grad_norm": 0.00011274849384790286, |
|
"learning_rate": 4.7122907143268645e-05, |
|
"logits/chosen": -1.7037220001220703, |
|
"logits/rejected": -1.0873366594314575, |
|
"logps/chosen": -417.3395080566406, |
|
"logps/rejected": -485.4212951660156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.40346622467041, |
|
"rewards/margins": 21.43330955505371, |
|
"rewards/rejected": -26.836772918701172, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 10.658404337722695, |
|
"grad_norm": 0.0008545616874471307, |
|
"learning_rate": 4.603892791892157e-05, |
|
"logits/chosen": -1.7251865863800049, |
|
"logits/rejected": -1.1108168363571167, |
|
"logps/chosen": -409.8521423339844, |
|
"logps/rejected": -483.19329833984375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.274342060089111, |
|
"rewards/margins": 22.360143661499023, |
|
"rewards/rejected": -26.634485244750977, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 10.720371804802479, |
|
"grad_norm": 0.0002442661498207599, |
|
"learning_rate": 4.4963821116684645e-05, |
|
"logits/chosen": -1.7168834209442139, |
|
"logits/rejected": -1.0469696521759033, |
|
"logps/chosen": -410.9766540527344, |
|
"logps/rejected": -462.96759033203125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.01826286315918, |
|
"rewards/margins": 21.594696044921875, |
|
"rewards/rejected": -26.612957000732422, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 10.782339271882261, |
|
"grad_norm": 2.5067949536605738e-05, |
|
"learning_rate": 4.3897763508460235e-05, |
|
"logits/chosen": -1.6555604934692383, |
|
"logits/rejected": -1.067326307296753, |
|
"logps/chosen": -411.1241149902344, |
|
"logps/rejected": -471.122314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.384194374084473, |
|
"rewards/margins": 20.667926788330078, |
|
"rewards/rejected": -26.052120208740234, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 10.844306738962045, |
|
"grad_norm": 9.07514404389076e-05, |
|
"learning_rate": 4.284093037825829e-05, |
|
"logits/chosen": -1.7002710103988647, |
|
"logits/rejected": -1.0244972705841064, |
|
"logps/chosen": -396.713623046875, |
|
"logps/rejected": -450.4693298339844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.3324480056762695, |
|
"rewards/margins": 20.980426788330078, |
|
"rewards/rejected": -26.312875747680664, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.906274206041829, |
|
"grad_norm": 0.0001592998596606776, |
|
"learning_rate": 4.179349549337557e-05, |
|
"logits/chosen": -1.704119086265564, |
|
"logits/rejected": -1.0116019248962402, |
|
"logps/chosen": -402.82666015625, |
|
"logps/rejected": -443.30157470703125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.895948886871338, |
|
"rewards/margins": 21.18239402770996, |
|
"rewards/rejected": -26.07834243774414, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 10.96824167312161, |
|
"grad_norm": 1.9538027117960155e-05, |
|
"learning_rate": 4.075563107582472e-05, |
|
"logits/chosen": -1.668092966079712, |
|
"logits/rejected": -1.065983533859253, |
|
"logps/chosen": -398.3217468261719, |
|
"logps/rejected": -477.6726989746094, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.416517734527588, |
|
"rewards/margins": 21.412036895751953, |
|
"rewards/rejected": -26.82855224609375, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 11.030209140201395, |
|
"grad_norm": 5.915413566981442e-05, |
|
"learning_rate": 3.9727507774016635e-05, |
|
"logits/chosen": -1.6671562194824219, |
|
"logits/rejected": -1.0572084188461304, |
|
"logps/chosen": -400.4344177246094, |
|
"logps/rejected": -474.96038818359375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.950907230377197, |
|
"rewards/margins": 20.902238845825195, |
|
"rewards/rejected": -26.8531494140625, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 11.092176607281177, |
|
"grad_norm": 0.0006108521483838558, |
|
"learning_rate": 3.8709294634702376e-05, |
|
"logits/chosen": -1.7030471563339233, |
|
"logits/rejected": -1.0317370891571045, |
|
"logps/chosen": -398.74090576171875, |
|
"logps/rejected": -459.75, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.71872615814209, |
|
"rewards/margins": 22.286239624023438, |
|
"rewards/rejected": -27.00496482849121, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 11.15414407436096, |
|
"grad_norm": 0.000467544246930629, |
|
"learning_rate": 3.770115907517773e-05, |
|
"logits/chosen": -1.6686887741088867, |
|
"logits/rejected": -1.0782063007354736, |
|
"logps/chosen": -406.98138427734375, |
|
"logps/rejected": -482.86572265625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.86759090423584, |
|
"rewards/margins": 21.316923141479492, |
|
"rewards/rejected": -27.184513092041016, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.216111541440744, |
|
"grad_norm": 0.0004900813801214099, |
|
"learning_rate": 3.670326685575632e-05, |
|
"logits/chosen": -1.7124903202056885, |
|
"logits/rejected": -1.0398648977279663, |
|
"logps/chosen": -415.08648681640625, |
|
"logps/rejected": -477.70709228515625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.952596187591553, |
|
"rewards/margins": 22.07376480102539, |
|
"rewards/rejected": -27.026357650756836, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 11.278079008520526, |
|
"grad_norm": 0.0002428332227282226, |
|
"learning_rate": 3.571578205251459e-05, |
|
"logits/chosen": -1.7211148738861084, |
|
"logits/rejected": -1.1097770929336548, |
|
"logps/chosen": -406.6622009277344, |
|
"logps/rejected": -460.78643798828125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.563107490539551, |
|
"rewards/margins": 21.05852699279785, |
|
"rewards/rejected": -26.621633529663086, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 11.34004647560031, |
|
"grad_norm": 0.0004079696664121002, |
|
"learning_rate": 3.4738867030314235e-05, |
|
"logits/chosen": -1.7017863988876343, |
|
"logits/rejected": -1.0735719203948975, |
|
"logps/chosen": -414.16339111328125, |
|
"logps/rejected": -490.61944580078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.069756507873535, |
|
"rewards/margins": 22.46738052368164, |
|
"rewards/rejected": -27.53713607788086, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 11.402013942680092, |
|
"grad_norm": 0.0001673255901550874, |
|
"learning_rate": 3.377268241610555e-05, |
|
"logits/chosen": -1.692521095275879, |
|
"logits/rejected": -1.0149263143539429, |
|
"logps/chosen": -412.38507080078125, |
|
"logps/rejected": -467.0577697753906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.867552280426025, |
|
"rewards/margins": 20.83139991760254, |
|
"rewards/rejected": -26.698949813842773, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 11.463981409759876, |
|
"grad_norm": 0.00012532217078842223, |
|
"learning_rate": 3.2817387072516726e-05, |
|
"logits/chosen": -1.7133913040161133, |
|
"logits/rejected": -1.1119440793991089, |
|
"logps/chosen": -401.7035217285156, |
|
"logps/rejected": -476.5845642089844, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.1463212966918945, |
|
"rewards/margins": 22.046228408813477, |
|
"rewards/rejected": -27.192550659179688, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.52594887683966, |
|
"grad_norm": 0.0002491988998372108, |
|
"learning_rate": 3.18731380717334e-05, |
|
"logits/chosen": -1.6776504516601562, |
|
"logits/rejected": -1.0443401336669922, |
|
"logps/chosen": -402.75933837890625, |
|
"logps/rejected": -455.70068359375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.209097385406494, |
|
"rewards/margins": 21.239925384521484, |
|
"rewards/rejected": -26.449024200439453, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 11.587916343919442, |
|
"grad_norm": 0.0005044552381150424, |
|
"learning_rate": 3.0940090669672215e-05, |
|
"logits/chosen": -1.6772470474243164, |
|
"logits/rejected": -1.0744705200195312, |
|
"logps/chosen": -400.09912109375, |
|
"logps/rejected": -477.5372619628906, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.949058532714844, |
|
"rewards/margins": 21.821866989135742, |
|
"rewards/rejected": -26.770925521850586, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 11.649883810999226, |
|
"grad_norm": 4.5204073103377596e-05, |
|
"learning_rate": 3.001839828045342e-05, |
|
"logits/chosen": -1.7325446605682373, |
|
"logits/rejected": -1.063987135887146, |
|
"logps/chosen": -415.75592041015625, |
|
"logps/rejected": -452.0940856933594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.492778778076172, |
|
"rewards/margins": 20.81328582763672, |
|
"rewards/rejected": -26.30606460571289, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 11.711851278079008, |
|
"grad_norm": 0.0002700432378333062, |
|
"learning_rate": 2.9108212451176033e-05, |
|
"logits/chosen": -1.7305303812026978, |
|
"logits/rejected": -1.083184003829956, |
|
"logps/chosen": -400.70635986328125, |
|
"logps/rejected": -472.36114501953125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.0615034103393555, |
|
"rewards/margins": 22.031635284423828, |
|
"rewards/rejected": -27.093135833740234, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 11.773818745158792, |
|
"grad_norm": 0.00013194057100918144, |
|
"learning_rate": 2.8209682837000072e-05, |
|
"logits/chosen": -1.6789268255233765, |
|
"logits/rejected": -1.0528620481491089, |
|
"logps/chosen": -403.6865539550781, |
|
"logps/rejected": -479.7601623535156, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.470952033996582, |
|
"rewards/margins": 21.67144775390625, |
|
"rewards/rejected": -27.14239501953125, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.835786212238574, |
|
"grad_norm": 0.0002364068350289017, |
|
"learning_rate": 2.7322957176539777e-05, |
|
"logits/chosen": -1.6753734350204468, |
|
"logits/rejected": -1.0195820331573486, |
|
"logps/chosen": -417.6498107910156, |
|
"logps/rejected": -472.09844970703125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.569521903991699, |
|
"rewards/margins": 20.978273391723633, |
|
"rewards/rejected": -26.54779624938965, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 11.897753679318358, |
|
"grad_norm": 0.00013174403284210712, |
|
"learning_rate": 2.6448181267572226e-05, |
|
"logits/chosen": -1.6455790996551514, |
|
"logits/rejected": -1.046744465827942, |
|
"logps/chosen": -410.19134521484375, |
|
"logps/rejected": -483.3438415527344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.728828430175781, |
|
"rewards/margins": 21.940776824951172, |
|
"rewards/rejected": -27.669601440429688, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 11.959721146398142, |
|
"grad_norm": 0.00042892919736914337, |
|
"learning_rate": 2.5585498943064724e-05, |
|
"logits/chosen": -1.6926710605621338, |
|
"logits/rejected": -1.0491944551467896, |
|
"logps/chosen": -415.20550537109375, |
|
"logps/rejected": -482.228271484375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.751172065734863, |
|
"rewards/margins": 21.466909408569336, |
|
"rewards/rejected": -27.21807861328125, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 12.021688613477924, |
|
"grad_norm": 8.727656677365303e-05, |
|
"learning_rate": 2.4735052047525398e-05, |
|
"logits/chosen": -1.7163196802139282, |
|
"logits/rejected": -1.059697151184082, |
|
"logps/chosen": -422.93359375, |
|
"logps/rejected": -472.23583984375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.125914573669434, |
|
"rewards/margins": 21.549646377563477, |
|
"rewards/rejected": -26.675561904907227, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 12.083656080557708, |
|
"grad_norm": 5.139048516866751e-05, |
|
"learning_rate": 2.389698041368089e-05, |
|
"logits/chosen": -1.682549238204956, |
|
"logits/rejected": -1.0410518646240234, |
|
"logps/chosen": -419.48529052734375, |
|
"logps/rejected": -488.83154296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.320895195007324, |
|
"rewards/margins": 22.32204246520996, |
|
"rewards/rejected": -27.6429386138916, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 12.14562354763749, |
|
"grad_norm": 0.00013814242265652865, |
|
"learning_rate": 2.3071421839484554e-05, |
|
"logits/chosen": -1.6900997161865234, |
|
"logits/rejected": -1.0404036045074463, |
|
"logps/chosen": -399.94854736328125, |
|
"logps/rejected": -466.58642578125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.632657051086426, |
|
"rewards/margins": 21.346328735351562, |
|
"rewards/rejected": -26.978984832763672, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 12.207591014717273, |
|
"grad_norm": 0.0001951899757841602, |
|
"learning_rate": 2.2258512065459448e-05, |
|
"logits/chosen": -1.6699708700180054, |
|
"logits/rejected": -1.058363437652588, |
|
"logps/chosen": -421.36419677734375, |
|
"logps/rejected": -490.47100830078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.72733211517334, |
|
"rewards/margins": 21.7630672454834, |
|
"rewards/rejected": -27.490398406982422, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 12.269558481797057, |
|
"grad_norm": 0.001167879207059741, |
|
"learning_rate": 2.1458384752379357e-05, |
|
"logits/chosen": -1.6963287591934204, |
|
"logits/rejected": -1.078595757484436, |
|
"logps/chosen": -400.4660339355469, |
|
"logps/rejected": -470.71710205078125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.399907112121582, |
|
"rewards/margins": 21.62917709350586, |
|
"rewards/rejected": -27.02908706665039, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 12.33152594887684, |
|
"grad_norm": 9.643881639931351e-06, |
|
"learning_rate": 2.067117145929216e-05, |
|
"logits/chosen": -1.688515305519104, |
|
"logits/rejected": -1.08303964138031, |
|
"logps/chosen": -402.33795166015625, |
|
"logps/rejected": -477.7525329589844, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -4.999421119689941, |
|
"rewards/margins": 22.334285736083984, |
|
"rewards/rejected": -27.333709716796875, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 12.393493415956623, |
|
"grad_norm": 0.0006664241082035005, |
|
"learning_rate": 1.9897001621888434e-05, |
|
"logits/chosen": -1.7171924114227295, |
|
"logits/rejected": -1.0485467910766602, |
|
"logps/chosen": -409.967529296875, |
|
"logps/rejected": -477.21551513671875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.805159568786621, |
|
"rewards/margins": 22.3187198638916, |
|
"rewards/rejected": -27.123876571655273, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.455460883036405, |
|
"grad_norm": 5.3627591114491224e-06, |
|
"learning_rate": 1.913600253121919e-05, |
|
"logits/chosen": -1.677496314048767, |
|
"logits/rejected": -1.0768311023712158, |
|
"logps/chosen": -421.8292541503906, |
|
"logps/rejected": -494.90606689453125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.249929904937744, |
|
"rewards/margins": 21.906986236572266, |
|
"rewards/rejected": -27.15691566467285, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 12.51742835011619, |
|
"grad_norm": 3.554378781700507e-05, |
|
"learning_rate": 1.838829931276653e-05, |
|
"logits/chosen": -1.6907306909561157, |
|
"logits/rejected": -1.0432696342468262, |
|
"logps/chosen": -398.9062805175781, |
|
"logps/rejected": -465.7071228027344, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -4.694939136505127, |
|
"rewards/margins": 22.108684539794922, |
|
"rewards/rejected": -26.80362319946289, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 12.579395817195973, |
|
"grad_norm": 6.133209535619244e-05, |
|
"learning_rate": 1.7654014905870098e-05, |
|
"logits/chosen": -1.6698366403579712, |
|
"logits/rejected": -1.0069531202316284, |
|
"logps/chosen": -417.49237060546875, |
|
"logps/rejected": -470.18902587890625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.820713520050049, |
|
"rewards/margins": 21.33327865600586, |
|
"rewards/rejected": -27.15399169921875, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 12.641363284275755, |
|
"grad_norm": 0.00020697916625067592, |
|
"learning_rate": 1.6933270043513083e-05, |
|
"logits/chosen": -1.677680253982544, |
|
"logits/rejected": -1.0464431047439575, |
|
"logps/chosen": -408.2115478515625, |
|
"logps/rejected": -478.3711853027344, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.883364200592041, |
|
"rewards/margins": 21.521183013916016, |
|
"rewards/rejected": -27.404544830322266, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 12.703330751355539, |
|
"grad_norm": 0.00018397132225800306, |
|
"learning_rate": 1.622618323247087e-05, |
|
"logits/chosen": -1.6993494033813477, |
|
"logits/rejected": -1.0857021808624268, |
|
"logps/chosen": -405.2132873535156, |
|
"logps/rejected": -485.60321044921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.594387531280518, |
|
"rewards/margins": 21.590347290039062, |
|
"rewards/rejected": -27.184734344482422, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 12.765298218435321, |
|
"grad_norm": 0.00029773233109153807, |
|
"learning_rate": 1.553287073382609e-05, |
|
"logits/chosen": -1.7119516134262085, |
|
"logits/rejected": -1.0656880140304565, |
|
"logps/chosen": -405.5570373535156, |
|
"logps/rejected": -462.2611389160156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.116886615753174, |
|
"rewards/margins": 21.456085205078125, |
|
"rewards/rejected": -26.57297134399414, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 12.827265685515105, |
|
"grad_norm": 0.0001080308502423577, |
|
"learning_rate": 1.485344654385239e-05, |
|
"logits/chosen": -1.6709296703338623, |
|
"logits/rejected": -1.053741693496704, |
|
"logps/chosen": -428.66839599609375, |
|
"logps/rejected": -500.01092529296875, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.277214050292969, |
|
"rewards/margins": 22.146846771240234, |
|
"rewards/rejected": -28.424060821533203, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 12.889233152594887, |
|
"grad_norm": 6.432453665183857e-05, |
|
"learning_rate": 1.418802237527106e-05, |
|
"logits/chosen": -1.68827223777771, |
|
"logits/rejected": -1.0494086742401123, |
|
"logps/chosen": -424.75286865234375, |
|
"logps/rejected": -481.1043395996094, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.699560165405273, |
|
"rewards/margins": 21.662763595581055, |
|
"rewards/rejected": -27.362323760986328, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 12.95120061967467, |
|
"grad_norm": 0.0004029480624012649, |
|
"learning_rate": 1.3536707638882872e-05, |
|
"logits/chosen": -1.6849908828735352, |
|
"logits/rejected": -1.0281345844268799, |
|
"logps/chosen": -419.80010986328125, |
|
"logps/rejected": -460.801025390625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.415833950042725, |
|
"rewards/margins": 20.73134422302246, |
|
"rewards/rejected": -26.147180557250977, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 13.013168086754455, |
|
"grad_norm": 0.0002039131213678047, |
|
"learning_rate": 1.289960942557844e-05, |
|
"logits/chosen": -1.686678171157837, |
|
"logits/rejected": -1.041481852531433, |
|
"logps/chosen": -418.22686767578125, |
|
"logps/rejected": -488.3094787597656, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.922072410583496, |
|
"rewards/margins": 21.746536254882812, |
|
"rewards/rejected": -27.66861343383789, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 13.075135553834237, |
|
"grad_norm": 0.00016347317432519048, |
|
"learning_rate": 1.2276832488730094e-05, |
|
"logits/chosen": -1.7182451486587524, |
|
"logits/rejected": -1.0532605648040771, |
|
"logps/chosen": -441.8271484375, |
|
"logps/rejected": -510.87628173828125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.668587684631348, |
|
"rewards/margins": 22.97989273071289, |
|
"rewards/rejected": -28.648479461669922, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 13.13710302091402, |
|
"grad_norm": 0.00020034710178151727, |
|
"learning_rate": 1.1668479226967965e-05, |
|
"logits/chosen": -1.6925156116485596, |
|
"logits/rejected": -1.0687302350997925, |
|
"logps/chosen": -399.3315124511719, |
|
"logps/rejected": -474.7539978027344, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.459714889526367, |
|
"rewards/margins": 21.628223419189453, |
|
"rewards/rejected": -27.087936401367188, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 13.199070487993803, |
|
"grad_norm": 0.00026680485461838543, |
|
"learning_rate": 1.1074649667343506e-05, |
|
"logits/chosen": -1.6791460514068604, |
|
"logits/rejected": -1.0547727346420288, |
|
"logps/chosen": -412.1854553222656, |
|
"logps/rejected": -474.461181640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.000552654266357, |
|
"rewards/margins": 21.563953399658203, |
|
"rewards/rejected": -26.564502716064453, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 13.261037955073586, |
|
"grad_norm": 9.416981629328802e-05, |
|
"learning_rate": 1.0495441448882571e-05, |
|
"logits/chosen": -1.6752477884292603, |
|
"logits/rejected": -1.0648829936981201, |
|
"logps/chosen": -413.24609375, |
|
"logps/rejected": -496.79327392578125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.392674922943115, |
|
"rewards/margins": 22.125301361083984, |
|
"rewards/rejected": -27.51797866821289, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 13.32300542215337, |
|
"grad_norm": 0.00027022938593290746, |
|
"learning_rate": 9.930949806531509e-06, |
|
"logits/chosen": -1.6898155212402344, |
|
"logits/rejected": -1.0595139265060425, |
|
"logps/chosen": -410.2594299316406, |
|
"logps/rejected": -469.86883544921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.0617547035217285, |
|
"rewards/margins": 21.85466194152832, |
|
"rewards/rejected": -26.916418075561523, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 13.384972889233152, |
|
"grad_norm": 5.3291834774427116e-05, |
|
"learning_rate": 9.38126755549832e-06, |
|
"logits/chosen": -1.6853482723236084, |
|
"logits/rejected": -1.0476603507995605, |
|
"logps/chosen": -411.350830078125, |
|
"logps/rejected": -470.8251037597656, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.509891510009766, |
|
"rewards/margins": 21.421506881713867, |
|
"rewards/rejected": -26.931400299072266, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 13.446940356312936, |
|
"grad_norm": 8.903396519599482e-05, |
|
"learning_rate": 8.846485075991728e-06, |
|
"logits/chosen": -1.6736446619033813, |
|
"logits/rejected": -1.0330798625946045, |
|
"logps/chosen": -417.89044189453125, |
|
"logps/rejected": -477.1280822753906, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.157768726348877, |
|
"rewards/margins": 21.815839767456055, |
|
"rewards/rejected": -26.973608016967773, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 13.508907823392718, |
|
"grad_norm": 0.0006522313342429698, |
|
"learning_rate": 8.326690298360639e-06, |
|
"logits/chosen": -1.679149866104126, |
|
"logits/rejected": -1.0622096061706543, |
|
"logps/chosen": -403.9975891113281, |
|
"logps/rejected": -478.72174072265625, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.251322269439697, |
|
"rewards/margins": 21.613903045654297, |
|
"rewards/rejected": -26.8652286529541, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 13.570875290472502, |
|
"grad_norm": 0.0001527480490040034, |
|
"learning_rate": 7.821968688636383e-06, |
|
"logits/chosen": -1.7000373601913452, |
|
"logits/rejected": -1.0500789880752563, |
|
"logps/chosen": -400.9742431640625, |
|
"logps/rejected": -477.05450439453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.700057506561279, |
|
"rewards/margins": 21.45535659790039, |
|
"rewards/rejected": -27.155414581298828, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 13.632842757552286, |
|
"grad_norm": 0.0005368488491512835, |
|
"learning_rate": 7.332403234480223e-06, |
|
"logits/chosen": -1.683445692062378, |
|
"logits/rejected": -1.0166078805923462, |
|
"logps/chosen": -401.72607421875, |
|
"logps/rejected": -456.4202575683594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.469435691833496, |
|
"rewards/margins": 21.11139488220215, |
|
"rewards/rejected": -26.580829620361328, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.694810224632068, |
|
"grad_norm": 0.0005580181023105979, |
|
"learning_rate": 6.858074431538164e-06, |
|
"logits/chosen": -1.6824891567230225, |
|
"logits/rejected": -1.0271477699279785, |
|
"logps/chosen": -399.6391296386719, |
|
"logps/rejected": -451.330078125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.243688583374023, |
|
"rewards/margins": 21.208574295043945, |
|
"rewards/rejected": -26.452260971069336, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 13.756777691711852, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.421646080196197e-06, |
|
"logits/chosen": -1.6686054468154907, |
|
"logits/rejected": -1.0693179368972778, |
|
"logps/chosen": -401.59844970703125, |
|
"logps/rejected": -474.7311096191406, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.454672336578369, |
|
"rewards/margins": 21.39242172241211, |
|
"rewards/rejected": -26.847095489501953, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 13.818745158791634, |
|
"grad_norm": 1.7149226550827734e-05, |
|
"learning_rate": 5.9772507736462145e-06, |
|
"logits/chosen": -1.710008978843689, |
|
"logits/rejected": -1.0888980627059937, |
|
"logps/chosen": -407.61260986328125, |
|
"logps/rejected": -481.07550048828125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.6366987228393555, |
|
"rewards/margins": 21.678539276123047, |
|
"rewards/rejected": -27.315237045288086, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 13.880712625871418, |
|
"grad_norm": 2.4136075808200985e-05, |
|
"learning_rate": 5.54831493606015e-06, |
|
"logits/chosen": -1.6713101863861084, |
|
"logits/rejected": -1.0732184648513794, |
|
"logps/chosen": -424.976806640625, |
|
"logps/rejected": -506.0423889160156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.765892028808594, |
|
"rewards/margins": 22.11074447631836, |
|
"rewards/rejected": -27.876636505126953, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 13.9426800929512, |
|
"grad_norm": 7.025560626061633e-05, |
|
"learning_rate": 5.134909094202267e-06, |
|
"logits/chosen": -1.699441909790039, |
|
"logits/rejected": -1.0467607975006104, |
|
"logps/chosen": -401.03375244140625, |
|
"logps/rejected": -447.85308837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.419959545135498, |
|
"rewards/margins": 20.893884658813477, |
|
"rewards/rejected": -26.313846588134766, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.004647560030984, |
|
"grad_norm": 0.0002559265703894198, |
|
"learning_rate": 4.7371012213538235e-06, |
|
"logits/chosen": -1.6893657445907593, |
|
"logits/rejected": -1.0456167459487915, |
|
"logps/chosen": -425.73895263671875, |
|
"logps/rejected": -486.43890380859375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.418589115142822, |
|
"rewards/margins": 22.638408660888672, |
|
"rewards/rejected": -28.0570011138916, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 14.066615027110767, |
|
"grad_norm": 0.00043519827886484563, |
|
"learning_rate": 4.35495672613685e-06, |
|
"logits/chosen": -1.6840267181396484, |
|
"logits/rejected": -1.0660759210586548, |
|
"logps/chosen": -420.65692138671875, |
|
"logps/rejected": -481.805419921875, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.826098442077637, |
|
"rewards/margins": 21.706336975097656, |
|
"rewards/rejected": -27.53243637084961, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 14.12858249419055, |
|
"grad_norm": 0.0004038415208924562, |
|
"learning_rate": 3.988538441759382e-06, |
|
"logits/chosen": -1.673048973083496, |
|
"logits/rejected": -1.0200636386871338, |
|
"logps/chosen": -403.9557189941406, |
|
"logps/rejected": -461.65179443359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.021474361419678, |
|
"rewards/margins": 21.59840965270996, |
|
"rewards/rejected": -26.619884490966797, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 14.190549961270333, |
|
"grad_norm": 0.00038054597098380327, |
|
"learning_rate": 3.637906615684328e-06, |
|
"logits/chosen": -1.6679537296295166, |
|
"logits/rejected": -1.0269415378570557, |
|
"logps/chosen": -410.174072265625, |
|
"logps/rejected": -484.68865966796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.3633928298950195, |
|
"rewards/margins": 22.351978302001953, |
|
"rewards/rejected": -27.715368270874023, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 14.252517428350115, |
|
"grad_norm": 5.562596925301477e-05, |
|
"learning_rate": 3.3031188997233676e-06, |
|
"logits/chosen": -1.6873247623443604, |
|
"logits/rejected": -1.0105091333389282, |
|
"logps/chosen": -405.04132080078125, |
|
"logps/rejected": -454.36920166015625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.410122871398926, |
|
"rewards/margins": 21.17348289489746, |
|
"rewards/rejected": -26.583606719970703, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 14.3144848954299, |
|
"grad_norm": 4.7735171392560005e-05, |
|
"learning_rate": 2.9842303405577366e-06, |
|
"logits/chosen": -1.6932716369628906, |
|
"logits/rejected": -1.026926040649414, |
|
"logps/chosen": -416.610595703125, |
|
"logps/rejected": -469.50335693359375, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.13016414642334, |
|
"rewards/margins": 20.862241744995117, |
|
"rewards/rejected": -26.99240493774414, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 14.376452362509683, |
|
"grad_norm": 0.00047004391672089696, |
|
"learning_rate": 2.6812933706872545e-06, |
|
"logits/chosen": -1.6934292316436768, |
|
"logits/rejected": -1.063394546508789, |
|
"logps/chosen": -415.4750061035156, |
|
"logps/rejected": -489.5491638183594, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.2422404289245605, |
|
"rewards/margins": 22.516773223876953, |
|
"rewards/rejected": -27.759014129638672, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 14.438419829589465, |
|
"grad_norm": 0.0008643981418572366, |
|
"learning_rate": 2.394357799809277e-06, |
|
"logits/chosen": -1.735192894935608, |
|
"logits/rejected": -1.069784164428711, |
|
"logps/chosen": -409.0735168457031, |
|
"logps/rejected": -455.7366638183594, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.419035911560059, |
|
"rewards/margins": 21.468860626220703, |
|
"rewards/rejected": -26.887897491455078, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 14.500387296669249, |
|
"grad_norm": 0.0002557814004831016, |
|
"learning_rate": 2.123470806628858e-06, |
|
"logits/chosen": -1.6932361125946045, |
|
"logits/rejected": -1.03562331199646, |
|
"logps/chosen": -404.10223388671875, |
|
"logps/rejected": -452.8517150878906, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.449051856994629, |
|
"rewards/margins": 21.111392974853516, |
|
"rewards/rejected": -26.560443878173828, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 14.562354763749031, |
|
"grad_norm": 0.00017765916709322482, |
|
"learning_rate": 1.868676931101465e-06, |
|
"logits/chosen": -1.6715888977050781, |
|
"logits/rejected": -1.057328462600708, |
|
"logps/chosen": -411.4977111816406, |
|
"logps/rejected": -486.6917419433594, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.450153827667236, |
|
"rewards/margins": 22.20999526977539, |
|
"rewards/rejected": -27.6601505279541, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 14.624322230828815, |
|
"grad_norm": 0.0006002355949021876, |
|
"learning_rate": 1.6300180671096288e-06, |
|
"logits/chosen": -1.6742595434188843, |
|
"logits/rejected": -1.0468966960906982, |
|
"logps/chosen": -414.0707092285156, |
|
"logps/rejected": -482.42657470703125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.493812561035156, |
|
"rewards/margins": 21.657306671142578, |
|
"rewards/rejected": -27.151119232177734, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 14.686289697908599, |
|
"grad_norm": 0.00020658239373005927, |
|
"learning_rate": 1.4075334555746055e-06, |
|
"logits/chosen": -1.662987470626831, |
|
"logits/rejected": -1.016445279121399, |
|
"logps/chosen": -407.02423095703125, |
|
"logps/rejected": -467.1194763183594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.999020576477051, |
|
"rewards/margins": 20.836938858032227, |
|
"rewards/rejected": -26.83595848083496, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 14.748257164988381, |
|
"grad_norm": 6.777382805012167e-05, |
|
"learning_rate": 1.2012596780043627e-06, |
|
"logits/chosen": -1.6404949426651, |
|
"logits/rejected": -1.0619919300079346, |
|
"logps/chosen": -394.98443603515625, |
|
"logps/rejected": -479.7742614746094, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.746143341064453, |
|
"rewards/margins": 21.60362434387207, |
|
"rewards/rejected": -27.349767684936523, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 14.810224632068165, |
|
"grad_norm": 0.00017278394079767168, |
|
"learning_rate": 1.011230650478634e-06, |
|
"logits/chosen": -1.6573286056518555, |
|
"logits/rejected": -1.0122966766357422, |
|
"logps/chosen": -396.2731018066406, |
|
"logps/rejected": -456.626220703125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.127909183502197, |
|
"rewards/margins": 21.664600372314453, |
|
"rewards/rejected": -26.79250717163086, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 14.872192099147947, |
|
"grad_norm": 0.00017635834228713065, |
|
"learning_rate": 8.374776180724575e-07, |
|
"logits/chosen": -1.7095073461532593, |
|
"logits/rejected": -1.0201966762542725, |
|
"logps/chosen": -402.76763916015625, |
|
"logps/rejected": -461.19903564453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.404868125915527, |
|
"rewards/margins": 21.330501556396484, |
|
"rewards/rejected": -26.735370635986328, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.93415956622773, |
|
"grad_norm": 0.0006217029877007008, |
|
"learning_rate": 6.800291497187083e-07, |
|
"logits/chosen": -1.7389657497406006, |
|
"logits/rejected": -1.0253870487213135, |
|
"logps/chosen": -406.7480163574219, |
|
"logps/rejected": -461.8179626464844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.920414447784424, |
|
"rewards/margins": 21.916866302490234, |
|
"rewards/rejected": -26.8372802734375, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 14.996127033307513, |
|
"grad_norm": 0.0001935044419951737, |
|
"learning_rate": 5.389111335107556e-07, |
|
"logits/chosen": -1.696392297744751, |
|
"logits/rejected": -1.0922819375991821, |
|
"logps/chosen": -414.5367736816406, |
|
"logps/rejected": -476.94012451171875, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.998685359954834, |
|
"rewards/margins": 21.558393478393555, |
|
"rewards/rejected": -27.557079315185547, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 15.058094500387297, |
|
"grad_norm": 4.989042645320296e-05, |
|
"learning_rate": 4.1414677244584477e-07, |
|
"logits/chosen": -1.690422773361206, |
|
"logits/rejected": -1.0694575309753418, |
|
"logps/chosen": -417.68487548828125, |
|
"logps/rejected": -490.20989990234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.380603313446045, |
|
"rewards/margins": 21.939071655273438, |
|
"rewards/rejected": -27.31967544555664, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 15.12006196746708, |
|
"grad_norm": 0.0008857127977535129, |
|
"learning_rate": 3.0575658061001713e-07, |
|
"logits/chosen": -1.692728042602539, |
|
"logits/rejected": -1.0653448104858398, |
|
"logps/chosen": -414.1552734375, |
|
"logps/rejected": -490.3134765625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -6.1294355392456055, |
|
"rewards/margins": 21.649871826171875, |
|
"rewards/rejected": -27.779308319091797, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 15.182029434546862, |
|
"grad_norm": 7.71297054598108e-05, |
|
"learning_rate": 2.1375837980512904e-07, |
|
"logits/chosen": -1.687190294265747, |
|
"logits/rejected": -1.0721074342727661, |
|
"logps/chosen": -410.22161865234375, |
|
"logps/rejected": -491.24835205078125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.182304859161377, |
|
"rewards/margins": 22.23093032836914, |
|
"rewards/rejected": -27.41323471069336, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 15.243996901626646, |
|
"grad_norm": 0.00017248830408789217, |
|
"learning_rate": 1.38167296618541e-07, |
|
"logits/chosen": -1.682885766029358, |
|
"logits/rejected": -1.0524094104766846, |
|
"logps/chosen": -410.17681884765625, |
|
"logps/rejected": -472.13885498046875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.831109046936035, |
|
"rewards/margins": 21.399702072143555, |
|
"rewards/rejected": -27.230810165405273, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 15.305964368706428, |
|
"grad_norm": 0.0008164289756678045, |
|
"learning_rate": 7.899575993597363e-08, |
|
"logits/chosen": -1.6627308130264282, |
|
"logits/rejected": -0.9520984888076782, |
|
"logps/chosen": -395.6473693847656, |
|
"logps/rejected": -433.9269104003906, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.2272748947143555, |
|
"rewards/margins": 20.858642578125, |
|
"rewards/rejected": -26.08591651916504, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 15.367931835786212, |
|
"grad_norm": 0.00019182954565621912, |
|
"learning_rate": 3.6253498897886873e-08, |
|
"logits/chosen": -1.6554197072982788, |
|
"logits/rejected": -1.0059171915054321, |
|
"logps/chosen": -394.91973876953125, |
|
"logps/rejected": -451.76312255859375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -5.4583234786987305, |
|
"rewards/margins": 21.091644287109375, |
|
"rewards/rejected": -26.549968719482422, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 15.429899302865996, |
|
"grad_norm": 0.00014239229494705796, |
|
"learning_rate": 9.947541299837327e-09, |
|
"logits/chosen": -1.7060569524765015, |
|
"logits/rejected": -1.0418967008590698, |
|
"logps/chosen": -427.88525390625, |
|
"logps/rejected": -482.782958984375, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -5.570733070373535, |
|
"rewards/margins": 21.934314727783203, |
|
"rewards/rejected": -27.505046844482422, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 15.491866769945778, |
|
"grad_norm": 0.0005336150643415749, |
|
"learning_rate": 8.221243689154889e-11, |
|
"logits/chosen": -1.6255543231964111, |
|
"logits/rejected": -1.027090311050415, |
|
"logps/chosen": -393.7467956542969, |
|
"logps/rejected": -484.93804931640625, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 0.996874988079071, |
|
"rewards/chosen": -5.538996696472168, |
|
"rewards/margins": 21.719022750854492, |
|
"rewards/rejected": -27.25801658630371, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|