|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 2.4548187255859375, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.4529099464416504, |
|
"logits/rejected": -2.357592821121216, |
|
"logps/chosen": -290.4953308105469, |
|
"logps/rejected": -374.6131591796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 2.4077019691467285, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.281538724899292, |
|
"logits/rejected": -2.181558132171631, |
|
"logps/chosen": -279.58404541015625, |
|
"logps/rejected": -245.3978729248047, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.00010121504601556808, |
|
"rewards/margins": 3.008971361850854e-05, |
|
"rewards/rejected": -0.00013130476872902364, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 2.513380527496338, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.2868807315826416, |
|
"logits/rejected": -2.13252329826355, |
|
"logps/chosen": -305.4561462402344, |
|
"logps/rejected": -237.63320922851562, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.003805191023275256, |
|
"rewards/margins": 0.0015581005718559027, |
|
"rewards/rejected": 0.00224709021858871, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 2.3212156295776367, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.271130084991455, |
|
"logits/rejected": -2.2239737510681152, |
|
"logps/chosen": -251.1245574951172, |
|
"logps/rejected": -251.23959350585938, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.012071892619132996, |
|
"rewards/margins": 0.0013887647073715925, |
|
"rewards/rejected": 0.010683128610253334, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.9601231813430786, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.166848659515381, |
|
"logits/rejected": -2.1315042972564697, |
|
"logps/chosen": -216.14614868164062, |
|
"logps/rejected": -221.613037109375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.018966395407915115, |
|
"rewards/margins": 0.003130494151264429, |
|
"rewards/rejected": 0.015835899859666824, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 2.087867259979248, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.212444305419922, |
|
"logits/rejected": -2.173527956008911, |
|
"logps/chosen": -266.7479553222656, |
|
"logps/rejected": -234.2369384765625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.029817840084433556, |
|
"rewards/margins": 0.004731935448944569, |
|
"rewards/rejected": 0.02508590742945671, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 2.1439507007598877, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.169727325439453, |
|
"logits/rejected": -2.1059727668762207, |
|
"logps/chosen": -252.1941375732422, |
|
"logps/rejected": -226.5443115234375, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.032556358724832535, |
|
"rewards/margins": 0.006436903961002827, |
|
"rewards/rejected": 0.026119451969861984, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 2.062434673309326, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.3089659214019775, |
|
"logits/rejected": -2.1857352256774902, |
|
"logps/chosen": -271.83209228515625, |
|
"logps/rejected": -246.536376953125, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.042484961450099945, |
|
"rewards/margins": 0.011770189739763737, |
|
"rewards/rejected": 0.03071477636694908, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 2.387071132659912, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.2028708457946777, |
|
"logits/rejected": -2.112464427947998, |
|
"logps/chosen": -257.39007568359375, |
|
"logps/rejected": -246.71914672851562, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.038935337215662, |
|
"rewards/margins": 0.011790206655859947, |
|
"rewards/rejected": 0.027145132422447205, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 2.23413348197937, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.209949493408203, |
|
"logits/rejected": -2.136049509048462, |
|
"logps/chosen": -249.93435668945312, |
|
"logps/rejected": -234.4086151123047, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0436517670750618, |
|
"rewards/margins": 0.01937195286154747, |
|
"rewards/rejected": 0.02427981235086918, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 2.0995676517486572, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.250483751296997, |
|
"logits/rejected": -2.1787195205688477, |
|
"logps/chosen": -246.7249755859375, |
|
"logps/rejected": -230.78726196289062, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04966636374592781, |
|
"rewards/margins": 0.025339443236589432, |
|
"rewards/rejected": 0.02432691864669323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": -2.1487553119659424, |
|
"eval_logits/rejected": -2.0557751655578613, |
|
"eval_logps/chosen": -259.4205627441406, |
|
"eval_logps/rejected": -241.98690795898438, |
|
"eval_loss": 0.6806859970092773, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.051943570375442505, |
|
"eval_rewards/margins": 0.026209397241473198, |
|
"eval_rewards/rejected": 0.02573416940867901, |
|
"eval_runtime": 1628.166, |
|
"eval_samples_per_second": 1.228, |
|
"eval_steps_per_second": 0.154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 2.388845682144165, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.241703987121582, |
|
"logits/rejected": -2.1246142387390137, |
|
"logps/chosen": -284.23797607421875, |
|
"logps/rejected": -239.15945434570312, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.05070078372955322, |
|
"rewards/margins": 0.034246720373630524, |
|
"rewards/rejected": 0.016454065218567848, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 2.2597317695617676, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.2740626335144043, |
|
"logits/rejected": -2.1607062816619873, |
|
"logps/chosen": -287.3439025878906, |
|
"logps/rejected": -272.5523681640625, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.05601048469543457, |
|
"rewards/margins": 0.049991391599178314, |
|
"rewards/rejected": 0.00601908378303051, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 2.850572109222412, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.2888760566711426, |
|
"logits/rejected": -2.1915862560272217, |
|
"logps/chosen": -250.40792846679688, |
|
"logps/rejected": -254.3070831298828, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.046189673244953156, |
|
"rewards/margins": 0.06280346214771271, |
|
"rewards/rejected": -0.016613787040114403, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 2.827874183654785, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.284651041030884, |
|
"logits/rejected": -2.0745301246643066, |
|
"logps/chosen": -272.6170349121094, |
|
"logps/rejected": -229.8065185546875, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.02093890681862831, |
|
"rewards/margins": 0.06832924485206604, |
|
"rewards/rejected": -0.047390345484018326, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 3.0178773403167725, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.2895896434783936, |
|
"logits/rejected": -2.168560028076172, |
|
"logps/chosen": -283.7343444824219, |
|
"logps/rejected": -248.0662078857422, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0016987703274935484, |
|
"rewards/margins": 0.07390020042657852, |
|
"rewards/rejected": -0.0722014307975769, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3.1758434772491455, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.209841251373291, |
|
"logits/rejected": -2.150552988052368, |
|
"logps/chosen": -262.22222900390625, |
|
"logps/rejected": -269.7354431152344, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0545073039829731, |
|
"rewards/margins": 0.07355803996324539, |
|
"rewards/rejected": -0.1280653327703476, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 3.9003469944000244, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.1911962032318115, |
|
"logits/rejected": -2.110661506652832, |
|
"logps/chosen": -227.29232788085938, |
|
"logps/rejected": -236.85684204101562, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.0682467371225357, |
|
"rewards/margins": 0.07278834283351898, |
|
"rewards/rejected": -0.14103509485721588, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 4.6043620109558105, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.207650661468506, |
|
"logits/rejected": -2.1264257431030273, |
|
"logps/chosen": -273.5342712402344, |
|
"logps/rejected": -269.66876220703125, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17898549139499664, |
|
"rewards/margins": 0.09451718628406525, |
|
"rewards/rejected": -0.2735026478767395, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 4.391633987426758, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.28275990486145, |
|
"logits/rejected": -2.165226936340332, |
|
"logps/chosen": -282.49395751953125, |
|
"logps/rejected": -268.0941467285156, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.12487462908029556, |
|
"rewards/margins": 0.13966041803359985, |
|
"rewards/rejected": -0.26453500986099243, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 5.526188373565674, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.2079992294311523, |
|
"logits/rejected": -2.086857318878174, |
|
"logps/chosen": -267.3877258300781, |
|
"logps/rejected": -241.93032836914062, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.17729662358760834, |
|
"rewards/margins": 0.12684157490730286, |
|
"rewards/rejected": -0.3041382431983948, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": -2.1026034355163574, |
|
"eval_logits/rejected": -2.0144851207733154, |
|
"eval_logps/chosen": -283.6620788574219, |
|
"eval_logps/rejected": -278.8497009277344, |
|
"eval_loss": 0.6350578665733337, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.1904720813035965, |
|
"eval_rewards/margins": 0.1524215042591095, |
|
"eval_rewards/rejected": -0.3428936302661896, |
|
"eval_runtime": 1725.681, |
|
"eval_samples_per_second": 1.159, |
|
"eval_steps_per_second": 0.145, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 4.445857524871826, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.2572193145751953, |
|
"logits/rejected": -2.1230695247650146, |
|
"logps/chosen": -278.8066101074219, |
|
"logps/rejected": -266.4608459472656, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1674547642469406, |
|
"rewards/margins": 0.19266971945762634, |
|
"rewards/rejected": -0.36012452840805054, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 6.93760871887207, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.1456832885742188, |
|
"logits/rejected": -2.0984296798706055, |
|
"logps/chosen": -272.310546875, |
|
"logps/rejected": -267.6050720214844, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.18377810716629028, |
|
"rewards/margins": 0.21743841469287872, |
|
"rewards/rejected": -0.4012165069580078, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 11.938814163208008, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.285099744796753, |
|
"logits/rejected": -2.1850409507751465, |
|
"logps/chosen": -349.00244140625, |
|
"logps/rejected": -328.79248046875, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4292556345462799, |
|
"rewards/margins": 0.15273931622505188, |
|
"rewards/rejected": -0.5819950103759766, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 10.254609107971191, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.1714115142822266, |
|
"logits/rejected": -2.1014115810394287, |
|
"logps/chosen": -335.92095947265625, |
|
"logps/rejected": -342.66046142578125, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.39719343185424805, |
|
"rewards/margins": 0.21806569397449493, |
|
"rewards/rejected": -0.6152591109275818, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 5.5169548988342285, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.180147647857666, |
|
"logits/rejected": -2.158116579055786, |
|
"logps/chosen": -300.8548278808594, |
|
"logps/rejected": -300.14276123046875, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4140791893005371, |
|
"rewards/margins": 0.2470981329679489, |
|
"rewards/rejected": -0.6611773371696472, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 8.337285995483398, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.213538646697998, |
|
"logits/rejected": -2.089621067047119, |
|
"logps/chosen": -320.1325378417969, |
|
"logps/rejected": -318.0543518066406, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.478567510843277, |
|
"rewards/margins": 0.1991441696882248, |
|
"rewards/rejected": -0.6777117252349854, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 4.801840305328369, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.1710739135742188, |
|
"logits/rejected": -2.1057240962982178, |
|
"logps/chosen": -291.7244873046875, |
|
"logps/rejected": -287.46148681640625, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3111107349395752, |
|
"rewards/margins": 0.27230924367904663, |
|
"rewards/rejected": -0.583419919013977, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 6.222449779510498, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.206630229949951, |
|
"logits/rejected": -2.086956739425659, |
|
"logps/chosen": -302.73236083984375, |
|
"logps/rejected": -307.13238525390625, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.438936710357666, |
|
"rewards/margins": 0.25869089365005493, |
|
"rewards/rejected": -0.6976275444030762, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 4.925107955932617, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.1828970909118652, |
|
"logits/rejected": -2.1146278381347656, |
|
"logps/chosen": -345.37225341796875, |
|
"logps/rejected": -351.8639221191406, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5980569124221802, |
|
"rewards/margins": 0.31618183851242065, |
|
"rewards/rejected": -0.9142388105392456, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 9.889649391174316, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.195115327835083, |
|
"logits/rejected": -2.0654757022857666, |
|
"logps/chosen": -292.487060546875, |
|
"logps/rejected": -312.0032043457031, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.48439502716064453, |
|
"rewards/margins": 0.3321578800678253, |
|
"rewards/rejected": -0.8165529370307922, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": -2.134176731109619, |
|
"eval_logits/rejected": -2.0507938861846924, |
|
"eval_logps/chosen": -309.2386474609375, |
|
"eval_logps/rejected": -315.8948974609375, |
|
"eval_loss": 0.6071631908416748, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -0.44623735547065735, |
|
"eval_rewards/margins": 0.26710858941078186, |
|
"eval_rewards/rejected": -0.7133459448814392, |
|
"eval_runtime": 1583.0322, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 7.277074813842773, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.3288633823394775, |
|
"logits/rejected": -2.2150847911834717, |
|
"logps/chosen": -340.07061767578125, |
|
"logps/rejected": -320.6544494628906, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5125407576560974, |
|
"rewards/margins": 0.3419082760810852, |
|
"rewards/rejected": -0.8544490933418274, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 8.367517471313477, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.236116647720337, |
|
"logits/rejected": -2.1389379501342773, |
|
"logps/chosen": -327.5163879394531, |
|
"logps/rejected": -331.678955078125, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6931655406951904, |
|
"rewards/margins": 0.21217937767505646, |
|
"rewards/rejected": -0.9053448438644409, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 4.962728977203369, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.129941463470459, |
|
"logits/rejected": -2.0937793254852295, |
|
"logps/chosen": -362.57940673828125, |
|
"logps/rejected": -357.7266845703125, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0307652950286865, |
|
"rewards/margins": 0.1927981674671173, |
|
"rewards/rejected": -1.2235634326934814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 5.679270267486572, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.1559038162231445, |
|
"logits/rejected": -2.1013383865356445, |
|
"logps/chosen": -398.1541442871094, |
|
"logps/rejected": -407.53564453125, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0855846405029297, |
|
"rewards/margins": 0.30799657106399536, |
|
"rewards/rejected": -1.3935811519622803, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 6.67193603515625, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -1.9782785177230835, |
|
"logits/rejected": -1.8998119831085205, |
|
"logps/chosen": -430.1962890625, |
|
"logps/rejected": -446.568359375, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.410946011543274, |
|
"rewards/margins": 0.2784286439418793, |
|
"rewards/rejected": -1.6893746852874756, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 6.463137149810791, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -1.8780485391616821, |
|
"logits/rejected": -1.8210369348526, |
|
"logps/chosen": -398.168212890625, |
|
"logps/rejected": -414.5179748535156, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5719618797302246, |
|
"rewards/margins": 0.37264296412467957, |
|
"rewards/rejected": -1.9446048736572266, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 9.000224113464355, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -1.8958412408828735, |
|
"logits/rejected": -1.7773358821868896, |
|
"logps/chosen": -420.69122314453125, |
|
"logps/rejected": -423.59820556640625, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4388353824615479, |
|
"rewards/margins": 0.4672966003417969, |
|
"rewards/rejected": -1.9061321020126343, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 8.802579879760742, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -1.8825538158416748, |
|
"logits/rejected": -1.7338628768920898, |
|
"logps/chosen": -393.0355529785156, |
|
"logps/rejected": -406.0670166015625, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9511327743530273, |
|
"rewards/margins": 0.46367520093917847, |
|
"rewards/rejected": -1.4148077964782715, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 13.02705192565918, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -1.7599290609359741, |
|
"logits/rejected": -1.6825551986694336, |
|
"logps/chosen": -410.8023376464844, |
|
"logps/rejected": -442.77069091796875, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.298668622970581, |
|
"rewards/margins": 0.5200435519218445, |
|
"rewards/rejected": -1.8187124729156494, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 13.382437705993652, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -1.7113679647445679, |
|
"logits/rejected": -1.6131651401519775, |
|
"logps/chosen": -458.7721252441406, |
|
"logps/rejected": -455.18988037109375, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.9338643550872803, |
|
"rewards/margins": 0.3708694577217102, |
|
"rewards/rejected": -2.3047337532043457, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -1.7201672792434692, |
|
"eval_logits/rejected": -1.637402057647705, |
|
"eval_logps/chosen": -413.682861328125, |
|
"eval_logps/rejected": -439.98870849609375, |
|
"eval_loss": 0.5892496705055237, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.4906798601150513, |
|
"eval_rewards/margins": 0.4636039435863495, |
|
"eval_rewards/rejected": -1.9542837142944336, |
|
"eval_runtime": 1582.6906, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 5.950835704803467, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.8206182718276978, |
|
"logits/rejected": -1.76302969455719, |
|
"logps/chosen": -344.7633056640625, |
|
"logps/rejected": -375.6517639160156, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0250967741012573, |
|
"rewards/margins": 0.3263424336910248, |
|
"rewards/rejected": -1.3514392375946045, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 7.535224437713623, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -1.721353530883789, |
|
"logits/rejected": -1.5896873474121094, |
|
"logps/chosen": -316.44989013671875, |
|
"logps/rejected": -364.8625183105469, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8779315948486328, |
|
"rewards/margins": 0.5743580460548401, |
|
"rewards/rejected": -1.4522895812988281, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 7.4984517097473145, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -1.5961456298828125, |
|
"logits/rejected": -1.5111815929412842, |
|
"logps/chosen": -389.32958984375, |
|
"logps/rejected": -431.39727783203125, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.118457555770874, |
|
"rewards/margins": 0.6571696400642395, |
|
"rewards/rejected": -1.7756273746490479, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 8.746241569519043, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -1.7189710140228271, |
|
"logits/rejected": -1.6461889743804932, |
|
"logps/chosen": -359.4372253417969, |
|
"logps/rejected": -380.6383361816406, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8736177682876587, |
|
"rewards/margins": 0.4956343173980713, |
|
"rewards/rejected": -1.3692519664764404, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 10.564971923828125, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.5438826084136963, |
|
"logits/rejected": -1.46512770652771, |
|
"logps/chosen": -384.11663818359375, |
|
"logps/rejected": -418.8710021972656, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1636836528778076, |
|
"rewards/margins": 0.5783780813217163, |
|
"rewards/rejected": -1.7420616149902344, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 11.805535316467285, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.4091435670852661, |
|
"logits/rejected": -1.3203108310699463, |
|
"logps/chosen": -540.69287109375, |
|
"logps/rejected": -591.8529052734375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.756392478942871, |
|
"rewards/margins": 0.657163679599762, |
|
"rewards/rejected": -3.4135565757751465, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 9.790424346923828, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.5596047639846802, |
|
"logits/rejected": -1.4357885122299194, |
|
"logps/chosen": -443.34210205078125, |
|
"logps/rejected": -497.86248779296875, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9961204528808594, |
|
"rewards/margins": 0.6699923276901245, |
|
"rewards/rejected": -2.6661131381988525, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 5.368061542510986, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -1.7159227132797241, |
|
"logits/rejected": -1.5995564460754395, |
|
"logps/chosen": -349.0379943847656, |
|
"logps/rejected": -414.6690979003906, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9223679304122925, |
|
"rewards/margins": 0.6385096907615662, |
|
"rewards/rejected": -1.560877799987793, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 14.292590141296387, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -1.705514669418335, |
|
"logits/rejected": -1.5731843709945679, |
|
"logps/chosen": -385.6563720703125, |
|
"logps/rejected": -410.178955078125, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0085517168045044, |
|
"rewards/margins": 0.5483412742614746, |
|
"rewards/rejected": -1.5568931102752686, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 8.968839645385742, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -1.5214301347732544, |
|
"logits/rejected": -1.3472332954406738, |
|
"logps/chosen": -424.3779296875, |
|
"logps/rejected": -453.65496826171875, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.5070832967758179, |
|
"rewards/margins": 0.7050014734268188, |
|
"rewards/rejected": -2.2120845317840576, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": -1.3375675678253174, |
|
"eval_logits/rejected": -1.204632043838501, |
|
"eval_logps/chosen": -395.8431701660156, |
|
"eval_logps/rejected": -444.97088623046875, |
|
"eval_loss": 0.5667398571968079, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": -1.3122824430465698, |
|
"eval_rewards/margins": 0.6918234825134277, |
|
"eval_rewards/rejected": -2.004106044769287, |
|
"eval_runtime": 1590.0362, |
|
"eval_samples_per_second": 1.258, |
|
"eval_steps_per_second": 0.157, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 18.905487060546875, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -1.4677969217300415, |
|
"logits/rejected": -1.3418806791305542, |
|
"logps/chosen": -405.25433349609375, |
|
"logps/rejected": -447.4716796875, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2714810371398926, |
|
"rewards/margins": 0.791718602180481, |
|
"rewards/rejected": -2.063199520111084, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 14.580382347106934, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.5167419910430908, |
|
"logits/rejected": -1.4652340412139893, |
|
"logps/chosen": -385.03106689453125, |
|
"logps/rejected": -418.76470947265625, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.12017822265625, |
|
"rewards/margins": 0.5001946687698364, |
|
"rewards/rejected": -1.6203731298446655, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 6.707192420959473, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -1.4496371746063232, |
|
"logits/rejected": -1.4301903247833252, |
|
"logps/chosen": -357.41998291015625, |
|
"logps/rejected": -416.819091796875, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9647631645202637, |
|
"rewards/margins": 0.5089520215988159, |
|
"rewards/rejected": -1.47371506690979, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 11.740205764770508, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -1.251502513885498, |
|
"logits/rejected": -1.0849131345748901, |
|
"logps/chosen": -420.04437255859375, |
|
"logps/rejected": -463.4141540527344, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.4166905879974365, |
|
"rewards/margins": 0.9010774493217468, |
|
"rewards/rejected": -2.317768096923828, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 13.1097412109375, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -1.1829249858856201, |
|
"logits/rejected": -1.1032363176345825, |
|
"logps/chosen": -457.55242919921875, |
|
"logps/rejected": -504.1400451660156, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7912673950195312, |
|
"rewards/margins": 0.6638978719711304, |
|
"rewards/rejected": -2.455165386199951, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 5.982730388641357, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -1.4941701889038086, |
|
"logits/rejected": -1.3918794393539429, |
|
"logps/chosen": -399.75579833984375, |
|
"logps/rejected": -464.8260192871094, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4229604005813599, |
|
"rewards/margins": 0.6473892331123352, |
|
"rewards/rejected": -2.0703494548797607, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 8.946754455566406, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -1.5534603595733643, |
|
"logits/rejected": -1.4113835096359253, |
|
"logps/chosen": -472.273681640625, |
|
"logps/rejected": -499.8206481933594, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.722957968711853, |
|
"rewards/margins": 0.6225946545600891, |
|
"rewards/rejected": -2.345552921295166, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 10.59168529510498, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -1.4608839750289917, |
|
"logits/rejected": -1.2478128671646118, |
|
"logps/chosen": -492.757080078125, |
|
"logps/rejected": -528.3729248046875, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9507982730865479, |
|
"rewards/margins": 0.7664415240287781, |
|
"rewards/rejected": -2.7172398567199707, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 6.83862829208374, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -1.38741135597229, |
|
"logits/rejected": -1.2707990407943726, |
|
"logps/chosen": -408.0611572265625, |
|
"logps/rejected": -441.0558166503906, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.50876784324646, |
|
"rewards/margins": 0.5663283467292786, |
|
"rewards/rejected": -2.0750961303710938, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 5.814328193664551, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -1.5240575075149536, |
|
"logits/rejected": -1.4667167663574219, |
|
"logps/chosen": -355.43487548828125, |
|
"logps/rejected": -398.4039001464844, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0472333431243896, |
|
"rewards/margins": 0.579511821269989, |
|
"rewards/rejected": -1.6267452239990234, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": -1.2780898809432983, |
|
"eval_logits/rejected": -1.1445080041885376, |
|
"eval_logps/chosen": -386.18792724609375, |
|
"eval_logps/rejected": -426.8257751464844, |
|
"eval_loss": 0.5524141192436218, |
|
"eval_rewards/accuracies": 0.703000009059906, |
|
"eval_rewards/chosen": -1.2157301902770996, |
|
"eval_rewards/margins": 0.6069244146347046, |
|
"eval_rewards/rejected": -1.8226546049118042, |
|
"eval_runtime": 1594.9357, |
|
"eval_samples_per_second": 1.254, |
|
"eval_steps_per_second": 0.157, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 9.641666412353516, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.3044933080673218, |
|
"logits/rejected": -1.1979707479476929, |
|
"logps/chosen": -385.60382080078125, |
|
"logps/rejected": -458.82373046875, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.398618221282959, |
|
"rewards/margins": 0.9089972376823425, |
|
"rewards/rejected": -2.3076155185699463, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 15.917795181274414, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -1.1524051427841187, |
|
"logits/rejected": -0.8983421325683594, |
|
"logps/chosen": -500.45880126953125, |
|
"logps/rejected": -547.509765625, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.1275360584259033, |
|
"rewards/margins": 1.056699275970459, |
|
"rewards/rejected": -3.1842353343963623, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 10.020596504211426, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -1.091322422027588, |
|
"logits/rejected": -0.9337531328201294, |
|
"logps/chosen": -459.0081481933594, |
|
"logps/rejected": -551.312255859375, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1100564002990723, |
|
"rewards/margins": 1.0251764059066772, |
|
"rewards/rejected": -3.135232925415039, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 11.079347610473633, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -1.0583717823028564, |
|
"logits/rejected": -0.8333446383476257, |
|
"logps/chosen": -591.6964111328125, |
|
"logps/rejected": -650.4478149414062, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.2645416259765625, |
|
"rewards/margins": 0.8453465700149536, |
|
"rewards/rejected": -4.109888076782227, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 15.071267127990723, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -1.0942609310150146, |
|
"logits/rejected": -0.9619698524475098, |
|
"logps/chosen": -564.9473876953125, |
|
"logps/rejected": -646.3541259765625, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.0973496437072754, |
|
"rewards/margins": 0.9102842211723328, |
|
"rewards/rejected": -4.007634162902832, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 12.748456001281738, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -1.2404229640960693, |
|
"logits/rejected": -1.0331848859786987, |
|
"logps/chosen": -465.51190185546875, |
|
"logps/rejected": -491.4901428222656, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.9967219829559326, |
|
"rewards/margins": 0.611833393573761, |
|
"rewards/rejected": -2.608555555343628, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 9.929147720336914, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -1.4407885074615479, |
|
"logits/rejected": -1.2742736339569092, |
|
"logps/chosen": -468.04180908203125, |
|
"logps/rejected": -500.22406005859375, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8461347818374634, |
|
"rewards/margins": 0.7285885810852051, |
|
"rewards/rejected": -2.574723243713379, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 7.518274784088135, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -1.2451258897781372, |
|
"logits/rejected": -1.081993818283081, |
|
"logps/chosen": -526.33740234375, |
|
"logps/rejected": -566.2431640625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.5310587882995605, |
|
"rewards/margins": 0.8698722124099731, |
|
"rewards/rejected": -3.400930881500244, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 9.582080841064453, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -1.1439664363861084, |
|
"logits/rejected": -1.0512256622314453, |
|
"logps/chosen": -527.6976318359375, |
|
"logps/rejected": -677.5025634765625, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.915743350982666, |
|
"rewards/margins": 1.3812744617462158, |
|
"rewards/rejected": -4.297018051147461, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 6.840953350067139, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -1.0450663566589355, |
|
"logits/rejected": -0.9686886072158813, |
|
"logps/chosen": -566.4291381835938, |
|
"logps/rejected": -659.4857177734375, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1321775913238525, |
|
"rewards/margins": 0.9749795794487, |
|
"rewards/rejected": -4.1071577072143555, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": -0.8394417762756348, |
|
"eval_logits/rejected": -0.6998772025108337, |
|
"eval_logps/chosen": -578.4380493164062, |
|
"eval_logps/rejected": -649.6521606445312, |
|
"eval_loss": 0.5335752964019775, |
|
"eval_rewards/accuracies": 0.7264999747276306, |
|
"eval_rewards/chosen": -3.138230800628662, |
|
"eval_rewards/margins": 0.9126878976821899, |
|
"eval_rewards/rejected": -4.050919055938721, |
|
"eval_runtime": 1582.5904, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 13.642376899719238, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -1.055434226989746, |
|
"logits/rejected": -0.9338513612747192, |
|
"logps/chosen": -568.8038940429688, |
|
"logps/rejected": -652.264404296875, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.012946844100952, |
|
"rewards/margins": 0.9706674814224243, |
|
"rewards/rejected": -3.983614683151245, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 12.86258316040039, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -0.9849473834037781, |
|
"logits/rejected": -0.825210452079773, |
|
"logps/chosen": -602.6118774414062, |
|
"logps/rejected": -685.4152221679688, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.3656258583068848, |
|
"rewards/margins": 1.0001194477081299, |
|
"rewards/rejected": -4.3657450675964355, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 16.07886505126953, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -0.8920208811759949, |
|
"logits/rejected": -0.7630427479743958, |
|
"logps/chosen": -712.3763427734375, |
|
"logps/rejected": -763.3992919921875, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.461544036865234, |
|
"rewards/margins": 0.8164655566215515, |
|
"rewards/rejected": -5.27800989151001, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 13.588937759399414, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -1.0917376279830933, |
|
"logits/rejected": -0.9343040585517883, |
|
"logps/chosen": -462.0271911621094, |
|
"logps/rejected": -513.8302001953125, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3388774394989014, |
|
"rewards/margins": 0.840278148651123, |
|
"rewards/rejected": -3.179155111312866, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 14.81460189819336, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -1.0682129859924316, |
|
"logits/rejected": -1.003150224685669, |
|
"logps/chosen": -441.767333984375, |
|
"logps/rejected": -530.33349609375, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.0735113620758057, |
|
"rewards/margins": 0.9047049283981323, |
|
"rewards/rejected": -2.9782164096832275, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 11.297514915466309, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -1.09920072555542, |
|
"logits/rejected": -1.0103808641433716, |
|
"logps/chosen": -463.56842041015625, |
|
"logps/rejected": -555.3057861328125, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9122604131698608, |
|
"rewards/margins": 0.9966481924057007, |
|
"rewards/rejected": -2.9089083671569824, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 6.034220218658447, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -1.2919018268585205, |
|
"logits/rejected": -1.1613837480545044, |
|
"logps/chosen": -394.8362121582031, |
|
"logps/rejected": -444.3602600097656, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4172089099884033, |
|
"rewards/margins": 0.7238287329673767, |
|
"rewards/rejected": -2.141037702560425, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 10.259706497192383, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -1.4326658248901367, |
|
"logits/rejected": -1.291441798210144, |
|
"logps/chosen": -437.5020446777344, |
|
"logps/rejected": -488.41802978515625, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5230964422225952, |
|
"rewards/margins": 0.7579478025436401, |
|
"rewards/rejected": -2.2810444831848145, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 8.649606704711914, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -1.3141874074935913, |
|
"logits/rejected": -1.3148314952850342, |
|
"logps/chosen": -416.85675048828125, |
|
"logps/rejected": -487.4302673339844, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.662632703781128, |
|
"rewards/margins": 0.6876001954078674, |
|
"rewards/rejected": -2.3502330780029297, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 10.932877540588379, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -1.563239574432373, |
|
"logits/rejected": -1.2844064235687256, |
|
"logps/chosen": -417.02923583984375, |
|
"logps/rejected": -430.236572265625, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5448291301727295, |
|
"rewards/margins": 0.7562888264656067, |
|
"rewards/rejected": -2.3011181354522705, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -1.2524374723434448, |
|
"eval_logits/rejected": -1.1249934434890747, |
|
"eval_logps/chosen": -448.3450012207031, |
|
"eval_logps/rejected": -507.1189270019531, |
|
"eval_loss": 0.5242464542388916, |
|
"eval_rewards/accuracies": 0.7245000004768372, |
|
"eval_rewards/chosen": -1.8373013734817505, |
|
"eval_rewards/margins": 0.788284957408905, |
|
"eval_rewards/rejected": -2.6255862712860107, |
|
"eval_runtime": 1587.9898, |
|
"eval_samples_per_second": 1.259, |
|
"eval_steps_per_second": 0.157, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 9.641347885131836, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -1.4272372722625732, |
|
"logits/rejected": -1.4065699577331543, |
|
"logps/chosen": -465.9081115722656, |
|
"logps/rejected": -525.2551879882812, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9209187030792236, |
|
"rewards/margins": 0.6635769605636597, |
|
"rewards/rejected": -2.5844955444335938, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 10.180907249450684, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.4811725616455078, |
|
"logits/rejected": -1.4760491847991943, |
|
"logps/chosen": -394.45654296875, |
|
"logps/rejected": -489.70751953125, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.6524245738983154, |
|
"rewards/margins": 0.8287159204483032, |
|
"rewards/rejected": -2.481140613555908, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 16.830577850341797, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -1.5256417989730835, |
|
"logits/rejected": -1.4408817291259766, |
|
"logps/chosen": -442.7582092285156, |
|
"logps/rejected": -510.44903564453125, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7438207864761353, |
|
"rewards/margins": 0.7136639356613159, |
|
"rewards/rejected": -2.4574849605560303, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 9.129151344299316, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -1.5453526973724365, |
|
"logits/rejected": -1.4078967571258545, |
|
"logps/chosen": -414.2274475097656, |
|
"logps/rejected": -470.25115966796875, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.6049034595489502, |
|
"rewards/margins": 0.8330914378166199, |
|
"rewards/rejected": -2.437994956970215, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 9.432477951049805, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -1.3889689445495605, |
|
"logits/rejected": -1.3114643096923828, |
|
"logps/chosen": -435.85797119140625, |
|
"logps/rejected": -512.47705078125, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8816181421279907, |
|
"rewards/margins": 0.8134034276008606, |
|
"rewards/rejected": -2.695021629333496, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 9.855865478515625, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -1.3516982793807983, |
|
"logits/rejected": -1.249561071395874, |
|
"logps/chosen": -436.2867736816406, |
|
"logps/rejected": -491.6578674316406, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.637629508972168, |
|
"rewards/margins": 0.6592670679092407, |
|
"rewards/rejected": -2.296896457672119, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 6.627049446105957, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -1.2943369150161743, |
|
"logits/rejected": -1.1972229480743408, |
|
"logps/chosen": -425.5354919433594, |
|
"logps/rejected": -497.001953125, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6972328424453735, |
|
"rewards/margins": 0.7206228971481323, |
|
"rewards/rejected": -2.417855739593506, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 8.324175834655762, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -1.277025818824768, |
|
"logits/rejected": -1.2231056690216064, |
|
"logps/chosen": -471.81292724609375, |
|
"logps/rejected": -548.893798828125, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.0869038105010986, |
|
"rewards/margins": 0.9123502969741821, |
|
"rewards/rejected": -2.999253749847412, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 16.953506469726562, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -1.0355793237686157, |
|
"logits/rejected": -0.9944553375244141, |
|
"logps/chosen": -496.1917419433594, |
|
"logps/rejected": -595.2552490234375, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.4879398345947266, |
|
"rewards/margins": 0.8530998229980469, |
|
"rewards/rejected": -3.3410396575927734, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 16.194597244262695, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -0.8758188486099243, |
|
"logits/rejected": -0.7574308514595032, |
|
"logps/chosen": -454.79925537109375, |
|
"logps/rejected": -559.8719482421875, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2598698139190674, |
|
"rewards/margins": 1.1400644779205322, |
|
"rewards/rejected": -3.3999342918395996, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": -0.9944241046905518, |
|
"eval_logits/rejected": -0.8587719798088074, |
|
"eval_logps/chosen": -465.2022399902344, |
|
"eval_logps/rejected": -527.2197875976562, |
|
"eval_loss": 0.5245745778083801, |
|
"eval_rewards/accuracies": 0.7254999876022339, |
|
"eval_rewards/chosen": -2.005873441696167, |
|
"eval_rewards/margins": 0.8207210302352905, |
|
"eval_rewards/rejected": -2.826594591140747, |
|
"eval_runtime": 1581.4306, |
|
"eval_samples_per_second": 1.265, |
|
"eval_steps_per_second": 0.158, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 9.985879898071289, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -1.2251172065734863, |
|
"logits/rejected": -1.0086650848388672, |
|
"logps/chosen": -471.8104553222656, |
|
"logps/rejected": -480.17510986328125, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8755344152450562, |
|
"rewards/margins": 0.6405603885650635, |
|
"rewards/rejected": -2.516094923019409, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 8.599123001098633, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -1.0334179401397705, |
|
"logits/rejected": -0.8868207931518555, |
|
"logps/chosen": -470.3492126464844, |
|
"logps/rejected": -531.8078002929688, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.072953701019287, |
|
"rewards/margins": 0.7891368865966797, |
|
"rewards/rejected": -2.862090587615967, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 10.509832382202148, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -1.0835293531417847, |
|
"logits/rejected": -0.979448139667511, |
|
"logps/chosen": -508.9417419433594, |
|
"logps/rejected": -565.7315063476562, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.3716158866882324, |
|
"rewards/margins": 0.7661987543106079, |
|
"rewards/rejected": -3.137814998626709, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 6.1488494873046875, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -1.15608811378479, |
|
"logits/rejected": -1.035563349723816, |
|
"logps/chosen": -482.892578125, |
|
"logps/rejected": -541.1561279296875, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9270384311676025, |
|
"rewards/margins": 0.8703593015670776, |
|
"rewards/rejected": -2.7973976135253906, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 7.954674243927002, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.3060693740844727, |
|
"logits/rejected": -1.1917331218719482, |
|
"logps/chosen": -448.90484619140625, |
|
"logps/rejected": -514.2643432617188, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0268380641937256, |
|
"rewards/margins": 0.797137975692749, |
|
"rewards/rejected": -2.8239760398864746, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 8.678433418273926, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -1.2542150020599365, |
|
"logits/rejected": -1.151984453201294, |
|
"logps/chosen": -519.7725830078125, |
|
"logps/rejected": -568.0989990234375, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.4804420471191406, |
|
"rewards/margins": 0.7246801853179932, |
|
"rewards/rejected": -3.2051219940185547, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 19.324024200439453, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -1.0881518125534058, |
|
"logits/rejected": -0.8402830958366394, |
|
"logps/chosen": -611.234130859375, |
|
"logps/rejected": -676.3607177734375, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.3131954669952393, |
|
"rewards/margins": 1.1177177429199219, |
|
"rewards/rejected": -4.430912971496582, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 5.164144992828369, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -0.9441936612129211, |
|
"logits/rejected": -0.7941098213195801, |
|
"logps/chosen": -666.5744018554688, |
|
"logps/rejected": -751.6942138671875, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.8323254585266113, |
|
"rewards/margins": 1.055508017539978, |
|
"rewards/rejected": -4.887833118438721, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 19.875019073486328, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -0.7852658033370972, |
|
"logits/rejected": -0.6114678382873535, |
|
"logps/chosen": -659.4119873046875, |
|
"logps/rejected": -755.3154907226562, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.203821182250977, |
|
"rewards/margins": 1.186416745185852, |
|
"rewards/rejected": -5.390236854553223, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 7.329054355621338, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -1.0289504528045654, |
|
"logits/rejected": -0.8489596247673035, |
|
"logps/chosen": -642.2366943359375, |
|
"logps/rejected": -696.42138671875, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.388699769973755, |
|
"rewards/margins": 0.904699444770813, |
|
"rewards/rejected": -4.293398857116699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -0.819309413433075, |
|
"eval_logits/rejected": -0.6716277003288269, |
|
"eval_logps/chosen": -553.1188354492188, |
|
"eval_logps/rejected": -624.8492431640625, |
|
"eval_loss": 0.5109167098999023, |
|
"eval_rewards/accuracies": 0.7394999861717224, |
|
"eval_rewards/chosen": -2.8850390911102295, |
|
"eval_rewards/margins": 0.9178500771522522, |
|
"eval_rewards/rejected": -3.802889108657837, |
|
"eval_runtime": 1590.9987, |
|
"eval_samples_per_second": 1.257, |
|
"eval_steps_per_second": 0.157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 11.057512283325195, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -0.9332865476608276, |
|
"logits/rejected": -0.8343530893325806, |
|
"logps/chosen": -501.17291259765625, |
|
"logps/rejected": -588.1627197265625, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.676569700241089, |
|
"rewards/margins": 0.9215409159660339, |
|
"rewards/rejected": -3.5981109142303467, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 15.726754188537598, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -1.0615359544754028, |
|
"logits/rejected": -0.9324240684509277, |
|
"logps/chosen": -485.42474365234375, |
|
"logps/rejected": -538.6219482421875, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.318000316619873, |
|
"rewards/margins": 0.8668109774589539, |
|
"rewards/rejected": -3.1848113536834717, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 8.744333267211914, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -1.0338976383209229, |
|
"logits/rejected": -0.9279731512069702, |
|
"logps/chosen": -438.6556091308594, |
|
"logps/rejected": -530.7064208984375, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.980308175086975, |
|
"rewards/margins": 1.0102787017822266, |
|
"rewards/rejected": -2.990586757659912, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 18.558218002319336, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -1.0217876434326172, |
|
"logits/rejected": -0.7763082385063171, |
|
"logps/chosen": -592.7269897460938, |
|
"logps/rejected": -723.8841552734375, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9158542156219482, |
|
"rewards/margins": 1.6223560571670532, |
|
"rewards/rejected": -4.538210391998291, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 13.077289581298828, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -0.9829725027084351, |
|
"logits/rejected": -0.8464131355285645, |
|
"logps/chosen": -542.563720703125, |
|
"logps/rejected": -691.7203369140625, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7414119243621826, |
|
"rewards/margins": 1.5575182437896729, |
|
"rewards/rejected": -4.2989301681518555, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 8.922407150268555, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -1.0383745431900024, |
|
"logits/rejected": -0.9313938021659851, |
|
"logps/chosen": -409.2937316894531, |
|
"logps/rejected": -496.7439880371094, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.7329803705215454, |
|
"rewards/margins": 0.931407630443573, |
|
"rewards/rejected": -2.6643881797790527, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 11.105772018432617, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.2218599319458008, |
|
"logits/rejected": -0.9990504384040833, |
|
"logps/chosen": -422.4734802246094, |
|
"logps/rejected": -471.9090270996094, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3439210653305054, |
|
"rewards/margins": 0.9874658584594727, |
|
"rewards/rejected": -2.3313870429992676, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 8.625237464904785, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -0.8810186386108398, |
|
"logits/rejected": -0.8003977537155151, |
|
"logps/chosen": -499.4249572753906, |
|
"logps/rejected": -544.4656982421875, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1937453746795654, |
|
"rewards/margins": 0.7729299664497375, |
|
"rewards/rejected": -2.9666755199432373, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 6.088487148284912, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -1.0772597789764404, |
|
"logits/rejected": -0.9252904653549194, |
|
"logps/chosen": -473.551025390625, |
|
"logps/rejected": -539.3673095703125, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1322884559631348, |
|
"rewards/margins": 0.8234016299247742, |
|
"rewards/rejected": -2.955690383911133, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 7.50374174118042, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -1.0490493774414062, |
|
"logits/rejected": -0.8567570447921753, |
|
"logps/chosen": -522.2603149414062, |
|
"logps/rejected": -541.4241943359375, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.5611276626586914, |
|
"rewards/margins": 0.630212128162384, |
|
"rewards/rejected": -3.1913397312164307, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": -0.8244917988777161, |
|
"eval_logits/rejected": -0.6634394526481628, |
|
"eval_logps/chosen": -513.6636352539062, |
|
"eval_logps/rejected": -577.7298583984375, |
|
"eval_loss": 0.5050398707389832, |
|
"eval_rewards/accuracies": 0.737500011920929, |
|
"eval_rewards/chosen": -2.490487813949585, |
|
"eval_rewards/margins": 0.8412085175514221, |
|
"eval_rewards/rejected": -3.3316965103149414, |
|
"eval_runtime": 1582.1916, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 6.5892252922058105, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -1.082047700881958, |
|
"logits/rejected": -1.00830078125, |
|
"logps/chosen": -517.5233764648438, |
|
"logps/rejected": -578.21923828125, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.4717154502868652, |
|
"rewards/margins": 0.6562873721122742, |
|
"rewards/rejected": -3.128002882003784, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 8.295710563659668, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -1.0961480140686035, |
|
"logits/rejected": -0.9785219430923462, |
|
"logps/chosen": -446.82977294921875, |
|
"logps/rejected": -531.4866333007812, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.056777000427246, |
|
"rewards/margins": 0.8543047904968262, |
|
"rewards/rejected": -2.911081552505493, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 9.79799747467041, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.0972565412521362, |
|
"logits/rejected": -0.9272262454032898, |
|
"logps/chosen": -524.0053100585938, |
|
"logps/rejected": -611.5361938476562, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6295433044433594, |
|
"rewards/margins": 0.9316496849060059, |
|
"rewards/rejected": -3.5611929893493652, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 31.351959228515625, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.2044315338134766, |
|
"logits/rejected": -0.9763747453689575, |
|
"logps/chosen": -536.931640625, |
|
"logps/rejected": -602.4816284179688, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.528210163116455, |
|
"rewards/margins": 1.1348176002502441, |
|
"rewards/rejected": -3.6630280017852783, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 13.463656425476074, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -1.2988972663879395, |
|
"logits/rejected": -1.1377476453781128, |
|
"logps/chosen": -515.65380859375, |
|
"logps/rejected": -611.4737548828125, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.433767795562744, |
|
"rewards/margins": 1.198464274406433, |
|
"rewards/rejected": -3.632232189178467, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 13.99889850616455, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -1.2166707515716553, |
|
"logits/rejected": -1.0688965320587158, |
|
"logps/chosen": -525.2330932617188, |
|
"logps/rejected": -608.0958862304688, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6150729656219482, |
|
"rewards/margins": 1.093909502029419, |
|
"rewards/rejected": -3.708981990814209, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 15.137212753295898, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -1.260829210281372, |
|
"logits/rejected": -1.0494515895843506, |
|
"logps/chosen": -565.7590942382812, |
|
"logps/rejected": -643.6331787109375, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.06677508354187, |
|
"rewards/margins": 1.1827514171600342, |
|
"rewards/rejected": -4.249526500701904, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 9.361719131469727, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -1.1176745891571045, |
|
"logits/rejected": -1.1530115604400635, |
|
"logps/chosen": -499.57470703125, |
|
"logps/rejected": -600.5289306640625, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.5301427841186523, |
|
"rewards/margins": 0.9564053416252136, |
|
"rewards/rejected": -3.4865479469299316, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 16.095386505126953, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -1.39276921749115, |
|
"logits/rejected": -1.2349721193313599, |
|
"logps/chosen": -434.96197509765625, |
|
"logps/rejected": -517.3797607421875, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8744901418685913, |
|
"rewards/margins": 0.8959289789199829, |
|
"rewards/rejected": -2.770418882369995, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 9.526153564453125, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -1.138518214225769, |
|
"logits/rejected": -1.0583736896514893, |
|
"logps/chosen": -489.06463623046875, |
|
"logps/rejected": -578.7098999023438, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.59414005279541, |
|
"rewards/margins": 0.8143970370292664, |
|
"rewards/rejected": -3.4085373878479004, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -1.1191861629486084, |
|
"eval_logits/rejected": -0.9870566725730896, |
|
"eval_logps/chosen": -491.9688415527344, |
|
"eval_logps/rejected": -566.8433837890625, |
|
"eval_loss": 0.4982523024082184, |
|
"eval_rewards/accuracies": 0.7384999990463257, |
|
"eval_rewards/chosen": -2.2735395431518555, |
|
"eval_rewards/margins": 0.9492914080619812, |
|
"eval_rewards/rejected": -3.2228307723999023, |
|
"eval_runtime": 1581.9568, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 7.82928991317749, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -1.2663036584854126, |
|
"logits/rejected": -1.0805227756500244, |
|
"logps/chosen": -465.62579345703125, |
|
"logps/rejected": -534.9702758789062, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.185610294342041, |
|
"rewards/margins": 0.9405841827392578, |
|
"rewards/rejected": -3.126194477081299, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 14.477264404296875, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -1.2190170288085938, |
|
"logits/rejected": -1.10158371925354, |
|
"logps/chosen": -563.8433227539062, |
|
"logps/rejected": -729.468017578125, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.0673668384552, |
|
"rewards/margins": 1.5377466678619385, |
|
"rewards/rejected": -4.6051130294799805, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 12.053476333618164, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -1.2529375553131104, |
|
"logits/rejected": -1.0980578660964966, |
|
"logps/chosen": -634.8171997070312, |
|
"logps/rejected": -681.6033935546875, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.410432815551758, |
|
"rewards/margins": 0.9311054944992065, |
|
"rewards/rejected": -4.341538429260254, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 8.703567504882812, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -1.3562941551208496, |
|
"logits/rejected": -1.2354745864868164, |
|
"logps/chosen": -528.8560791015625, |
|
"logps/rejected": -573.3671875, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4158124923706055, |
|
"rewards/margins": 0.8610725402832031, |
|
"rewards/rejected": -3.2768847942352295, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 5.352301120758057, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -1.3785821199417114, |
|
"logits/rejected": -1.1847844123840332, |
|
"logps/chosen": -459.38037109375, |
|
"logps/rejected": -520.2581787109375, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.04193115234375, |
|
"rewards/margins": 0.8080309629440308, |
|
"rewards/rejected": -2.849961757659912, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 10.721965789794922, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -1.393056869506836, |
|
"logits/rejected": -1.2600148916244507, |
|
"logps/chosen": -454.79547119140625, |
|
"logps/rejected": -493.84405517578125, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9885499477386475, |
|
"rewards/margins": 0.5462992787361145, |
|
"rewards/rejected": -2.534849166870117, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 7.625462532043457, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.2262321710586548, |
|
"logits/rejected": -1.0494863986968994, |
|
"logps/chosen": -501.2200622558594, |
|
"logps/rejected": -556.45849609375, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.510348081588745, |
|
"rewards/margins": 0.879997730255127, |
|
"rewards/rejected": -3.390345811843872, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 10.538474082946777, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.1859577894210815, |
|
"logits/rejected": -1.030694842338562, |
|
"logps/chosen": -578.6294555664062, |
|
"logps/rejected": -669.954833984375, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.32794189453125, |
|
"rewards/margins": 1.1238527297973633, |
|
"rewards/rejected": -4.451794624328613, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 12.202057838439941, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.1280696392059326, |
|
"logits/rejected": -0.9983634948730469, |
|
"logps/chosen": -617.6463623046875, |
|
"logps/rejected": -723.6165771484375, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.684889554977417, |
|
"rewards/margins": 1.2597671747207642, |
|
"rewards/rejected": -4.9446563720703125, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 13.77889633178711, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.147534728050232, |
|
"logits/rejected": -1.0692282915115356, |
|
"logps/chosen": -597.9291381835938, |
|
"logps/rejected": -696.7471923828125, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.4435715675354004, |
|
"rewards/margins": 1.0198230743408203, |
|
"rewards/rejected": -4.463394641876221, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": -0.9910528063774109, |
|
"eval_logits/rejected": -0.8540138006210327, |
|
"eval_logps/chosen": -616.756591796875, |
|
"eval_logps/rejected": -717.8565063476562, |
|
"eval_loss": 0.5000870227813721, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -3.5214157104492188, |
|
"eval_rewards/margins": 1.2115455865859985, |
|
"eval_rewards/rejected": -4.732961654663086, |
|
"eval_runtime": 1582.4164, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 7.990820407867432, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.223760724067688, |
|
"logits/rejected": -1.0244286060333252, |
|
"logps/chosen": -568.0660400390625, |
|
"logps/rejected": -653.6463623046875, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.2144699096679688, |
|
"rewards/margins": 1.1462501287460327, |
|
"rewards/rejected": -4.360719680786133, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 11.47555160522461, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.2019230127334595, |
|
"logits/rejected": -1.108407735824585, |
|
"logps/chosen": -539.8394775390625, |
|
"logps/rejected": -651.2247314453125, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8300833702087402, |
|
"rewards/margins": 1.0788848400115967, |
|
"rewards/rejected": -3.908967971801758, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 5.791493892669678, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -1.1601794958114624, |
|
"logits/rejected": -1.067439317703247, |
|
"logps/chosen": -501.59100341796875, |
|
"logps/rejected": -596.958984375, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.677966356277466, |
|
"rewards/margins": 0.9972420930862427, |
|
"rewards/rejected": -3.675208568572998, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 13.772578239440918, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.3145829439163208, |
|
"logits/rejected": -1.1666548252105713, |
|
"logps/chosen": -544.5247192382812, |
|
"logps/rejected": -613.3075561523438, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.751533031463623, |
|
"rewards/margins": 0.9478660821914673, |
|
"rewards/rejected": -3.69939923286438, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 10.991410255432129, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -1.2049071788787842, |
|
"logits/rejected": -1.057510495185852, |
|
"logps/chosen": -566.255859375, |
|
"logps/rejected": -625.919921875, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.0400071144104004, |
|
"rewards/margins": 0.797439694404602, |
|
"rewards/rejected": -3.837446928024292, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 10.892525672912598, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -1.1951572895050049, |
|
"logits/rejected": -1.0272043943405151, |
|
"logps/chosen": -564.4779052734375, |
|
"logps/rejected": -638.2157592773438, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.090005397796631, |
|
"rewards/margins": 0.9581144452095032, |
|
"rewards/rejected": -4.048120021820068, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 10.314249992370605, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.2887331247329712, |
|
"logits/rejected": -1.192067265510559, |
|
"logps/chosen": -587.147705078125, |
|
"logps/rejected": -671.292236328125, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.1862995624542236, |
|
"rewards/margins": 0.8927896618843079, |
|
"rewards/rejected": -4.079089164733887, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 9.613045692443848, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.3485846519470215, |
|
"logits/rejected": -1.1314103603363037, |
|
"logps/chosen": -622.3319091796875, |
|
"logps/rejected": -722.0323486328125, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.5042572021484375, |
|
"rewards/margins": 1.2276619672775269, |
|
"rewards/rejected": -4.731919288635254, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 13.027145385742188, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -1.1553242206573486, |
|
"logits/rejected": -1.0384472608566284, |
|
"logps/chosen": -590.9238891601562, |
|
"logps/rejected": -693.7357788085938, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.3637802600860596, |
|
"rewards/margins": 1.1092660427093506, |
|
"rewards/rejected": -4.47304630279541, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 13.17509651184082, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.3155999183654785, |
|
"logits/rejected": -1.1753036975860596, |
|
"logps/chosen": -568.0977783203125, |
|
"logps/rejected": -626.4328002929688, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.994460344314575, |
|
"rewards/margins": 0.8298677206039429, |
|
"rewards/rejected": -3.8243279457092285, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": -1.2062066793441772, |
|
"eval_logits/rejected": -1.0816161632537842, |
|
"eval_logps/chosen": -543.2669677734375, |
|
"eval_logps/rejected": -619.3544921875, |
|
"eval_loss": 0.4986713230609894, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.786520004272461, |
|
"eval_rewards/margins": 0.9614222049713135, |
|
"eval_rewards/rejected": -3.7479424476623535, |
|
"eval_runtime": 1581.7334, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 11.276718139648438, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -1.2875537872314453, |
|
"logits/rejected": -1.158477544784546, |
|
"logps/chosen": -564.5877075195312, |
|
"logps/rejected": -679.6033935546875, |
|
"loss": 0.3861, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.009406089782715, |
|
"rewards/margins": 1.3703964948654175, |
|
"rewards/rejected": -4.379802227020264, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 12.995461463928223, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.2381871938705444, |
|
"logits/rejected": -1.0811434984207153, |
|
"logps/chosen": -635.1746826171875, |
|
"logps/rejected": -741.3914184570312, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.5006301403045654, |
|
"rewards/margins": 1.3320776224136353, |
|
"rewards/rejected": -4.832707405090332, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 12.000993728637695, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -1.029284119606018, |
|
"logits/rejected": -0.9086493253707886, |
|
"logps/chosen": -637.097900390625, |
|
"logps/rejected": -769.3482666015625, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.841416120529175, |
|
"rewards/margins": 1.4375669956207275, |
|
"rewards/rejected": -5.278983116149902, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 32.742244720458984, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -1.1185188293457031, |
|
"logits/rejected": -1.0146936178207397, |
|
"logps/chosen": -668.8628540039062, |
|
"logps/rejected": -822.2618408203125, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -4.184566020965576, |
|
"rewards/margins": 1.7391846179962158, |
|
"rewards/rejected": -5.923750877380371, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 9.557754516601562, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -1.2069816589355469, |
|
"logits/rejected": -1.0695757865905762, |
|
"logps/chosen": -616.6637573242188, |
|
"logps/rejected": -743.1511840820312, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.7714271545410156, |
|
"rewards/margins": 1.4299030303955078, |
|
"rewards/rejected": -5.201330184936523, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 15.0908784866333, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -1.3125331401824951, |
|
"logits/rejected": -1.2091357707977295, |
|
"logps/chosen": -542.5530395507812, |
|
"logps/rejected": -604.1723022460938, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.894252300262451, |
|
"rewards/margins": 0.8939558863639832, |
|
"rewards/rejected": -3.788208484649658, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 4.927533149719238, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.4343416690826416, |
|
"logits/rejected": -1.3349957466125488, |
|
"logps/chosen": -462.3154296875, |
|
"logps/rejected": -552.6721801757812, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.1374027729034424, |
|
"rewards/margins": 1.026588797569275, |
|
"rewards/rejected": -3.1639912128448486, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 10.096611976623535, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.3481743335723877, |
|
"logits/rejected": -1.198091745376587, |
|
"logps/chosen": -515.7351684570312, |
|
"logps/rejected": -568.5164184570312, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5670249462127686, |
|
"rewards/margins": 1.077282190322876, |
|
"rewards/rejected": -3.6443073749542236, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 13.104360580444336, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.2953057289123535, |
|
"logits/rejected": -1.1235467195510864, |
|
"logps/chosen": -528.2382202148438, |
|
"logps/rejected": -629.0557861328125, |
|
"loss": 0.502, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.7498528957366943, |
|
"rewards/margins": 1.271679162979126, |
|
"rewards/rejected": -4.02153205871582, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 16.11091423034668, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.3702330589294434, |
|
"logits/rejected": -1.2128852605819702, |
|
"logps/chosen": -517.0223999023438, |
|
"logps/rejected": -641.86962890625, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.527927875518799, |
|
"rewards/margins": 1.4398820400238037, |
|
"rewards/rejected": -3.9678096771240234, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": -1.3215845823287964, |
|
"eval_logits/rejected": -1.1934013366699219, |
|
"eval_logps/chosen": -510.6183776855469, |
|
"eval_logps/rejected": -609.4039306640625, |
|
"eval_loss": 0.5144493579864502, |
|
"eval_rewards/accuracies": 0.7329999804496765, |
|
"eval_rewards/chosen": -2.460034132003784, |
|
"eval_rewards/margins": 1.1884018182754517, |
|
"eval_rewards/rejected": -3.6484363079071045, |
|
"eval_runtime": 1581.1744, |
|
"eval_samples_per_second": 1.265, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 14.719096183776855, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.391238808631897, |
|
"logits/rejected": -1.3038396835327148, |
|
"logps/chosen": -469.97021484375, |
|
"logps/rejected": -566.0907592773438, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.3631210327148438, |
|
"rewards/margins": 1.0193054676055908, |
|
"rewards/rejected": -3.3824265003204346, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 10.856192588806152, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.5884860754013062, |
|
"logits/rejected": -1.493381381034851, |
|
"logps/chosen": -435.23876953125, |
|
"logps/rejected": -504.7139587402344, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7161569595336914, |
|
"rewards/margins": 0.8754542469978333, |
|
"rewards/rejected": -2.5916106700897217, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 7.564871311187744, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.6664730310440063, |
|
"logits/rejected": -1.5948269367218018, |
|
"logps/chosen": -443.94915771484375, |
|
"logps/rejected": -495.65643310546875, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.69313645362854, |
|
"rewards/margins": 0.6239147186279297, |
|
"rewards/rejected": -2.3170511722564697, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 11.641048431396484, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -1.4826856851577759, |
|
"logits/rejected": -1.3677144050598145, |
|
"logps/chosen": -416.457275390625, |
|
"logps/rejected": -480.78118896484375, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7890077829360962, |
|
"rewards/margins": 0.8001953959465027, |
|
"rewards/rejected": -2.589203119277954, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 8.30944538116455, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.5535342693328857, |
|
"logits/rejected": -1.4495346546173096, |
|
"logps/chosen": -458.6407165527344, |
|
"logps/rejected": -524.59423828125, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8754609823226929, |
|
"rewards/margins": 0.8596477508544922, |
|
"rewards/rejected": -2.7351086139678955, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 11.737417221069336, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -1.492614984512329, |
|
"logits/rejected": -1.2994787693023682, |
|
"logps/chosen": -562.4512939453125, |
|
"logps/rejected": -632.38818359375, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.8106777667999268, |
|
"rewards/margins": 1.069096326828003, |
|
"rewards/rejected": -3.879774570465088, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 13.686888694763184, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -1.384356141090393, |
|
"logits/rejected": -1.24310302734375, |
|
"logps/chosen": -584.9012451171875, |
|
"logps/rejected": -698.5891723632812, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.33648419380188, |
|
"rewards/margins": 1.2853710651397705, |
|
"rewards/rejected": -4.621855735778809, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 8.767433166503906, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.2861278057098389, |
|
"logits/rejected": -1.2656017541885376, |
|
"logps/chosen": -606.9259033203125, |
|
"logps/rejected": -742.4180908203125, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.5447895526885986, |
|
"rewards/margins": 1.0646626949310303, |
|
"rewards/rejected": -4.609452247619629, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 9.344108581542969, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.4602949619293213, |
|
"logits/rejected": -1.431760311126709, |
|
"logps/chosen": -516.3866577148438, |
|
"logps/rejected": -624.79248046875, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.784986972808838, |
|
"rewards/margins": 1.016643762588501, |
|
"rewards/rejected": -3.801631212234497, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 8.162246704101562, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.3787976503372192, |
|
"logits/rejected": -1.3049051761627197, |
|
"logps/chosen": -540.64697265625, |
|
"logps/rejected": -601.52392578125, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.764016628265381, |
|
"rewards/margins": 0.8225992321968079, |
|
"rewards/rejected": -3.586615800857544, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -1.3065768480300903, |
|
"eval_logits/rejected": -1.1838239431381226, |
|
"eval_logps/chosen": -514.4846801757812, |
|
"eval_logps/rejected": -594.8329467773438, |
|
"eval_loss": 0.493674635887146, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.4986977577209473, |
|
"eval_rewards/margins": 1.004028558731079, |
|
"eval_rewards/rejected": -3.5027263164520264, |
|
"eval_runtime": 1581.6015, |
|
"eval_samples_per_second": 1.265, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 5.075291156768799, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.412367582321167, |
|
"logits/rejected": -1.3295118808746338, |
|
"logps/chosen": -484.82781982421875, |
|
"logps/rejected": -605.0839233398438, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.4015586376190186, |
|
"rewards/margins": 1.136573076248169, |
|
"rewards/rejected": -3.5381317138671875, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 14.283614158630371, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -1.3783130645751953, |
|
"logits/rejected": -1.2037944793701172, |
|
"logps/chosen": -573.0740966796875, |
|
"logps/rejected": -635.7131958007812, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.1027450561523438, |
|
"rewards/margins": 0.9221722483634949, |
|
"rewards/rejected": -4.024917125701904, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 9.143744468688965, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -1.0775479078292847, |
|
"logits/rejected": -1.0551337003707886, |
|
"logps/chosen": -561.3260498046875, |
|
"logps/rejected": -659.1534423828125, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.2980690002441406, |
|
"rewards/margins": 1.12051522731781, |
|
"rewards/rejected": -4.41858434677124, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 5.6775970458984375, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.1355875730514526, |
|
"logits/rejected": -1.0401685237884521, |
|
"logps/chosen": -623.6513671875, |
|
"logps/rejected": -720.0593872070312, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.7002768516540527, |
|
"rewards/margins": 1.0166988372802734, |
|
"rewards/rejected": -4.716975688934326, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 8.781460762023926, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.1672102212905884, |
|
"logits/rejected": -0.9630798101425171, |
|
"logps/chosen": -615.6878051757812, |
|
"logps/rejected": -661.8714599609375, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.6454033851623535, |
|
"rewards/margins": 0.8021238446235657, |
|
"rewards/rejected": -4.4475274085998535, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 17.43423843383789, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.0625419616699219, |
|
"logits/rejected": -0.9829475283622742, |
|
"logps/chosen": -560.93896484375, |
|
"logps/rejected": -651.3380126953125, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.390519618988037, |
|
"rewards/margins": 0.8660950660705566, |
|
"rewards/rejected": -4.256614685058594, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 6.469093322753906, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -1.1648396253585815, |
|
"logits/rejected": -0.9342324137687683, |
|
"logps/chosen": -580.8231201171875, |
|
"logps/rejected": -656.4859008789062, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.1200098991394043, |
|
"rewards/margins": 1.074102759361267, |
|
"rewards/rejected": -4.194112300872803, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 9.7577486038208, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.2089046239852905, |
|
"logits/rejected": -0.9787479639053345, |
|
"logps/chosen": -566.4661254882812, |
|
"logps/rejected": -651.2072143554688, |
|
"loss": 0.449, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8394370079040527, |
|
"rewards/margins": 1.2666107416152954, |
|
"rewards/rejected": -4.106047630310059, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 17.261714935302734, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.2693849802017212, |
|
"logits/rejected": -1.0484362840652466, |
|
"logps/chosen": -608.1680908203125, |
|
"logps/rejected": -667.4273681640625, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.3012607097625732, |
|
"rewards/margins": 1.0486795902252197, |
|
"rewards/rejected": -4.349940299987793, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 12.436171531677246, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.1414504051208496, |
|
"logits/rejected": -1.039656400680542, |
|
"logps/chosen": -577.3287353515625, |
|
"logps/rejected": -704.9564208984375, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.674631118774414, |
|
"rewards/margins": 1.2074581384658813, |
|
"rewards/rejected": -4.882089138031006, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": -1.106369972229004, |
|
"eval_logits/rejected": -0.9647883176803589, |
|
"eval_logps/chosen": -626.7305297851562, |
|
"eval_logps/rejected": -725.0693969726562, |
|
"eval_loss": 0.4948273003101349, |
|
"eval_rewards/accuracies": 0.7294999957084656, |
|
"eval_rewards/chosen": -3.6211562156677246, |
|
"eval_rewards/margins": 1.183934211730957, |
|
"eval_rewards/rejected": -4.805090427398682, |
|
"eval_runtime": 1583.0831, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 13.189878463745117, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.2679539918899536, |
|
"logits/rejected": -1.2505522966384888, |
|
"logps/chosen": -562.8509521484375, |
|
"logps/rejected": -688.5130615234375, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.3842010498046875, |
|
"rewards/margins": 1.1209660768508911, |
|
"rewards/rejected": -4.505166530609131, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 10.39622974395752, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.4502748250961304, |
|
"logits/rejected": -1.3018438816070557, |
|
"logps/chosen": -608.05517578125, |
|
"logps/rejected": -713.238037109375, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.393193006515503, |
|
"rewards/margins": 1.203611135482788, |
|
"rewards/rejected": -4.596804618835449, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 9.055331230163574, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.3674051761627197, |
|
"logits/rejected": -1.2791941165924072, |
|
"logps/chosen": -541.0001831054688, |
|
"logps/rejected": -634.0692749023438, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8097071647644043, |
|
"rewards/margins": 1.1776827573776245, |
|
"rewards/rejected": -3.9873898029327393, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 11.12189769744873, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.4076497554779053, |
|
"logits/rejected": -1.251043438911438, |
|
"logps/chosen": -572.8175659179688, |
|
"logps/rejected": -628.5023803710938, |
|
"loss": 0.5592, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.9791908264160156, |
|
"rewards/margins": 0.8598833084106445, |
|
"rewards/rejected": -3.839073896408081, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 8.793429374694824, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.5849800109863281, |
|
"logits/rejected": -1.3935880661010742, |
|
"logps/chosen": -543.2794189453125, |
|
"logps/rejected": -622.4841918945312, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.9448046684265137, |
|
"rewards/margins": 0.9939256906509399, |
|
"rewards/rejected": -3.9387307167053223, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 11.67789077758789, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -1.357431173324585, |
|
"logits/rejected": -1.3213989734649658, |
|
"logps/chosen": -582.795654296875, |
|
"logps/rejected": -671.8525390625, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.2719674110412598, |
|
"rewards/margins": 1.1332902908325195, |
|
"rewards/rejected": -4.4052581787109375, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 10.402399063110352, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.3948209285736084, |
|
"logits/rejected": -1.33302640914917, |
|
"logps/chosen": -597.5047607421875, |
|
"logps/rejected": -718.071044921875, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.4439845085144043, |
|
"rewards/margins": 1.1630891561508179, |
|
"rewards/rejected": -4.607073783874512, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 8.766528129577637, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -1.4917911291122437, |
|
"logits/rejected": -1.3801844120025635, |
|
"logps/chosen": -554.3919677734375, |
|
"logps/rejected": -640.0030517578125, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.1797173023223877, |
|
"rewards/margins": 0.9688693881034851, |
|
"rewards/rejected": -4.148587226867676, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 15.830283164978027, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.43815279006958, |
|
"logits/rejected": -1.3467546701431274, |
|
"logps/chosen": -635.6055908203125, |
|
"logps/rejected": -746.7036743164062, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.6045749187469482, |
|
"rewards/margins": 1.145918607711792, |
|
"rewards/rejected": -4.75049352645874, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 15.555473327636719, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -1.162718653678894, |
|
"logits/rejected": -1.0536067485809326, |
|
"logps/chosen": -632.3372802734375, |
|
"logps/rejected": -805.0314331054688, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.021511554718018, |
|
"rewards/margins": 1.4656221866607666, |
|
"rewards/rejected": -5.487133979797363, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -1.1612821817398071, |
|
"eval_logits/rejected": -1.0275636911392212, |
|
"eval_logps/chosen": -666.7680053710938, |
|
"eval_logps/rejected": -767.4140625, |
|
"eval_loss": 0.4885352551937103, |
|
"eval_rewards/accuracies": 0.7524999976158142, |
|
"eval_rewards/chosen": -4.021530628204346, |
|
"eval_rewards/margins": 1.2070072889328003, |
|
"eval_rewards/rejected": -5.228537082672119, |
|
"eval_runtime": 1582.3542, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 19.3488712310791, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.1995022296905518, |
|
"logits/rejected": -1.0088884830474854, |
|
"logps/chosen": -648.7389526367188, |
|
"logps/rejected": -741.3822021484375, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.001690864562988, |
|
"rewards/margins": 1.2280538082122803, |
|
"rewards/rejected": -5.229744911193848, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 12.791426658630371, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.2882441282272339, |
|
"logits/rejected": -1.2072982788085938, |
|
"logps/chosen": -709.89990234375, |
|
"logps/rejected": -785.1103515625, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.266518592834473, |
|
"rewards/margins": 0.9120771288871765, |
|
"rewards/rejected": -5.178596496582031, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 13.390192985534668, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -1.2616273164749146, |
|
"logits/rejected": -1.2146679162979126, |
|
"logps/chosen": -636.3683471679688, |
|
"logps/rejected": -724.3128051757812, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.891195774078369, |
|
"rewards/margins": 1.0095303058624268, |
|
"rewards/rejected": -4.900726318359375, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 6.8147382736206055, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.444204330444336, |
|
"logits/rejected": -1.2651526927947998, |
|
"logps/chosen": -641.5262451171875, |
|
"logps/rejected": -739.78271484375, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.5686423778533936, |
|
"rewards/margins": 1.2668758630752563, |
|
"rewards/rejected": -4.835517883300781, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 7.521138668060303, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.4095209836959839, |
|
"logits/rejected": -1.2402890920639038, |
|
"logps/chosen": -645.0303955078125, |
|
"logps/rejected": -744.2957153320312, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.5808544158935547, |
|
"rewards/margins": 1.2336914539337158, |
|
"rewards/rejected": -4.81454610824585, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 5.6627326011657715, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -1.3021334409713745, |
|
"logits/rejected": -1.2168514728546143, |
|
"logps/chosen": -695.7568969726562, |
|
"logps/rejected": -792.8882446289062, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.097269535064697, |
|
"rewards/margins": 1.1725034713745117, |
|
"rewards/rejected": -5.269773006439209, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 7.5200653076171875, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -1.2390494346618652, |
|
"logits/rejected": -1.080314040184021, |
|
"logps/chosen": -702.23681640625, |
|
"logps/rejected": -841.40234375, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -4.300023555755615, |
|
"rewards/margins": 1.5825563669204712, |
|
"rewards/rejected": -5.882579803466797, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 11.655389785766602, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.2104527950286865, |
|
"logits/rejected": -1.0451383590698242, |
|
"logps/chosen": -758.5480346679688, |
|
"logps/rejected": -856.0066528320312, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.885684967041016, |
|
"rewards/margins": 1.3967931270599365, |
|
"rewards/rejected": -6.282477855682373, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 12.27940845489502, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -1.2786993980407715, |
|
"logits/rejected": -1.154975175857544, |
|
"logps/chosen": -723.9029541015625, |
|
"logps/rejected": -814.64892578125, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.477005958557129, |
|
"rewards/margins": 1.1614168882369995, |
|
"rewards/rejected": -5.63842248916626, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 7.594533920288086, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.377384066581726, |
|
"logits/rejected": -1.2067997455596924, |
|
"logps/chosen": -656.7349243164062, |
|
"logps/rejected": -785.1405029296875, |
|
"loss": 0.4387, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.754779815673828, |
|
"rewards/margins": 1.5012847185134888, |
|
"rewards/rejected": -5.256064414978027, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": -1.2419021129608154, |
|
"eval_logits/rejected": -1.1075339317321777, |
|
"eval_logps/chosen": -645.9786376953125, |
|
"eval_logps/rejected": -748.0073852539062, |
|
"eval_loss": 0.4897352159023285, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -3.8136374950408936, |
|
"eval_rewards/margins": 1.2208337783813477, |
|
"eval_rewards/rejected": -5.034470558166504, |
|
"eval_runtime": 1582.9561, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 12.187158584594727, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.3125779628753662, |
|
"logits/rejected": -1.1751724481582642, |
|
"logps/chosen": -683.4190063476562, |
|
"logps/rejected": -763.2318115234375, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.184360027313232, |
|
"rewards/margins": 1.1687465906143188, |
|
"rewards/rejected": -5.353107452392578, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 13.572044372558594, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.2750508785247803, |
|
"logits/rejected": -1.1450278759002686, |
|
"logps/chosen": -725.9456176757812, |
|
"logps/rejected": -845.9400634765625, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.662615776062012, |
|
"rewards/margins": 1.3297778367996216, |
|
"rewards/rejected": -5.992393970489502, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 7.278676509857178, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.3194019794464111, |
|
"logits/rejected": -1.127657175064087, |
|
"logps/chosen": -717.7136840820312, |
|
"logps/rejected": -793.281982421875, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.51358699798584, |
|
"rewards/margins": 1.136637806892395, |
|
"rewards/rejected": -5.650224685668945, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 11.58935260772705, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -1.3775203227996826, |
|
"logits/rejected": -1.272014856338501, |
|
"logps/chosen": -640.1255493164062, |
|
"logps/rejected": -746.0568237304688, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.8106136322021484, |
|
"rewards/margins": 1.1573151350021362, |
|
"rewards/rejected": -4.967928886413574, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 11.089369773864746, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.464210867881775, |
|
"logits/rejected": -1.2921464443206787, |
|
"logps/chosen": -602.5208740234375, |
|
"logps/rejected": -670.5046997070312, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.2676520347595215, |
|
"rewards/margins": 0.9385223388671875, |
|
"rewards/rejected": -4.206175327301025, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 9.643657684326172, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.5414282083511353, |
|
"logits/rejected": -1.351072907447815, |
|
"logps/chosen": -588.3757934570312, |
|
"logps/rejected": -664.9412841796875, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1064059734344482, |
|
"rewards/margins": 1.1096309423446655, |
|
"rewards/rejected": -4.216037273406982, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 12.020472526550293, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.2709811925888062, |
|
"logits/rejected": -1.1159567832946777, |
|
"logps/chosen": -681.0741577148438, |
|
"logps/rejected": -740.572509765625, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.075676918029785, |
|
"rewards/margins": 1.0485068559646606, |
|
"rewards/rejected": -5.124184608459473, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 14.415773391723633, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -1.0758979320526123, |
|
"logits/rejected": -1.0921449661254883, |
|
"logps/chosen": -699.5072631835938, |
|
"logps/rejected": -847.3580932617188, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.702380180358887, |
|
"rewards/margins": 1.275566816329956, |
|
"rewards/rejected": -5.9779462814331055, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 8.904159545898438, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.1817315816879272, |
|
"logits/rejected": -1.0418232679367065, |
|
"logps/chosen": -711.0933227539062, |
|
"logps/rejected": -818.5221557617188, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.63338565826416, |
|
"rewards/margins": 1.2256052494049072, |
|
"rewards/rejected": -5.858990669250488, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 13.392192840576172, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.3758423328399658, |
|
"logits/rejected": -1.1924948692321777, |
|
"logps/chosen": -736.4212646484375, |
|
"logps/rejected": -802.3355712890625, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.471527099609375, |
|
"rewards/margins": 1.2190699577331543, |
|
"rewards/rejected": -5.690597057342529, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -1.1241540908813477, |
|
"eval_logits/rejected": -0.9858745336532593, |
|
"eval_logps/chosen": -721.045654296875, |
|
"eval_logps/rejected": -814.3307495117188, |
|
"eval_loss": 0.494096577167511, |
|
"eval_rewards/accuracies": 0.7409999966621399, |
|
"eval_rewards/chosen": -4.56430721282959, |
|
"eval_rewards/margins": 1.133396863937378, |
|
"eval_rewards/rejected": -5.697703838348389, |
|
"eval_runtime": 1582.7861, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 9.46854019165039, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -1.2051199674606323, |
|
"logits/rejected": -1.154191017150879, |
|
"logps/chosen": -719.0904541015625, |
|
"logps/rejected": -816.6644287109375, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.54345703125, |
|
"rewards/margins": 1.0406770706176758, |
|
"rewards/rejected": -5.584134101867676, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 10.047252655029297, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.3990776538848877, |
|
"logits/rejected": -1.2440322637557983, |
|
"logps/chosen": -642.8494873046875, |
|
"logps/rejected": -737.8421020507812, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.8995118141174316, |
|
"rewards/margins": 1.1307276487350464, |
|
"rewards/rejected": -5.030240058898926, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 9.14461898803711, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -1.4923516511917114, |
|
"logits/rejected": -1.3187439441680908, |
|
"logps/chosen": -675.9149169921875, |
|
"logps/rejected": -767.320068359375, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.8115932941436768, |
|
"rewards/margins": 1.337866187095642, |
|
"rewards/rejected": -5.149459362030029, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 10.868741989135742, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -1.2933242321014404, |
|
"logits/rejected": -1.2320917844772339, |
|
"logps/chosen": -715.2813720703125, |
|
"logps/rejected": -814.0577392578125, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.448263168334961, |
|
"rewards/margins": 1.1784393787384033, |
|
"rewards/rejected": -5.626702308654785, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 13.911977767944336, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -1.2531259059906006, |
|
"logits/rejected": -1.0494439601898193, |
|
"logps/chosen": -810.5252685546875, |
|
"logps/rejected": -883.2147216796875, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -5.621924877166748, |
|
"rewards/margins": 1.2322816848754883, |
|
"rewards/rejected": -6.8542070388793945, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 5.480376243591309, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -1.294999599456787, |
|
"logits/rejected": -1.1645463705062866, |
|
"logps/chosen": -820.8541870117188, |
|
"logps/rejected": -930.9850463867188, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -5.399382591247559, |
|
"rewards/margins": 1.3490444421768188, |
|
"rewards/rejected": -6.748426914215088, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 8.86125373840332, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -1.2584960460662842, |
|
"logits/rejected": -1.2339789867401123, |
|
"logps/chosen": -752.2889404296875, |
|
"logps/rejected": -858.5090942382812, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -5.008147716522217, |
|
"rewards/margins": 0.9858999252319336, |
|
"rewards/rejected": -5.99404764175415, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 6.723228931427002, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -1.3701547384262085, |
|
"logits/rejected": -1.22635018825531, |
|
"logps/chosen": -713.291748046875, |
|
"logps/rejected": -783.838623046875, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.579865455627441, |
|
"rewards/margins": 1.0079872608184814, |
|
"rewards/rejected": -5.587852954864502, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 25.50057601928711, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -1.343469262123108, |
|
"logits/rejected": -1.181768774986267, |
|
"logps/chosen": -755.4833374023438, |
|
"logps/rejected": -878.3968505859375, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.6183061599731445, |
|
"rewards/margins": 1.3548907041549683, |
|
"rewards/rejected": -5.973196983337402, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 15.880741119384766, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.2968125343322754, |
|
"logits/rejected": -1.1565604209899902, |
|
"logps/chosen": -708.8741455078125, |
|
"logps/rejected": -828.703125, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.676494598388672, |
|
"rewards/margins": 1.3593757152557373, |
|
"rewards/rejected": -6.035870552062988, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -1.2699260711669922, |
|
"eval_logits/rejected": -1.1445250511169434, |
|
"eval_logps/chosen": -729.02099609375, |
|
"eval_logps/rejected": -829.7324829101562, |
|
"eval_loss": 0.4877359867095947, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -4.6440606117248535, |
|
"eval_rewards/margins": 1.2076616287231445, |
|
"eval_rewards/rejected": -5.85172176361084, |
|
"eval_runtime": 1582.8734, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 8.566736221313477, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -1.408442735671997, |
|
"logits/rejected": -1.2569968700408936, |
|
"logps/chosen": -724.885498046875, |
|
"logps/rejected": -797.1796875, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.491677284240723, |
|
"rewards/margins": 1.1345134973526, |
|
"rewards/rejected": -5.626191139221191, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 12.80423355102539, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -1.4017808437347412, |
|
"logits/rejected": -1.3316090106964111, |
|
"logps/chosen": -649.954345703125, |
|
"logps/rejected": -722.61083984375, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.044957160949707, |
|
"rewards/margins": 1.0687569379806519, |
|
"rewards/rejected": -5.11371374130249, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 9.04439926147461, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -1.4811222553253174, |
|
"logits/rejected": -1.3649004697799683, |
|
"logps/chosen": -638.9652099609375, |
|
"logps/rejected": -694.5174560546875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.63200306892395, |
|
"rewards/margins": 0.8811875581741333, |
|
"rewards/rejected": -4.513190746307373, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 7.143604278564453, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -1.5229297876358032, |
|
"logits/rejected": -1.3823162317276, |
|
"logps/chosen": -608.2154541015625, |
|
"logps/rejected": -684.1043090820312, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.4388651847839355, |
|
"rewards/margins": 1.1439706087112427, |
|
"rewards/rejected": -4.582836151123047, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 12.864585876464844, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.4179461002349854, |
|
"logits/rejected": -1.2970348596572876, |
|
"logps/chosen": -606.7810668945312, |
|
"logps/rejected": -675.0203857421875, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.7310357093811035, |
|
"rewards/margins": 1.007622480392456, |
|
"rewards/rejected": -4.7386579513549805, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 21.30654525756836, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -1.4896656274795532, |
|
"logits/rejected": -1.2570466995239258, |
|
"logps/chosen": -715.8450317382812, |
|
"logps/rejected": -766.1806640625, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.289364814758301, |
|
"rewards/margins": 1.15054190158844, |
|
"rewards/rejected": -5.439906597137451, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 10.769180297851562, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -1.3695234060287476, |
|
"logits/rejected": -1.2086126804351807, |
|
"logps/chosen": -677.4241943359375, |
|
"logps/rejected": -737.0889892578125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -4.191761016845703, |
|
"rewards/margins": 0.9847186803817749, |
|
"rewards/rejected": -5.176480293273926, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 9.414416313171387, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -1.4165534973144531, |
|
"logits/rejected": -1.287619709968567, |
|
"logps/chosen": -636.1911010742188, |
|
"logps/rejected": -688.1452026367188, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.863328456878662, |
|
"rewards/margins": 0.8915898203849792, |
|
"rewards/rejected": -4.754918575286865, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 7.431769847869873, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -1.3594014644622803, |
|
"logits/rejected": -1.2485125064849854, |
|
"logps/chosen": -615.8621826171875, |
|
"logps/rejected": -705.419921875, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.6098227500915527, |
|
"rewards/margins": 0.9186857342720032, |
|
"rewards/rejected": -4.52850866317749, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 7.507672309875488, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.4956846237182617, |
|
"logits/rejected": -1.378463864326477, |
|
"logps/chosen": -583.9468994140625, |
|
"logps/rejected": -668.4165649414062, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.225862503051758, |
|
"rewards/margins": 1.049989104270935, |
|
"rewards/rejected": -4.275851726531982, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": -1.3664623498916626, |
|
"eval_logits/rejected": -1.2438994646072388, |
|
"eval_logps/chosen": -592.4716186523438, |
|
"eval_logps/rejected": -673.7171020507812, |
|
"eval_loss": 0.4812549352645874, |
|
"eval_rewards/accuracies": 0.7484999895095825, |
|
"eval_rewards/chosen": -3.27856707572937, |
|
"eval_rewards/margins": 1.0130008459091187, |
|
"eval_rewards/rejected": -4.291567802429199, |
|
"eval_runtime": 1583.0348, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 5.585911273956299, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.5299404859542847, |
|
"logits/rejected": -1.3702765703201294, |
|
"logps/chosen": -628.8416137695312, |
|
"logps/rejected": -697.6521606445312, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.5344300270080566, |
|
"rewards/margins": 0.8697922825813293, |
|
"rewards/rejected": -4.404222011566162, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 10.258821487426758, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -1.3099156618118286, |
|
"logits/rejected": -1.2557871341705322, |
|
"logps/chosen": -661.6383056640625, |
|
"logps/rejected": -749.5225830078125, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.8788514137268066, |
|
"rewards/margins": 1.0083012580871582, |
|
"rewards/rejected": -4.887152671813965, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 6.397181987762451, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -1.3133596181869507, |
|
"logits/rejected": -1.229486107826233, |
|
"logps/chosen": -645.9674682617188, |
|
"logps/rejected": -744.9403076171875, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.690619707107544, |
|
"rewards/margins": 1.3393402099609375, |
|
"rewards/rejected": -5.0299601554870605, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 17.43966293334961, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.4071886539459229, |
|
"logits/rejected": -1.3069418668746948, |
|
"logps/chosen": -654.7639770507812, |
|
"logps/rejected": -755.3448486328125, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.5785202980041504, |
|
"rewards/margins": 1.1814624071121216, |
|
"rewards/rejected": -4.759982109069824, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 6.193937301635742, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.4731229543685913, |
|
"logits/rejected": -1.3733452558517456, |
|
"logps/chosen": -552.7366333007812, |
|
"logps/rejected": -673.0077514648438, |
|
"loss": 0.4034, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.83868670463562, |
|
"rewards/margins": 1.426152229309082, |
|
"rewards/rejected": -4.264839172363281, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 11.390615463256836, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -1.4542146921157837, |
|
"logits/rejected": -1.3438105583190918, |
|
"logps/chosen": -562.8676147460938, |
|
"logps/rejected": -622.1143798828125, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.917558193206787, |
|
"rewards/margins": 0.870224118232727, |
|
"rewards/rejected": -3.7877821922302246, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 7.33363151550293, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.5080881118774414, |
|
"logits/rejected": -1.3906389474868774, |
|
"logps/chosen": -563.2923583984375, |
|
"logps/rejected": -613.376220703125, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.813906669616699, |
|
"rewards/margins": 1.0224448442459106, |
|
"rewards/rejected": -3.836350917816162, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 16.42394256591797, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -1.4487019777297974, |
|
"logits/rejected": -1.2721295356750488, |
|
"logps/chosen": -563.4182739257812, |
|
"logps/rejected": -641.2840576171875, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.0196499824523926, |
|
"rewards/margins": 1.0839171409606934, |
|
"rewards/rejected": -4.103566646575928, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 8.327413558959961, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.3579986095428467, |
|
"logits/rejected": -1.2439508438110352, |
|
"logps/chosen": -605.872314453125, |
|
"logps/rejected": -684.9946899414062, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.47078013420105, |
|
"rewards/margins": 1.1140120029449463, |
|
"rewards/rejected": -4.584792137145996, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 10.106181144714355, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.427367091178894, |
|
"logits/rejected": -1.222748875617981, |
|
"logps/chosen": -683.3574829101562, |
|
"logps/rejected": -780.22509765625, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.059211730957031, |
|
"rewards/margins": 1.3089077472686768, |
|
"rewards/rejected": -5.368119716644287, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": -1.19517183303833, |
|
"eval_logits/rejected": -1.0666708946228027, |
|
"eval_logps/chosen": -680.9020385742188, |
|
"eval_logps/rejected": -799.8125610351562, |
|
"eval_loss": 0.48845258355140686, |
|
"eval_rewards/accuracies": 0.7455000281333923, |
|
"eval_rewards/chosen": -4.162871837615967, |
|
"eval_rewards/margins": 1.3896511793136597, |
|
"eval_rewards/rejected": -5.552523612976074, |
|
"eval_runtime": 1582.9401, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 16.23677635192871, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.1654255390167236, |
|
"logits/rejected": -1.065344214439392, |
|
"logps/chosen": -639.0543212890625, |
|
"logps/rejected": -735.3461303710938, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.146035194396973, |
|
"rewards/margins": 1.2893322706222534, |
|
"rewards/rejected": -5.435367584228516, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 8.372858047485352, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -1.2929928302764893, |
|
"logits/rejected": -1.1719223260879517, |
|
"logps/chosen": -675.3624877929688, |
|
"logps/rejected": -769.5217895507812, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.201422214508057, |
|
"rewards/margins": 1.270340919494629, |
|
"rewards/rejected": -5.471763610839844, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 10.845846176147461, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -1.3178662061691284, |
|
"logits/rejected": -1.2613779306411743, |
|
"logps/chosen": -671.625732421875, |
|
"logps/rejected": -765.293701171875, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.9272491931915283, |
|
"rewards/margins": 0.9973451495170593, |
|
"rewards/rejected": -4.924594402313232, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 8.701257705688477, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.2995572090148926, |
|
"logits/rejected": -1.156830906867981, |
|
"logps/chosen": -596.4144897460938, |
|
"logps/rejected": -711.1951904296875, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.6349895000457764, |
|
"rewards/margins": 1.2283639907836914, |
|
"rewards/rejected": -4.863353252410889, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 10.194114685058594, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.3707363605499268, |
|
"logits/rejected": -1.2504899501800537, |
|
"logps/chosen": -594.62109375, |
|
"logps/rejected": -689.7848510742188, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.3595714569091797, |
|
"rewards/margins": 1.2741920948028564, |
|
"rewards/rejected": -4.633763313293457, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 13.661014556884766, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.4088404178619385, |
|
"logits/rejected": -1.216277837753296, |
|
"logps/chosen": -611.9348754882812, |
|
"logps/rejected": -714.869873046875, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.4869492053985596, |
|
"rewards/margins": 1.1751216650009155, |
|
"rewards/rejected": -4.662070274353027, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 8.576478004455566, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.350233793258667, |
|
"logits/rejected": -1.172080159187317, |
|
"logps/chosen": -622.2972412109375, |
|
"logps/rejected": -720.3619384765625, |
|
"loss": 0.4285, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.581017255783081, |
|
"rewards/margins": 1.2686632871627808, |
|
"rewards/rejected": -4.8496809005737305, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 10.128191947937012, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -1.1970648765563965, |
|
"logits/rejected": -1.1168218851089478, |
|
"logps/chosen": -642.9293823242188, |
|
"logps/rejected": -753.526123046875, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.944753646850586, |
|
"rewards/margins": 1.2011665105819702, |
|
"rewards/rejected": -5.145920753479004, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 6.274438381195068, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -1.257615327835083, |
|
"logits/rejected": -1.1408653259277344, |
|
"logps/chosen": -668.20361328125, |
|
"logps/rejected": -755.4804077148438, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.9589202404022217, |
|
"rewards/margins": 1.1090304851531982, |
|
"rewards/rejected": -5.06795072555542, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 6.87614107131958, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -1.2781522274017334, |
|
"logits/rejected": -1.12553071975708, |
|
"logps/chosen": -630.0660400390625, |
|
"logps/rejected": -739.07177734375, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.671879291534424, |
|
"rewards/margins": 1.2957801818847656, |
|
"rewards/rejected": -4.967658996582031, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -1.1734751462936401, |
|
"eval_logits/rejected": -1.0476148128509521, |
|
"eval_logps/chosen": -638.953369140625, |
|
"eval_logps/rejected": -742.967529296875, |
|
"eval_loss": 0.4858860671520233, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -3.74338436126709, |
|
"eval_rewards/margins": 1.2406867742538452, |
|
"eval_rewards/rejected": -4.984071254730225, |
|
"eval_runtime": 1583.062, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 26.042829513549805, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.3303982019424438, |
|
"logits/rejected": -1.2409372329711914, |
|
"logps/chosen": -617.5029296875, |
|
"logps/rejected": -694.068359375, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.733428955078125, |
|
"rewards/margins": 1.014293909072876, |
|
"rewards/rejected": -4.747722625732422, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 28.547435760498047, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.376987099647522, |
|
"logits/rejected": -1.2434319257736206, |
|
"logps/chosen": -573.7362060546875, |
|
"logps/rejected": -679.9476318359375, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.2147457599639893, |
|
"rewards/margins": 1.295339584350586, |
|
"rewards/rejected": -4.510085105895996, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 13.484001159667969, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.3759535551071167, |
|
"logits/rejected": -1.2478660345077515, |
|
"logps/chosen": -576.2813720703125, |
|
"logps/rejected": -690.5980834960938, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.1959147453308105, |
|
"rewards/margins": 1.2616323232650757, |
|
"rewards/rejected": -4.457546710968018, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 9.300795555114746, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.3617148399353027, |
|
"logits/rejected": -1.17989182472229, |
|
"logps/chosen": -608.29052734375, |
|
"logps/rejected": -669.2510986328125, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.3776187896728516, |
|
"rewards/margins": 1.1922826766967773, |
|
"rewards/rejected": -4.569901943206787, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 14.334630012512207, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.30707585811615, |
|
"logits/rejected": -1.1524229049682617, |
|
"logps/chosen": -616.8284912109375, |
|
"logps/rejected": -702.1578369140625, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.30047869682312, |
|
"rewards/margins": 1.238687515258789, |
|
"rewards/rejected": -4.53916597366333, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 12.695516586303711, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.3930574655532837, |
|
"logits/rejected": -1.2220638990402222, |
|
"logps/chosen": -628.47802734375, |
|
"logps/rejected": -705.5762939453125, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.5162925720214844, |
|
"rewards/margins": 1.2897061109542847, |
|
"rewards/rejected": -4.805999279022217, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 8.12979793548584, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.309929609298706, |
|
"logits/rejected": -1.2359564304351807, |
|
"logps/chosen": -624.3968505859375, |
|
"logps/rejected": -734.6637573242188, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.716935634613037, |
|
"rewards/margins": 1.0615085363388062, |
|
"rewards/rejected": -4.778443813323975, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 12.232304573059082, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -1.1189963817596436, |
|
"logits/rejected": -1.0223686695098877, |
|
"logps/chosen": -641.5491943359375, |
|
"logps/rejected": -750.5919189453125, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.883084535598755, |
|
"rewards/margins": 1.4329438209533691, |
|
"rewards/rejected": -5.316028118133545, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 12.076385498046875, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -1.2061289548873901, |
|
"logits/rejected": -1.1477553844451904, |
|
"logps/chosen": -615.6104736328125, |
|
"logps/rejected": -732.1640625, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.807772159576416, |
|
"rewards/margins": 1.32088303565979, |
|
"rewards/rejected": -5.1286540031433105, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 16.170991897583008, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -1.3174595832824707, |
|
"logits/rejected": -1.1845439672470093, |
|
"logps/chosen": -622.0203857421875, |
|
"logps/rejected": -727.765869140625, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.6488277912139893, |
|
"rewards/margins": 1.1369919776916504, |
|
"rewards/rejected": -4.785820007324219, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": -1.1780551671981812, |
|
"eval_logits/rejected": -1.0471770763397217, |
|
"eval_logps/chosen": -625.8917846679688, |
|
"eval_logps/rejected": -728.1769409179688, |
|
"eval_loss": 0.48169419169425964, |
|
"eval_rewards/accuracies": 0.7425000071525574, |
|
"eval_rewards/chosen": -3.6127684116363525, |
|
"eval_rewards/margins": 1.223397970199585, |
|
"eval_rewards/rejected": -4.8361663818359375, |
|
"eval_runtime": 1582.5825, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 8.167618751525879, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.3206676244735718, |
|
"logits/rejected": -1.2007882595062256, |
|
"logps/chosen": -629.7269287109375, |
|
"logps/rejected": -714.9498901367188, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.523585796356201, |
|
"rewards/margins": 1.2378898859024048, |
|
"rewards/rejected": -4.761475563049316, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 8.540782928466797, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -1.3026401996612549, |
|
"logits/rejected": -1.052721381187439, |
|
"logps/chosen": -674.0380249023438, |
|
"logps/rejected": -757.1068115234375, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.850877046585083, |
|
"rewards/margins": 1.4135150909423828, |
|
"rewards/rejected": -5.264391899108887, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 7.330903053283691, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.2844129800796509, |
|
"logits/rejected": -1.130133867263794, |
|
"logps/chosen": -647.9868774414062, |
|
"logps/rejected": -788.3101806640625, |
|
"loss": 0.4094, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.012332439422607, |
|
"rewards/margins": 1.5098378658294678, |
|
"rewards/rejected": -5.522170066833496, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 11.852506637573242, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -1.1574808359146118, |
|
"logits/rejected": -1.0841835737228394, |
|
"logps/chosen": -666.4259033203125, |
|
"logps/rejected": -808.6561279296875, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.217291831970215, |
|
"rewards/margins": 1.4833755493164062, |
|
"rewards/rejected": -5.700667381286621, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 28.528125762939453, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.2304925918579102, |
|
"logits/rejected": -1.1360973119735718, |
|
"logps/chosen": -650.3413696289062, |
|
"logps/rejected": -789.5084228515625, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.129073619842529, |
|
"rewards/margins": 1.4358268976211548, |
|
"rewards/rejected": -5.5649003982543945, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 12.270853042602539, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.2561757564544678, |
|
"logits/rejected": -1.1032966375350952, |
|
"logps/chosen": -674.009765625, |
|
"logps/rejected": -758.3587036132812, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.820760726928711, |
|
"rewards/margins": 1.4071722030639648, |
|
"rewards/rejected": -5.227932453155518, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 18.18036460876465, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.313661813735962, |
|
"logits/rejected": -1.1960715055465698, |
|
"logps/chosen": -597.9262084960938, |
|
"logps/rejected": -680.4265747070312, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.5625762939453125, |
|
"rewards/margins": 0.9416546821594238, |
|
"rewards/rejected": -4.504230976104736, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 5.7035298347473145, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.433161973953247, |
|
"logits/rejected": -1.3360836505889893, |
|
"logps/chosen": -541.0769653320312, |
|
"logps/rejected": -661.7882690429688, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8529908657073975, |
|
"rewards/margins": 1.1742432117462158, |
|
"rewards/rejected": -4.027234077453613, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 10.68341064453125, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -1.3998830318450928, |
|
"logits/rejected": -1.3137894868850708, |
|
"logps/chosen": -547.9580078125, |
|
"logps/rejected": -663.7359619140625, |
|
"loss": 0.4378, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.9742558002471924, |
|
"rewards/margins": 1.242157220840454, |
|
"rewards/rejected": -4.2164130210876465, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 9.300664901733398, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -1.3978197574615479, |
|
"logits/rejected": -1.243277668952942, |
|
"logps/chosen": -609.5036010742188, |
|
"logps/rejected": -684.1070556640625, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.410266876220703, |
|
"rewards/margins": 1.1315726041793823, |
|
"rewards/rejected": -4.541839599609375, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": -1.239994764328003, |
|
"eval_logits/rejected": -1.1158030033111572, |
|
"eval_logps/chosen": -624.4171142578125, |
|
"eval_logps/rejected": -730.1331176757812, |
|
"eval_loss": 0.48536595702171326, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -3.598021984100342, |
|
"eval_rewards/margins": 1.2577056884765625, |
|
"eval_rewards/rejected": -4.855727672576904, |
|
"eval_runtime": 1583.4838, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 7.191524028778076, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -1.4120800495147705, |
|
"logits/rejected": -1.2935571670532227, |
|
"logps/chosen": -636.7886962890625, |
|
"logps/rejected": -754.2188110351562, |
|
"loss": 0.4331, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.6377861499786377, |
|
"rewards/margins": 1.3760086297988892, |
|
"rewards/rejected": -5.013794898986816, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 31.748910903930664, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.2162339687347412, |
|
"logits/rejected": -1.0921854972839355, |
|
"logps/chosen": -682.8983154296875, |
|
"logps/rejected": -786.9542846679688, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -4.19767427444458, |
|
"rewards/margins": 1.2615830898284912, |
|
"rewards/rejected": -5.45925760269165, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 5.8050408363342285, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.3429977893829346, |
|
"logits/rejected": -1.1918339729309082, |
|
"logps/chosen": -684.1312255859375, |
|
"logps/rejected": -784.0355224609375, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.149628639221191, |
|
"rewards/margins": 1.270444393157959, |
|
"rewards/rejected": -5.420073509216309, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 11.632438659667969, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -1.3046501874923706, |
|
"logits/rejected": -1.1579644680023193, |
|
"logps/chosen": -673.1400146484375, |
|
"logps/rejected": -799.3960571289062, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.367093086242676, |
|
"rewards/margins": 1.3987313508987427, |
|
"rewards/rejected": -5.765824317932129, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 11.619688034057617, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.3125207424163818, |
|
"logits/rejected": -1.147236704826355, |
|
"logps/chosen": -736.60009765625, |
|
"logps/rejected": -873.1632690429688, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.3982462882995605, |
|
"rewards/margins": 1.6112134456634521, |
|
"rewards/rejected": -6.009459495544434, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 18.199705123901367, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -1.274456262588501, |
|
"logits/rejected": -1.1420204639434814, |
|
"logps/chosen": -735.5014038085938, |
|
"logps/rejected": -867.34912109375, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.637304782867432, |
|
"rewards/margins": 1.4379584789276123, |
|
"rewards/rejected": -6.075263023376465, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 12.624371528625488, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.2420213222503662, |
|
"logits/rejected": -1.128442406654358, |
|
"logps/chosen": -703.1854248046875, |
|
"logps/rejected": -820.5750732421875, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.453032493591309, |
|
"rewards/margins": 1.4042234420776367, |
|
"rewards/rejected": -5.857255935668945, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 12.940664291381836, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -1.2617765665054321, |
|
"logits/rejected": -1.117337703704834, |
|
"logps/chosen": -708.4508056640625, |
|
"logps/rejected": -800.98046875, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.413388252258301, |
|
"rewards/margins": 1.2580486536026, |
|
"rewards/rejected": -5.671436786651611, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 9.87313175201416, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -1.303589105606079, |
|
"logits/rejected": -1.2480074167251587, |
|
"logps/chosen": -684.2908935546875, |
|
"logps/rejected": -803.417724609375, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.188497543334961, |
|
"rewards/margins": 1.2328007221221924, |
|
"rewards/rejected": -5.421298027038574, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 20.87190818786621, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.3425134420394897, |
|
"logits/rejected": -1.2293293476104736, |
|
"logps/chosen": -675.4927368164062, |
|
"logps/rejected": -778.1267700195312, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.113883018493652, |
|
"rewards/margins": 1.2898164987564087, |
|
"rewards/rejected": -5.403698921203613, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": -1.1949888467788696, |
|
"eval_logits/rejected": -1.071972131729126, |
|
"eval_logps/chosen": -677.2342529296875, |
|
"eval_logps/rejected": -781.0516967773438, |
|
"eval_loss": 0.4856647849082947, |
|
"eval_rewards/accuracies": 0.7444999814033508, |
|
"eval_rewards/chosen": -4.126193523406982, |
|
"eval_rewards/margins": 1.2387206554412842, |
|
"eval_rewards/rejected": -5.3649139404296875, |
|
"eval_runtime": 1583.0821, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 11.783329963684082, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.2694941759109497, |
|
"logits/rejected": -1.2013720273971558, |
|
"logps/chosen": -656.7574462890625, |
|
"logps/rejected": -778.4323120117188, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.9103951454162598, |
|
"rewards/margins": 1.222727656364441, |
|
"rewards/rejected": -5.13312292098999, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 7.543945789337158, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.3921228647232056, |
|
"logits/rejected": -1.2788370847702026, |
|
"logps/chosen": -665.0604248046875, |
|
"logps/rejected": -739.6992797851562, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.739894390106201, |
|
"rewards/margins": 1.048128604888916, |
|
"rewards/rejected": -4.788022994995117, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 10.31935977935791, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.3381649255752563, |
|
"logits/rejected": -1.1965323686599731, |
|
"logps/chosen": -610.3319091796875, |
|
"logps/rejected": -734.8633422851562, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.563275098800659, |
|
"rewards/margins": 1.659253478050232, |
|
"rewards/rejected": -5.222528457641602, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 8.897191047668457, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.3344717025756836, |
|
"logits/rejected": -1.2315446138381958, |
|
"logps/chosen": -659.2275390625, |
|
"logps/rejected": -775.5537109375, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.7568366527557373, |
|
"rewards/margins": 1.4870188236236572, |
|
"rewards/rejected": -5.243854999542236, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 7.804056167602539, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.3492381572723389, |
|
"logits/rejected": -1.2712219953536987, |
|
"logps/chosen": -637.6734619140625, |
|
"logps/rejected": -759.937744140625, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.8261501789093018, |
|
"rewards/margins": 1.1614983081817627, |
|
"rewards/rejected": -4.987648963928223, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 12.590385437011719, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -1.3491287231445312, |
|
"logits/rejected": -1.1939796209335327, |
|
"logps/chosen": -623.6396484375, |
|
"logps/rejected": -731.343994140625, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.7736754417419434, |
|
"rewards/margins": 1.4199963808059692, |
|
"rewards/rejected": -5.193671226501465, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 18.164583206176758, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -1.3109443187713623, |
|
"logits/rejected": -1.1731343269348145, |
|
"logps/chosen": -682.9834594726562, |
|
"logps/rejected": -781.8642578125, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.103693962097168, |
|
"rewards/margins": 1.2765446901321411, |
|
"rewards/rejected": -5.3802385330200195, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 9.202481269836426, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -1.3194820880889893, |
|
"logits/rejected": -1.1984010934829712, |
|
"logps/chosen": -711.1646728515625, |
|
"logps/rejected": -828.0762939453125, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.235322952270508, |
|
"rewards/margins": 1.4504263401031494, |
|
"rewards/rejected": -5.685749530792236, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 11.143651962280273, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.3201286792755127, |
|
"logits/rejected": -1.204737901687622, |
|
"logps/chosen": -666.336669921875, |
|
"logps/rejected": -790.0221557617188, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.058554172515869, |
|
"rewards/margins": 1.5139802694320679, |
|
"rewards/rejected": -5.572534561157227, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 9.879022598266602, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -1.3698309659957886, |
|
"logits/rejected": -1.259245753288269, |
|
"logps/chosen": -664.8604736328125, |
|
"logps/rejected": -768.4254760742188, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.9975292682647705, |
|
"rewards/margins": 1.2025644779205322, |
|
"rewards/rejected": -5.2000932693481445, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -1.2732905149459839, |
|
"eval_logits/rejected": -1.1544142961502075, |
|
"eval_logps/chosen": -650.2978515625, |
|
"eval_logps/rejected": -755.7132568359375, |
|
"eval_loss": 0.48224759101867676, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -3.85683012008667, |
|
"eval_rewards/margins": 1.2546993494033813, |
|
"eval_rewards/rejected": -5.111529350280762, |
|
"eval_runtime": 1582.0799, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 10.106141090393066, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.3600491285324097, |
|
"logits/rejected": -1.2630369663238525, |
|
"logps/chosen": -658.0189208984375, |
|
"logps/rejected": -777.9553833007812, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.969266176223755, |
|
"rewards/margins": 1.2290141582489014, |
|
"rewards/rejected": -5.198280334472656, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 17.606903076171875, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.3096013069152832, |
|
"logits/rejected": -1.1738802194595337, |
|
"logps/chosen": -621.0303955078125, |
|
"logps/rejected": -706.1574096679688, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.8049144744873047, |
|
"rewards/margins": 0.9849715232849121, |
|
"rewards/rejected": -4.789886474609375, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 14.5999174118042, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.3722821474075317, |
|
"logits/rejected": -1.2702796459197998, |
|
"logps/chosen": -643.744873046875, |
|
"logps/rejected": -727.7888793945312, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.8248817920684814, |
|
"rewards/margins": 1.1005748510360718, |
|
"rewards/rejected": -4.925456523895264, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 9.080976486206055, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.44389009475708, |
|
"logits/rejected": -1.3529036045074463, |
|
"logps/chosen": -636.1320190429688, |
|
"logps/rejected": -695.2192993164062, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.754615068435669, |
|
"rewards/margins": 0.9210684895515442, |
|
"rewards/rejected": -4.67568302154541, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 10.234068870544434, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.3670417070388794, |
|
"logits/rejected": -1.234607458114624, |
|
"logps/chosen": -646.2342529296875, |
|
"logps/rejected": -762.1930541992188, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.7331900596618652, |
|
"rewards/margins": 1.3892850875854492, |
|
"rewards/rejected": -5.122475624084473, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 9.236120223999023, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -1.3387397527694702, |
|
"logits/rejected": -1.3080878257751465, |
|
"logps/chosen": -629.9612426757812, |
|
"logps/rejected": -772.1923828125, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.732499599456787, |
|
"rewards/margins": 1.3648207187652588, |
|
"rewards/rejected": -5.097320556640625, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 9.961119651794434, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.4475278854370117, |
|
"logits/rejected": -1.3916738033294678, |
|
"logps/chosen": -651.3665161132812, |
|
"logps/rejected": -753.1768798828125, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.8218841552734375, |
|
"rewards/margins": 1.0978153944015503, |
|
"rewards/rejected": -4.919699192047119, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 11.848674774169922, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -1.408536672592163, |
|
"logits/rejected": -1.2724813222885132, |
|
"logps/chosen": -660.1790161132812, |
|
"logps/rejected": -770.7523193359375, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.0685577392578125, |
|
"rewards/margins": 1.3600984811782837, |
|
"rewards/rejected": -5.428656101226807, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 8.26095199584961, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -1.429696798324585, |
|
"logits/rejected": -1.319048523902893, |
|
"logps/chosen": -654.4461669921875, |
|
"logps/rejected": -754.9195556640625, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -4.036130905151367, |
|
"rewards/margins": 1.2892920970916748, |
|
"rewards/rejected": -5.325423240661621, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 8.51677417755127, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -1.330289602279663, |
|
"logits/rejected": -1.227264642715454, |
|
"logps/chosen": -666.0051879882812, |
|
"logps/rejected": -786.2086791992188, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.104722023010254, |
|
"rewards/margins": 1.2456867694854736, |
|
"rewards/rejected": -5.350409507751465, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": -1.296103835105896, |
|
"eval_logits/rejected": -1.1772661209106445, |
|
"eval_logps/chosen": -659.640625, |
|
"eval_logps/rejected": -767.6243896484375, |
|
"eval_loss": 0.4806530177593231, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -3.9502570629119873, |
|
"eval_rewards/margins": 1.2803831100463867, |
|
"eval_rewards/rejected": -5.230639457702637, |
|
"eval_runtime": 1581.8876, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 7.22358512878418, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.3467257022857666, |
|
"logits/rejected": -1.1828867197036743, |
|
"logps/chosen": -630.05615234375, |
|
"logps/rejected": -734.7799072265625, |
|
"loss": 0.4723, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.8855319023132324, |
|
"rewards/margins": 1.3687862157821655, |
|
"rewards/rejected": -5.254318714141846, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 8.41374683380127, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.383126974105835, |
|
"logits/rejected": -1.2696807384490967, |
|
"logps/chosen": -666.6580810546875, |
|
"logps/rejected": -765.6370849609375, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.899862766265869, |
|
"rewards/margins": 1.0650051832199097, |
|
"rewards/rejected": -4.964868068695068, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 7.640721321105957, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -1.4619683027267456, |
|
"logits/rejected": -1.3194328546524048, |
|
"logps/chosen": -648.1679077148438, |
|
"logps/rejected": -739.3377685546875, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.7884438037872314, |
|
"rewards/margins": 1.1673457622528076, |
|
"rewards/rejected": -4.955790042877197, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 6.787487030029297, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.3260360956192017, |
|
"logits/rejected": -1.2002280950546265, |
|
"logps/chosen": -652.7042236328125, |
|
"logps/rejected": -749.501220703125, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.9212698936462402, |
|
"rewards/margins": 1.38629150390625, |
|
"rewards/rejected": -5.307560920715332, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 12.033089637756348, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -1.3529894351959229, |
|
"logits/rejected": -1.241321086883545, |
|
"logps/chosen": -645.5877075195312, |
|
"logps/rejected": -721.0560913085938, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.9297664165496826, |
|
"rewards/margins": 1.0031468868255615, |
|
"rewards/rejected": -4.932913780212402, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 5.963113784790039, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -1.3783152103424072, |
|
"logits/rejected": -1.3106451034545898, |
|
"logps/chosen": -633.4246215820312, |
|
"logps/rejected": -740.6929931640625, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.7124524116516113, |
|
"rewards/margins": 1.314141035079956, |
|
"rewards/rejected": -5.0265936851501465, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 10.886439323425293, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -1.4251292943954468, |
|
"logits/rejected": -1.27992844581604, |
|
"logps/chosen": -660.6417846679688, |
|
"logps/rejected": -765.4755249023438, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.779376268386841, |
|
"rewards/margins": 1.207291841506958, |
|
"rewards/rejected": -4.986666679382324, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 7.657388210296631, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.442101240158081, |
|
"logits/rejected": -1.2947075366973877, |
|
"logps/chosen": -608.75390625, |
|
"logps/rejected": -730.2647705078125, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.5647411346435547, |
|
"rewards/margins": 1.344158411026001, |
|
"rewards/rejected": -4.908899784088135, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 8.993943214416504, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -1.4737087488174438, |
|
"logits/rejected": -1.3463550806045532, |
|
"logps/chosen": -584.3572387695312, |
|
"logps/rejected": -691.84130859375, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.5355136394500732, |
|
"rewards/margins": 1.2167141437530518, |
|
"rewards/rejected": -4.752227783203125, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 9.367300033569336, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.3742111921310425, |
|
"logits/rejected": -1.2975659370422363, |
|
"logps/chosen": -587.8654174804688, |
|
"logps/rejected": -723.0962524414062, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.5372672080993652, |
|
"rewards/margins": 1.3036311864852905, |
|
"rewards/rejected": -4.840898513793945, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -1.312312126159668, |
|
"eval_logits/rejected": -1.1940079927444458, |
|
"eval_logps/chosen": -629.1587524414062, |
|
"eval_logps/rejected": -726.3348999023438, |
|
"eval_loss": 0.47825390100479126, |
|
"eval_rewards/accuracies": 0.7544999718666077, |
|
"eval_rewards/chosen": -3.645437717437744, |
|
"eval_rewards/margins": 1.1723082065582275, |
|
"eval_rewards/rejected": -4.817745685577393, |
|
"eval_runtime": 1582.4909, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 10.457430839538574, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.3809382915496826, |
|
"logits/rejected": -1.31160569190979, |
|
"logps/chosen": -637.9448852539062, |
|
"logps/rejected": -725.9009399414062, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.6930198669433594, |
|
"rewards/margins": 1.064775824546814, |
|
"rewards/rejected": -4.757795810699463, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 10.19312858581543, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.3200327157974243, |
|
"logits/rejected": -1.176688551902771, |
|
"logps/chosen": -585.4561157226562, |
|
"logps/rejected": -678.8817138671875, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.592146396636963, |
|
"rewards/margins": 1.1788526773452759, |
|
"rewards/rejected": -4.770998954772949, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 10.267387390136719, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -1.3837000131607056, |
|
"logits/rejected": -1.2987123727798462, |
|
"logps/chosen": -583.4530029296875, |
|
"logps/rejected": -682.2948608398438, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.5015385150909424, |
|
"rewards/margins": 1.1210825443267822, |
|
"rewards/rejected": -4.622621059417725, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 6.017190933227539, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -1.5390170812606812, |
|
"logits/rejected": -1.3035577535629272, |
|
"logps/chosen": -615.9330444335938, |
|
"logps/rejected": -661.38037109375, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.404139757156372, |
|
"rewards/margins": 1.1094605922698975, |
|
"rewards/rejected": -4.5136003494262695, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 7.074293613433838, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -1.3566112518310547, |
|
"logits/rejected": -1.2582927942276, |
|
"logps/chosen": -576.7879638671875, |
|
"logps/rejected": -713.3567504882812, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.4851574897766113, |
|
"rewards/margins": 1.3315637111663818, |
|
"rewards/rejected": -4.816721439361572, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 10.666440963745117, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -1.3122138977050781, |
|
"logits/rejected": -1.2932623624801636, |
|
"logps/chosen": -627.7955322265625, |
|
"logps/rejected": -814.8255615234375, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.653444290161133, |
|
"rewards/margins": 1.4460790157318115, |
|
"rewards/rejected": -5.099523067474365, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 9.67546558380127, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -1.4693623781204224, |
|
"logits/rejected": -1.4153467416763306, |
|
"logps/chosen": -655.6878662109375, |
|
"logps/rejected": -741.1046752929688, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.7438597679138184, |
|
"rewards/margins": 1.0592159032821655, |
|
"rewards/rejected": -4.803075790405273, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 7.3843865394592285, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.4262231588363647, |
|
"logits/rejected": -1.2912954092025757, |
|
"logps/chosen": -656.511962890625, |
|
"logps/rejected": -770.8526611328125, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.956704616546631, |
|
"rewards/margins": 1.4080426692962646, |
|
"rewards/rejected": -5.364747524261475, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 7.734601020812988, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -1.4562673568725586, |
|
"logits/rejected": -1.2930481433868408, |
|
"logps/chosen": -664.8043823242188, |
|
"logps/rejected": -734.4459228515625, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.8809738159179688, |
|
"rewards/margins": 1.1982970237731934, |
|
"rewards/rejected": -5.079270362854004, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 8.992291450500488, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -1.376122236251831, |
|
"logits/rejected": -1.2622703313827515, |
|
"logps/chosen": -655.6407470703125, |
|
"logps/rejected": -750.63818359375, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.794213056564331, |
|
"rewards/margins": 1.1705824136734009, |
|
"rewards/rejected": -4.9647955894470215, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": -1.2847076654434204, |
|
"eval_logits/rejected": -1.1654341220855713, |
|
"eval_logps/chosen": -653.8601684570312, |
|
"eval_logps/rejected": -760.7857055664062, |
|
"eval_loss": 0.4786832332611084, |
|
"eval_rewards/accuracies": 0.7534999847412109, |
|
"eval_rewards/chosen": -3.892453193664551, |
|
"eval_rewards/margins": 1.2698006629943848, |
|
"eval_rewards/rejected": -5.162253379821777, |
|
"eval_runtime": 1582.3299, |
|
"eval_samples_per_second": 1.264, |
|
"eval_steps_per_second": 0.158, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 9.587347030639648, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.4489879608154297, |
|
"logits/rejected": -1.285023808479309, |
|
"logps/chosen": -667.4219360351562, |
|
"logps/rejected": -779.0870361328125, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.8209068775177, |
|
"rewards/margins": 1.2750650644302368, |
|
"rewards/rejected": -5.095972061157227, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 9.464641571044922, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -1.4130933284759521, |
|
"logits/rejected": -1.2669379711151123, |
|
"logps/chosen": -666.3409423828125, |
|
"logps/rejected": -763.8473510742188, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.9732048511505127, |
|
"rewards/margins": 1.2480720281600952, |
|
"rewards/rejected": -5.221276760101318, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 11.495794296264648, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -1.332169771194458, |
|
"logits/rejected": -1.248384952545166, |
|
"logps/chosen": -647.7191162109375, |
|
"logps/rejected": -768.2628173828125, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.9689033031463623, |
|
"rewards/margins": 1.2653844356536865, |
|
"rewards/rejected": -5.234287738800049, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 13.087740898132324, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -1.3172610998153687, |
|
"logits/rejected": -1.1501275300979614, |
|
"logps/chosen": -616.0255126953125, |
|
"logps/rejected": -763.9444580078125, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.7686569690704346, |
|
"rewards/margins": 1.5006383657455444, |
|
"rewards/rejected": -5.2692952156066895, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 7.49388313293457, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -1.4834858179092407, |
|
"logits/rejected": -1.3113733530044556, |
|
"logps/chosen": -653.4777221679688, |
|
"logps/rejected": -768.167236328125, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.8982043266296387, |
|
"rewards/margins": 1.2591768503189087, |
|
"rewards/rejected": -5.157381534576416, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 9.854113578796387, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -1.33645761013031, |
|
"logits/rejected": -1.2524462938308716, |
|
"logps/chosen": -650.8976440429688, |
|
"logps/rejected": -750.16796875, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.754307270050049, |
|
"rewards/margins": 1.112375020980835, |
|
"rewards/rejected": -4.866682052612305, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 6.365306377410889, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -1.3347636461257935, |
|
"logits/rejected": -1.1535985469818115, |
|
"logps/chosen": -623.1888427734375, |
|
"logps/rejected": -730.3753662109375, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.575324296951294, |
|
"rewards/margins": 1.3884552717208862, |
|
"rewards/rejected": -4.963778972625732, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 10.03058910369873, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -1.4377801418304443, |
|
"logits/rejected": -1.3695132732391357, |
|
"logps/chosen": -578.0193481445312, |
|
"logps/rejected": -705.9163208007812, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.5517592430114746, |
|
"rewards/margins": 1.0413546562194824, |
|
"rewards/rejected": -4.593113899230957, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 5.4473876953125, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.4346039295196533, |
|
"logits/rejected": -1.3620599508285522, |
|
"logps/chosen": -621.5526733398438, |
|
"logps/rejected": -724.8057861328125, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.5769572257995605, |
|
"rewards/margins": 1.1758462190628052, |
|
"rewards/rejected": -4.752803325653076, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 11.63176441192627, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -1.3472647666931152, |
|
"logits/rejected": -1.330804467201233, |
|
"logps/chosen": -613.8287963867188, |
|
"logps/rejected": -755.5730590820312, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.5616798400878906, |
|
"rewards/margins": 1.2930762767791748, |
|
"rewards/rejected": -4.854755878448486, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -1.3301115036010742, |
|
"eval_logits/rejected": -1.2139098644256592, |
|
"eval_logps/chosen": -613.1915283203125, |
|
"eval_logps/rejected": -714.2922973632812, |
|
"eval_loss": 0.4755466878414154, |
|
"eval_rewards/accuracies": 0.7524999976158142, |
|
"eval_rewards/chosen": -3.4857659339904785, |
|
"eval_rewards/margins": 1.2115534543991089, |
|
"eval_rewards/rejected": -4.697319507598877, |
|
"eval_runtime": 1591.0618, |
|
"eval_samples_per_second": 1.257, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 6.076069355010986, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.354421854019165, |
|
"logits/rejected": -1.2948524951934814, |
|
"logps/chosen": -597.9942016601562, |
|
"logps/rejected": -688.4937133789062, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.366337299346924, |
|
"rewards/margins": 1.1454763412475586, |
|
"rewards/rejected": -4.511813163757324, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 7.7770161628723145, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -1.432493805885315, |
|
"logits/rejected": -1.3264445066452026, |
|
"logps/chosen": -639.21142578125, |
|
"logps/rejected": -703.9612426757812, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.4550623893737793, |
|
"rewards/margins": 1.0796080827713013, |
|
"rewards/rejected": -4.534670352935791, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 15.151912689208984, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.369700312614441, |
|
"logits/rejected": -1.2841273546218872, |
|
"logps/chosen": -613.7825927734375, |
|
"logps/rejected": -702.5382080078125, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.5211188793182373, |
|
"rewards/margins": 1.0141242742538452, |
|
"rewards/rejected": -4.535243034362793, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 11.273491859436035, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.5450893640518188, |
|
"logits/rejected": -1.3859776258468628, |
|
"logps/chosen": -600.0426025390625, |
|
"logps/rejected": -696.7976684570312, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.348644256591797, |
|
"rewards/margins": 1.3427956104278564, |
|
"rewards/rejected": -4.691439628601074, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 7.93589973449707, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.4886678457260132, |
|
"logits/rejected": -1.3508810997009277, |
|
"logps/chosen": -630.6646728515625, |
|
"logps/rejected": -736.4685668945312, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.374323606491089, |
|
"rewards/margins": 1.3093299865722656, |
|
"rewards/rejected": -4.683653354644775, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 17.22613525390625, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -1.3882197141647339, |
|
"logits/rejected": -1.2806700468063354, |
|
"logps/chosen": -637.3515014648438, |
|
"logps/rejected": -732.1290283203125, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.802922010421753, |
|
"rewards/margins": 1.1185327768325806, |
|
"rewards/rejected": -4.921454906463623, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 14.590155601501465, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -1.4339938163757324, |
|
"logits/rejected": -1.3640520572662354, |
|
"logps/chosen": -598.9895629882812, |
|
"logps/rejected": -707.3479614257812, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.513320207595825, |
|
"rewards/margins": 1.2418614625930786, |
|
"rewards/rejected": -4.755181789398193, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 12.836420059204102, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.3704925775527954, |
|
"logits/rejected": -1.3468918800354004, |
|
"logps/chosen": -630.4825439453125, |
|
"logps/rejected": -753.4363403320312, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.5602269172668457, |
|
"rewards/margins": 1.2000439167022705, |
|
"rewards/rejected": -4.760270595550537, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 5.077442169189453, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -1.4080350399017334, |
|
"logits/rejected": -1.3973747491836548, |
|
"logps/chosen": -639.9348754882812, |
|
"logps/rejected": -779.698974609375, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.790173053741455, |
|
"rewards/margins": 1.416594386100769, |
|
"rewards/rejected": -5.206767559051514, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 6.491420269012451, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.3865365982055664, |
|
"logits/rejected": -1.275614857673645, |
|
"logps/chosen": -620.8766479492188, |
|
"logps/rejected": -736.026611328125, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.636903762817383, |
|
"rewards/margins": 1.1893088817596436, |
|
"rewards/rejected": -4.8262128829956055, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": -1.3147002458572388, |
|
"eval_logits/rejected": -1.1985875368118286, |
|
"eval_logps/chosen": -633.2412109375, |
|
"eval_logps/rejected": -738.4901123046875, |
|
"eval_loss": 0.4761752188205719, |
|
"eval_rewards/accuracies": 0.7524999976158142, |
|
"eval_rewards/chosen": -3.686262607574463, |
|
"eval_rewards/margins": 1.2530354261398315, |
|
"eval_rewards/rejected": -4.939297676086426, |
|
"eval_runtime": 1583.9739, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 19.23656463623047, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -1.409970998764038, |
|
"logits/rejected": -1.2858604192733765, |
|
"logps/chosen": -586.6197509765625, |
|
"logps/rejected": -659.4794921875, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.5687224864959717, |
|
"rewards/margins": 1.1649234294891357, |
|
"rewards/rejected": -4.733645915985107, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 9.18576431274414, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.3930479288101196, |
|
"logits/rejected": -1.2830320596694946, |
|
"logps/chosen": -632.0997314453125, |
|
"logps/rejected": -728.1014404296875, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.6787362098693848, |
|
"rewards/margins": 1.2093604803085327, |
|
"rewards/rejected": -4.888096809387207, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 14.547459602355957, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -1.539896011352539, |
|
"logits/rejected": -1.3763868808746338, |
|
"logps/chosen": -630.2366943359375, |
|
"logps/rejected": -743.1935424804688, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.5275490283966064, |
|
"rewards/margins": 1.4253052473068237, |
|
"rewards/rejected": -4.952854156494141, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 10.659337043762207, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.4196306467056274, |
|
"logits/rejected": -1.3246088027954102, |
|
"logps/chosen": -650.2425537109375, |
|
"logps/rejected": -748.0562744140625, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.8152756690979004, |
|
"rewards/margins": 1.0581191778182983, |
|
"rewards/rejected": -4.873394966125488, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 7.751733779907227, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.364739179611206, |
|
"logits/rejected": -1.269736409187317, |
|
"logps/chosen": -621.5891723632812, |
|
"logps/rejected": -746.6184692382812, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.5590720176696777, |
|
"rewards/margins": 1.3328505754470825, |
|
"rewards/rejected": -4.891922473907471, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 8.421998023986816, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -1.4129679203033447, |
|
"logits/rejected": -1.2285114526748657, |
|
"logps/chosen": -637.7249755859375, |
|
"logps/rejected": -789.73779296875, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.650952100753784, |
|
"rewards/margins": 1.57891047000885, |
|
"rewards/rejected": -5.229863166809082, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 7.72039270401001, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.3570077419281006, |
|
"logits/rejected": -1.2234935760498047, |
|
"logps/chosen": -617.6724243164062, |
|
"logps/rejected": -736.6064453125, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.775437831878662, |
|
"rewards/margins": 1.3919856548309326, |
|
"rewards/rejected": -5.167424201965332, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 12.305501937866211, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.3679782152175903, |
|
"logits/rejected": -1.2431459426879883, |
|
"logps/chosen": -638.044921875, |
|
"logps/rejected": -742.0861206054688, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.8360562324523926, |
|
"rewards/margins": 1.2222352027893066, |
|
"rewards/rejected": -5.058291435241699, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 6.246249675750732, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.414441466331482, |
|
"logits/rejected": -1.2020373344421387, |
|
"logps/chosen": -651.7365112304688, |
|
"logps/rejected": -722.0252685546875, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.7970893383026123, |
|
"rewards/margins": 1.1998703479766846, |
|
"rewards/rejected": -4.996959209442139, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 8.567130088806152, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.42445707321167, |
|
"logits/rejected": -1.2594492435455322, |
|
"logps/chosen": -675.4766235351562, |
|
"logps/rejected": -760.4920654296875, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.605090618133545, |
|
"rewards/margins": 1.4216583967208862, |
|
"rewards/rejected": -5.026749134063721, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": -1.2863726615905762, |
|
"eval_logits/rejected": -1.1676445007324219, |
|
"eval_logps/chosen": -646.8135375976562, |
|
"eval_logps/rejected": -755.2252197265625, |
|
"eval_loss": 0.476205974817276, |
|
"eval_rewards/accuracies": 0.7534999847412109, |
|
"eval_rewards/chosen": -3.821986436843872, |
|
"eval_rewards/margins": 1.284662127494812, |
|
"eval_rewards/rejected": -5.1066484451293945, |
|
"eval_runtime": 1584.3019, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.158, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 9.226576805114746, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.3717691898345947, |
|
"logits/rejected": -1.211722493171692, |
|
"logps/chosen": -668.0113525390625, |
|
"logps/rejected": -759.1041259765625, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.867192506790161, |
|
"rewards/margins": 1.3442370891571045, |
|
"rewards/rejected": -5.211429119110107, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 19.934711456298828, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.4707757234573364, |
|
"logits/rejected": -1.3320478200912476, |
|
"logps/chosen": -664.7568969726562, |
|
"logps/rejected": -757.882568359375, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.836893081665039, |
|
"rewards/margins": 1.2826611995697021, |
|
"rewards/rejected": -5.119554042816162, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 10.98475170135498, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -1.4144407510757446, |
|
"logits/rejected": -1.2425600290298462, |
|
"logps/chosen": -652.3900146484375, |
|
"logps/rejected": -788.5800170898438, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.7777092456817627, |
|
"rewards/margins": 1.5329736471176147, |
|
"rewards/rejected": -5.310682773590088, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 8.336905479431152, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -1.3964354991912842, |
|
"logits/rejected": -1.2669428586959839, |
|
"logps/chosen": -629.9685668945312, |
|
"logps/rejected": -719.1429443359375, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.744743824005127, |
|
"rewards/margins": 1.254372000694275, |
|
"rewards/rejected": -4.999115467071533, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 13.396973609924316, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.426830530166626, |
|
"logits/rejected": -1.3484631776809692, |
|
"logps/chosen": -632.507568359375, |
|
"logps/rejected": -737.5188598632812, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.6420531272888184, |
|
"rewards/margins": 1.3079338073730469, |
|
"rewards/rejected": -4.949986934661865, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 12.532068252563477, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.3498502969741821, |
|
"logits/rejected": -1.3053711652755737, |
|
"logps/chosen": -631.3232421875, |
|
"logps/rejected": -761.801025390625, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.851062059402466, |
|
"rewards/margins": 1.3252581357955933, |
|
"rewards/rejected": -5.1763200759887695, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 9.887036323547363, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -1.4401438236236572, |
|
"logits/rejected": -1.2534105777740479, |
|
"logps/chosen": -664.2423706054688, |
|
"logps/rejected": -740.9425048828125, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.6783053874969482, |
|
"rewards/margins": 1.3420859575271606, |
|
"rewards/rejected": -5.020391464233398, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 8.95275592803955, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.438189148902893, |
|
"logits/rejected": -1.3202855587005615, |
|
"logps/chosen": -631.1199951171875, |
|
"logps/rejected": -736.70458984375, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -3.8719773292541504, |
|
"rewards/margins": 1.087269902229309, |
|
"rewards/rejected": -4.959246635437012, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 9.944456100463867, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -1.4648234844207764, |
|
"logits/rejected": -1.3383164405822754, |
|
"logps/chosen": -654.9630737304688, |
|
"logps/rejected": -742.1627807617188, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.731118679046631, |
|
"rewards/margins": 1.2008678913116455, |
|
"rewards/rejected": -4.9319868087768555, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 6.555506229400635, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -1.4295841455459595, |
|
"logits/rejected": -1.2433637380599976, |
|
"logps/chosen": -656.9864501953125, |
|
"logps/rejected": -733.3885498046875, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.7627615928649902, |
|
"rewards/margins": 1.2152563333511353, |
|
"rewards/rejected": -4.978017807006836, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": -1.3105764389038086, |
|
"eval_logits/rejected": -1.1933128833770752, |
|
"eval_logps/chosen": -640.232666015625, |
|
"eval_logps/rejected": -749.0795288085938, |
|
"eval_loss": 0.47588038444519043, |
|
"eval_rewards/accuracies": 0.753000020980835, |
|
"eval_rewards/chosen": -3.7561774253845215, |
|
"eval_rewards/margins": 1.2890138626098633, |
|
"eval_rewards/rejected": -5.045191287994385, |
|
"eval_runtime": 1584.5871, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.158, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 10.787802696228027, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -1.4012380838394165, |
|
"logits/rejected": -1.240236759185791, |
|
"logps/chosen": -624.1384887695312, |
|
"logps/rejected": -739.2446899414062, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.699873447418213, |
|
"rewards/margins": 1.3274140357971191, |
|
"rewards/rejected": -5.027287483215332, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 8.844759941101074, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.4404442310333252, |
|
"logits/rejected": -1.366231083869934, |
|
"logps/chosen": -637.5411987304688, |
|
"logps/rejected": -778.2499389648438, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.691349506378174, |
|
"rewards/margins": 1.4043484926223755, |
|
"rewards/rejected": -5.09569787979126, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 17.08158302307129, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.366356611251831, |
|
"logits/rejected": -1.2875096797943115, |
|
"logps/chosen": -614.8327026367188, |
|
"logps/rejected": -738.70263671875, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.841702938079834, |
|
"rewards/margins": 1.2470335960388184, |
|
"rewards/rejected": -5.0887370109558105, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 7.980059623718262, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -1.4690799713134766, |
|
"logits/rejected": -1.3165347576141357, |
|
"logps/chosen": -652.5242919921875, |
|
"logps/rejected": -803.9196166992188, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.6954245567321777, |
|
"rewards/margins": 1.5657516717910767, |
|
"rewards/rejected": -5.261176109313965, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 10.548364639282227, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.4087066650390625, |
|
"logits/rejected": -1.3460147380828857, |
|
"logps/chosen": -621.637451171875, |
|
"logps/rejected": -753.1847534179688, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.676053524017334, |
|
"rewards/margins": 1.3061879873275757, |
|
"rewards/rejected": -4.982241630554199, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 6.714722633361816, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -1.4119006395339966, |
|
"logits/rejected": -1.3059628009796143, |
|
"logps/chosen": -620.5264892578125, |
|
"logps/rejected": -717.8798828125, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.6425423622131348, |
|
"rewards/margins": 1.3905235528945923, |
|
"rewards/rejected": -5.0330657958984375, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 7.078303813934326, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -1.4229375123977661, |
|
"logits/rejected": -1.312577486038208, |
|
"logps/chosen": -663.3248291015625, |
|
"logps/rejected": -762.3607177734375, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.7122771739959717, |
|
"rewards/margins": 1.2408705949783325, |
|
"rewards/rejected": -4.953147888183594, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 10.152729988098145, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -1.4115734100341797, |
|
"logits/rejected": -1.3518728017807007, |
|
"logps/chosen": -633.0020751953125, |
|
"logps/rejected": -763.7161865234375, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.6908345222473145, |
|
"rewards/margins": 1.2587831020355225, |
|
"rewards/rejected": -4.949617862701416, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 6.685621738433838, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.401659369468689, |
|
"logits/rejected": -1.2891366481781006, |
|
"logps/chosen": -619.3512573242188, |
|
"logps/rejected": -733.2835083007812, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.577338457107544, |
|
"rewards/margins": 1.285097599029541, |
|
"rewards/rejected": -4.862435817718506, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 13.303339958190918, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.3285419940948486, |
|
"logits/rejected": -1.2414448261260986, |
|
"logps/chosen": -591.6680908203125, |
|
"logps/rejected": -724.0158081054688, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.662086009979248, |
|
"rewards/margins": 1.2775993347167969, |
|
"rewards/rejected": -4.939684867858887, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -1.3114778995513916, |
|
"eval_logits/rejected": -1.1944193840026855, |
|
"eval_logps/chosen": -635.4866943359375, |
|
"eval_logps/rejected": -744.0071411132812, |
|
"eval_loss": 0.4759487509727478, |
|
"eval_rewards/accuracies": 0.7534999847412109, |
|
"eval_rewards/chosen": -3.7087175846099854, |
|
"eval_rewards/margins": 1.2857497930526733, |
|
"eval_rewards/rejected": -4.994467735290527, |
|
"eval_runtime": 1593.4546, |
|
"eval_samples_per_second": 1.255, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 9.378813743591309, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.3526476621627808, |
|
"logits/rejected": -1.2188866138458252, |
|
"logps/chosen": -608.4993286132812, |
|
"logps/rejected": -749.1359252929688, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.732194423675537, |
|
"rewards/margins": 1.5286829471588135, |
|
"rewards/rejected": -5.26087760925293, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 11.812514305114746, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -1.379093050956726, |
|
"logits/rejected": -1.2838860750198364, |
|
"logps/chosen": -625.9976806640625, |
|
"logps/rejected": -716.8546752929688, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.781675338745117, |
|
"rewards/margins": 1.2094558477401733, |
|
"rewards/rejected": -4.991130828857422, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 9.726532936096191, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.4842337369918823, |
|
"logits/rejected": -1.3593313694000244, |
|
"logps/chosen": -630.210693359375, |
|
"logps/rejected": -723.7564697265625, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.6620631217956543, |
|
"rewards/margins": 1.3267091512680054, |
|
"rewards/rejected": -4.988772392272949, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 10.711019515991211, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.411237120628357, |
|
"logits/rejected": -1.2247424125671387, |
|
"logps/chosen": -640.1253662109375, |
|
"logps/rejected": -703.46875, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.6698365211486816, |
|
"rewards/margins": 1.190108299255371, |
|
"rewards/rejected": -4.8599443435668945, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 7.913322925567627, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.4571387767791748, |
|
"logits/rejected": -1.3205856084823608, |
|
"logps/chosen": -603.8260498046875, |
|
"logps/rejected": -697.1012573242188, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.5181479454040527, |
|
"rewards/margins": 1.2571742534637451, |
|
"rewards/rejected": -4.775322914123535, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 14.019694328308105, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -1.3986600637435913, |
|
"logits/rejected": -1.3672528266906738, |
|
"logps/chosen": -645.9337158203125, |
|
"logps/rejected": -747.8303833007812, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -3.8122050762176514, |
|
"rewards/margins": 1.0134917497634888, |
|
"rewards/rejected": -4.82569694519043, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 11.917638778686523, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -1.5023940801620483, |
|
"logits/rejected": -1.3516952991485596, |
|
"logps/chosen": -656.4842529296875, |
|
"logps/rejected": -742.977783203125, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.840627670288086, |
|
"rewards/margins": 1.2045114040374756, |
|
"rewards/rejected": -5.045139789581299, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 12.40701961517334, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.368158221244812, |
|
"logits/rejected": -1.353477120399475, |
|
"logps/chosen": -638.237060546875, |
|
"logps/rejected": -766.6362915039062, |
|
"loss": 0.4165, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.7673258781433105, |
|
"rewards/margins": 1.40687096118927, |
|
"rewards/rejected": -5.174197196960449, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 10.349726676940918, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -1.4667888879776, |
|
"logits/rejected": -1.289011001586914, |
|
"logps/chosen": -689.1021728515625, |
|
"logps/rejected": -770.5333862304688, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.7222888469696045, |
|
"rewards/margins": 1.3046228885650635, |
|
"rewards/rejected": -5.026911735534668, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 9.133951187133789, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -1.3182872533798218, |
|
"logits/rejected": -1.2082844972610474, |
|
"logps/chosen": -675.6629638671875, |
|
"logps/rejected": -779.8258666992188, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.791264295578003, |
|
"rewards/margins": 1.3038253784179688, |
|
"rewards/rejected": -5.095089912414551, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": -1.3108527660369873, |
|
"eval_logits/rejected": -1.1938129663467407, |
|
"eval_logps/chosen": -633.6405029296875, |
|
"eval_logps/rejected": -741.50830078125, |
|
"eval_loss": 0.4758020043373108, |
|
"eval_rewards/accuracies": 0.7540000081062317, |
|
"eval_rewards/chosen": -3.6902551651000977, |
|
"eval_rewards/margins": 1.2792243957519531, |
|
"eval_rewards/rejected": -4.969479560852051, |
|
"eval_runtime": 1596.3049, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 13.42652702331543, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -1.3121452331542969, |
|
"logits/rejected": -1.2471363544464111, |
|
"logps/chosen": -600.2552490234375, |
|
"logps/rejected": -731.0992431640625, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.6395773887634277, |
|
"rewards/margins": 1.3849769830703735, |
|
"rewards/rejected": -5.02455472946167, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 7.9909467697143555, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.3058593273162842, |
|
"logits/rejected": -1.1494419574737549, |
|
"logps/chosen": -619.4830932617188, |
|
"logps/rejected": -719.6461181640625, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.703446865081787, |
|
"rewards/margins": 1.3608992099761963, |
|
"rewards/rejected": -5.0643463134765625, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 9.135430335998535, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.3282991647720337, |
|
"logits/rejected": -1.2410192489624023, |
|
"logps/chosen": -604.8692016601562, |
|
"logps/rejected": -705.7180786132812, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.5968875885009766, |
|
"rewards/margins": 1.2363134622573853, |
|
"rewards/rejected": -4.833200931549072, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 9.719812393188477, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.385698914527893, |
|
"logits/rejected": -1.2765476703643799, |
|
"logps/chosen": -639.7982788085938, |
|
"logps/rejected": -776.7741088867188, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.678164005279541, |
|
"rewards/margins": 1.4851183891296387, |
|
"rewards/rejected": -5.163282871246338, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 10.63892936706543, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.4346338510513306, |
|
"logits/rejected": -1.3097164630889893, |
|
"logps/chosen": -635.2260131835938, |
|
"logps/rejected": -746.8245849609375, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.7078888416290283, |
|
"rewards/margins": 1.264432668685913, |
|
"rewards/rejected": -4.972321510314941, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 9.860692024230957, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.433598279953003, |
|
"logits/rejected": -1.3519879579544067, |
|
"logps/chosen": -632.1627197265625, |
|
"logps/rejected": -731.175048828125, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.6687378883361816, |
|
"rewards/margins": 1.3088436126708984, |
|
"rewards/rejected": -4.97758150100708, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 11.127714157104492, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -1.444789171218872, |
|
"logits/rejected": -1.2916604280471802, |
|
"logps/chosen": -641.5234985351562, |
|
"logps/rejected": -745.976318359375, |
|
"loss": 0.413, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.573714017868042, |
|
"rewards/margins": 1.2991065979003906, |
|
"rewards/rejected": -4.8728203773498535, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 12.815695762634277, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.3558757305145264, |
|
"logits/rejected": -1.311942219734192, |
|
"logps/chosen": -625.3250122070312, |
|
"logps/rejected": -757.5562744140625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -3.7559523582458496, |
|
"rewards/margins": 1.1017048358917236, |
|
"rewards/rejected": -4.857657432556152, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 16.819839477539062, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -1.3806864023208618, |
|
"logits/rejected": -1.2327523231506348, |
|
"logps/chosen": -646.9813842773438, |
|
"logps/rejected": -753.9891357421875, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.727670669555664, |
|
"rewards/margins": 1.29872465133667, |
|
"rewards/rejected": -5.026394844055176, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 15.355550765991211, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -1.4688384532928467, |
|
"logits/rejected": -1.3403863906860352, |
|
"logps/chosen": -656.9666748046875, |
|
"logps/rejected": -763.0733642578125, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.752246856689453, |
|
"rewards/margins": 1.3061609268188477, |
|
"rewards/rejected": -5.058407783508301, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": -1.3093682527542114, |
|
"eval_logits/rejected": -1.1921626329421997, |
|
"eval_logps/chosen": -633.0255737304688, |
|
"eval_logps/rejected": -740.7546997070312, |
|
"eval_loss": 0.4757540225982666, |
|
"eval_rewards/accuracies": 0.7544999718666077, |
|
"eval_rewards/chosen": -3.6841063499450684, |
|
"eval_rewards/margins": 1.2778375148773193, |
|
"eval_rewards/rejected": -4.961943626403809, |
|
"eval_runtime": 1598.9917, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 11.665594100952148, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -1.3954050540924072, |
|
"logits/rejected": -1.2826766967773438, |
|
"logps/chosen": -617.3712158203125, |
|
"logps/rejected": -716.8834228515625, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.650341033935547, |
|
"rewards/margins": 1.1511954069137573, |
|
"rewards/rejected": -4.8015360832214355, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 16.381610870361328, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.3866708278656006, |
|
"logits/rejected": -1.2333214282989502, |
|
"logps/chosen": -657.7440795898438, |
|
"logps/rejected": -766.9520874023438, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.7108898162841797, |
|
"rewards/margins": 1.5390161275863647, |
|
"rewards/rejected": -5.249906063079834, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5140745321264819, |
|
"train_runtime": 164862.9896, |
|
"train_samples_per_second": 0.371, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|