|
{ |
|
"best_metric": 1.4484930038452148, |
|
"best_model_checkpoint": "saves/Falcon-7B-Instruct/lora/orpo-salt/checkpoint-1500", |
|
"epoch": 2.9969690846635686, |
|
"eval_steps": 500, |
|
"global_step": 1854, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01616488179430188, |
|
"grad_norm": 0.5467122793197632, |
|
"learning_rate": 4.999648198770648e-06, |
|
"logits/chosen": -14.078092575073242, |
|
"logits/rejected": -14.159353256225586, |
|
"logps/chosen": -1.7583353519439697, |
|
"logps/rejected": -1.8469493389129639, |
|
"loss": 1.8299, |
|
"odds_ratio_loss": 0.7155797481536865, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.17583352327346802, |
|
"rewards/margins": 0.008861413225531578, |
|
"rewards/rejected": -0.18469493091106415, |
|
"sft_loss": 1.7583353519439697, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03232976358860376, |
|
"grad_norm": 0.495731920003891, |
|
"learning_rate": 4.998578646361359e-06, |
|
"logits/chosen": -14.073513984680176, |
|
"logits/rejected": -14.144752502441406, |
|
"logps/chosen": -1.9236218929290771, |
|
"logps/rejected": -1.9451425075531006, |
|
"loss": 2.0003, |
|
"odds_ratio_loss": 0.766566812992096, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1923622190952301, |
|
"rewards/margins": 0.002152049448341131, |
|
"rewards/rejected": -0.19451424479484558, |
|
"sft_loss": 1.9236218929290771, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04849464538290564, |
|
"grad_norm": 0.6057537198066711, |
|
"learning_rate": 4.996791614004449e-06, |
|
"logits/chosen": -14.302851676940918, |
|
"logits/rejected": -14.224812507629395, |
|
"logps/chosen": -1.8387420177459717, |
|
"logps/rejected": -1.910175085067749, |
|
"loss": 1.9128, |
|
"odds_ratio_loss": 0.7409650087356567, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1838742196559906, |
|
"rewards/margins": 0.007143297698348761, |
|
"rewards/rejected": -0.1910175085067749, |
|
"sft_loss": 1.8387420177459717, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06465952717720752, |
|
"grad_norm": 0.5634093284606934, |
|
"learning_rate": 4.994287614855618e-06, |
|
"logits/chosen": -14.0798921585083, |
|
"logits/rejected": -14.19922161102295, |
|
"logps/chosen": -1.947654366493225, |
|
"logps/rejected": -1.9009010791778564, |
|
"loss": 2.0298, |
|
"odds_ratio_loss": 0.8212669491767883, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.19476543366909027, |
|
"rewards/margins": -0.004675320815294981, |
|
"rewards/rejected": -0.1900901347398758, |
|
"sft_loss": 1.947654366493225, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0808244089715094, |
|
"grad_norm": 0.7957186698913574, |
|
"learning_rate": 4.991067367951343e-06, |
|
"logits/chosen": -14.371423721313477, |
|
"logits/rejected": -14.266546249389648, |
|
"logps/chosen": -2.017087697982788, |
|
"logps/rejected": -2.0035624504089355, |
|
"loss": 2.0958, |
|
"odds_ratio_loss": 0.7871265411376953, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.20170876383781433, |
|
"rewards/margins": -0.0013525458052754402, |
|
"rewards/rejected": -0.20035621523857117, |
|
"sft_loss": 2.017087697982788, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09698929076581128, |
|
"grad_norm": 0.5418820381164551, |
|
"learning_rate": 4.987131798002389e-06, |
|
"logits/chosen": -14.21721076965332, |
|
"logits/rejected": -14.099153518676758, |
|
"logps/chosen": -1.8751760721206665, |
|
"logps/rejected": -1.8855310678482056, |
|
"loss": 1.9577, |
|
"odds_ratio_loss": 0.8254929780960083, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.18751761317253113, |
|
"rewards/margins": 0.001035516383126378, |
|
"rewards/rejected": -0.188553124666214, |
|
"sft_loss": 1.8751760721206665, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11315417256011315, |
|
"grad_norm": 1.0633864402770996, |
|
"learning_rate": 4.982482035128285e-06, |
|
"logits/chosen": -14.105901718139648, |
|
"logits/rejected": -14.193835258483887, |
|
"logps/chosen": -2.0220446586608887, |
|
"logps/rejected": -1.9594541788101196, |
|
"loss": 2.1089, |
|
"odds_ratio_loss": 0.8683654069900513, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.20220446586608887, |
|
"rewards/margins": -0.00625905767083168, |
|
"rewards/rejected": -0.19594541192054749, |
|
"sft_loss": 2.0220446586608887, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12931905435441504, |
|
"grad_norm": 1.0158140659332275, |
|
"learning_rate": 4.9771194145328e-06, |
|
"logits/chosen": -14.075093269348145, |
|
"logits/rejected": -14.02421760559082, |
|
"logps/chosen": -1.6751682758331299, |
|
"logps/rejected": -1.7500627040863037, |
|
"loss": 1.7468, |
|
"odds_ratio_loss": 0.716758668422699, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.167516827583313, |
|
"rewards/margins": 0.00748945539817214, |
|
"rewards/rejected": -0.17500628530979156, |
|
"sft_loss": 1.6751682758331299, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1454839361487169, |
|
"grad_norm": 1.3243364095687866, |
|
"learning_rate": 4.971045476120532e-06, |
|
"logits/chosen": -14.14300537109375, |
|
"logits/rejected": -14.079290390014648, |
|
"logps/chosen": -1.8245623111724854, |
|
"logps/rejected": -1.760660171508789, |
|
"loss": 1.9067, |
|
"odds_ratio_loss": 0.8211291432380676, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.18245622515678406, |
|
"rewards/margins": -0.006390226539224386, |
|
"rewards/rejected": -0.17606601119041443, |
|
"sft_loss": 1.8245623111724854, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1616488179430188, |
|
"grad_norm": 0.7163342237472534, |
|
"learning_rate": 4.964261964054713e-06, |
|
"logits/chosen": -14.068964958190918, |
|
"logits/rejected": -14.082951545715332, |
|
"logps/chosen": -1.7527011632919312, |
|
"logps/rejected": -1.8138408660888672, |
|
"loss": 1.8297, |
|
"odds_ratio_loss": 0.7703070044517517, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.17527012526988983, |
|
"rewards/margins": 0.0061139510944485664, |
|
"rewards/rejected": -0.18138407170772552, |
|
"sft_loss": 1.7527011632919312, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17781369973732067, |
|
"grad_norm": 1.006773829460144, |
|
"learning_rate": 4.956770826256372e-06, |
|
"logits/chosen": -14.166906356811523, |
|
"logits/rejected": -14.120782852172852, |
|
"logps/chosen": -1.7077207565307617, |
|
"logps/rejected": -1.7365996837615967, |
|
"loss": 1.7844, |
|
"odds_ratio_loss": 0.7667573690414429, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.17077207565307617, |
|
"rewards/margins": 0.002887908834964037, |
|
"rewards/rejected": -0.17365998029708862, |
|
"sft_loss": 1.7077207565307617, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19397858153162256, |
|
"grad_norm": 0.8139289617538452, |
|
"learning_rate": 4.94857421384497e-06, |
|
"logits/chosen": -14.175407409667969, |
|
"logits/rejected": -14.165875434875488, |
|
"logps/chosen": -1.692577600479126, |
|
"logps/rejected": -1.8239320516586304, |
|
"loss": 1.7682, |
|
"odds_ratio_loss": 0.7562084794044495, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1692577451467514, |
|
"rewards/margins": 0.013135453686118126, |
|
"rewards/rejected": -0.18239320814609528, |
|
"sft_loss": 1.692577600479126, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21014346332592443, |
|
"grad_norm": 1.0950274467468262, |
|
"learning_rate": 4.939674480520701e-06, |
|
"logits/chosen": -14.055421829223633, |
|
"logits/rejected": -14.265202522277832, |
|
"logps/chosen": -1.65860915184021, |
|
"logps/rejected": -1.6671603918075562, |
|
"loss": 1.7352, |
|
"odds_ratio_loss": 0.7663736939430237, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.16586092114448547, |
|
"rewards/margins": 0.0008551125647500157, |
|
"rewards/rejected": -0.16671602427959442, |
|
"sft_loss": 1.65860915184021, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2263083451202263, |
|
"grad_norm": 0.6190826892852783, |
|
"learning_rate": 4.930074181888613e-06, |
|
"logits/chosen": -14.116220474243164, |
|
"logits/rejected": -14.158090591430664, |
|
"logps/chosen": -1.7475076913833618, |
|
"logps/rejected": -1.736114501953125, |
|
"loss": 1.8234, |
|
"odds_ratio_loss": 0.7589074373245239, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.17475078999996185, |
|
"rewards/margins": -0.0011393536115065217, |
|
"rewards/rejected": -0.17361143231391907, |
|
"sft_loss": 1.7475076913833618, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2424732269145282, |
|
"grad_norm": 0.8096482157707214, |
|
"learning_rate": 4.91977607472475e-06, |
|
"logits/chosen": -14.182394027709961, |
|
"logits/rejected": -14.252290725708008, |
|
"logps/chosen": -1.6399564743041992, |
|
"logps/rejected": -1.6184114217758179, |
|
"loss": 1.7178, |
|
"odds_ratio_loss": 0.778221607208252, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.1639956533908844, |
|
"rewards/margins": -0.0021545083727687597, |
|
"rewards/rejected": -0.16184113919734955, |
|
"sft_loss": 1.6399564743041992, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2586381087088301, |
|
"grad_norm": 1.5372618436813354, |
|
"learning_rate": 4.908783116184534e-06, |
|
"logits/chosen": -14.110807418823242, |
|
"logits/rejected": -14.087692260742188, |
|
"logps/chosen": -1.613721489906311, |
|
"logps/rejected": -1.7073653936386108, |
|
"loss": 1.6837, |
|
"odds_ratio_loss": 0.6995801329612732, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1613721400499344, |
|
"rewards/margins": 0.009364412166178226, |
|
"rewards/rejected": -0.17073655128479004, |
|
"sft_loss": 1.613721489906311, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27480299050313195, |
|
"grad_norm": 1.0400787591934204, |
|
"learning_rate": 4.897098462953598e-06, |
|
"logits/chosen": -14.309249877929688, |
|
"logits/rejected": -14.144041061401367, |
|
"logps/chosen": -1.572377324104309, |
|
"logps/rejected": -1.679239273071289, |
|
"loss": 1.6438, |
|
"odds_ratio_loss": 0.7143967747688293, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.15723773837089539, |
|
"rewards/margins": 0.010686198249459267, |
|
"rewards/rejected": -0.1679239273071289, |
|
"sft_loss": 1.572377324104309, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2909678722974338, |
|
"grad_norm": 0.6752244234085083, |
|
"learning_rate": 4.884725470341331e-06, |
|
"logits/chosen": -14.362325668334961, |
|
"logits/rejected": -14.368985176086426, |
|
"logps/chosen": -1.5275907516479492, |
|
"logps/rejected": -1.6322838068008423, |
|
"loss": 1.5969, |
|
"odds_ratio_loss": 0.6928091645240784, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.15275909006595612, |
|
"rewards/margins": 0.01046929694712162, |
|
"rewards/rejected": -0.1632283627986908, |
|
"sft_loss": 1.5275907516479492, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3071327540917357, |
|
"grad_norm": 1.5551739931106567, |
|
"learning_rate": 4.871667691317377e-06, |
|
"logits/chosen": -14.23143196105957, |
|
"logits/rejected": -14.168081283569336, |
|
"logps/chosen": -1.5617109537124634, |
|
"logps/rejected": -1.516629934310913, |
|
"loss": 1.6442, |
|
"odds_ratio_loss": 0.8246932029724121, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.15617111325263977, |
|
"rewards/margins": -0.004508105106651783, |
|
"rewards/rejected": -0.15166299045085907, |
|
"sft_loss": 1.5617109537124634, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3232976358860376, |
|
"grad_norm": 4.873908519744873, |
|
"learning_rate": 4.857928875491392e-06, |
|
"logits/chosen": -14.317342758178711, |
|
"logits/rejected": -14.135493278503418, |
|
"logps/chosen": -1.4843647480010986, |
|
"logps/rejected": -1.5346746444702148, |
|
"loss": 1.5575, |
|
"odds_ratio_loss": 0.7314870953559875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.14843648672103882, |
|
"rewards/margins": 0.005030992440879345, |
|
"rewards/rejected": -0.15346747636795044, |
|
"sft_loss": 1.4843647480010986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33946251768033947, |
|
"grad_norm": 1.1008872985839844, |
|
"learning_rate": 4.843512968036314e-06, |
|
"logits/chosen": -13.899968147277832, |
|
"logits/rejected": -13.980463027954102, |
|
"logps/chosen": -1.4831616878509521, |
|
"logps/rejected": -1.464994192123413, |
|
"loss": 1.5606, |
|
"odds_ratio_loss": 0.7743188738822937, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1483161747455597, |
|
"rewards/margins": -0.0018167542293667793, |
|
"rewards/rejected": -0.1464994251728058, |
|
"sft_loss": 1.4831616878509521, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35562739947464134, |
|
"grad_norm": 2.111262083053589, |
|
"learning_rate": 4.828424108555486e-06, |
|
"logits/chosen": -14.277219772338867, |
|
"logits/rejected": -14.1966552734375, |
|
"logps/chosen": -1.5998783111572266, |
|
"logps/rejected": -1.7076078653335571, |
|
"loss": 1.6726, |
|
"odds_ratio_loss": 0.727408230304718, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.15998782217502594, |
|
"rewards/margins": 0.010772952809929848, |
|
"rewards/rejected": -0.17076078057289124, |
|
"sft_loss": 1.5998783111572266, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3717922812689432, |
|
"grad_norm": 0.6497421264648438, |
|
"learning_rate": 4.812666629893957e-06, |
|
"logits/chosen": -14.255824089050293, |
|
"logits/rejected": -14.233850479125977, |
|
"logps/chosen": -1.5216138362884521, |
|
"logps/rejected": -1.4904725551605225, |
|
"loss": 1.599, |
|
"odds_ratio_loss": 0.7741049528121948, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.1521613895893097, |
|
"rewards/margins": -0.0031141184736043215, |
|
"rewards/rejected": -0.14904727041721344, |
|
"sft_loss": 1.5216138362884521, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3879571630632451, |
|
"grad_norm": 1.4030089378356934, |
|
"learning_rate": 4.796245056894273e-06, |
|
"logits/chosen": -13.990198135375977, |
|
"logits/rejected": -14.032785415649414, |
|
"logps/chosen": -1.5593761205673218, |
|
"logps/rejected": -1.5817941427230835, |
|
"loss": 1.6382, |
|
"odds_ratio_loss": 0.7885618805885315, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.15593759715557098, |
|
"rewards/margins": 0.0022418068256229162, |
|
"rewards/rejected": -0.1581794172525406, |
|
"sft_loss": 1.5593761205673218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.404122044857547, |
|
"grad_norm": 1.03659987449646, |
|
"learning_rate": 4.779164105097148e-06, |
|
"logits/chosen": -14.23992919921875, |
|
"logits/rejected": -14.331039428710938, |
|
"logps/chosen": -1.4630193710327148, |
|
"logps/rejected": -1.6595561504364014, |
|
"loss": 1.5308, |
|
"odds_ratio_loss": 0.6777212023735046, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.14630195498466492, |
|
"rewards/margins": 0.019653689116239548, |
|
"rewards/rejected": -0.16595561802387238, |
|
"sft_loss": 1.4630193710327148, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42028692665184886, |
|
"grad_norm": 1.1558053493499756, |
|
"learning_rate": 4.761428679387373e-06, |
|
"logits/chosen": -14.19200611114502, |
|
"logits/rejected": -14.27843189239502, |
|
"logps/chosen": -1.4934606552124023, |
|
"logps/rejected": -1.5448919534683228, |
|
"loss": 1.5664, |
|
"odds_ratio_loss": 0.7296234369277954, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.14934605360031128, |
|
"rewards/margins": 0.005143154412508011, |
|
"rewards/rejected": -0.154489204287529, |
|
"sft_loss": 1.4934606552124023, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4364518084461507, |
|
"grad_norm": 1.3478955030441284, |
|
"learning_rate": 4.7430438725853515e-06, |
|
"logits/chosen": -14.099308967590332, |
|
"logits/rejected": -14.247446060180664, |
|
"logps/chosen": -1.5219833850860596, |
|
"logps/rejected": -1.7108709812164307, |
|
"loss": 1.5916, |
|
"odds_ratio_loss": 0.6957148313522339, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.15219834446907043, |
|
"rewards/margins": 0.01888876222074032, |
|
"rewards/rejected": -0.1710870862007141, |
|
"sft_loss": 1.5219833850860596, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4526166902404526, |
|
"grad_norm": 1.0543924570083618, |
|
"learning_rate": 4.724014963984669e-06, |
|
"logits/chosen": -14.321874618530273, |
|
"logits/rejected": -14.308130264282227, |
|
"logps/chosen": -1.4753090143203735, |
|
"logps/rejected": -1.6179271936416626, |
|
"loss": 1.5473, |
|
"odds_ratio_loss": 0.7201633453369141, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.14753088355064392, |
|
"rewards/margins": 0.014261829666793346, |
|
"rewards/rejected": -0.16179272532463074, |
|
"sft_loss": 1.4753090143203735, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4687815720347545, |
|
"grad_norm": 1.6008622646331787, |
|
"learning_rate": 4.704347417836116e-06, |
|
"logits/chosen": -14.192815780639648, |
|
"logits/rejected": -14.182914733886719, |
|
"logps/chosen": -1.373263955116272, |
|
"logps/rejected": -1.4777114391326904, |
|
"loss": 1.4462, |
|
"odds_ratio_loss": 0.7295758128166199, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13732638955116272, |
|
"rewards/margins": 0.010444764979183674, |
|
"rewards/rejected": -0.14777114987373352, |
|
"sft_loss": 1.373263955116272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4849464538290564, |
|
"grad_norm": 1.0440045595169067, |
|
"learning_rate": 4.684046881778603e-06, |
|
"logits/chosen": -13.9605131149292, |
|
"logits/rejected": -14.021821975708008, |
|
"logps/chosen": -1.3839852809906006, |
|
"logps/rejected": -1.4472886323928833, |
|
"loss": 1.456, |
|
"odds_ratio_loss": 0.719718337059021, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.13839852809906006, |
|
"rewards/margins": 0.006330335047096014, |
|
"rewards/rejected": -0.1447288691997528, |
|
"sft_loss": 1.3839852809906006, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5011113356233583, |
|
"grad_norm": 0.8026280999183655, |
|
"learning_rate": 4.663119185217409e-06, |
|
"logits/chosen": -14.247451782226562, |
|
"logits/rejected": -14.332074165344238, |
|
"logps/chosen": -1.4372491836547852, |
|
"logps/rejected": -1.5869617462158203, |
|
"loss": 1.5057, |
|
"odds_ratio_loss": 0.684893012046814, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1437249332666397, |
|
"rewards/margins": 0.014971258118748665, |
|
"rewards/rejected": -0.15869615972042084, |
|
"sft_loss": 1.4372491836547852, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5172762174176602, |
|
"grad_norm": 1.054210901260376, |
|
"learning_rate": 4.641570337650232e-06, |
|
"logits/chosen": -14.101099967956543, |
|
"logits/rejected": -14.234477043151855, |
|
"logps/chosen": -1.3175721168518066, |
|
"logps/rejected": -1.46291184425354, |
|
"loss": 1.3866, |
|
"odds_ratio_loss": 0.6904350519180298, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1317571997642517, |
|
"rewards/margins": 0.014533978886902332, |
|
"rewards/rejected": -0.14629118144512177, |
|
"sft_loss": 1.3175721168518066, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.533441099211962, |
|
"grad_norm": 1.6171979904174805, |
|
"learning_rate": 4.61940652694154e-06, |
|
"logits/chosen": -14.107089042663574, |
|
"logits/rejected": -14.126917839050293, |
|
"logps/chosen": -1.5025255680084229, |
|
"logps/rejected": -1.4795392751693726, |
|
"loss": 1.5835, |
|
"odds_ratio_loss": 0.8096711039543152, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.1502525359392166, |
|
"rewards/margins": -0.0022986275143921375, |
|
"rewards/rejected": -0.14795391261577606, |
|
"sft_loss": 1.5025255680084229, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5496059810062639, |
|
"grad_norm": 1.2122093439102173, |
|
"learning_rate": 4.596634117545689e-06, |
|
"logits/chosen": -14.346307754516602, |
|
"logits/rejected": -14.166845321655273, |
|
"logps/chosen": -1.5319068431854248, |
|
"logps/rejected": -1.624324083328247, |
|
"loss": 1.6054, |
|
"odds_ratio_loss": 0.735165536403656, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.15319068729877472, |
|
"rewards/margins": 0.00924170482903719, |
|
"rewards/rejected": -0.16243240237236023, |
|
"sft_loss": 1.5319068431854248, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"grad_norm": 0.899023175239563, |
|
"learning_rate": 4.573259648679335e-06, |
|
"logits/chosen": -14.317461013793945, |
|
"logits/rejected": -14.103338241577148, |
|
"logps/chosen": -1.47697114944458, |
|
"logps/rejected": -1.648705244064331, |
|
"loss": 1.546, |
|
"odds_ratio_loss": 0.6902921199798584, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.14769712090492249, |
|
"rewards/margins": 0.017173420637845993, |
|
"rewards/rejected": -0.16487054526805878, |
|
"sft_loss": 1.47697114944458, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5819357445948676, |
|
"grad_norm": 2.3687381744384766, |
|
"learning_rate": 4.549289832443663e-06, |
|
"logits/chosen": -14.142545700073242, |
|
"logits/rejected": -14.211145401000977, |
|
"logps/chosen": -1.4514472484588623, |
|
"logps/rejected": -1.5542781352996826, |
|
"loss": 1.5233, |
|
"odds_ratio_loss": 0.7186037302017212, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1451447308063507, |
|
"rewards/margins": 0.010283084586262703, |
|
"rewards/rejected": -0.15542782843112946, |
|
"sft_loss": 1.4514472484588623, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5981006263891695, |
|
"grad_norm": 1.039651870727539, |
|
"learning_rate": 4.524731551896978e-06, |
|
"logits/chosen": -14.117040634155273, |
|
"logits/rejected": -14.164260864257812, |
|
"logps/chosen": -1.3633731603622437, |
|
"logps/rejected": -1.4127264022827148, |
|
"loss": 1.4381, |
|
"odds_ratio_loss": 0.7473303079605103, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.13633732497692108, |
|
"rewards/margins": 0.004935313947498798, |
|
"rewards/rejected": -0.1412726640701294, |
|
"sft_loss": 1.3633731603622437, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6142655081834714, |
|
"grad_norm": 2.077622413635254, |
|
"learning_rate": 4.4995918590781925e-06, |
|
"logits/chosen": -14.212381362915039, |
|
"logits/rejected": -14.251853942871094, |
|
"logps/chosen": -1.3631454706192017, |
|
"logps/rejected": -1.4832844734191895, |
|
"loss": 1.437, |
|
"odds_ratio_loss": 0.7388315200805664, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1363145411014557, |
|
"rewards/margins": 0.012013902887701988, |
|
"rewards/rejected": -0.14832845330238342, |
|
"sft_loss": 1.3631454706192017, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6304303899777733, |
|
"grad_norm": 0.6616309881210327, |
|
"learning_rate": 4.473877972981797e-06, |
|
"logits/chosen": -14.166543960571289, |
|
"logits/rejected": -14.008458137512207, |
|
"logps/chosen": -1.414536476135254, |
|
"logps/rejected": -1.5125486850738525, |
|
"loss": 1.4849, |
|
"odds_ratio_loss": 0.7040683031082153, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.14145365357398987, |
|
"rewards/margins": 0.009801235981285572, |
|
"rewards/rejected": -0.15125489234924316, |
|
"sft_loss": 1.414536476135254, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6465952717720752, |
|
"grad_norm": 1.2422401905059814, |
|
"learning_rate": 4.447597277484894e-06, |
|
"logits/chosen": -14.10089111328125, |
|
"logits/rejected": -14.177225112915039, |
|
"logps/chosen": -1.3244436979293823, |
|
"logps/rejected": -1.434922456741333, |
|
"loss": 1.3936, |
|
"odds_ratio_loss": 0.6911473274230957, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.132444366812706, |
|
"rewards/margins": 0.011047879233956337, |
|
"rewards/rejected": -0.14349225163459778, |
|
"sft_loss": 1.3244436979293823, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6627601535663771, |
|
"grad_norm": 1.3308875560760498, |
|
"learning_rate": 4.42075731922687e-06, |
|
"logits/chosen": -14.254026412963867, |
|
"logits/rejected": -14.150421142578125, |
|
"logps/chosen": -1.4931491613388062, |
|
"logps/rejected": -1.5233150720596313, |
|
"loss": 1.5684, |
|
"odds_ratio_loss": 0.7521846890449524, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.14931491017341614, |
|
"rewards/margins": 0.0030165952630341053, |
|
"rewards/rejected": -0.15233151614665985, |
|
"sft_loss": 1.4931491613388062, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6789250353606789, |
|
"grad_norm": 1.4143937826156616, |
|
"learning_rate": 4.3933658054423465e-06, |
|
"logits/chosen": -14.156329154968262, |
|
"logits/rejected": -14.047518730163574, |
|
"logps/chosen": -1.338627576828003, |
|
"logps/rejected": -1.4370090961456299, |
|
"loss": 1.4095, |
|
"odds_ratio_loss": 0.70883709192276, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.13386276364326477, |
|
"rewards/margins": 0.009838144294917583, |
|
"rewards/rejected": -0.14370091259479523, |
|
"sft_loss": 1.338627576828003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6950899171549808, |
|
"grad_norm": 2.3574774265289307, |
|
"learning_rate": 4.365430601748003e-06, |
|
"logits/chosen": -14.235176086425781, |
|
"logits/rejected": -14.395864486694336, |
|
"logps/chosen": -1.564626932144165, |
|
"logps/rejected": -1.5344398021697998, |
|
"loss": 1.6431, |
|
"odds_ratio_loss": 0.7849880456924438, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.15646269917488098, |
|
"rewards/margins": -0.0030187165830284357, |
|
"rewards/rejected": -0.15344397723674774, |
|
"sft_loss": 1.564626932144165, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7112547989492827, |
|
"grad_norm": 3.739943504333496, |
|
"learning_rate": 4.336959729883925e-06, |
|
"logits/chosen": -14.274754524230957, |
|
"logits/rejected": -14.191232681274414, |
|
"logps/chosen": -1.3745372295379639, |
|
"logps/rejected": -1.405700445175171, |
|
"loss": 1.4506, |
|
"odds_ratio_loss": 0.7607132196426392, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13745373487472534, |
|
"rewards/margins": 0.0031163152307271957, |
|
"rewards/rejected": -0.1405700445175171, |
|
"sft_loss": 1.3745372295379639, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7274196807435845, |
|
"grad_norm": 0.9312599301338196, |
|
"learning_rate": 4.307961365410118e-06, |
|
"logits/chosen": -14.044285774230957, |
|
"logits/rejected": -14.011823654174805, |
|
"logps/chosen": -1.4385414123535156, |
|
"logps/rejected": -1.4718294143676758, |
|
"loss": 1.5134, |
|
"odds_ratio_loss": 0.7482468485832214, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.14385412633419037, |
|
"rewards/margins": 0.003328789724037051, |
|
"rewards/rejected": -0.14718294143676758, |
|
"sft_loss": 1.4385414123535156, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7435845625378864, |
|
"grad_norm": 1.4249197244644165, |
|
"learning_rate": 4.278443835358854e-06, |
|
"logits/chosen": -14.115106582641602, |
|
"logits/rejected": -14.075739860534668, |
|
"logps/chosen": -1.3712975978851318, |
|
"logps/rejected": -1.5527522563934326, |
|
"loss": 1.4406, |
|
"odds_ratio_loss": 0.6929912567138672, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1371297538280487, |
|
"rewards/margins": 0.018145468086004257, |
|
"rewards/rejected": -0.15527524054050446, |
|
"sft_loss": 1.3712975978851318, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7597494443321883, |
|
"grad_norm": 1.1615644693374634, |
|
"learning_rate": 4.248415615843523e-06, |
|
"logits/chosen": -14.288152694702148, |
|
"logits/rejected": -14.206695556640625, |
|
"logps/chosen": -1.4021141529083252, |
|
"logps/rejected": -1.416723370552063, |
|
"loss": 1.4775, |
|
"odds_ratio_loss": 0.7538274526596069, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.14021141827106476, |
|
"rewards/margins": 0.001460921368561685, |
|
"rewards/rejected": -0.14167232811450958, |
|
"sft_loss": 1.4021141529083252, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7759143261264903, |
|
"grad_norm": 1.276267409324646, |
|
"learning_rate": 4.217885329624666e-06, |
|
"logits/chosen": -14.302003860473633, |
|
"logits/rejected": -14.307230949401855, |
|
"logps/chosen": -1.346254587173462, |
|
"logps/rejected": -1.4862271547317505, |
|
"loss": 1.4137, |
|
"odds_ratio_loss": 0.6745720505714417, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13462546467781067, |
|
"rewards/margins": 0.013997259549796581, |
|
"rewards/rejected": -0.14862270653247833, |
|
"sft_loss": 1.346254587173462, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 1.6030430793762207, |
|
"learning_rate": 4.186861743633911e-06, |
|
"logits/chosen": -14.13404369354248, |
|
"logits/rejected": -14.251507759094238, |
|
"logps/chosen": -1.4151580333709717, |
|
"logps/rejected": -1.5721826553344727, |
|
"loss": 1.4904, |
|
"odds_ratio_loss": 0.7523505091667175, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1415158212184906, |
|
"rewards/margins": 0.015702461823821068, |
|
"rewards/rejected": -0.15721826255321503, |
|
"sft_loss": 1.4151580333709717, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"grad_norm": 1.7222312688827515, |
|
"learning_rate": 4.155353766456497e-06, |
|
"logits/chosen": -14.4000825881958, |
|
"logits/rejected": -14.304115295410156, |
|
"logps/chosen": -1.433506727218628, |
|
"logps/rejected": -1.535611867904663, |
|
"loss": 1.5005, |
|
"odds_ratio_loss": 0.6703948378562927, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14335067570209503, |
|
"rewards/margins": 0.010210518725216389, |
|
"rewards/rejected": -0.15356118977069855, |
|
"sft_loss": 1.433506727218628, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"eval_logits/chosen": -14.227585792541504, |
|
"eval_logits/rejected": -14.265686988830566, |
|
"eval_logps/chosen": -1.4436272382736206, |
|
"eval_logps/rejected": -1.4898087978363037, |
|
"eval_loss": 1.5202080011367798, |
|
"eval_odds_ratio_loss": 0.7658076882362366, |
|
"eval_rewards/accuracies": 0.48181816935539246, |
|
"eval_rewards/chosen": -0.1443627029657364, |
|
"eval_rewards/margins": 0.004618145525455475, |
|
"eval_rewards/rejected": -0.14898087084293365, |
|
"eval_runtime": 207.676, |
|
"eval_samples_per_second": 5.297, |
|
"eval_sft_loss": 1.4436272382736206, |
|
"eval_steps_per_second": 2.648, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8244089715093958, |
|
"grad_norm": 1.143004059791565, |
|
"learning_rate": 4.123370445773134e-06, |
|
"logits/chosen": -14.356025695800781, |
|
"logits/rejected": -14.339376449584961, |
|
"logps/chosen": -1.4154841899871826, |
|
"logps/rejected": -1.4348183870315552, |
|
"loss": 1.4927, |
|
"odds_ratio_loss": 0.7723585963249207, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14154842495918274, |
|
"rewards/margins": 0.001933417865075171, |
|
"rewards/rejected": -0.14348182082176208, |
|
"sft_loss": 1.4154841899871826, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8405738533036977, |
|
"grad_norm": 3.6751832962036133, |
|
"learning_rate": 4.090920965761906e-06, |
|
"logits/chosen": -14.4230375289917, |
|
"logits/rejected": -14.330423355102539, |
|
"logps/chosen": -1.4806926250457764, |
|
"logps/rejected": -1.4873076677322388, |
|
"loss": 1.559, |
|
"odds_ratio_loss": 0.7833209037780762, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.14806927740573883, |
|
"rewards/margins": 0.0006614929297938943, |
|
"rewards/rejected": -0.14873075485229492, |
|
"sft_loss": 1.4806926250457764, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8567387350979996, |
|
"grad_norm": 4.592033386230469, |
|
"learning_rate": 4.058014644460991e-06, |
|
"logits/chosen": -14.309356689453125, |
|
"logits/rejected": -14.266693115234375, |
|
"logps/chosen": -1.4232040643692017, |
|
"logps/rejected": -1.4629483222961426, |
|
"loss": 1.4967, |
|
"odds_ratio_loss": 0.7350074052810669, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1423204094171524, |
|
"rewards/margins": 0.003974422812461853, |
|
"rewards/rejected": -0.14629481732845306, |
|
"sft_loss": 1.4232040643692017, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8729036168923014, |
|
"grad_norm": 1.3515141010284424, |
|
"learning_rate": 4.024660931092939e-06, |
|
"logits/chosen": -14.12739086151123, |
|
"logits/rejected": -14.135973930358887, |
|
"logps/chosen": -1.4027074575424194, |
|
"logps/rejected": -1.5116406679153442, |
|
"loss": 1.4748, |
|
"odds_ratio_loss": 0.7212173938751221, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14027073979377747, |
|
"rewards/margins": 0.010893313214182854, |
|
"rewards/rejected": -0.15116406977176666, |
|
"sft_loss": 1.4027074575424194, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8890684986866033, |
|
"grad_norm": 3.3689217567443848, |
|
"learning_rate": 3.990869403351272e-06, |
|
"logits/chosen": -14.354001998901367, |
|
"logits/rejected": -14.225595474243164, |
|
"logps/chosen": -1.4652130603790283, |
|
"logps/rejected": -1.552912712097168, |
|
"loss": 1.5359, |
|
"odds_ratio_loss": 0.7067934274673462, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.14652130007743835, |
|
"rewards/margins": 0.008769966661930084, |
|
"rewards/rejected": -0.15529127418994904, |
|
"sft_loss": 1.4652130603790283, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9052333804809052, |
|
"grad_norm": 1.5204488039016724, |
|
"learning_rate": 3.956649764650206e-06, |
|
"logits/chosen": -14.487988471984863, |
|
"logits/rejected": -14.507904052734375, |
|
"logps/chosen": -1.4564487934112549, |
|
"logps/rejected": -1.5203144550323486, |
|
"loss": 1.5325, |
|
"odds_ratio_loss": 0.7608081102371216, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.1456448882818222, |
|
"rewards/margins": 0.006386570632457733, |
|
"rewards/rejected": -0.15203145146369934, |
|
"sft_loss": 1.4564487934112549, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9213982622752072, |
|
"grad_norm": 2.2319583892822266, |
|
"learning_rate": 3.92201184133826e-06, |
|
"logits/chosen": -14.393239974975586, |
|
"logits/rejected": -14.3502779006958, |
|
"logps/chosen": -1.3946270942687988, |
|
"logps/rejected": -1.444805383682251, |
|
"loss": 1.4679, |
|
"odds_ratio_loss": 0.7322729229927063, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13946272432804108, |
|
"rewards/margins": 0.005017831921577454, |
|
"rewards/rejected": -0.14448055624961853, |
|
"sft_loss": 1.3946270942687988, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.937563144069509, |
|
"grad_norm": 1.4617536067962646, |
|
"learning_rate": 3.886965579876572e-06, |
|
"logits/chosen": -14.353238105773926, |
|
"logits/rejected": -14.260797500610352, |
|
"logps/chosen": -1.3793189525604248, |
|
"logps/rejected": -1.445691704750061, |
|
"loss": 1.4501, |
|
"odds_ratio_loss": 0.7080078125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13793189823627472, |
|
"rewards/margins": 0.006637275218963623, |
|
"rewards/rejected": -0.14456915855407715, |
|
"sft_loss": 1.3793189525604248, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9537280258638109, |
|
"grad_norm": 1.2430846691131592, |
|
"learning_rate": 3.851521043982716e-06, |
|
"logits/chosen": -14.31140422821045, |
|
"logits/rejected": -14.404243469238281, |
|
"logps/chosen": -1.424002766609192, |
|
"logps/rejected": -1.4054510593414307, |
|
"loss": 1.4998, |
|
"odds_ratio_loss": 0.7578663229942322, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.14240026473999023, |
|
"rewards/margins": -0.0018551532411947846, |
|
"rewards/rejected": -0.14054511487483978, |
|
"sft_loss": 1.424002766609192, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9698929076581128, |
|
"grad_norm": 1.5072684288024902, |
|
"learning_rate": 3.81568841174086e-06, |
|
"logits/chosen": -14.169085502624512, |
|
"logits/rejected": -14.1954345703125, |
|
"logps/chosen": -1.4412424564361572, |
|
"logps/rejected": -1.4657504558563232, |
|
"loss": 1.5191, |
|
"odds_ratio_loss": 0.7788038849830627, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14412423968315125, |
|
"rewards/margins": 0.002450800035148859, |
|
"rewards/rejected": -0.14657504856586456, |
|
"sft_loss": 1.4412424564361572, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9860577894524146, |
|
"grad_norm": 1.2968331575393677, |
|
"learning_rate": 3.7794779726790664e-06, |
|
"logits/chosen": -14.130575180053711, |
|
"logits/rejected": -14.240781784057617, |
|
"logps/chosen": -1.3836543560028076, |
|
"logps/rejected": -1.457695722579956, |
|
"loss": 1.4561, |
|
"odds_ratio_loss": 0.7247332334518433, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.13836543262004852, |
|
"rewards/margins": 0.007404146250337362, |
|
"rewards/rejected": -0.14576958119869232, |
|
"sft_loss": 1.3836543560028076, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0022226712467166, |
|
"grad_norm": 4.868699550628662, |
|
"learning_rate": 3.7429001248146096e-06, |
|
"logits/chosen": -14.240348815917969, |
|
"logits/rejected": -14.297922134399414, |
|
"logps/chosen": -1.4243017435073853, |
|
"logps/rejected": -1.5530868768692017, |
|
"loss": 1.4924, |
|
"odds_ratio_loss": 0.680776059627533, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1424301713705063, |
|
"rewards/margins": 0.012878507375717163, |
|
"rewards/rejected": -0.15530869364738464, |
|
"sft_loss": 1.4243017435073853, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0183875530410185, |
|
"grad_norm": 0.8127214312553406, |
|
"learning_rate": 3.7059653716681227e-06, |
|
"logits/chosen": -14.380844116210938, |
|
"logits/rejected": -14.255830764770508, |
|
"logps/chosen": -1.4107029438018799, |
|
"logps/rejected": -1.521928071975708, |
|
"loss": 1.4861, |
|
"odds_ratio_loss": 0.7541464567184448, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14107032120227814, |
|
"rewards/margins": 0.01112250704318285, |
|
"rewards/rejected": -0.15219281613826752, |
|
"sft_loss": 1.4107029438018799, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0345524348353203, |
|
"grad_norm": 3.8503897190093994, |
|
"learning_rate": 3.668684319247463e-06, |
|
"logits/chosen": -14.447845458984375, |
|
"logits/rejected": -14.433076858520508, |
|
"logps/chosen": -1.367375135421753, |
|
"logps/rejected": -1.548612356185913, |
|
"loss": 1.4348, |
|
"odds_ratio_loss": 0.6741297841072083, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13673751056194305, |
|
"rewards/margins": 0.018123725429177284, |
|
"rewards/rejected": -0.1548612415790558, |
|
"sft_loss": 1.367375135421753, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0507173166296222, |
|
"grad_norm": 0.9416384100914001, |
|
"learning_rate": 3.6310676730021373e-06, |
|
"logits/chosen": -14.3724946975708, |
|
"logits/rejected": -14.455398559570312, |
|
"logps/chosen": -1.3245970010757446, |
|
"logps/rejected": -1.3460277318954468, |
|
"loss": 1.3979, |
|
"odds_ratio_loss": 0.7330806255340576, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13245970010757446, |
|
"rewards/margins": 0.002143078250810504, |
|
"rewards/rejected": -0.13460277020931244, |
|
"sft_loss": 1.3245970010757446, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.066882198423924, |
|
"grad_norm": 2.8321056365966797, |
|
"learning_rate": 3.593126234749178e-06, |
|
"logits/chosen": -14.317327499389648, |
|
"logits/rejected": -14.38727855682373, |
|
"logps/chosen": -1.423680067062378, |
|
"logps/rejected": -1.4616180658340454, |
|
"loss": 1.4976, |
|
"odds_ratio_loss": 0.739305853843689, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14236800372600555, |
|
"rewards/margins": 0.0037938044406473637, |
|
"rewards/rejected": -0.14616182446479797, |
|
"sft_loss": 1.423680067062378, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.083047080218226, |
|
"grad_norm": 0.9518349766731262, |
|
"learning_rate": 3.554870899571343e-06, |
|
"logits/chosen": -14.144752502441406, |
|
"logits/rejected": -14.251813888549805, |
|
"logps/chosen": -1.4052397012710571, |
|
"logps/rejected": -1.5265625715255737, |
|
"loss": 1.4767, |
|
"odds_ratio_loss": 0.7148950695991516, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1405239850282669, |
|
"rewards/margins": 0.012132286094129086, |
|
"rewards/rejected": -0.15265627205371857, |
|
"sft_loss": 1.4052397012710571, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0992119620125278, |
|
"grad_norm": 3.0823421478271484, |
|
"learning_rate": 3.5163126526885373e-06, |
|
"logits/chosen": -14.263737678527832, |
|
"logits/rejected": -14.341888427734375, |
|
"logps/chosen": -1.3758028745651245, |
|
"logps/rejected": -1.4713342189788818, |
|
"loss": 1.4506, |
|
"odds_ratio_loss": 0.748176097869873, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1375802755355835, |
|
"rewards/margins": 0.009553151205182076, |
|
"rewards/rejected": -0.14713343977928162, |
|
"sft_loss": 1.3758028745651245, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1153768438068297, |
|
"grad_norm": 1.1957412958145142, |
|
"learning_rate": 3.4774625663033484e-06, |
|
"logits/chosen": -14.262721061706543, |
|
"logits/rejected": -14.248212814331055, |
|
"logps/chosen": -1.4033539295196533, |
|
"logps/rejected": -1.4489859342575073, |
|
"loss": 1.4783, |
|
"odds_ratio_loss": 0.7493518590927124, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.14033538103103638, |
|
"rewards/margins": 0.004563204478472471, |
|
"rewards/rejected": -0.14489860832691193, |
|
"sft_loss": 1.4033539295196533, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"grad_norm": 1.0352710485458374, |
|
"learning_rate": 3.4383317964216067e-06, |
|
"logits/chosen": -14.168815612792969, |
|
"logits/rejected": -14.324069023132324, |
|
"logps/chosen": -1.3365106582641602, |
|
"logps/rejected": -1.3756332397460938, |
|
"loss": 1.4108, |
|
"odds_ratio_loss": 0.7429829835891724, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13365106284618378, |
|
"rewards/margins": 0.0039122505113482475, |
|
"rewards/rejected": -0.1375633180141449, |
|
"sft_loss": 1.3365106582641602, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1477066073954334, |
|
"grad_norm": 2.4808411598205566, |
|
"learning_rate": 3.398931579648877e-06, |
|
"logits/chosen": -14.3150053024292, |
|
"logits/rejected": -14.531530380249023, |
|
"logps/chosen": -1.4491299390792847, |
|
"logps/rejected": -1.5492023229599, |
|
"loss": 1.5203, |
|
"odds_ratio_loss": 0.7113555669784546, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.14491300284862518, |
|
"rewards/margins": 0.010007232427597046, |
|
"rewards/rejected": -0.15492023527622223, |
|
"sft_loss": 1.4491299390792847, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1638714891897353, |
|
"grad_norm": 1.2726991176605225, |
|
"learning_rate": 3.359273229963813e-06, |
|
"logits/chosen": -14.357129096984863, |
|
"logits/rejected": -14.291903495788574, |
|
"logps/chosen": -1.3459408283233643, |
|
"logps/rejected": -1.3911712169647217, |
|
"loss": 1.421, |
|
"odds_ratio_loss": 0.750839114189148, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13459408283233643, |
|
"rewards/margins": 0.004523060750216246, |
|
"rewards/rejected": -0.13911715149879456, |
|
"sft_loss": 1.3459408283233643, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1800363709840371, |
|
"grad_norm": 1.0978913307189941, |
|
"learning_rate": 3.319368135469285e-06, |
|
"logits/chosen": -14.36750602722168, |
|
"logits/rejected": -14.435731887817383, |
|
"logps/chosen": -1.3765571117401123, |
|
"logps/rejected": -1.4039866924285889, |
|
"loss": 1.4538, |
|
"odds_ratio_loss": 0.7719755172729492, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.13765572011470795, |
|
"rewards/margins": 0.002742946846410632, |
|
"rewards/rejected": -0.14039869606494904, |
|
"sft_loss": 1.3765571117401123, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.196201252778339, |
|
"grad_norm": 2.1035361289978027, |
|
"learning_rate": 3.279227755122228e-06, |
|
"logits/chosen": -14.316058158874512, |
|
"logits/rejected": -14.294093132019043, |
|
"logps/chosen": -1.320318579673767, |
|
"logps/rejected": -1.5284496545791626, |
|
"loss": 1.3866, |
|
"odds_ratio_loss": 0.6632006764411926, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.13203184306621552, |
|
"rewards/margins": 0.020813116803765297, |
|
"rewards/rejected": -0.15284495055675507, |
|
"sft_loss": 1.320318579673767, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2123661345726409, |
|
"grad_norm": 3.223933696746826, |
|
"learning_rate": 3.2388636154431417e-06, |
|
"logits/chosen": -14.34916877746582, |
|
"logits/rejected": -14.280328750610352, |
|
"logps/chosen": -1.429145097732544, |
|
"logps/rejected": -1.5203419923782349, |
|
"loss": 1.502, |
|
"odds_ratio_loss": 0.7281750440597534, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1429145336151123, |
|
"rewards/margins": 0.009119677357375622, |
|
"rewards/rejected": -0.152034193277359, |
|
"sft_loss": 1.429145097732544, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2285310163669427, |
|
"grad_norm": 1.1619030237197876, |
|
"learning_rate": 3.198287307206192e-06, |
|
"logits/chosen": -14.091611862182617, |
|
"logits/rejected": -14.187002182006836, |
|
"logps/chosen": -1.4056107997894287, |
|
"logps/rejected": -1.442886233329773, |
|
"loss": 1.4829, |
|
"odds_ratio_loss": 0.7725043296813965, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1405610740184784, |
|
"rewards/margins": 0.003727543633431196, |
|
"rewards/rejected": -0.14428862929344177, |
|
"sft_loss": 1.4056107997894287, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2446958981612446, |
|
"grad_norm": 1.0456814765930176, |
|
"learning_rate": 3.157510482110856e-06, |
|
"logits/chosen": -14.408856391906738, |
|
"logits/rejected": -14.243043899536133, |
|
"logps/chosen": -1.3281633853912354, |
|
"logps/rejected": -1.3863494396209717, |
|
"loss": 1.4004, |
|
"odds_ratio_loss": 0.7221428751945496, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.132816344499588, |
|
"rewards/margins": 0.005818599369376898, |
|
"rewards/rejected": -0.13863493502140045, |
|
"sft_loss": 1.3281633853912354, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2608607799555465, |
|
"grad_norm": 1.2318408489227295, |
|
"learning_rate": 3.116544849436077e-06, |
|
"logits/chosen": -14.334813117980957, |
|
"logits/rejected": -14.20678997039795, |
|
"logps/chosen": -1.5153284072875977, |
|
"logps/rejected": -1.6125590801239014, |
|
"loss": 1.588, |
|
"odds_ratio_loss": 0.7266558408737183, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.151532843708992, |
|
"rewards/margins": 0.009723084978759289, |
|
"rewards/rejected": -0.16125592589378357, |
|
"sft_loss": 1.5153284072875977, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2770256617498483, |
|
"grad_norm": 1.3976880311965942, |
|
"learning_rate": 3.0754021726778848e-06, |
|
"logits/chosen": -14.33143138885498, |
|
"logits/rejected": -14.257779121398926, |
|
"logps/chosen": -1.3455626964569092, |
|
"logps/rejected": -1.4571717977523804, |
|
"loss": 1.4162, |
|
"odds_ratio_loss": 0.7065266370773315, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13455626368522644, |
|
"rewards/margins": 0.011160916648805141, |
|
"rewards/rejected": -0.14571718871593475, |
|
"sft_loss": 1.3455626964569092, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2931905435441502, |
|
"grad_norm": 0.7877367734909058, |
|
"learning_rate": 3.0340942661714463e-06, |
|
"logits/chosen": -14.352252006530762, |
|
"logits/rejected": -14.257513046264648, |
|
"logps/chosen": -1.4310262203216553, |
|
"logps/rejected": -1.4348089694976807, |
|
"loss": 1.5077, |
|
"odds_ratio_loss": 0.7662674188613892, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14310263097286224, |
|
"rewards/margins": 0.00037826746120117605, |
|
"rewards/rejected": -0.14348089694976807, |
|
"sft_loss": 1.4310262203216553, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3093554253384523, |
|
"grad_norm": 1.265386939048767, |
|
"learning_rate": 2.992632991698512e-06, |
|
"logits/chosen": -14.194437980651855, |
|
"logits/rejected": -14.312055587768555, |
|
"logps/chosen": -1.3498046398162842, |
|
"logps/rejected": -1.4344502687454224, |
|
"loss": 1.4207, |
|
"odds_ratio_loss": 0.7088189721107483, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1349804699420929, |
|
"rewards/margins": 0.008464555256068707, |
|
"rewards/rejected": -0.14344502985477448, |
|
"sft_loss": 1.3498046398162842, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3255203071327541, |
|
"grad_norm": 1.7529423236846924, |
|
"learning_rate": 2.9510302550812537e-06, |
|
"logits/chosen": -14.307215690612793, |
|
"logits/rejected": -14.374090194702148, |
|
"logps/chosen": -1.3449764251708984, |
|
"logps/rejected": -1.5051848888397217, |
|
"loss": 1.4155, |
|
"odds_ratio_loss": 0.7051501274108887, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13449765741825104, |
|
"rewards/margins": 0.016020851209759712, |
|
"rewards/rejected": -0.1505185067653656, |
|
"sft_loss": 1.3449764251708984, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.341685188927056, |
|
"grad_norm": 3.534449815750122, |
|
"learning_rate": 2.9092980027634325e-06, |
|
"logits/chosen": -14.194910049438477, |
|
"logits/rejected": -14.260457038879395, |
|
"logps/chosen": -1.3157680034637451, |
|
"logps/rejected": -1.39622163772583, |
|
"loss": 1.3858, |
|
"odds_ratio_loss": 0.7005105018615723, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.131576806306839, |
|
"rewards/margins": 0.008045351132750511, |
|
"rewards/rejected": -0.13962216675281525, |
|
"sft_loss": 1.3157680034637451, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3578500707213579, |
|
"grad_norm": 1.6155622005462646, |
|
"learning_rate": 2.867448218379927e-06, |
|
"logits/chosen": -14.231335639953613, |
|
"logits/rejected": -14.248939514160156, |
|
"logps/chosen": -1.3620965480804443, |
|
"logps/rejected": -1.409558892250061, |
|
"loss": 1.4355, |
|
"odds_ratio_loss": 0.734248697757721, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1362096518278122, |
|
"rewards/margins": 0.0047462377697229385, |
|
"rewards/rejected": -0.14095589518547058, |
|
"sft_loss": 1.3620965480804443, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3740149525156597, |
|
"grad_norm": 4.540154933929443, |
|
"learning_rate": 2.825492919315559e-06, |
|
"logits/chosen": -14.306146621704102, |
|
"logits/rejected": -14.476399421691895, |
|
"logps/chosen": -1.4043729305267334, |
|
"logps/rejected": -1.4499131441116333, |
|
"loss": 1.4789, |
|
"odds_ratio_loss": 0.7450671195983887, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1404372900724411, |
|
"rewards/margins": 0.004554024897515774, |
|
"rewards/rejected": -0.14499132335186005, |
|
"sft_loss": 1.4043729305267334, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3901798343099616, |
|
"grad_norm": 1.2316781282424927, |
|
"learning_rate": 2.7834441532542482e-06, |
|
"logits/chosen": -14.352537155151367, |
|
"logits/rejected": -14.446965217590332, |
|
"logps/chosen": -1.3581891059875488, |
|
"logps/rejected": -1.4636138677597046, |
|
"loss": 1.4297, |
|
"odds_ratio_loss": 0.7155886888504028, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13581891357898712, |
|
"rewards/margins": 0.01054247748106718, |
|
"rewards/rejected": -0.14636139571666718, |
|
"sft_loss": 1.3581891059875488, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4063447161042635, |
|
"grad_norm": 0.915081799030304, |
|
"learning_rate": 2.74131399471945e-06, |
|
"logits/chosen": -14.232261657714844, |
|
"logits/rejected": -14.369558334350586, |
|
"logps/chosen": -1.4017927646636963, |
|
"logps/rejected": -1.4412128925323486, |
|
"loss": 1.4755, |
|
"odds_ratio_loss": 0.7375406622886658, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14017929136753082, |
|
"rewards/margins": 0.003942002542316914, |
|
"rewards/rejected": -0.14412127435207367, |
|
"sft_loss": 1.4017927646636963, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4225095978985653, |
|
"grad_norm": 1.1700351238250732, |
|
"learning_rate": 2.6991145416068947e-06, |
|
"logits/chosen": -14.184051513671875, |
|
"logits/rejected": -14.361761093139648, |
|
"logps/chosen": -1.3888486623764038, |
|
"logps/rejected": -1.3866727352142334, |
|
"loss": 1.4645, |
|
"odds_ratio_loss": 0.7568970918655396, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13888487219810486, |
|
"rewards/margins": -0.00021760519302915782, |
|
"rewards/rejected": -0.1386672556400299, |
|
"sft_loss": 1.3888486623764038, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4386744796928672, |
|
"grad_norm": 0.7416606545448303, |
|
"learning_rate": 2.6568579117106143e-06, |
|
"logits/chosen": -14.222585678100586, |
|
"logits/rejected": -14.173550605773926, |
|
"logps/chosen": -1.321872591972351, |
|
"logps/rejected": -1.451570749282837, |
|
"loss": 1.3933, |
|
"odds_ratio_loss": 0.7138932943344116, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13218727707862854, |
|
"rewards/margins": 0.012969812378287315, |
|
"rewards/rejected": -0.1451570689678192, |
|
"sft_loss": 1.321872591972351, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.454839361487169, |
|
"grad_norm": 0.7456266283988953, |
|
"learning_rate": 2.6145562392432544e-06, |
|
"logits/chosen": -14.201733589172363, |
|
"logits/rejected": -14.159896850585938, |
|
"logps/chosen": -1.371537446975708, |
|
"logps/rejected": -1.4001505374908447, |
|
"loss": 1.4466, |
|
"odds_ratio_loss": 0.7501237392425537, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.137153759598732, |
|
"rewards/margins": 0.002861298155039549, |
|
"rewards/rejected": -0.14001503586769104, |
|
"sft_loss": 1.371537446975708, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.471004243281471, |
|
"grad_norm": 1.7800395488739014, |
|
"learning_rate": 2.5722216713516682e-06, |
|
"logits/chosen": -14.122312545776367, |
|
"logits/rejected": -14.1841402053833, |
|
"logps/chosen": -1.2916905879974365, |
|
"logps/rejected": -1.3739659786224365, |
|
"loss": 1.3653, |
|
"odds_ratio_loss": 0.7365130186080933, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1291690617799759, |
|
"rewards/margins": 0.008227519690990448, |
|
"rewards/rejected": -0.13739657402038574, |
|
"sft_loss": 1.2916905879974365, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4871691250757728, |
|
"grad_norm": 3.366191864013672, |
|
"learning_rate": 2.5298663646288064e-06, |
|
"logits/chosen": -14.279853820800781, |
|
"logits/rejected": -14.313766479492188, |
|
"logps/chosen": -1.3366254568099976, |
|
"logps/rejected": -1.4743283987045288, |
|
"loss": 1.4084, |
|
"odds_ratio_loss": 0.7178291082382202, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13366253674030304, |
|
"rewards/margins": 0.013770299032330513, |
|
"rewards/rejected": -0.1474328488111496, |
|
"sft_loss": 1.3366254568099976, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.503334006870075, |
|
"grad_norm": 1.793541431427002, |
|
"learning_rate": 2.487502481622879e-06, |
|
"logits/chosen": -14.228408813476562, |
|
"logits/rejected": -14.142854690551758, |
|
"logps/chosen": -1.3270151615142822, |
|
"logps/rejected": -1.4341893196105957, |
|
"loss": 1.3983, |
|
"odds_ratio_loss": 0.7129431366920471, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13270151615142822, |
|
"rewards/margins": 0.010717417113482952, |
|
"rewards/rejected": -0.14341893792152405, |
|
"sft_loss": 1.3270151615142822, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5194988886643768, |
|
"grad_norm": 2.546449661254883, |
|
"learning_rate": 2.4451421873448253e-06, |
|
"logits/chosen": -14.15150260925293, |
|
"logits/rejected": -14.336977005004883, |
|
"logps/chosen": -1.431612253189087, |
|
"logps/rejected": -1.4608542919158936, |
|
"loss": 1.508, |
|
"odds_ratio_loss": 0.7637500762939453, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.14316122233867645, |
|
"rewards/margins": 0.002924212021753192, |
|
"rewards/rejected": -0.1460854411125183, |
|
"sft_loss": 1.431612253189087, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5356637704586786, |
|
"grad_norm": 2.0193891525268555, |
|
"learning_rate": 2.40279764577506e-06, |
|
"logits/chosen": -14.358665466308594, |
|
"logits/rejected": -14.505513191223145, |
|
"logps/chosen": -1.403634786605835, |
|
"logps/rejected": -1.4488627910614014, |
|
"loss": 1.48, |
|
"odds_ratio_loss": 0.7633059620857239, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.14036348462104797, |
|
"rewards/margins": 0.004522812552750111, |
|
"rewards/rejected": -0.14488628506660461, |
|
"sft_loss": 1.403634786605835, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5518286522529805, |
|
"grad_norm": 1.2108488082885742, |
|
"learning_rate": 2.3604810163705242e-06, |
|
"logits/chosen": -14.17876148223877, |
|
"logits/rejected": -14.2489652633667, |
|
"logps/chosen": -1.306792140007019, |
|
"logps/rejected": -1.3910942077636719, |
|
"loss": 1.377, |
|
"odds_ratio_loss": 0.7023099660873413, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1306792050600052, |
|
"rewards/margins": 0.00843021459877491, |
|
"rewards/rejected": -0.13910941779613495, |
|
"sft_loss": 1.306792140007019, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5679935340472824, |
|
"grad_norm": 1.9210587739944458, |
|
"learning_rate": 2.3182044505730364e-06, |
|
"logits/chosen": -14.331990242004395, |
|
"logits/rejected": -14.305018424987793, |
|
"logps/chosen": -1.2632302045822144, |
|
"logps/rejected": -1.3584424257278442, |
|
"loss": 1.3349, |
|
"odds_ratio_loss": 0.7163167595863342, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12632302939891815, |
|
"rewards/margins": 0.009521213360130787, |
|
"rewards/rejected": -0.13584424555301666, |
|
"sft_loss": 1.2632302045822144, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 1.7603510618209839, |
|
"learning_rate": 2.275980088319941e-06, |
|
"logits/chosen": -14.362065315246582, |
|
"logits/rejected": -14.22284984588623, |
|
"logps/chosen": -1.269855260848999, |
|
"logps/rejected": -1.3405383825302124, |
|
"loss": 1.3406, |
|
"odds_ratio_loss": 0.7074419260025024, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12698553502559662, |
|
"rewards/margins": 0.0070683010853827, |
|
"rewards/rejected": -0.13405382633209229, |
|
"sft_loss": 1.269855260848999, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.600323297635886, |
|
"grad_norm": 1.6920086145401, |
|
"learning_rate": 2.2338200545580577e-06, |
|
"logits/chosen": -14.224035263061523, |
|
"logits/rejected": -14.358423233032227, |
|
"logps/chosen": -1.2658283710479736, |
|
"logps/rejected": -1.4482189416885376, |
|
"loss": 1.3345, |
|
"odds_ratio_loss": 0.6871744990348816, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12658283114433289, |
|
"rewards/margins": 0.01823904737830162, |
|
"rewards/rejected": -0.1448218822479248, |
|
"sft_loss": 1.2658283710479736, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"grad_norm": 1.0991649627685547, |
|
"learning_rate": 2.191736455761947e-06, |
|
"logits/chosen": -14.324908256530762, |
|
"logits/rejected": -14.3560209274292, |
|
"logps/chosen": -1.2651708126068115, |
|
"logps/rejected": -1.290913701057434, |
|
"loss": 1.3401, |
|
"odds_ratio_loss": 0.749754786491394, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.12651710212230682, |
|
"rewards/margins": 0.0025742852594703436, |
|
"rewards/rejected": -0.12909138202667236, |
|
"sft_loss": 1.2651708126068115, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"eval_logits/chosen": -14.268522262573242, |
|
"eval_logits/rejected": -14.308253288269043, |
|
"eval_logps/chosen": -1.3874938488006592, |
|
"eval_logps/rejected": -1.4423273801803589, |
|
"eval_loss": 1.4635207653045654, |
|
"eval_odds_ratio_loss": 0.7602682709693909, |
|
"eval_rewards/accuracies": 0.48363634943962097, |
|
"eval_rewards/chosen": -0.1387493908405304, |
|
"eval_rewards/margins": 0.00548336049541831, |
|
"eval_rewards/rejected": -0.14423276484012604, |
|
"eval_runtime": 207.8962, |
|
"eval_samples_per_second": 5.291, |
|
"eval_sft_loss": 1.3874938488006592, |
|
"eval_steps_per_second": 2.646, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.9229074716567993, |
|
"learning_rate": 2.1497413764574673e-06, |
|
"logits/chosen": -14.391751289367676, |
|
"logits/rejected": -14.302392959594727, |
|
"logps/chosen": -1.4207522869110107, |
|
"logps/rejected": -1.4941614866256714, |
|
"loss": 1.4937, |
|
"odds_ratio_loss": 0.7297941446304321, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.14207521080970764, |
|
"rewards/margins": 0.007340931333601475, |
|
"rewards/rejected": -0.14941613376140594, |
|
"sft_loss": 1.4207522869110107, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6488179430187917, |
|
"grad_norm": 1.2489970922470093, |
|
"learning_rate": 2.1078468757516395e-06, |
|
"logits/chosen": -14.41105842590332, |
|
"logits/rejected": -14.309954643249512, |
|
"logps/chosen": -1.3737413883209229, |
|
"logps/rejected": -1.331855297088623, |
|
"loss": 1.453, |
|
"odds_ratio_loss": 0.7925962805747986, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1373741328716278, |
|
"rewards/margins": -0.004188609775155783, |
|
"rewards/rejected": -0.1331855207681656, |
|
"sft_loss": 1.3737413883209229, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6649828248130936, |
|
"grad_norm": 0.9103444814682007, |
|
"learning_rate": 2.0660649838698145e-06, |
|
"logits/chosen": -14.60859203338623, |
|
"logits/rejected": -14.583990097045898, |
|
"logps/chosen": -1.3282297849655151, |
|
"logps/rejected": -1.4166333675384521, |
|
"loss": 1.3999, |
|
"odds_ratio_loss": 0.7163518071174622, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.13282299041748047, |
|
"rewards/margins": 0.008840366266667843, |
|
"rewards/rejected": -0.1416633427143097, |
|
"sft_loss": 1.3282297849655151, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6811477066073954, |
|
"grad_norm": 1.1333231925964355, |
|
"learning_rate": 2.0244076987011284e-06, |
|
"logits/chosen": -14.382695198059082, |
|
"logits/rejected": -14.247182846069336, |
|
"logps/chosen": -1.3871229887008667, |
|
"logps/rejected": -1.5080008506774902, |
|
"loss": 1.4558, |
|
"odds_ratio_loss": 0.68644779920578, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1387123018503189, |
|
"rewards/margins": 0.012087779119610786, |
|
"rewards/rejected": -0.15080007910728455, |
|
"sft_loss": 1.3871229887008667, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"grad_norm": 1.302032709121704, |
|
"learning_rate": 1.982886982353251e-06, |
|
"logits/chosen": -14.392558097839355, |
|
"logits/rejected": -14.241909980773926, |
|
"logps/chosen": -1.3640697002410889, |
|
"logps/rejected": -1.5009006261825562, |
|
"loss": 1.4359, |
|
"odds_ratio_loss": 0.7178789377212524, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.13640697300434113, |
|
"rewards/margins": 0.013683101162314415, |
|
"rewards/rejected": -0.1500900685787201, |
|
"sft_loss": 1.3640697002410889, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7134774701959992, |
|
"grad_norm": 1.7859091758728027, |
|
"learning_rate": 1.941514757717392e-06, |
|
"logits/chosen": -14.138816833496094, |
|
"logits/rejected": -14.210226058959961, |
|
"logps/chosen": -1.3156766891479492, |
|
"logps/rejected": -1.4917762279510498, |
|
"loss": 1.3807, |
|
"odds_ratio_loss": 0.6497665643692017, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13156768679618835, |
|
"rewards/margins": 0.01760994642972946, |
|
"rewards/rejected": -0.1491776406764984, |
|
"sft_loss": 1.3156766891479492, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.729642351990301, |
|
"grad_norm": 2.0628256797790527, |
|
"learning_rate": 1.9003029050445953e-06, |
|
"logits/chosen": -14.267855644226074, |
|
"logits/rejected": -14.399972915649414, |
|
"logps/chosen": -1.402465581893921, |
|
"logps/rejected": -1.4434514045715332, |
|
"loss": 1.4747, |
|
"odds_ratio_loss": 0.7224588990211487, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14024657011032104, |
|
"rewards/margins": 0.004098571836948395, |
|
"rewards/rejected": -0.14434513449668884, |
|
"sft_loss": 1.402465581893921, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.745807233784603, |
|
"grad_norm": 1.5042709112167358, |
|
"learning_rate": 1.8592632585342523e-06, |
|
"logits/chosen": -14.195714950561523, |
|
"logits/rejected": -14.285571098327637, |
|
"logps/chosen": -1.3312032222747803, |
|
"logps/rejected": -1.412341833114624, |
|
"loss": 1.4047, |
|
"odds_ratio_loss": 0.7354634404182434, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1331203281879425, |
|
"rewards/margins": 0.008113870397210121, |
|
"rewards/rejected": -0.14123418927192688, |
|
"sft_loss": 1.3312032222747803, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7619721155789048, |
|
"grad_norm": 3.4297995567321777, |
|
"learning_rate": 1.8184076029358527e-06, |
|
"logits/chosen": -14.20643138885498, |
|
"logits/rejected": -14.019030570983887, |
|
"logps/chosen": -1.2683379650115967, |
|
"logps/rejected": -1.2236586809158325, |
|
"loss": 1.3443, |
|
"odds_ratio_loss": 0.7591326832771301, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.12683378159999847, |
|
"rewards/margins": -0.00446792459115386, |
|
"rewards/rejected": -0.12236586958169937, |
|
"sft_loss": 1.2683379650115967, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7781369973732066, |
|
"grad_norm": 1.0218937397003174, |
|
"learning_rate": 1.7777476701649318e-06, |
|
"logits/chosen": -14.1577730178833, |
|
"logits/rejected": -14.125236511230469, |
|
"logps/chosen": -1.3477040529251099, |
|
"logps/rejected": -1.391446828842163, |
|
"loss": 1.4231, |
|
"odds_ratio_loss": 0.7540372610092163, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13477042317390442, |
|
"rewards/margins": 0.004374279640614986, |
|
"rewards/rejected": -0.1391446888446808, |
|
"sft_loss": 1.3477040529251099, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7943018791675085, |
|
"grad_norm": 1.4984055757522583, |
|
"learning_rate": 1.7372951359341925e-06, |
|
"logits/chosen": -14.369695663452148, |
|
"logits/rejected": -14.277885437011719, |
|
"logps/chosen": -1.2875721454620361, |
|
"logps/rejected": -1.3878809213638306, |
|
"loss": 1.3577, |
|
"odds_ratio_loss": 0.7012876272201538, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.12875720858573914, |
|
"rewards/margins": 0.01003087218850851, |
|
"rewards/rejected": -0.13878807425498962, |
|
"sft_loss": 1.2875721454620361, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8104667609618104, |
|
"grad_norm": 3.3275625705718994, |
|
"learning_rate": 1.6970616164007547e-06, |
|
"logits/chosen": -14.229268074035645, |
|
"logits/rejected": -14.10546875, |
|
"logps/chosen": -1.364091396331787, |
|
"logps/rejected": -1.3946739435195923, |
|
"loss": 1.4435, |
|
"odds_ratio_loss": 0.7942220568656921, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13640913367271423, |
|
"rewards/margins": 0.0030582635663449764, |
|
"rewards/rejected": -0.13946738839149475, |
|
"sft_loss": 1.364091396331787, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8266316427561122, |
|
"grad_norm": 2.735656976699829, |
|
"learning_rate": 1.6570586648305276e-06, |
|
"logits/chosen": -14.143117904663086, |
|
"logits/rejected": -14.2241849899292, |
|
"logps/chosen": -1.344879150390625, |
|
"logps/rejected": -1.493446707725525, |
|
"loss": 1.4182, |
|
"odds_ratio_loss": 0.733532726764679, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13448792695999146, |
|
"rewards/margins": 0.014856770634651184, |
|
"rewards/rejected": -0.14934466779232025, |
|
"sft_loss": 1.344879150390625, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.842796524550414, |
|
"grad_norm": 1.1568862199783325, |
|
"learning_rate": 1.6172977682806151e-06, |
|
"logits/chosen": -14.38661003112793, |
|
"logits/rejected": -14.517931938171387, |
|
"logps/chosen": -1.3603746891021729, |
|
"logps/rejected": -1.5093238353729248, |
|
"loss": 1.4288, |
|
"odds_ratio_loss": 0.68376624584198, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1360374540090561, |
|
"rewards/margins": 0.014894920401275158, |
|
"rewards/rejected": -0.15093238651752472, |
|
"sft_loss": 1.3603746891021729, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.858961406344716, |
|
"grad_norm": 1.1773515939712524, |
|
"learning_rate": 1.5777903443007586e-06, |
|
"logits/chosen": -14.423624992370605, |
|
"logits/rejected": -14.032621383666992, |
|
"logps/chosen": -1.387117624282837, |
|
"logps/rejected": -1.4605300426483154, |
|
"loss": 1.4607, |
|
"odds_ratio_loss": 0.7362414598464966, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13871176540851593, |
|
"rewards/margins": 0.007341254502534866, |
|
"rewards/rejected": -0.1460530012845993, |
|
"sft_loss": 1.387117624282837, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8751262881390178, |
|
"grad_norm": 1.5692604780197144, |
|
"learning_rate": 1.5385477376547226e-06, |
|
"logits/chosen": -14.410656929016113, |
|
"logits/rejected": -14.352084159851074, |
|
"logps/chosen": -1.3973274230957031, |
|
"logps/rejected": -1.4963886737823486, |
|
"loss": 1.4675, |
|
"odds_ratio_loss": 0.7020548582077026, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1397327482700348, |
|
"rewards/margins": 0.009906120598316193, |
|
"rewards/rejected": -0.14963887631893158, |
|
"sft_loss": 1.3973274230957031, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.89129116993332, |
|
"grad_norm": 3.0858218669891357, |
|
"learning_rate": 1.4995812170625845e-06, |
|
"logits/chosen": -14.365419387817383, |
|
"logits/rejected": -14.341082572937012, |
|
"logps/chosen": -1.4526535272598267, |
|
"logps/rejected": -1.5791641473770142, |
|
"loss": 1.5265, |
|
"odds_ratio_loss": 0.7380681037902832, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1452653706073761, |
|
"rewards/margins": 0.012651054188609123, |
|
"rewards/rejected": -0.15791639685630798, |
|
"sft_loss": 1.4526535272598267, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9074560517276218, |
|
"grad_norm": 2.4256625175476074, |
|
"learning_rate": 1.4609019719648666e-06, |
|
"logits/chosen": -14.359014511108398, |
|
"logits/rejected": -14.343942642211914, |
|
"logps/chosen": -1.365081787109375, |
|
"logps/rejected": -1.4730589389801025, |
|
"loss": 1.4336, |
|
"odds_ratio_loss": 0.685504138469696, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13650815188884735, |
|
"rewards/margins": 0.010797703638672829, |
|
"rewards/rejected": -0.14730587601661682, |
|
"sft_loss": 1.365081787109375, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9236209335219236, |
|
"grad_norm": 2.2215967178344727, |
|
"learning_rate": 1.42252110930943e-06, |
|
"logits/chosen": -14.144754409790039, |
|
"logits/rejected": -14.116401672363281, |
|
"logps/chosen": -1.2247555255889893, |
|
"logps/rejected": -1.2106770277023315, |
|
"loss": 1.3031, |
|
"odds_ratio_loss": 0.7834988832473755, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.12247554957866669, |
|
"rewards/margins": -0.0014078498352319002, |
|
"rewards/rejected": -0.12106770277023315, |
|
"sft_loss": 1.2247555255889893, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9397858153162255, |
|
"grad_norm": 1.6026244163513184, |
|
"learning_rate": 1.3844496503620493e-06, |
|
"logits/chosen": -14.315832138061523, |
|
"logits/rejected": -14.499916076660156, |
|
"logps/chosen": -1.4833340644836426, |
|
"logps/rejected": -1.521794080734253, |
|
"loss": 1.5547, |
|
"odds_ratio_loss": 0.7132872343063354, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14833340048789978, |
|
"rewards/margins": 0.0038460283540189266, |
|
"rewards/rejected": -0.15217943489551544, |
|
"sft_loss": 1.4833340644836426, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9559506971105274, |
|
"grad_norm": 1.1467649936676025, |
|
"learning_rate": 1.3466985275416081e-06, |
|
"logits/chosen": -14.316365242004395, |
|
"logits/rejected": -14.039219856262207, |
|
"logps/chosen": -1.4100277423858643, |
|
"logps/rejected": -1.4868837594985962, |
|
"loss": 1.4848, |
|
"odds_ratio_loss": 0.7481211423873901, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.14100277423858643, |
|
"rewards/margins": 0.00768560403957963, |
|
"rewards/rejected": -0.14868836104869843, |
|
"sft_loss": 1.4100277423858643, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9721155789048292, |
|
"grad_norm": 1.3261767625808716, |
|
"learning_rate": 1.309278581280791e-06, |
|
"logits/chosen": -14.425065994262695, |
|
"logits/rejected": -14.19542121887207, |
|
"logps/chosen": -1.258156418800354, |
|
"logps/rejected": -1.3927624225616455, |
|
"loss": 1.3258, |
|
"odds_ratio_loss": 0.6761429309844971, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12581565976142883, |
|
"rewards/margins": 0.013460601679980755, |
|
"rewards/rejected": -0.13927623629570007, |
|
"sft_loss": 1.258156418800354, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.9882804606991311, |
|
"grad_norm": 0.8793450593948364, |
|
"learning_rate": 1.272200556913199e-06, |
|
"logits/chosen": -14.331692695617676, |
|
"logits/rejected": -14.390342712402344, |
|
"logps/chosen": -1.2902759313583374, |
|
"logps/rejected": -1.398531198501587, |
|
"loss": 1.3633, |
|
"odds_ratio_loss": 0.7302906513214111, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1290276050567627, |
|
"rewards/margins": 0.01082551758736372, |
|
"rewards/rejected": -0.1398531198501587, |
|
"sft_loss": 1.2902759313583374, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.004445342493433, |
|
"grad_norm": 2.07963228225708, |
|
"learning_rate": 1.2354751015877698e-06, |
|
"logits/chosen": -14.254411697387695, |
|
"logits/rejected": -14.420768737792969, |
|
"logps/chosen": -1.2709214687347412, |
|
"logps/rejected": -1.4514631032943726, |
|
"loss": 1.3403, |
|
"odds_ratio_loss": 0.6936594247817993, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.1270921379327774, |
|
"rewards/margins": 0.018054189160466194, |
|
"rewards/rejected": -0.14514632523059845, |
|
"sft_loss": 1.2709214687347412, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.020610224287735, |
|
"grad_norm": 2.574068069458008, |
|
"learning_rate": 1.1991127612113945e-06, |
|
"logits/chosen": -14.361371040344238, |
|
"logits/rejected": -14.495355606079102, |
|
"logps/chosen": -1.3789875507354736, |
|
"logps/rejected": -1.5034908056259155, |
|
"loss": 1.4475, |
|
"odds_ratio_loss": 0.6847060322761536, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.1378987580537796, |
|
"rewards/margins": 0.012450330890715122, |
|
"rewards/rejected": -0.15034906566143036, |
|
"sft_loss": 1.3789875507354736, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.036775106082037, |
|
"grad_norm": 1.4936628341674805, |
|
"learning_rate": 1.1631239774206035e-06, |
|
"logits/chosen": -14.19866943359375, |
|
"logits/rejected": -14.191067695617676, |
|
"logps/chosen": -1.347879409790039, |
|
"logps/rejected": -1.4048999547958374, |
|
"loss": 1.4251, |
|
"odds_ratio_loss": 0.7725744247436523, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1347879320383072, |
|
"rewards/margins": 0.005702070891857147, |
|
"rewards/rejected": -0.14049001038074493, |
|
"sft_loss": 1.347879409790039, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.052939987876339, |
|
"grad_norm": 1.7168585062026978, |
|
"learning_rate": 1.1275190845831978e-06, |
|
"logits/chosen": -14.3424711227417, |
|
"logits/rejected": -14.3289213180542, |
|
"logps/chosen": -1.3685007095336914, |
|
"logps/rejected": -1.4727340936660767, |
|
"loss": 1.4389, |
|
"odds_ratio_loss": 0.7035232782363892, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13685005903244019, |
|
"rewards/margins": 0.010423343628644943, |
|
"rewards/rejected": -0.14727340638637543, |
|
"sft_loss": 1.3685007095336914, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0691048696706407, |
|
"grad_norm": 1.1820368766784668, |
|
"learning_rate": 1.0923083068306778e-06, |
|
"logits/chosen": -14.398675918579102, |
|
"logits/rejected": -14.118631362915039, |
|
"logps/chosen": -1.2939175367355347, |
|
"logps/rejected": -1.473049283027649, |
|
"loss": 1.3601, |
|
"odds_ratio_loss": 0.662093997001648, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12939175963401794, |
|
"rewards/margins": 0.017913173884153366, |
|
"rewards/rejected": -0.14730492234230042, |
|
"sft_loss": 1.2939175367355347, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0852697514649425, |
|
"grad_norm": 1.1745166778564453, |
|
"learning_rate": 1.0575017551223348e-06, |
|
"logits/chosen": -14.3531494140625, |
|
"logits/rejected": -14.198529243469238, |
|
"logps/chosen": -1.2511951923370361, |
|
"logps/rejected": -1.3217878341674805, |
|
"loss": 1.3224, |
|
"odds_ratio_loss": 0.7121993899345398, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12511952221393585, |
|
"rewards/margins": 0.007059249095618725, |
|
"rewards/rejected": -0.13217875361442566, |
|
"sft_loss": 1.2511951923370361, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.1014346332592444, |
|
"grad_norm": 0.894344687461853, |
|
"learning_rate": 1.023109424341833e-06, |
|
"logits/chosen": -14.153393745422363, |
|
"logits/rejected": -14.245986938476562, |
|
"logps/chosen": -1.3667266368865967, |
|
"logps/rejected": -1.42815363407135, |
|
"loss": 1.4394, |
|
"odds_ratio_loss": 0.727142333984375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13667264580726624, |
|
"rewards/margins": 0.006142704281955957, |
|
"rewards/rejected": -0.14281536638736725, |
|
"sft_loss": 1.3667266368865967, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1175995150535463, |
|
"grad_norm": 1.5093544721603394, |
|
"learning_rate": 9.891411904271273e-07, |
|
"logits/chosen": -14.242596626281738, |
|
"logits/rejected": -14.327380180358887, |
|
"logps/chosen": -1.3282233476638794, |
|
"logps/rejected": -1.3852262496948242, |
|
"loss": 1.4007, |
|
"odds_ratio_loss": 0.7251249551773071, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13282233476638794, |
|
"rewards/margins": 0.005700295325368643, |
|
"rewards/rejected": -0.13852263987064362, |
|
"sft_loss": 1.3282233476638794, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.133764396847848, |
|
"grad_norm": 0.8299040198326111, |
|
"learning_rate": 9.556068075345363e-07, |
|
"logits/chosen": -14.465705871582031, |
|
"logits/rejected": -14.254651069641113, |
|
"logps/chosen": -1.2607736587524414, |
|
"logps/rejected": -1.3249403238296509, |
|
"loss": 1.3327, |
|
"odds_ratio_loss": 0.7195707559585571, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12607736885547638, |
|
"rewards/margins": 0.006416681222617626, |
|
"rewards/rejected": -0.13249404728412628, |
|
"sft_loss": 1.2607736587524414, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.14992927864215, |
|
"grad_norm": 1.5431737899780273, |
|
"learning_rate": 9.225159052377838e-07, |
|
"logits/chosen": -14.418218612670898, |
|
"logits/rejected": -14.442914009094238, |
|
"logps/chosen": -1.369145393371582, |
|
"logps/rejected": -1.4892218112945557, |
|
"loss": 1.4395, |
|
"odds_ratio_loss": 0.7034425735473633, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.13691455125808716, |
|
"rewards/margins": 0.012007640674710274, |
|
"rewards/rejected": -0.1489221751689911, |
|
"sft_loss": 1.369145393371582, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.166094160436452, |
|
"grad_norm": 2.125438928604126, |
|
"learning_rate": 8.898779857628184e-07, |
|
"logits/chosen": -14.263992309570312, |
|
"logits/rejected": -14.439204216003418, |
|
"logps/chosen": -1.2737493515014648, |
|
"logps/rejected": -1.307660698890686, |
|
"loss": 1.3488, |
|
"odds_ratio_loss": 0.7507684826850891, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.12737493216991425, |
|
"rewards/margins": 0.003391148056834936, |
|
"rewards/rejected": -0.13076607882976532, |
|
"sft_loss": 1.2737493515014648, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1822590422307537, |
|
"grad_norm": 1.0558884143829346, |
|
"learning_rate": 8.577024212591975e-07, |
|
"logits/chosen": -14.523656845092773, |
|
"logits/rejected": -14.395648002624512, |
|
"logps/chosen": -1.3369591236114502, |
|
"logps/rejected": -1.402151346206665, |
|
"loss": 1.4081, |
|
"odds_ratio_loss": 0.7112525701522827, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.13369593024253845, |
|
"rewards/margins": 0.006519217975437641, |
|
"rewards/rejected": -0.14021514356136322, |
|
"sft_loss": 1.3369591236114502, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.1984239240250556, |
|
"grad_norm": 1.1882685422897339, |
|
"learning_rate": 8.259984511088276e-07, |
|
"logits/chosen": -14.409403800964355, |
|
"logits/rejected": -14.405116081237793, |
|
"logps/chosen": -1.3154635429382324, |
|
"logps/rejected": -1.4095304012298584, |
|
"loss": 1.3863, |
|
"odds_ratio_loss": 0.7081496715545654, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13154636323451996, |
|
"rewards/margins": 0.009406678378582, |
|
"rewards/rejected": -0.14095303416252136, |
|
"sft_loss": 1.3154635429382324, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2145888058193575, |
|
"grad_norm": 1.6390233039855957, |
|
"learning_rate": 7.947751792728237e-07, |
|
"logits/chosen": -14.409843444824219, |
|
"logits/rejected": -14.329424858093262, |
|
"logps/chosen": -1.3204478025436401, |
|
"logps/rejected": -1.4512555599212646, |
|
"loss": 1.3901, |
|
"odds_ratio_loss": 0.6965182423591614, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.13204479217529297, |
|
"rewards/margins": 0.013080772943794727, |
|
"rewards/rejected": -0.14512555301189423, |
|
"sft_loss": 1.3204478025436401, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2307536876136593, |
|
"grad_norm": 1.7825186252593994, |
|
"learning_rate": 7.640415716772626e-07, |
|
"logits/chosen": -14.333005905151367, |
|
"logits/rejected": -14.429731369018555, |
|
"logps/chosen": -1.3603641986846924, |
|
"logps/rejected": -1.4518425464630127, |
|
"loss": 1.4331, |
|
"odds_ratio_loss": 0.7270913124084473, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13603642582893372, |
|
"rewards/margins": 0.009147830307483673, |
|
"rewards/rejected": -0.1451842486858368, |
|
"sft_loss": 1.3603641986846924, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.246918569407961, |
|
"grad_norm": 1.125680685043335, |
|
"learning_rate": 7.338064536385722e-07, |
|
"logits/chosen": -14.394281387329102, |
|
"logits/rejected": -14.345739364624023, |
|
"logps/chosen": -1.3667652606964111, |
|
"logps/rejected": -1.5295965671539307, |
|
"loss": 1.435, |
|
"odds_ratio_loss": 0.6821550130844116, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13667652010917664, |
|
"rewards/margins": 0.016283124685287476, |
|
"rewards/rejected": -0.1529596596956253, |
|
"sft_loss": 1.3667652606964111, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"grad_norm": 1.7544102668762207, |
|
"learning_rate": 7.040785073292883e-07, |
|
"logits/chosen": -14.237360000610352, |
|
"logits/rejected": -14.33959674835205, |
|
"logps/chosen": -1.4276225566864014, |
|
"logps/rejected": -1.4824755191802979, |
|
"loss": 1.5027, |
|
"odds_ratio_loss": 0.750755786895752, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14276224374771118, |
|
"rewards/margins": 0.00548530463129282, |
|
"rewards/rejected": -0.14824756979942322, |
|
"sft_loss": 1.4276225566864014, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.279248332996565, |
|
"grad_norm": 1.7468085289001465, |
|
"learning_rate": 6.748662692849297e-07, |
|
"logits/chosen": -14.5598726272583, |
|
"logits/rejected": -14.531698226928711, |
|
"logps/chosen": -1.3492968082427979, |
|
"logps/rejected": -1.4934823513031006, |
|
"loss": 1.4184, |
|
"odds_ratio_loss": 0.6912583112716675, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.13492968678474426, |
|
"rewards/margins": 0.014418545179069042, |
|
"rewards/rejected": -0.14934822916984558, |
|
"sft_loss": 1.3492968082427979, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.295413214790867, |
|
"grad_norm": 3.2176520824432373, |
|
"learning_rate": 6.46178127952686e-07, |
|
"logits/chosen": -14.288836479187012, |
|
"logits/rejected": -14.204765319824219, |
|
"logps/chosen": -1.299232840538025, |
|
"logps/rejected": -1.4280776977539062, |
|
"loss": 1.3673, |
|
"odds_ratio_loss": 0.6802908182144165, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.12992329895496368, |
|
"rewards/margins": 0.01288448553532362, |
|
"rewards/rejected": -0.1428077667951584, |
|
"sft_loss": 1.299232840538025, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3115780965851687, |
|
"grad_norm": 2.5991835594177246, |
|
"learning_rate": 6.180223212826289e-07, |
|
"logits/chosen": -14.347335815429688, |
|
"logits/rejected": -14.187026977539062, |
|
"logps/chosen": -1.2904529571533203, |
|
"logps/rejected": -1.3600698709487915, |
|
"loss": 1.362, |
|
"odds_ratio_loss": 0.7157233953475952, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.1290452927350998, |
|
"rewards/margins": 0.006961710751056671, |
|
"rewards/rejected": -0.13600699603557587, |
|
"sft_loss": 1.2904529571533203, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3277429783794705, |
|
"grad_norm": 0.8683578968048096, |
|
"learning_rate": 5.904069343621443e-07, |
|
"logits/chosen": -14.465449333190918, |
|
"logits/rejected": -14.325057983398438, |
|
"logps/chosen": -1.299377202987671, |
|
"logps/rejected": -1.401989459991455, |
|
"loss": 1.3706, |
|
"odds_ratio_loss": 0.7122213244438171, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12993772327899933, |
|
"rewards/margins": 0.010261224582791328, |
|
"rewards/rejected": -0.14019893109798431, |
|
"sft_loss": 1.299377202987671, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3439078601737724, |
|
"grad_norm": 1.7288964986801147, |
|
"learning_rate": 5.633398970942544e-07, |
|
"logits/chosen": -14.3145170211792, |
|
"logits/rejected": -14.42223834991455, |
|
"logps/chosen": -1.2952549457550049, |
|
"logps/rejected": -1.3960306644439697, |
|
"loss": 1.3675, |
|
"odds_ratio_loss": 0.7228525876998901, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.12952548265457153, |
|
"rewards/margins": 0.010077586397528648, |
|
"rewards/rejected": -0.13960307836532593, |
|
"sft_loss": 1.2952549457550049, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3600727419680743, |
|
"grad_norm": 1.8580021858215332, |
|
"learning_rate": 5.368289819205069e-07, |
|
"logits/chosen": -14.319725036621094, |
|
"logits/rejected": -14.285405158996582, |
|
"logps/chosen": -1.2445900440216064, |
|
"logps/rejected": -1.3483976125717163, |
|
"loss": 1.3139, |
|
"odds_ratio_loss": 0.6927712559700012, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12445902824401855, |
|
"rewards/margins": 0.010380755178630352, |
|
"rewards/rejected": -0.13483977317810059, |
|
"sft_loss": 1.2445900440216064, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 2.3416638374328613, |
|
"learning_rate": 5.108818015890785e-07, |
|
"logits/chosen": -14.468851089477539, |
|
"logits/rejected": -14.461502075195312, |
|
"logps/chosen": -1.3592495918273926, |
|
"logps/rejected": -1.4990885257720947, |
|
"loss": 1.4311, |
|
"odds_ratio_loss": 0.7181252241134644, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.13592496514320374, |
|
"rewards/margins": 0.013983884826302528, |
|
"rewards/rejected": -0.14990884065628052, |
|
"sft_loss": 1.3592495918273926, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.392402505556678, |
|
"grad_norm": 1.5794059038162231, |
|
"learning_rate": 4.855058069687291e-07, |
|
"logits/chosen": -14.158782958984375, |
|
"logits/rejected": -14.074625015258789, |
|
"logps/chosen": -1.324530839920044, |
|
"logps/rejected": -1.366247296333313, |
|
"loss": 1.3974, |
|
"odds_ratio_loss": 0.7290586233139038, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1324530839920044, |
|
"rewards/margins": 0.004171643406152725, |
|
"rewards/rejected": -0.13662473857402802, |
|
"sft_loss": 1.324530839920044, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.40856738735098, |
|
"grad_norm": 2.1180176734924316, |
|
"learning_rate": 4.607082849092523e-07, |
|
"logits/chosen": -14.219759941101074, |
|
"logits/rejected": -14.182577133178711, |
|
"logps/chosen": -1.4282917976379395, |
|
"logps/rejected": -1.4976496696472168, |
|
"loss": 1.5016, |
|
"odds_ratio_loss": 0.7326869368553162, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14282917976379395, |
|
"rewards/margins": 0.006935800425708294, |
|
"rewards/rejected": -0.14976496994495392, |
|
"sft_loss": 1.4282917976379395, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"grad_norm": 2.495347738265991, |
|
"learning_rate": 4.3649635614901405e-07, |
|
"logits/chosen": -14.16241455078125, |
|
"logits/rejected": -14.45665168762207, |
|
"logps/chosen": -1.3701971769332886, |
|
"logps/rejected": -1.3534958362579346, |
|
"loss": 1.446, |
|
"odds_ratio_loss": 0.7579734921455383, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.13701972365379333, |
|
"rewards/margins": -0.0016701335553079844, |
|
"rewards/rejected": -0.1353495866060257, |
|
"sft_loss": 1.3701971769332886, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"eval_logits/chosen": -14.27784252166748, |
|
"eval_logits/rejected": -14.317824363708496, |
|
"eval_logps/chosen": -1.372594952583313, |
|
"eval_logps/rejected": -1.4290432929992676, |
|
"eval_loss": 1.4484930038452148, |
|
"eval_odds_ratio_loss": 0.7589808702468872, |
|
"eval_rewards/accuracies": 0.4809090793132782, |
|
"eval_rewards/chosen": -0.13725949823856354, |
|
"eval_rewards/margins": 0.005644842050969601, |
|
"eval_rewards/rejected": -0.1429043412208557, |
|
"eval_runtime": 396.2162, |
|
"eval_samples_per_second": 2.776, |
|
"eval_sft_loss": 1.372594952583313, |
|
"eval_steps_per_second": 1.388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4408971509395836, |
|
"grad_norm": 1.8667449951171875, |
|
"learning_rate": 4.128769732701973e-07, |
|
"logits/chosen": -14.2674560546875, |
|
"logits/rejected": -14.17170524597168, |
|
"logps/chosen": -1.3341007232666016, |
|
"logps/rejected": -1.4468257427215576, |
|
"loss": 1.4053, |
|
"odds_ratio_loss": 0.7120139002799988, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13341006636619568, |
|
"rewards/margins": 0.011272510513663292, |
|
"rewards/rejected": -0.14468258619308472, |
|
"sft_loss": 1.3341007232666016, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4570620327338855, |
|
"grad_norm": 2.940946102142334, |
|
"learning_rate": 3.8985691870233046e-07, |
|
"logits/chosen": -14.28807258605957, |
|
"logits/rejected": -14.214245796203613, |
|
"logps/chosen": -1.3024286031723022, |
|
"logps/rejected": -1.4218701124191284, |
|
"loss": 1.3737, |
|
"odds_ratio_loss": 0.712692379951477, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.13024285435676575, |
|
"rewards/margins": 0.011944140307605267, |
|
"rewards/rejected": -0.1421869993209839, |
|
"sft_loss": 1.3024286031723022, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4732269145281873, |
|
"grad_norm": 2.6948108673095703, |
|
"learning_rate": 3.6744280277467904e-07, |
|
"logits/chosen": -14.425226211547852, |
|
"logits/rejected": -14.381690979003906, |
|
"logps/chosen": -1.4246366024017334, |
|
"logps/rejected": -1.426334023475647, |
|
"loss": 1.5046, |
|
"odds_ratio_loss": 0.7999409437179565, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.14246365427970886, |
|
"rewards/margins": 0.00016971743025351316, |
|
"rewards/rejected": -0.14263339340686798, |
|
"sft_loss": 1.4246366024017334, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.489391796322489, |
|
"grad_norm": 1.6409363746643066, |
|
"learning_rate": 3.456410618180503e-07, |
|
"logits/chosen": -13.974553108215332, |
|
"logits/rejected": -14.2942533493042, |
|
"logps/chosen": -1.2257071733474731, |
|
"logps/rejected": -1.43178391456604, |
|
"loss": 1.2927, |
|
"odds_ratio_loss": 0.6698334217071533, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1225707158446312, |
|
"rewards/margins": 0.020607685670256615, |
|
"rewards/rejected": -0.14317841827869415, |
|
"sft_loss": 1.2257071733474731, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5055566781167915, |
|
"grad_norm": 1.3992644548416138, |
|
"learning_rate": 3.244579563165753e-07, |
|
"logits/chosen": -14.36426830291748, |
|
"logits/rejected": -14.48327922821045, |
|
"logps/chosen": -1.2957897186279297, |
|
"logps/rejected": -1.4375650882720947, |
|
"loss": 1.3673, |
|
"odds_ratio_loss": 0.7152336239814758, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12957896292209625, |
|
"rewards/margins": 0.014177536591887474, |
|
"rewards/rejected": -0.14375647902488708, |
|
"sft_loss": 1.2957897186279297, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.521721559911093, |
|
"grad_norm": 0.9756754636764526, |
|
"learning_rate": 3.038995691099697e-07, |
|
"logits/chosen": -14.465911865234375, |
|
"logits/rejected": -14.273321151733398, |
|
"logps/chosen": -1.3624980449676514, |
|
"logps/rejected": -1.5072979927062988, |
|
"loss": 1.4344, |
|
"odds_ratio_loss": 0.7189978361129761, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.13624981045722961, |
|
"rewards/margins": 0.014479981735348701, |
|
"rewards/rejected": -0.15072980523109436, |
|
"sft_loss": 1.3624980449676514, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5378864417053952, |
|
"grad_norm": 2.6390867233276367, |
|
"learning_rate": 2.839718036468192e-07, |
|
"logits/chosen": -14.324618339538574, |
|
"logits/rejected": -14.362611770629883, |
|
"logps/chosen": -1.4562547206878662, |
|
"logps/rejected": -1.4829699993133545, |
|
"loss": 1.5307, |
|
"odds_ratio_loss": 0.7442874312400818, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14562548696994781, |
|
"rewards/margins": 0.0026715078856796026, |
|
"rewards/rejected": -0.1482969969511032, |
|
"sft_loss": 1.4562547206878662, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5540513234996967, |
|
"grad_norm": 1.9648209810256958, |
|
"learning_rate": 2.646803822893723e-07, |
|
"logits/chosen": -14.38152027130127, |
|
"logits/rejected": -14.392126083374023, |
|
"logps/chosen": -1.4547812938690186, |
|
"logps/rejected": -1.4928423166275024, |
|
"loss": 1.5325, |
|
"odds_ratio_loss": 0.7773637175559998, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14547815918922424, |
|
"rewards/margins": 0.003806093242019415, |
|
"rewards/rejected": -0.1492842435836792, |
|
"sft_loss": 1.4547812938690186, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.570216205293999, |
|
"grad_norm": 1.1905252933502197, |
|
"learning_rate": 2.460308446703341e-07, |
|
"logits/chosen": -14.339777946472168, |
|
"logits/rejected": -14.1979398727417, |
|
"logps/chosen": -1.3354339599609375, |
|
"logps/rejected": -1.348439335823059, |
|
"loss": 1.4097, |
|
"odds_ratio_loss": 0.7425277829170227, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13354340195655823, |
|
"rewards/margins": 0.0013005301589146256, |
|
"rewards/rejected": -0.13484393060207367, |
|
"sft_loss": 1.3354339599609375, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.5863810870883004, |
|
"grad_norm": 4.711751461029053, |
|
"learning_rate": 2.2802854610213143e-07, |
|
"logits/chosen": -14.302705764770508, |
|
"logits/rejected": -14.19762134552002, |
|
"logps/chosen": -1.3138768672943115, |
|
"logps/rejected": -1.4147188663482666, |
|
"loss": 1.3864, |
|
"odds_ratio_loss": 0.7257053256034851, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13138769567012787, |
|
"rewards/margins": 0.010084209032356739, |
|
"rewards/rejected": -0.14147189259529114, |
|
"sft_loss": 1.3138768672943115, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6025459688826027, |
|
"grad_norm": 4.042973518371582, |
|
"learning_rate": 2.106786560391072e-07, |
|
"logits/chosen": -14.2058744430542, |
|
"logits/rejected": -14.269085884094238, |
|
"logps/chosen": -1.3923499584197998, |
|
"logps/rejected": -1.3771612644195557, |
|
"loss": 1.4698, |
|
"odds_ratio_loss": 0.7747048139572144, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1392349898815155, |
|
"rewards/margins": -0.0015188835095614195, |
|
"rewards/rejected": -0.1377161294221878, |
|
"sft_loss": 1.3923499584197998, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6187108506769046, |
|
"grad_norm": 1.3606544733047485, |
|
"learning_rate": 1.9398615659308255e-07, |
|
"logits/chosen": -14.2599515914917, |
|
"logits/rejected": -14.334997177124023, |
|
"logps/chosen": -1.3270127773284912, |
|
"logps/rejected": -1.3853967189788818, |
|
"loss": 1.3982, |
|
"odds_ratio_loss": 0.7119258046150208, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.13270129263401031, |
|
"rewards/margins": 0.005838391836732626, |
|
"rewards/rejected": -0.13853967189788818, |
|
"sft_loss": 1.3270127773284912, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6348757324712064, |
|
"grad_norm": 1.4494473934173584, |
|
"learning_rate": 1.7795584110272184e-07, |
|
"logits/chosen": -14.470367431640625, |
|
"logits/rejected": -14.478838920593262, |
|
"logps/chosen": -1.3744457960128784, |
|
"logps/rejected": -1.4546699523925781, |
|
"loss": 1.4475, |
|
"odds_ratio_loss": 0.730518639087677, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.13744458556175232, |
|
"rewards/margins": 0.008022413589060307, |
|
"rewards/rejected": -0.14546698331832886, |
|
"sft_loss": 1.3744457960128784, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6510406142655083, |
|
"grad_norm": 2.888951539993286, |
|
"learning_rate": 1.6259231275709636e-07, |
|
"logits/chosen": -14.41100788116455, |
|
"logits/rejected": -14.428006172180176, |
|
"logps/chosen": -1.3241318464279175, |
|
"logps/rejected": -1.318234920501709, |
|
"loss": 1.4028, |
|
"odds_ratio_loss": 0.7864112257957458, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13241317868232727, |
|
"rewards/margins": -0.000589700706768781, |
|
"rewards/rejected": -0.13182349503040314, |
|
"sft_loss": 1.3241318464279175, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.66720549605981, |
|
"grad_norm": 1.5565133094787598, |
|
"learning_rate": 1.478999832738548e-07, |
|
"logits/chosen": -14.382177352905273, |
|
"logits/rejected": -14.320945739746094, |
|
"logps/chosen": -1.297300934791565, |
|
"logps/rejected": -1.4187005758285522, |
|
"loss": 1.368, |
|
"odds_ratio_loss": 0.7067518830299377, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12973010540008545, |
|
"rewards/margins": 0.01213997695595026, |
|
"rewards/rejected": -0.14187008142471313, |
|
"sft_loss": 1.297300934791565, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.683370377854112, |
|
"grad_norm": 2.0713951587677, |
|
"learning_rate": 1.338830716323769e-07, |
|
"logits/chosen": -14.337793350219727, |
|
"logits/rejected": -14.350440979003906, |
|
"logps/chosen": -1.3087949752807617, |
|
"logps/rejected": -1.350098967552185, |
|
"loss": 1.383, |
|
"odds_ratio_loss": 0.7419986724853516, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.1308794915676117, |
|
"rewards/margins": 0.004130417015403509, |
|
"rewards/rejected": -0.13500989973545074, |
|
"sft_loss": 1.3087949752807617, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.699535259648414, |
|
"grad_norm": 2.8654770851135254, |
|
"learning_rate": 1.205456028622723e-07, |
|
"logits/chosen": -14.387499809265137, |
|
"logits/rejected": -14.384310722351074, |
|
"logps/chosen": -1.2575846910476685, |
|
"logps/rejected": -1.4380841255187988, |
|
"loss": 1.3249, |
|
"odds_ratio_loss": 0.6730828285217285, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12575848400592804, |
|
"rewards/margins": 0.018049929291009903, |
|
"rewards/rejected": -0.14380840957164764, |
|
"sft_loss": 1.2575846910476685, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7157001414427158, |
|
"grad_norm": 2.644263505935669, |
|
"learning_rate": 1.0789140688756805e-07, |
|
"logits/chosen": -14.564410209655762, |
|
"logits/rejected": -14.484796524047852, |
|
"logps/chosen": -1.331872582435608, |
|
"logps/rejected": -1.4917659759521484, |
|
"loss": 1.3983, |
|
"odds_ratio_loss": 0.6643630862236023, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13318723440170288, |
|
"rewards/margins": 0.015989361330866814, |
|
"rewards/rejected": -0.14917659759521484, |
|
"sft_loss": 1.331872582435608, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.7318650232370176, |
|
"grad_norm": 1.8434594869613647, |
|
"learning_rate": 9.592411742693098e-08, |
|
"logits/chosen": -14.349563598632812, |
|
"logits/rejected": -14.297950744628906, |
|
"logps/chosen": -1.284172773361206, |
|
"logps/rejected": -1.3313789367675781, |
|
"loss": 1.3598, |
|
"odds_ratio_loss": 0.7563740611076355, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.12841728329658508, |
|
"rewards/margins": 0.004720622207969427, |
|
"rewards/rejected": -0.13313789665699005, |
|
"sft_loss": 1.284172773361206, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7480299050313195, |
|
"grad_norm": 0.9198280572891235, |
|
"learning_rate": 8.464717095022168e-08, |
|
"logits/chosen": -14.535560607910156, |
|
"logits/rejected": -14.29857349395752, |
|
"logps/chosen": -1.291333794593811, |
|
"logps/rejected": -1.4038417339324951, |
|
"loss": 1.3626, |
|
"odds_ratio_loss": 0.7129305601119995, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12913337349891663, |
|
"rewards/margins": 0.011250784620642662, |
|
"rewards/rejected": -0.14038416743278503, |
|
"sft_loss": 1.291333794593811, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7641947868256214, |
|
"grad_norm": 1.85430908203125, |
|
"learning_rate": 7.406380569169841e-08, |
|
"logits/chosen": -14.304112434387207, |
|
"logits/rejected": -14.291776657104492, |
|
"logps/chosen": -1.3815504312515259, |
|
"logps/rejected": -1.3685299158096313, |
|
"loss": 1.4574, |
|
"odds_ratio_loss": 0.7585769891738892, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.13815505802631378, |
|
"rewards/margins": -0.0013020627666264772, |
|
"rewards/rejected": -0.13685297966003418, |
|
"sft_loss": 1.3815504312515259, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.7803596686199232, |
|
"grad_norm": 7.879937171936035, |
|
"learning_rate": 6.417706072013808e-08, |
|
"logits/chosen": -14.357699394226074, |
|
"logits/rejected": -14.520744323730469, |
|
"logps/chosen": -1.4151430130004883, |
|
"logps/rejected": -1.4842795133590698, |
|
"loss": 1.4887, |
|
"odds_ratio_loss": 0.7356118559837341, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.14151428639888763, |
|
"rewards/margins": 0.006913675460964441, |
|
"rewards/rejected": -0.14842796325683594, |
|
"sft_loss": 1.4151430130004883, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.796524550414225, |
|
"grad_norm": 2.3623361587524414, |
|
"learning_rate": 5.498977506615294e-08, |
|
"logits/chosen": -14.438512802124023, |
|
"logits/rejected": -14.370248794555664, |
|
"logps/chosen": -1.4021018743515015, |
|
"logps/rejected": -1.3835337162017822, |
|
"loss": 1.4818, |
|
"odds_ratio_loss": 0.796977698802948, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.14021018147468567, |
|
"rewards/margins": -0.0018568048253655434, |
|
"rewards/rejected": -0.1383533775806427, |
|
"sft_loss": 1.4021018743515015, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.812689432208527, |
|
"grad_norm": 1.0650444030761719, |
|
"learning_rate": 4.6504586906947756e-08, |
|
"logits/chosen": -14.35010051727295, |
|
"logits/rejected": -14.401901245117188, |
|
"logps/chosen": -1.3507376909255981, |
|
"logps/rejected": -1.4280903339385986, |
|
"loss": 1.4204, |
|
"odds_ratio_loss": 0.6963773369789124, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13507376611232758, |
|
"rewards/margins": 0.007735266350209713, |
|
"rewards/rejected": -0.14280903339385986, |
|
"sft_loss": 1.3507376909255981, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 5.588193893432617, |
|
"learning_rate": 3.8723932808754914e-08, |
|
"logits/chosen": -14.620956420898438, |
|
"logits/rejected": -14.591873168945312, |
|
"logps/chosen": -1.4141243696212769, |
|
"logps/rejected": -1.4447482824325562, |
|
"loss": 1.4888, |
|
"odds_ratio_loss": 0.7466815710067749, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1414124220609665, |
|
"rewards/margins": 0.003062391420826316, |
|
"rewards/rejected": -0.1444748193025589, |
|
"sft_loss": 1.4141243696212769, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8450191957971307, |
|
"grad_norm": 2.8461813926696777, |
|
"learning_rate": 3.1650047027158014e-08, |
|
"logits/chosen": -14.406710624694824, |
|
"logits/rejected": -14.431941032409668, |
|
"logps/chosen": -1.3235969543457031, |
|
"logps/rejected": -1.378565788269043, |
|
"loss": 1.3941, |
|
"odds_ratio_loss": 0.7055075764656067, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.13235969841480255, |
|
"rewards/margins": 0.005496888421475887, |
|
"rewards/rejected": -0.13785657286643982, |
|
"sft_loss": 1.3235969543457031, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8611840775914326, |
|
"grad_norm": 1.4648724794387817, |
|
"learning_rate": 2.5284960865517848e-08, |
|
"logits/chosen": -14.247715950012207, |
|
"logits/rejected": -14.30573844909668, |
|
"logps/chosen": -1.2652337551116943, |
|
"logps/rejected": -1.3874812126159668, |
|
"loss": 1.3373, |
|
"odds_ratio_loss": 0.7210808992385864, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12652337551116943, |
|
"rewards/margins": 0.012224750593304634, |
|
"rewards/rejected": -0.13874812424182892, |
|
"sft_loss": 1.2652337551116943, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.8773489593857344, |
|
"grad_norm": 1.2711795568466187, |
|
"learning_rate": 1.9630502091670388e-08, |
|
"logits/chosen": -14.345422744750977, |
|
"logits/rejected": -14.210649490356445, |
|
"logps/chosen": -1.3347010612487793, |
|
"logps/rejected": -1.4864898920059204, |
|
"loss": 1.4034, |
|
"odds_ratio_loss": 0.686531126499176, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13347011804580688, |
|
"rewards/margins": 0.015178876928985119, |
|
"rewards/rejected": -0.14864897727966309, |
|
"sft_loss": 1.3347010612487793, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.8935138411800363, |
|
"grad_norm": 4.285287857055664, |
|
"learning_rate": 1.4688294413074677e-08, |
|
"logits/chosen": -14.240816116333008, |
|
"logits/rejected": -14.293863296508789, |
|
"logps/chosen": -1.2230440378189087, |
|
"logps/rejected": -1.3717424869537354, |
|
"loss": 1.2918, |
|
"odds_ratio_loss": 0.6871523857116699, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.12230439484119415, |
|
"rewards/margins": 0.014869834296405315, |
|
"rewards/rejected": -0.13717423379421234, |
|
"sft_loss": 1.2230440378189087, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.909678722974338, |
|
"grad_norm": 1.111965298652649, |
|
"learning_rate": 1.0459757010556626e-08, |
|
"logits/chosen": -14.294512748718262, |
|
"logits/rejected": -14.2905912399292, |
|
"logps/chosen": -1.3162596225738525, |
|
"logps/rejected": -1.357807993888855, |
|
"loss": 1.3902, |
|
"odds_ratio_loss": 0.7398349046707153, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1316259652376175, |
|
"rewards/margins": 0.0041548521257936954, |
|
"rewards/rejected": -0.13578079640865326, |
|
"sft_loss": 1.3162596225738525, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.92584360476864, |
|
"grad_norm": 1.985671043395996, |
|
"learning_rate": 6.94610413078306e-09, |
|
"logits/chosen": -14.099322319030762, |
|
"logits/rejected": -14.289319038391113, |
|
"logps/chosen": -1.3942023515701294, |
|
"logps/rejected": -1.5463578701019287, |
|
"loss": 1.4669, |
|
"odds_ratio_loss": 0.7267955541610718, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13942024111747742, |
|
"rewards/margins": 0.015215557999908924, |
|
"rewards/rejected": -0.15463578701019287, |
|
"sft_loss": 1.3942023515701294, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.942008486562942, |
|
"grad_norm": 1.1975542306900024, |
|
"learning_rate": 4.14834473758563e-09, |
|
"logits/chosen": -14.166104316711426, |
|
"logits/rejected": -14.219152450561523, |
|
"logps/chosen": -1.2467665672302246, |
|
"logps/rejected": -1.3985602855682373, |
|
"loss": 1.3162, |
|
"odds_ratio_loss": 0.6939627528190613, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1246766597032547, |
|
"rewards/margins": 0.015179386362433434, |
|
"rewards/rejected": -0.13985604047775269, |
|
"sft_loss": 1.2467665672302246, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9581733683572438, |
|
"grad_norm": 1.3036004304885864, |
|
"learning_rate": 2.067282222230349e-09, |
|
"logits/chosen": -14.375224113464355, |
|
"logits/rejected": -14.571484565734863, |
|
"logps/chosen": -1.326818585395813, |
|
"logps/rejected": -1.477850317955017, |
|
"loss": 1.3957, |
|
"odds_ratio_loss": 0.6886210441589355, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.13268187642097473, |
|
"rewards/margins": 0.015103173442184925, |
|
"rewards/rejected": -0.1477850377559662, |
|
"sft_loss": 1.326818585395813, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9743382501515456, |
|
"grad_norm": 6.394278049468994, |
|
"learning_rate": 7.035141727212979e-10, |
|
"logits/chosen": -14.3215913772583, |
|
"logits/rejected": -14.438852310180664, |
|
"logps/chosen": -1.256394386291504, |
|
"logps/rejected": -1.3541960716247559, |
|
"loss": 1.3287, |
|
"odds_ratio_loss": 0.7228869199752808, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1256394237279892, |
|
"rewards/margins": 0.009780170395970345, |
|
"rewards/rejected": -0.13541960716247559, |
|
"sft_loss": 1.256394386291504, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.9905031319458475, |
|
"grad_norm": 2.8705546855926514, |
|
"learning_rate": 5.743220219761592e-11, |
|
"logits/chosen": -14.366948127746582, |
|
"logits/rejected": -14.415715217590332, |
|
"logps/chosen": -1.3598301410675049, |
|
"logps/rejected": -1.40765380859375, |
|
"loss": 1.4375, |
|
"odds_ratio_loss": 0.7764675617218018, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.13598300516605377, |
|
"rewards/margins": 0.004782381001859903, |
|
"rewards/rejected": -0.14076539874076843, |
|
"sft_loss": 1.3598301410675049, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.9969690846635686, |
|
"step": 1854, |
|
"total_flos": 1.9131711497471508e+18, |
|
"train_loss": 1.4823461713142765, |
|
"train_runtime": 22122.5243, |
|
"train_samples_per_second": 1.342, |
|
"train_steps_per_second": 0.084 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1854, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.9131711497471508e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|