|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 861, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003484320557491289, |
|
"grad_norm": 358.6503200029449, |
|
"learning_rate": 3.4482758620689654e-09, |
|
"logits/chosen": -2.5345611572265625, |
|
"logits/rejected": -2.581700563430786, |
|
"logps/chosen": -60.002105712890625, |
|
"logps/rejected": -99.98374938964844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03484320557491289, |
|
"grad_norm": 335.50819535600175, |
|
"learning_rate": 3.448275862068965e-08, |
|
"logits/chosen": -2.5634875297546387, |
|
"logits/rejected": -2.562131881713867, |
|
"logps/chosen": -59.66706085205078, |
|
"logps/rejected": -73.39751434326172, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.2222222238779068, |
|
"rewards/chosen": -0.00458882749080658, |
|
"rewards/margins": 0.005699412431567907, |
|
"rewards/rejected": -0.010288238525390625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06968641114982578, |
|
"grad_norm": 431.2142268974989, |
|
"learning_rate": 6.89655172413793e-08, |
|
"logits/chosen": -2.6049625873565674, |
|
"logits/rejected": -2.563797950744629, |
|
"logps/chosen": -104.11083984375, |
|
"logps/rejected": -94.92049407958984, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": 0.010060709901154041, |
|
"rewards/margins": 0.026405800133943558, |
|
"rewards/rejected": -0.016345087438821793, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10452961672473868, |
|
"grad_norm": 412.06640261613535, |
|
"learning_rate": 1.0344827586206897e-07, |
|
"logits/chosen": -2.591465711593628, |
|
"logits/rejected": -2.5714292526245117, |
|
"logps/chosen": -82.47346496582031, |
|
"logps/rejected": -91.5473403930664, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.014319317415356636, |
|
"rewards/margins": 0.018754545599222183, |
|
"rewards/rejected": -0.0044352286495268345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13937282229965156, |
|
"grad_norm": 362.45244079994615, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/chosen": -2.498384475708008, |
|
"logits/rejected": -2.496023654937744, |
|
"logps/chosen": -77.94225311279297, |
|
"logps/rejected": -73.06587219238281, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": -0.01477863360196352, |
|
"rewards/margins": 0.02524409256875515, |
|
"rewards/rejected": -0.040022727102041245, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"grad_norm": 320.1865662448919, |
|
"learning_rate": 1.7241379310344825e-07, |
|
"logits/chosen": -2.533860683441162, |
|
"logits/rejected": -2.537620782852173, |
|
"logps/chosen": -63.8255729675293, |
|
"logps/rejected": -76.09891510009766, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": 0.0704144611954689, |
|
"rewards/margins": 0.06760600954294205, |
|
"rewards/rejected": 0.0028084414079785347, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.20905923344947736, |
|
"grad_norm": 315.3790948714663, |
|
"learning_rate": 2.0689655172413793e-07, |
|
"logits/chosen": -2.5066819190979004, |
|
"logits/rejected": -2.500108480453491, |
|
"logps/chosen": -72.56768798828125, |
|
"logps/rejected": -67.95696258544922, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.3960721492767334, |
|
"rewards/margins": 0.11608312278985977, |
|
"rewards/rejected": 0.2799890339374542, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 311.35919136752943, |
|
"learning_rate": 2.413793103448276e-07, |
|
"logits/chosen": -2.529073715209961, |
|
"logits/rejected": -2.5245697498321533, |
|
"logps/chosen": -63.03422164916992, |
|
"logps/rejected": -67.55048370361328, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.7452259063720703, |
|
"rewards/margins": 0.20201978087425232, |
|
"rewards/rejected": 0.5432060956954956, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2787456445993031, |
|
"grad_norm": 380.6379547994258, |
|
"learning_rate": 2.758620689655172e-07, |
|
"logits/chosen": -2.4771764278411865, |
|
"logits/rejected": -2.4673564434051514, |
|
"logps/chosen": -74.3353271484375, |
|
"logps/rejected": -76.72254943847656, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.8243219256401062, |
|
"rewards/margins": 0.3330293893814087, |
|
"rewards/rejected": 0.49129247665405273, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.313588850174216, |
|
"grad_norm": 290.2304014635751, |
|
"learning_rate": 2.9922480620155034e-07, |
|
"logits/chosen": -2.4918248653411865, |
|
"logits/rejected": -2.505871295928955, |
|
"logps/chosen": -65.13265228271484, |
|
"logps/rejected": -69.66169738769531, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": 0.5267654657363892, |
|
"rewards/margins": 0.24630899727344513, |
|
"rewards/rejected": 0.28045645356178284, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"grad_norm": 362.13853654937515, |
|
"learning_rate": 2.96640826873385e-07, |
|
"logits/chosen": -2.475275754928589, |
|
"logits/rejected": -2.4764583110809326, |
|
"logps/chosen": -74.12995910644531, |
|
"logps/rejected": -80.4610366821289, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.7075541019439697, |
|
"rewards/margins": 0.3413715958595276, |
|
"rewards/rejected": 0.36618250608444214, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_logits/chosen": -2.5614964962005615, |
|
"eval_logits/rejected": -2.545496940612793, |
|
"eval_logps/chosen": -73.26775360107422, |
|
"eval_logps/rejected": -80.48373413085938, |
|
"eval_loss": 0.6374358534812927, |
|
"eval_rewards/accuracies": 0.3452380895614624, |
|
"eval_rewards/chosen": 0.7585535645484924, |
|
"eval_rewards/margins": 0.3588891923427582, |
|
"eval_rewards/rejected": 0.39966443181037903, |
|
"eval_runtime": 113.5456, |
|
"eval_samples_per_second": 17.614, |
|
"eval_steps_per_second": 0.555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3832752613240418, |
|
"grad_norm": 402.8431072761315, |
|
"learning_rate": 2.940568475452196e-07, |
|
"logits/chosen": -2.5034003257751465, |
|
"logits/rejected": -2.465451717376709, |
|
"logps/chosen": -72.49577331542969, |
|
"logps/rejected": -62.814613342285156, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.41803866624832153, |
|
"rewards/margins": 0.29653844237327576, |
|
"rewards/rejected": 0.12150021642446518, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4181184668989547, |
|
"grad_norm": 276.5279613655658, |
|
"learning_rate": 2.9147286821705423e-07, |
|
"logits/chosen": -2.528160333633423, |
|
"logits/rejected": -2.497156858444214, |
|
"logps/chosen": -76.75882720947266, |
|
"logps/rejected": -66.54231262207031, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.4357032775878906, |
|
"rewards/margins": 0.3555503487586975, |
|
"rewards/rejected": 0.08015286922454834, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4529616724738676, |
|
"grad_norm": 438.9130471992716, |
|
"learning_rate": 2.888888888888889e-07, |
|
"logits/chosen": -2.5648016929626465, |
|
"logits/rejected": -2.5454888343811035, |
|
"logps/chosen": -83.2737808227539, |
|
"logps/rejected": -87.85270690917969, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.3763393759727478, |
|
"rewards/margins": 0.5897358655929565, |
|
"rewards/rejected": -0.21339651942253113, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 322.57184174769804, |
|
"learning_rate": 2.863049095607235e-07, |
|
"logits/chosen": -2.4603538513183594, |
|
"logits/rejected": -2.4488110542297363, |
|
"logps/chosen": -80.25636291503906, |
|
"logps/rejected": -70.93235778808594, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.5512245893478394, |
|
"rewards/margins": 0.6554535627365112, |
|
"rewards/rejected": -0.10422901809215546, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"grad_norm": 359.54734158147613, |
|
"learning_rate": 2.837209302325581e-07, |
|
"logits/chosen": -2.5226199626922607, |
|
"logits/rejected": -2.4783759117126465, |
|
"logps/chosen": -78.67804718017578, |
|
"logps/rejected": -79.57527160644531, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.4999012351036072, |
|
"rewards/margins": 0.5150824785232544, |
|
"rewards/rejected": -0.01518118567764759, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5574912891986062, |
|
"grad_norm": 312.3603092848705, |
|
"learning_rate": 2.811369509043928e-07, |
|
"logits/chosen": -2.486689805984497, |
|
"logits/rejected": -2.507362127304077, |
|
"logps/chosen": -64.26698303222656, |
|
"logps/rejected": -72.27865600585938, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": 0.15987662971019745, |
|
"rewards/margins": 0.3494376838207245, |
|
"rewards/rejected": -0.18956105411052704, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5923344947735192, |
|
"grad_norm": 391.74254314560164, |
|
"learning_rate": 2.7855297157622735e-07, |
|
"logits/chosen": -2.4955577850341797, |
|
"logits/rejected": -2.48069167137146, |
|
"logps/chosen": -69.2925796508789, |
|
"logps/rejected": -77.45768737792969, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.20161184668540955, |
|
"rewards/margins": 0.5074432492256165, |
|
"rewards/rejected": -0.7090551257133484, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.627177700348432, |
|
"grad_norm": 354.9896897534272, |
|
"learning_rate": 2.75968992248062e-07, |
|
"logits/chosen": -2.545736789703369, |
|
"logits/rejected": -2.534653663635254, |
|
"logps/chosen": -91.68567657470703, |
|
"logps/rejected": -86.991943359375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.1063336730003357, |
|
"rewards/margins": 0.40485674142837524, |
|
"rewards/rejected": -0.29852309823036194, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.662020905923345, |
|
"grad_norm": 251.86613571396995, |
|
"learning_rate": 2.733850129198967e-07, |
|
"logits/chosen": -2.5427966117858887, |
|
"logits/rejected": -2.5337371826171875, |
|
"logps/chosen": -70.1039810180664, |
|
"logps/rejected": -81.1055908203125, |
|
"loss": 0.6537, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.587446928024292, |
|
"rewards/margins": 0.3254398703575134, |
|
"rewards/rejected": 0.26200705766677856, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"grad_norm": 448.52473154376446, |
|
"learning_rate": 2.7080103359173124e-07, |
|
"logits/chosen": -2.5728795528411865, |
|
"logits/rejected": -2.580472469329834, |
|
"logps/chosen": -88.97058868408203, |
|
"logps/rejected": -91.36177062988281, |
|
"loss": 0.7044, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": 1.0451823472976685, |
|
"rewards/margins": 0.6180461645126343, |
|
"rewards/rejected": 0.42713627219200134, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_logits/chosen": -2.548659324645996, |
|
"eval_logits/rejected": -2.5324509143829346, |
|
"eval_logps/chosen": -73.5050048828125, |
|
"eval_logps/rejected": -80.9368667602539, |
|
"eval_loss": 0.6785250902175903, |
|
"eval_rewards/accuracies": 0.335317462682724, |
|
"eval_rewards/chosen": 0.6114616394042969, |
|
"eval_rewards/margins": 0.4927373230457306, |
|
"eval_rewards/rejected": 0.11872433125972748, |
|
"eval_runtime": 113.5716, |
|
"eval_samples_per_second": 17.61, |
|
"eval_steps_per_second": 0.555, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 539.9357015144649, |
|
"learning_rate": 2.682170542635659e-07, |
|
"logits/chosen": -2.5559327602386475, |
|
"logits/rejected": -2.529869794845581, |
|
"logps/chosen": -68.37545013427734, |
|
"logps/rejected": -63.614356994628906, |
|
"loss": 0.7042, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.58504319190979, |
|
"rewards/margins": 0.7152014970779419, |
|
"rewards/rejected": -0.13015827536582947, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7665505226480837, |
|
"grad_norm": 286.2044305000258, |
|
"learning_rate": 2.6563307493540046e-07, |
|
"logits/chosen": -2.591370105743408, |
|
"logits/rejected": -2.5699965953826904, |
|
"logps/chosen": -72.25666809082031, |
|
"logps/rejected": -70.89804077148438, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.74493408203125, |
|
"rewards/margins": 0.2705448865890503, |
|
"rewards/rejected": 0.4743892252445221, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8013937282229965, |
|
"grad_norm": 398.39083722167163, |
|
"learning_rate": 2.6304909560723513e-07, |
|
"logits/chosen": -2.5968384742736816, |
|
"logits/rejected": -2.574525833129883, |
|
"logps/chosen": -88.1639633178711, |
|
"logps/rejected": -87.76924133300781, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 1.1387383937835693, |
|
"rewards/margins": 0.9033845663070679, |
|
"rewards/rejected": 0.23535379767417908, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8362369337979094, |
|
"grad_norm": 339.80066659715754, |
|
"learning_rate": 2.6046511627906974e-07, |
|
"logits/chosen": -2.589068651199341, |
|
"logits/rejected": -2.5533642768859863, |
|
"logps/chosen": -85.19193267822266, |
|
"logps/rejected": -79.42808532714844, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.9607963562011719, |
|
"rewards/margins": 0.5218255519866943, |
|
"rewards/rejected": 0.4389708638191223, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 341.3674177311404, |
|
"learning_rate": 2.5788113695090435e-07, |
|
"logits/chosen": -2.6001765727996826, |
|
"logits/rejected": -2.56406831741333, |
|
"logps/chosen": -94.24848937988281, |
|
"logps/rejected": -89.58869934082031, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.7444799542427063, |
|
"rewards/margins": 0.5228375196456909, |
|
"rewards/rejected": 0.2216424196958542, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9059233449477352, |
|
"grad_norm": 283.98484381974555, |
|
"learning_rate": 2.55297157622739e-07, |
|
"logits/chosen": -2.506063461303711, |
|
"logits/rejected": -2.5218896865844727, |
|
"logps/chosen": -57.92702102661133, |
|
"logps/rejected": -65.19326782226562, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": 0.6958502531051636, |
|
"rewards/margins": 0.4261382520198822, |
|
"rewards/rejected": 0.26971206068992615, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9407665505226481, |
|
"grad_norm": 372.62325379603294, |
|
"learning_rate": 2.5271317829457363e-07, |
|
"logits/chosen": -2.6025278568267822, |
|
"logits/rejected": -2.6028153896331787, |
|
"logps/chosen": -67.53540802001953, |
|
"logps/rejected": -82.33738708496094, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.8337510228157043, |
|
"rewards/margins": 0.713237464427948, |
|
"rewards/rejected": 0.12051346153020859, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 314.12151123620725, |
|
"learning_rate": 2.5012919896640824e-07, |
|
"logits/chosen": -2.5111606121063232, |
|
"logits/rejected": -2.487565517425537, |
|
"logps/chosen": -66.56291198730469, |
|
"logps/rejected": -70.64192199707031, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.7883111238479614, |
|
"rewards/margins": 0.668962836265564, |
|
"rewards/rejected": 0.11934838443994522, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0104529616724738, |
|
"grad_norm": 72.54916768701123, |
|
"learning_rate": 2.475452196382429e-07, |
|
"logits/chosen": -2.524649143218994, |
|
"logits/rejected": -2.4943947792053223, |
|
"logps/chosen": -69.41844177246094, |
|
"logps/rejected": -65.09283447265625, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 1.642530083656311, |
|
"rewards/margins": 1.75347900390625, |
|
"rewards/rejected": -0.11094935238361359, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"grad_norm": 25.060931202079587, |
|
"learning_rate": 2.4496124031007747e-07, |
|
"logits/chosen": -2.5531129837036133, |
|
"logits/rejected": -2.5395359992980957, |
|
"logps/chosen": -62.507972717285156, |
|
"logps/rejected": -72.49549865722656, |
|
"loss": 0.3945, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.5053844451904297, |
|
"rewards/margins": 4.567110061645508, |
|
"rewards/rejected": -2.06172513961792, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_logits/chosen": -2.575260877609253, |
|
"eval_logits/rejected": -2.559643507003784, |
|
"eval_logps/chosen": -73.25456237792969, |
|
"eval_logps/rejected": -80.9556655883789, |
|
"eval_loss": 0.6974567770957947, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": 0.7667317986488342, |
|
"eval_rewards/margins": 0.6596661806106567, |
|
"eval_rewards/rejected": 0.10706562548875809, |
|
"eval_runtime": 113.4908, |
|
"eval_samples_per_second": 17.623, |
|
"eval_steps_per_second": 0.555, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0801393728222997, |
|
"grad_norm": 19.10824587006278, |
|
"learning_rate": 2.4237726098191214e-07, |
|
"logits/chosen": -2.530690908432007, |
|
"logits/rejected": -2.5344767570495605, |
|
"logps/chosen": -63.4401969909668, |
|
"logps/rejected": -79.88385772705078, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 2.601799488067627, |
|
"rewards/margins": 5.476699352264404, |
|
"rewards/rejected": -2.8749003410339355, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1149825783972125, |
|
"grad_norm": 13.055135863878375, |
|
"learning_rate": 2.397932816537468e-07, |
|
"logits/chosen": -2.583162546157837, |
|
"logits/rejected": -2.5704259872436523, |
|
"logps/chosen": -70.44621276855469, |
|
"logps/rejected": -82.24694061279297, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 2.1784868240356445, |
|
"rewards/margins": 4.678074359893799, |
|
"rewards/rejected": -2.499587059020996, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1498257839721253, |
|
"grad_norm": 73.70458680382093, |
|
"learning_rate": 2.3720930232558136e-07, |
|
"logits/chosen": -2.574796438217163, |
|
"logits/rejected": -2.550279140472412, |
|
"logps/chosen": -79.08229064941406, |
|
"logps/rejected": -82.61952209472656, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 3.124291181564331, |
|
"rewards/margins": 5.5998125076293945, |
|
"rewards/rejected": -2.4755213260650635, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.1846689895470384, |
|
"grad_norm": 28.850587508009358, |
|
"learning_rate": 2.34625322997416e-07, |
|
"logits/chosen": -2.557753562927246, |
|
"logits/rejected": -2.5632052421569824, |
|
"logps/chosen": -78.73472595214844, |
|
"logps/rejected": -99.98568725585938, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 3.1080851554870605, |
|
"rewards/margins": 6.023845672607422, |
|
"rewards/rejected": -2.9157605171203613, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 85.64045808230581, |
|
"learning_rate": 2.3204134366925064e-07, |
|
"logits/chosen": -2.5850272178649902, |
|
"logits/rejected": -2.551978588104248, |
|
"logps/chosen": -64.38944244384766, |
|
"logps/rejected": -68.43348693847656, |
|
"loss": 0.3897, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 2.608705759048462, |
|
"rewards/margins": 4.316413402557373, |
|
"rewards/rejected": -1.7077077627182007, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.254355400696864, |
|
"grad_norm": 52.17293262552292, |
|
"learning_rate": 2.2945736434108528e-07, |
|
"logits/chosen": -2.5734505653381348, |
|
"logits/rejected": -2.5436837673187256, |
|
"logps/chosen": -67.31871032714844, |
|
"logps/rejected": -67.99309539794922, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 2.8094611167907715, |
|
"rewards/margins": 4.253398895263672, |
|
"rewards/rejected": -1.44393789768219, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.289198606271777, |
|
"grad_norm": 73.55818831421092, |
|
"learning_rate": 2.268733850129199e-07, |
|
"logits/chosen": -2.5357134342193604, |
|
"logits/rejected": -2.556293487548828, |
|
"logps/chosen": -65.90876770019531, |
|
"logps/rejected": -77.08678436279297, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 3.1975178718566895, |
|
"rewards/margins": 4.904288291931152, |
|
"rewards/rejected": -1.7067703008651733, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.32404181184669, |
|
"grad_norm": 63.56754205504431, |
|
"learning_rate": 2.2428940568475453e-07, |
|
"logits/chosen": -2.543696880340576, |
|
"logits/rejected": -2.542275905609131, |
|
"logps/chosen": -79.8168716430664, |
|
"logps/rejected": -90.68196105957031, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 4.3757524490356445, |
|
"rewards/margins": 6.989476680755615, |
|
"rewards/rejected": -2.61372447013855, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.3588850174216027, |
|
"grad_norm": 84.5304582436996, |
|
"learning_rate": 2.2170542635658914e-07, |
|
"logits/chosen": -2.616854190826416, |
|
"logits/rejected": -2.6028048992156982, |
|
"logps/chosen": -62.09540939331055, |
|
"logps/rejected": -74.197265625, |
|
"loss": 0.3743, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 3.0379724502563477, |
|
"rewards/margins": 4.858659744262695, |
|
"rewards/rejected": -1.8206875324249268, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"grad_norm": 45.47639364698017, |
|
"learning_rate": 2.1912144702842375e-07, |
|
"logits/chosen": -2.5946788787841797, |
|
"logits/rejected": -2.564770221710205, |
|
"logps/chosen": -80.40516662597656, |
|
"logps/rejected": -101.68209075927734, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 3.559967041015625, |
|
"rewards/margins": 5.592529296875, |
|
"rewards/rejected": -2.032562732696533, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_logits/chosen": -2.5995094776153564, |
|
"eval_logits/rejected": -2.5833675861358643, |
|
"eval_logps/chosen": -72.12498474121094, |
|
"eval_logps/rejected": -80.21580505371094, |
|
"eval_loss": 0.7395845651626587, |
|
"eval_rewards/accuracies": 0.3571428656578064, |
|
"eval_rewards/chosen": 1.4670709371566772, |
|
"eval_rewards/margins": 0.9012959599494934, |
|
"eval_rewards/rejected": 0.5657750368118286, |
|
"eval_runtime": 113.4528, |
|
"eval_samples_per_second": 17.628, |
|
"eval_steps_per_second": 0.555, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 9.035779538567592, |
|
"learning_rate": 2.1653746770025842e-07, |
|
"logits/chosen": -2.587205410003662, |
|
"logits/rejected": -2.577908515930176, |
|
"logps/chosen": -76.38627624511719, |
|
"logps/rejected": -81.7760009765625, |
|
"loss": 0.3719, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 3.8607609272003174, |
|
"rewards/margins": 4.7285661697387695, |
|
"rewards/rejected": -0.8678053021430969, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 31.70641333825559, |
|
"learning_rate": 2.1395348837209303e-07, |
|
"logits/chosen": -2.637145519256592, |
|
"logits/rejected": -2.637343168258667, |
|
"logps/chosen": -69.94471740722656, |
|
"logps/rejected": -86.88435363769531, |
|
"loss": 0.3975, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 3.812042236328125, |
|
"rewards/margins": 5.059989929199219, |
|
"rewards/rejected": -1.2479479312896729, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.4982578397212545, |
|
"grad_norm": 65.50905056120632, |
|
"learning_rate": 2.1136950904392762e-07, |
|
"logits/chosen": -2.594949960708618, |
|
"logits/rejected": -2.582991123199463, |
|
"logps/chosen": -62.43513870239258, |
|
"logps/rejected": -74.3114013671875, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 3.8608689308166504, |
|
"rewards/margins": 4.990710258483887, |
|
"rewards/rejected": -1.1298413276672363, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.533101045296167, |
|
"grad_norm": 103.25920597551605, |
|
"learning_rate": 2.0878552971576226e-07, |
|
"logits/chosen": -2.587477207183838, |
|
"logits/rejected": -2.596736192703247, |
|
"logps/chosen": -64.37332916259766, |
|
"logps/rejected": -75.4191665649414, |
|
"loss": 0.401, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 3.475306987762451, |
|
"rewards/margins": 4.665290832519531, |
|
"rewards/rejected": -1.1899840831756592, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"grad_norm": 46.90858224938077, |
|
"learning_rate": 2.062015503875969e-07, |
|
"logits/chosen": -2.641456127166748, |
|
"logits/rejected": -2.6066718101501465, |
|
"logps/chosen": -84.70106506347656, |
|
"logps/rejected": -90.63847351074219, |
|
"loss": 0.3861, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 3.8073742389678955, |
|
"rewards/margins": 6.706219673156738, |
|
"rewards/rejected": -2.8988451957702637, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6027874564459932, |
|
"grad_norm": 73.13862861246201, |
|
"learning_rate": 2.0361757105943153e-07, |
|
"logits/chosen": -2.6171023845672607, |
|
"logits/rejected": -2.5963072776794434, |
|
"logps/chosen": -70.6004867553711, |
|
"logps/rejected": -81.99185943603516, |
|
"loss": 0.4031, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 3.4589905738830566, |
|
"rewards/margins": 6.551025390625, |
|
"rewards/rejected": -3.0920345783233643, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6376306620209058, |
|
"grad_norm": 55.407817129230025, |
|
"learning_rate": 2.0103359173126615e-07, |
|
"logits/chosen": -2.617323637008667, |
|
"logits/rejected": -2.6086113452911377, |
|
"logps/chosen": -57.063804626464844, |
|
"logps/rejected": -75.29525756835938, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 2.6279244422912598, |
|
"rewards/margins": 4.8750505447387695, |
|
"rewards/rejected": -2.247126817703247, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.6724738675958188, |
|
"grad_norm": 95.66039903353, |
|
"learning_rate": 1.9844961240310078e-07, |
|
"logits/chosen": -2.636000156402588, |
|
"logits/rejected": -2.619654893875122, |
|
"logps/chosen": -50.774810791015625, |
|
"logps/rejected": -55.16005325317383, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 2.037757158279419, |
|
"rewards/margins": 4.008673667907715, |
|
"rewards/rejected": -1.970916986465454, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 194.80847235958515, |
|
"learning_rate": 1.958656330749354e-07, |
|
"logits/chosen": -2.6303064823150635, |
|
"logits/rejected": -2.617170810699463, |
|
"logps/chosen": -70.10054779052734, |
|
"logps/rejected": -74.9827880859375, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 1.9351723194122314, |
|
"rewards/margins": 4.985955715179443, |
|
"rewards/rejected": -3.05078387260437, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 134.98085943724894, |
|
"learning_rate": 1.9328165374677e-07, |
|
"logits/chosen": -2.5582988262176514, |
|
"logits/rejected": -2.551264524459839, |
|
"logps/chosen": -73.02742004394531, |
|
"logps/rejected": -89.30897521972656, |
|
"loss": 0.3893, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 2.0215065479278564, |
|
"rewards/margins": 5.4846272468566895, |
|
"rewards/rejected": -3.463120222091675, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_logits/chosen": -2.6659271717071533, |
|
"eval_logits/rejected": -2.6499409675598145, |
|
"eval_logps/chosen": -75.26073455810547, |
|
"eval_logps/rejected": -83.39617919921875, |
|
"eval_loss": 0.7903804183006287, |
|
"eval_rewards/accuracies": 0.3492063581943512, |
|
"eval_rewards/chosen": -0.47709161043167114, |
|
"eval_rewards/margins": 0.9289572834968567, |
|
"eval_rewards/rejected": -1.4060487747192383, |
|
"eval_runtime": 121.402, |
|
"eval_samples_per_second": 16.474, |
|
"eval_steps_per_second": 0.519, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.7770034843205575, |
|
"grad_norm": 28.87847596006846, |
|
"learning_rate": 1.9069767441860465e-07, |
|
"logits/chosen": -2.623152732849121, |
|
"logits/rejected": -2.60357666015625, |
|
"logps/chosen": -68.32408142089844, |
|
"logps/rejected": -75.5625991821289, |
|
"loss": 0.3872, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 1.5080443620681763, |
|
"rewards/margins": 5.214726448059082, |
|
"rewards/rejected": -3.706681728363037, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.8118466898954704, |
|
"grad_norm": 24.57794510996642, |
|
"learning_rate": 1.8811369509043926e-07, |
|
"logits/chosen": -2.635575532913208, |
|
"logits/rejected": -2.630188226699829, |
|
"logps/chosen": -71.8682861328125, |
|
"logps/rejected": -82.45121002197266, |
|
"loss": 0.3947, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 1.4898183345794678, |
|
"rewards/margins": 4.737631797790527, |
|
"rewards/rejected": -3.247814178466797, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.8466898954703832, |
|
"grad_norm": 43.47081299094027, |
|
"learning_rate": 1.8552971576227387e-07, |
|
"logits/chosen": -2.6130404472351074, |
|
"logits/rejected": -2.6072745323181152, |
|
"logps/chosen": -67.63585662841797, |
|
"logps/rejected": -76.81507873535156, |
|
"loss": 0.4098, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 2.294175386428833, |
|
"rewards/margins": 5.674102306365967, |
|
"rewards/rejected": -3.379927158355713, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.8815331010452963, |
|
"grad_norm": 379.9980856009288, |
|
"learning_rate": 1.829457364341085e-07, |
|
"logits/chosen": -2.6233439445495605, |
|
"logits/rejected": -2.6407742500305176, |
|
"logps/chosen": -59.39795684814453, |
|
"logps/rejected": -78.36451721191406, |
|
"loss": 0.4218, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 2.6863512992858887, |
|
"rewards/margins": 5.80073881149292, |
|
"rewards/rejected": -3.114386796951294, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"grad_norm": 36.16567370320362, |
|
"learning_rate": 1.8036175710594315e-07, |
|
"logits/chosen": -2.6051318645477295, |
|
"logits/rejected": -2.5861499309539795, |
|
"logps/chosen": -85.02639770507812, |
|
"logps/rejected": -92.01643371582031, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 3.6012024879455566, |
|
"rewards/margins": 6.781991004943848, |
|
"rewards/rejected": -3.180788993835449, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 113.77212344428047, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"logits/chosen": -2.601555347442627, |
|
"logits/rejected": -2.6108827590942383, |
|
"logps/chosen": -59.21348190307617, |
|
"logps/rejected": -73.62416076660156, |
|
"loss": 0.4326, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 2.7940502166748047, |
|
"rewards/margins": 4.857416152954102, |
|
"rewards/rejected": -2.063365936279297, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.986062717770035, |
|
"grad_norm": 84.29111668630037, |
|
"learning_rate": 1.7519379844961235e-07, |
|
"logits/chosen": -2.676542282104492, |
|
"logits/rejected": -2.6477246284484863, |
|
"logps/chosen": -60.66288375854492, |
|
"logps/rejected": -65.19253540039062, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 2.821211338043213, |
|
"rewards/margins": 4.4517645835876465, |
|
"rewards/rejected": -1.6305538415908813, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.0209059233449476, |
|
"grad_norm": 1.1269554094711256, |
|
"learning_rate": 1.7260981912144704e-07, |
|
"logits/chosen": -2.685316801071167, |
|
"logits/rejected": -2.6868529319763184, |
|
"logps/chosen": -73.15747833251953, |
|
"logps/rejected": -84.87666320800781, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 3.4528770446777344, |
|
"rewards/margins": 6.801962852478027, |
|
"rewards/rejected": -3.349086046218872, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.0557491289198606, |
|
"grad_norm": 25.976934863727717, |
|
"learning_rate": 1.7002583979328165e-07, |
|
"logits/chosen": -2.589168071746826, |
|
"logits/rejected": -2.530686378479004, |
|
"logps/chosen": -87.9363021850586, |
|
"logps/rejected": -83.9460220336914, |
|
"loss": 0.343, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 4.061544895172119, |
|
"rewards/margins": 7.5588788986206055, |
|
"rewards/rejected": -3.4973349571228027, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.0905923344947737, |
|
"grad_norm": 4.291125808983435, |
|
"learning_rate": 1.6744186046511627e-07, |
|
"logits/chosen": -2.6059463024139404, |
|
"logits/rejected": -2.5761430263519287, |
|
"logps/chosen": -57.32917404174805, |
|
"logps/rejected": -64.40930938720703, |
|
"loss": 0.3749, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 2.808424234390259, |
|
"rewards/margins": 5.987191200256348, |
|
"rewards/rejected": -3.1787662506103516, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0905923344947737, |
|
"eval_logits/chosen": -2.6320791244506836, |
|
"eval_logits/rejected": -2.6158933639526367, |
|
"eval_logps/chosen": -73.58616638183594, |
|
"eval_logps/rejected": -81.91004180908203, |
|
"eval_loss": 0.8125157952308655, |
|
"eval_rewards/accuracies": 0.363095223903656, |
|
"eval_rewards/chosen": 0.5611402988433838, |
|
"eval_rewards/margins": 1.0457921028137207, |
|
"eval_rewards/rejected": -0.48465171456336975, |
|
"eval_runtime": 113.5505, |
|
"eval_samples_per_second": 17.613, |
|
"eval_steps_per_second": 0.555, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.1254355400696863, |
|
"grad_norm": 7.908116387433432, |
|
"learning_rate": 1.6485788113695088e-07, |
|
"logits/chosen": -2.655932664871216, |
|
"logits/rejected": -2.618626117706299, |
|
"logps/chosen": -77.79930114746094, |
|
"logps/rejected": -76.16334533691406, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 3.5249366760253906, |
|
"rewards/margins": 6.6772894859313965, |
|
"rewards/rejected": -3.152352809906006, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.1602787456445993, |
|
"grad_norm": 12.816821249204288, |
|
"learning_rate": 1.6227390180878554e-07, |
|
"logits/chosen": -2.6253597736358643, |
|
"logits/rejected": -2.570455551147461, |
|
"logps/chosen": -72.71697998046875, |
|
"logps/rejected": -84.75135040283203, |
|
"loss": 0.3665, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 3.5354607105255127, |
|
"rewards/margins": 7.969748020172119, |
|
"rewards/rejected": -4.434287071228027, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 8.050269883900128, |
|
"learning_rate": 1.5968992248062013e-07, |
|
"logits/chosen": -2.5422072410583496, |
|
"logits/rejected": -2.559743881225586, |
|
"logps/chosen": -67.67036437988281, |
|
"logps/rejected": -95.14600372314453, |
|
"loss": 0.3449, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 3.445582866668701, |
|
"rewards/margins": 8.744329452514648, |
|
"rewards/rejected": -5.2987470626831055, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.229965156794425, |
|
"grad_norm": 5.758994331059265, |
|
"learning_rate": 1.5710594315245477e-07, |
|
"logits/chosen": -2.5750184059143066, |
|
"logits/rejected": -2.5732388496398926, |
|
"logps/chosen": -66.1601333618164, |
|
"logps/rejected": -90.6006851196289, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 2.1229605674743652, |
|
"rewards/margins": 6.844090938568115, |
|
"rewards/rejected": -4.721129894256592, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.264808362369338, |
|
"grad_norm": 4.106714414729963, |
|
"learning_rate": 1.5452196382428938e-07, |
|
"logits/chosen": -2.571296215057373, |
|
"logits/rejected": -2.5443153381347656, |
|
"logps/chosen": -76.41832733154297, |
|
"logps/rejected": -84.57535552978516, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.150641918182373, |
|
"rewards/margins": 6.811266899108887, |
|
"rewards/rejected": -4.660625457763672, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.2996515679442506, |
|
"grad_norm": 26.12193694012493, |
|
"learning_rate": 1.5193798449612402e-07, |
|
"logits/chosen": -2.601414203643799, |
|
"logits/rejected": -2.5880684852600098, |
|
"logps/chosen": -76.15614318847656, |
|
"logps/rejected": -84.27168273925781, |
|
"loss": 0.3595, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 1.7724649906158447, |
|
"rewards/margins": 7.21490478515625, |
|
"rewards/rejected": -5.442440986633301, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.3344947735191637, |
|
"grad_norm": 10.794985780975221, |
|
"learning_rate": 1.4935400516795863e-07, |
|
"logits/chosen": -2.593954563140869, |
|
"logits/rejected": -2.5915369987487793, |
|
"logps/chosen": -70.91958618164062, |
|
"logps/rejected": -83.38150787353516, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 1.1422145366668701, |
|
"rewards/margins": 7.140495300292969, |
|
"rewards/rejected": -5.998281002044678, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.3693379790940767, |
|
"grad_norm": 2.1208474931059706, |
|
"learning_rate": 1.4677002583979327e-07, |
|
"logits/chosen": -2.635155439376831, |
|
"logits/rejected": -2.622910499572754, |
|
"logps/chosen": -70.04927062988281, |
|
"logps/rejected": -82.35643005371094, |
|
"loss": 0.3572, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 2.0802841186523438, |
|
"rewards/margins": 8.326448440551758, |
|
"rewards/rejected": -6.246163845062256, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.40418118466899, |
|
"grad_norm": 10.487007281103297, |
|
"learning_rate": 1.4418604651162788e-07, |
|
"logits/chosen": -2.6031365394592285, |
|
"logits/rejected": -2.5714783668518066, |
|
"logps/chosen": -77.78074645996094, |
|
"logps/rejected": -97.24638366699219, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 2.6356489658355713, |
|
"rewards/margins": 8.40825080871582, |
|
"rewards/rejected": -5.7726030349731445, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 7.007416212948012, |
|
"learning_rate": 1.4160206718346252e-07, |
|
"logits/chosen": -2.5922415256500244, |
|
"logits/rejected": -2.5560450553894043, |
|
"logps/chosen": -83.61808776855469, |
|
"logps/rejected": -89.99815368652344, |
|
"loss": 0.3662, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 3.2752013206481934, |
|
"rewards/margins": 8.457530975341797, |
|
"rewards/rejected": -5.182328701019287, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_logits/chosen": -2.6111857891082764, |
|
"eval_logits/rejected": -2.594111442565918, |
|
"eval_logps/chosen": -75.47573852539062, |
|
"eval_logps/rejected": -84.49435424804688, |
|
"eval_loss": 0.8411857485771179, |
|
"eval_rewards/accuracies": 0.3650793731212616, |
|
"eval_rewards/chosen": -0.6103957891464233, |
|
"eval_rewards/margins": 1.476527214050293, |
|
"eval_rewards/rejected": -2.086923122406006, |
|
"eval_runtime": 113.5556, |
|
"eval_samples_per_second": 17.613, |
|
"eval_steps_per_second": 0.555, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.4738675958188154, |
|
"grad_norm": 0.4711439948410044, |
|
"learning_rate": 1.3901808785529716e-07, |
|
"logits/chosen": -2.632316827774048, |
|
"logits/rejected": -2.612363576889038, |
|
"logps/chosen": -91.01361083984375, |
|
"logps/rejected": -111.48197937011719, |
|
"loss": 0.3629, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 2.5902059078216553, |
|
"rewards/margins": 9.54753303527832, |
|
"rewards/rejected": -6.957326412200928, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.508710801393728, |
|
"grad_norm": 0.03547147611253739, |
|
"learning_rate": 1.3643410852713177e-07, |
|
"logits/chosen": -2.6036946773529053, |
|
"logits/rejected": -2.5696444511413574, |
|
"logps/chosen": -66.248779296875, |
|
"logps/rejected": -71.01225280761719, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 2.9773497581481934, |
|
"rewards/margins": 7.88436222076416, |
|
"rewards/rejected": -4.907011985778809, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.543554006968641, |
|
"grad_norm": 4.675015379292036, |
|
"learning_rate": 1.3385012919896641e-07, |
|
"logits/chosen": -2.6531801223754883, |
|
"logits/rejected": -2.628028392791748, |
|
"logps/chosen": -65.84535217285156, |
|
"logps/rejected": -67.12870025634766, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 2.7976653575897217, |
|
"rewards/margins": 6.484448432922363, |
|
"rewards/rejected": -3.6867833137512207, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.578397212543554, |
|
"grad_norm": 15.399000464963422, |
|
"learning_rate": 1.3126614987080103e-07, |
|
"logits/chosen": -2.5426218509674072, |
|
"logits/rejected": -2.5461742877960205, |
|
"logps/chosen": -61.294456481933594, |
|
"logps/rejected": -87.54208374023438, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 2.92600154876709, |
|
"rewards/margins": 8.220406532287598, |
|
"rewards/rejected": -5.29440450668335, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.6132404181184667, |
|
"grad_norm": 1.423350223185656, |
|
"learning_rate": 1.2868217054263566e-07, |
|
"logits/chosen": -2.6260664463043213, |
|
"logits/rejected": -2.613114595413208, |
|
"logps/chosen": -62.37495040893555, |
|
"logps/rejected": -72.44084930419922, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 3.621440887451172, |
|
"rewards/margins": 6.8329291343688965, |
|
"rewards/rejected": -3.2114882469177246, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.64808362369338, |
|
"grad_norm": 28.902176788755387, |
|
"learning_rate": 1.2609819121447028e-07, |
|
"logits/chosen": -2.54823637008667, |
|
"logits/rejected": -2.536100387573242, |
|
"logps/chosen": -48.50947189331055, |
|
"logps/rejected": -61.75891876220703, |
|
"loss": 0.3804, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 2.9782872200012207, |
|
"rewards/margins": 6.0528998374938965, |
|
"rewards/rejected": -3.074612855911255, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.05604179114051096, |
|
"learning_rate": 1.2351421188630492e-07, |
|
"logits/chosen": -2.6780683994293213, |
|
"logits/rejected": -2.659198522567749, |
|
"logps/chosen": -71.71595764160156, |
|
"logps/rejected": -86.19725036621094, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 3.0666489601135254, |
|
"rewards/margins": 7.4923295974731445, |
|
"rewards/rejected": -4.425681114196777, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.7177700348432055, |
|
"grad_norm": 0.283574851628151, |
|
"learning_rate": 1.2093023255813953e-07, |
|
"logits/chosen": -2.6133499145507812, |
|
"logits/rejected": -2.612217426300049, |
|
"logps/chosen": -74.23127746582031, |
|
"logps/rejected": -91.57493591308594, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 4.144797325134277, |
|
"rewards/margins": 9.933080673217773, |
|
"rewards/rejected": -5.788283348083496, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.7526132404181185, |
|
"grad_norm": 0.8997563908630796, |
|
"learning_rate": 1.1834625322997414e-07, |
|
"logits/chosen": -2.6512093544006348, |
|
"logits/rejected": -2.6401140689849854, |
|
"logps/chosen": -64.2149429321289, |
|
"logps/rejected": -82.6252212524414, |
|
"loss": 0.3748, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 1.9805446863174438, |
|
"rewards/margins": 7.318342685699463, |
|
"rewards/rejected": -5.337798118591309, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.7874564459930316, |
|
"grad_norm": 20.667512714719013, |
|
"learning_rate": 1.1576227390180877e-07, |
|
"logits/chosen": -2.621532440185547, |
|
"logits/rejected": -2.5869758129119873, |
|
"logps/chosen": -75.92918395996094, |
|
"logps/rejected": -84.97443389892578, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 2.5894510746002197, |
|
"rewards/margins": 8.263704299926758, |
|
"rewards/rejected": -5.674252510070801, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.7874564459930316, |
|
"eval_logits/chosen": -2.6538193225860596, |
|
"eval_logits/rejected": -2.636691093444824, |
|
"eval_logps/chosen": -76.02718353271484, |
|
"eval_logps/rejected": -85.26797485351562, |
|
"eval_loss": 0.8766492605209351, |
|
"eval_rewards/accuracies": 0.3611111044883728, |
|
"eval_rewards/chosen": -0.9522846937179565, |
|
"eval_rewards/margins": 1.6142810583114624, |
|
"eval_rewards/rejected": -2.566565990447998, |
|
"eval_runtime": 113.6286, |
|
"eval_samples_per_second": 17.601, |
|
"eval_steps_per_second": 0.554, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.822299651567944, |
|
"grad_norm": 2.1853706150186754, |
|
"learning_rate": 1.131782945736434e-07, |
|
"logits/chosen": -2.6088814735412598, |
|
"logits/rejected": -2.5650715827941895, |
|
"logps/chosen": -85.81961822509766, |
|
"logps/rejected": -88.84381103515625, |
|
"loss": 0.3574, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 2.4687106609344482, |
|
"rewards/margins": 8.304216384887695, |
|
"rewards/rejected": -5.835506439208984, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 3.1905831999742365, |
|
"learning_rate": 1.1059431524547802e-07, |
|
"logits/chosen": -2.621854305267334, |
|
"logits/rejected": -2.6245625019073486, |
|
"logps/chosen": -87.5467529296875, |
|
"logps/rejected": -108.12459564208984, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 2.308243989944458, |
|
"rewards/margins": 8.762375831604004, |
|
"rewards/rejected": -6.454131126403809, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.89198606271777, |
|
"grad_norm": 0.7951775803731691, |
|
"learning_rate": 1.0801033591731266e-07, |
|
"logits/chosen": -2.5292835235595703, |
|
"logits/rejected": -2.5408291816711426, |
|
"logps/chosen": -59.091575622558594, |
|
"logps/rejected": -82.16059112548828, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 1.360400915145874, |
|
"rewards/margins": 7.509343147277832, |
|
"rewards/rejected": -6.148941993713379, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 1.1924411554401215, |
|
"learning_rate": 1.054263565891473e-07, |
|
"logits/chosen": -2.617638111114502, |
|
"logits/rejected": -2.5782742500305176, |
|
"logps/chosen": -76.57533264160156, |
|
"logps/rejected": -92.98815155029297, |
|
"loss": 0.3733, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 3.365124464035034, |
|
"rewards/margins": 9.208218574523926, |
|
"rewards/rejected": -5.843094825744629, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.961672473867596, |
|
"grad_norm": 0.8910844109249688, |
|
"learning_rate": 1.0284237726098191e-07, |
|
"logits/chosen": -2.6461727619171143, |
|
"logits/rejected": -2.616791009902954, |
|
"logps/chosen": -86.35088348388672, |
|
"logps/rejected": -93.9124526977539, |
|
"loss": 0.3609, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 3.4399936199188232, |
|
"rewards/margins": 8.806478500366211, |
|
"rewards/rejected": -5.366484642028809, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.996515679442509, |
|
"grad_norm": 43.871948824299515, |
|
"learning_rate": 1.0025839793281653e-07, |
|
"logits/chosen": -2.576416015625, |
|
"logits/rejected": -2.5809175968170166, |
|
"logps/chosen": -69.1751708984375, |
|
"logps/rejected": -86.57472229003906, |
|
"loss": 0.3403, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 3.5098342895507812, |
|
"rewards/margins": 9.559127807617188, |
|
"rewards/rejected": -6.049294471740723, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 861, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4762609164889266, |
|
"train_runtime": 9670.9508, |
|
"train_samples_per_second": 5.689, |
|
"train_steps_per_second": 0.089 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 861, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|